[Bf-blender-cvs] [15a80ea05c] temp-cycles-denoising: Merge remote-tracking branch 'origin/master' into temp-cycles-denoising
Lukas Stockner
noreply at git.blender.org
Fri Mar 24 20:18:18 CET 2017
Commit: 15a80ea05c00a02620d0e2be61cc32addeec1fb1
Author: Lukas Stockner
Date: Sun Mar 12 04:18:45 2017 +0100
Branches: temp-cycles-denoising
https://developer.blender.org/rB15a80ea05c00a02620d0e2be61cc32addeec1fb1
Merge remote-tracking branch 'origin/master' into temp-cycles-denoising
Conflicts:
intern/cycles/device/CMakeLists.txt
intern/cycles/device/device_cpu.cpp
intern/cycles/device/device_cuda.cpp
intern/cycles/device/device_task.h
intern/cycles/device/opencl/opencl_split.cpp
intern/cycles/kernel/CMakeLists.txt
intern/cycles/kernel/kernel_path_branched.h
intern/cycles/kernel/kernel_path_surface.h
intern/cycles/kernel/kernel_shader.h
intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
intern/cycles/kernel/kernels/cuda/kernel.cu
intern/cycles/kernel/split/kernel_buffer_update.h
intern/cycles/kernel/svm/svm_closure.h
intern/cycles/util/util_atomic.h
===================================================================
===================================================================
diff --cc intern/cycles/device/CMakeLists.txt
index 2478e3e226,a237345169..3dae2270cb
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@@ -3,7 -3,7 +3,8 @@@ set(IN
.
../graph
../kernel
+ ../filter
+ ../kernel/split
../kernel/svm
../kernel/osl
../util
@@@ -32,9 -32,9 +33,10 @@@ set(SR
device.cpp
device_cpu.cpp
device_cuda.cpp
+ device_denoising.cpp
device_multi.cpp
device_opencl.cpp
+ device_split_kernel.cpp
device_task.cpp
)
diff --cc intern/cycles/device/device_cpu.cpp
index de98f616fd,273c3b4893..f0366a4e90
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@@ -26,15 -26,14 +26,17 @@@
#include "device.h"
#include "device_intern.h"
+#include "device_denoising.h"
+ #include "device_split_kernel.h"
#include "kernel.h"
#include "kernel_compat_cpu.h"
#include "kernel_types.h"
+ #include "split/kernel_split_data.h"
#include "kernel_globals.h"
+#include "filter.h"
+
#include "osl_shader.h"
#include "osl_globals.h"
@@@ -51,33 -51,44 +54,40 @@@
CCL_NAMESPACE_BEGIN
+ class CPUDevice;
+
-class CPUSplitKernel : public DeviceSplitKernel {
- CPUDevice *device;
-public:
- explicit CPUSplitKernel(CPUDevice *device);
+/* Has to be outside of the class to be shared across template instantiations. */
+static bool logged_architecture = false;
- virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
- RenderTile& rtile,
- int num_global_elements,
- device_memory& kernel_globals,
- device_memory& kernel_data_,
- device_memory& split_data,
- device_memory& ray_state,
- device_memory& queue_index,
- device_memory& use_queues_flag,
- device_memory& work_pool_wgs);
-
- virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&);
- virtual int2 split_kernel_local_size();
- virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task);
- virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
-};
-
-class CPUDevice : public Device
-{
- static unordered_map<string, void*> kernel_functions;
-
- static void register_kernel_function(const char* name, void* func)
+template<typename F>
+class KernelFunctions {
+public:
++ KernelFunctions()
+ {
- kernel_functions[name] = func;
++ kernel = (F)NULL;
+ }
+
- static const char* get_arch_name()
+ KernelFunctions(F kernel_default,
+ F kernel_sse2,
+ F kernel_sse3,
+ F kernel_sse41,
+ F kernel_avx,
+ F kernel_avx2)
{
+ string architecture_name = "default";
+ kernel = kernel_default;
+
+ /* Silence potential warnings about unused variables
+ * when compiling without some architectures. */
+ (void)kernel_sse2;
+ (void)kernel_sse3;
+ (void)kernel_sse41;
+ (void)kernel_avx;
+ (void)kernel_avx2;
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
if(system_cpu_support_avx2()) {
- return "cpu_avx2";
+ architecture_name = "AVX2";
+ kernel = kernel_avx2;
}
else
#endif
@@@ -115,15 -121,23 +125,38 @@@
}
}
- template<typename F>
- static F get_kernel_function(string name)
- {
- name = string("kernel_") + get_arch_name() + "_" + name;
-
- unordered_map<string, void*>::iterator it = kernel_functions.find(name);
+ inline F operator()() const {
++ assert(kernel);
+ return kernel;
+ }
+protected:
+ F kernel;
+};
- if(it == kernel_functions.end()) {
- assert(!"kernel function not found");
- return NULL;
- }
++class CPUSplitKernel : public DeviceSplitKernel {
++ CPUDevice *device;
++public:
++ explicit CPUSplitKernel(CPUDevice *device);
+
- return (F)it->second;
- }
++ virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
++ RenderTile& rtile,
++ int num_global_elements,
++ device_memory& kernel_globals,
++ device_memory& kernel_data_,
++ device_memory& split_data,
++ device_memory& ray_state,
++ device_memory& queue_index,
++ device_memory& use_queues_flag,
++ device_memory& work_pool_wgs);
+
- friend class CPUSplitKernel;
++ virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&);
++ virtual int2 split_kernel_local_size();
++ virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task);
++ virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
++};
+
+class CPUDevice : public Device
+{
public:
TaskPool task_pool;
KernelGlobals kernel_globals;
@@@ -132,57 -146,80 +165,94 @@@
OSLGlobals osl_globals;
#endif
+ bool use_split_kernel;
+
+ DeviceRequestedFeatures requested_features;
-
++
+ KernelFunctions<void(*)(KernelGlobals *, float *, unsigned int *, int, int, int, int, int)> path_trace_kernel;
+ KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_half_float_kernel;
+ KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_byte_kernel;
+ KernelFunctions<void(*)(KernelGlobals *, uint4 *, float4 *, float*, int, int, int, int, int)> shader_kernel;
+
+ KernelFunctions<void(*)(int, TilesInfo*, int, int, float*, float*, float*, float*, float*, int*, int, int, bool)> filter_divide_shadow_kernel;
+ KernelFunctions<void(*)(int, TilesInfo*, int, int, int, int, float*, float*, int*, int, int, bool)> filter_get_feature_kernel;
+ KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel;
+ KernelFunctions<void(*)(int, int, int, float*, int, int, int, int)> filter_divide_combined_kernel;
+
+ KernelFunctions<void(*)(int, int, float*, float*, float*, int*, int, int, float, float)> filter_nlm_calc_difference_kernel;
+ KernelFunctions<void(*)(float*, float*, int*, int, int)> filter_nlm_blur_kernel;
+ KernelFunctions<void(*)(float*, float*, int*, int, int)> filter_nlm_calc_weight_kernel;
+ KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int, int)> filter_nlm_update_output_kernel;
+ KernelFunctions<void(*)(float*, float*, int*, int)> filter_nlm_normalize_kernel;
+
+ KernelFunctions<void(*)(int, float*, int, int, int, float*, int*, int*, int, float)> filter_construct_transform_kernel;
+ KernelFunctions<void(*)(int, int, float*, float*, float*, float*, float*, int*, float*, float3*, int*, int*, int, int, int)> filter_nlm_construct_gramian_kernel;
+ KernelFunctions<void(*)(int, int, int, int, int, float*, int*, float*, float3*, int*, int)> filter_finalize_kernel;
+
++ KernelFunctions<void(*)(KernelGlobals *, ccl_constant KernelData*, ccl_global void*, int, ccl_global char*,
++ ccl_global uint*, int, int, int, int, int, int, int, int, ccl_global int*, int,
++ ccl_global char*, ccl_global unsigned int*, unsigned int, ccl_global float*)> data_init_kernel;
++ unordered_map<string, KernelFunctions<void(*)(KernelGlobals*, KernelData*)> > split_kernels;
++
+#define KERNEL_FUNCTIONS(name) \
+ KERNEL_NAME_EVAL(cpu, name), \
+ KERNEL_NAME_EVAL(cpu_sse2, name), \
+ KERNEL_NAME_EVAL(cpu_sse3, name), \
+ KERNEL_NAME_EVAL(cpu_sse41, name), \
+ KERNEL_NAME_EVAL(cpu_avx, name), \
+ KERNEL_NAME_EVAL(cpu_avx2, name)
+
CPUDevice(DeviceInfo& info, Stats &stats, bool background)
- : Device(info, stats, background)
+ : Device(info, stats, background),
- path_trace_kernel(KERNEL_FUNCTIONS(path_trace)),
- convert_to_half_float_kernel(KERNEL_FUNCTIONS(convert_to_half_float)),
- convert_to_byte_kernel(KERNEL_FUNCTIONS(convert_to_byte)),
- shader_kernel(KERNEL_FUNCTIONS(shader)),
- filter_divide_shadow_kernel(KERNEL_FUNCTIONS(filter_divide_shadow)),
- filter_get_feature_kernel(KERNEL_FUNCTIONS(filter_get_feature)),
- filter_combine_halves_kernel(KERNEL_FUNCTIONS(filter_combine_halves)),
- filter_divide_combined_kernel(KERNEL_FUNCTIONS(filter_divide_combined)),
- filter_nlm_calc_difference_kernel(KERNEL_FUNCTIONS(filter_nlm_calc_difference)),
- filter_nlm_blur_kernel(KERNEL_FUNCTIONS(filter_nlm_blur)),
- filter_nlm_calc_weight_kernel(KERNEL_FUNCTIONS(filter_nlm_calc_weight)),
- filter_nlm_update_output_kernel(KERNEL_FUNCTIONS(filter_nlm_update_output)),
- filter_nlm_normalize_kernel(KERNEL_FUNCTIONS(filter_nlm_normalize)),
- filter_construct_transform_kernel(KERNEL_FUNCTIONS(filter_construct_transform)),
- filter_nlm_construct_gramian_kernel(KERNEL_FUNCTIONS(filter_nlm_construct_gramian)),
- filter_finalize_kernel(KERNEL_FUNCTIONS(filter_finalize))
++#define REGISTER_KERNEL(name) name ## _kernel(KERNEL_FUNCTIONS(name))
++ REGISTER_KERNEL(path_trace),
++ REGISTER_KERNEL(convert_to_half_float),
++ REGISTER_KERNEL(convert_to_byte),
++ REGISTER_KERNEL(shader),
++ REGISTER_KERNEL(filter_divide_shadow),
++ REGISTER_KERNEL(filter_get_feature),
++ REGISTER_KERNEL(filter_combine_halves),
++ REGISTER_KERNEL(filter_divide_combined),
++ REGISTER_KER
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list