[Bf-blender-cvs] [15a80ea05c] temp-cycles-denoising: Merge remote-tracking branch 'origin/master' into temp-cycles-denoising

Lukas Stockner noreply at git.blender.org
Fri Mar 24 20:18:18 CET 2017


Commit: 15a80ea05c00a02620d0e2be61cc32addeec1fb1
Author: Lukas Stockner
Date:   Sun Mar 12 04:18:45 2017 +0100
Branches: temp-cycles-denoising
https://developer.blender.org/rB15a80ea05c00a02620d0e2be61cc32addeec1fb1

Merge remote-tracking branch 'origin/master' into temp-cycles-denoising

Conflicts:
	intern/cycles/device/CMakeLists.txt
	intern/cycles/device/device_cpu.cpp
	intern/cycles/device/device_cuda.cpp
	intern/cycles/device/device_task.h
	intern/cycles/device/opencl/opencl_split.cpp
	intern/cycles/kernel/CMakeLists.txt
	intern/cycles/kernel/kernel_path_branched.h
	intern/cycles/kernel/kernel_path_surface.h
	intern/cycles/kernel/kernel_shader.h
	intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
	intern/cycles/kernel/kernels/cuda/kernel.cu
	intern/cycles/kernel/split/kernel_buffer_update.h
	intern/cycles/kernel/svm/svm_closure.h
	intern/cycles/util/util_atomic.h

===================================================================



===================================================================

diff --cc intern/cycles/device/CMakeLists.txt
index 2478e3e226,a237345169..3dae2270cb
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@@ -3,7 -3,7 +3,8 @@@ set(IN
  	.
  	../graph
  	../kernel
 +	../filter
+ 	../kernel/split
  	../kernel/svm
  	../kernel/osl
  	../util
@@@ -32,9 -32,9 +33,10 @@@ set(SR
  	device.cpp
  	device_cpu.cpp
  	device_cuda.cpp
 +	device_denoising.cpp
  	device_multi.cpp
  	device_opencl.cpp
+ 	device_split_kernel.cpp
  	device_task.cpp
  )
  
diff --cc intern/cycles/device/device_cpu.cpp
index de98f616fd,273c3b4893..f0366a4e90
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@@ -26,15 -26,14 +26,17 @@@
  
  #include "device.h"
  #include "device_intern.h"
 +#include "device_denoising.h"
+ #include "device_split_kernel.h"
  
  #include "kernel.h"
  #include "kernel_compat_cpu.h"
  #include "kernel_types.h"
+ #include "split/kernel_split_data.h"
  #include "kernel_globals.h"
  
 +#include "filter.h"
 +
  #include "osl_shader.h"
  #include "osl_globals.h"
  
@@@ -51,33 -51,44 +54,40 @@@
  
  CCL_NAMESPACE_BEGIN
  
+ class CPUDevice;
+ 
 -class CPUSplitKernel : public DeviceSplitKernel {
 -	CPUDevice *device;
 -public:
 -	explicit CPUSplitKernel(CPUDevice *device);
 +/* Has to be outside of the class to be shared across template instantiations. */
 +static bool logged_architecture = false;
  
 -	virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
 -	                                            RenderTile& rtile,
 -	                                            int num_global_elements,
 -	                                            device_memory& kernel_globals,
 -	                                            device_memory& kernel_data_,
 -	                                            device_memory& split_data,
 -	                                            device_memory& ray_state,
 -	                                            device_memory& queue_index,
 -	                                            device_memory& use_queues_flag,
 -	                                            device_memory& work_pool_wgs);
 -
 -	virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&);
 -	virtual int2 split_kernel_local_size();
 -	virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task);
 -	virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
 -};
 -
 -class CPUDevice : public Device
 -{
 -	static unordered_map<string, void*> kernel_functions;
 -
 -	static void register_kernel_function(const char* name, void* func)
 +template<typename F>
 +class KernelFunctions {
 +public:
++	KernelFunctions()
+ 	{
 -		kernel_functions[name] = func;
++		kernel = (F)NULL;
+ 	}
+ 
 -	static const char* get_arch_name()
 +	KernelFunctions(F kernel_default,
 +	                F kernel_sse2,
 +	                F kernel_sse3,
 +	                F kernel_sse41,
 +	                F kernel_avx,
 +	                F kernel_avx2)
  	{
 +		string architecture_name = "default";
 +		kernel = kernel_default;
 +
 +		/* Silence potential warnings about unused variables
 +		 * when compiling without some architectures. */
 +		(void)kernel_sse2;
 +		(void)kernel_sse3;
 +		(void)kernel_sse41;
 +		(void)kernel_avx;
 +		(void)kernel_avx2;
  #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
  		if(system_cpu_support_avx2()) {
 -			return "cpu_avx2";
 +			architecture_name = "AVX2";
 +			kernel = kernel_avx2;
  		}
  		else
  #endif
@@@ -115,15 -121,23 +125,38 @@@
  		}
  	}
  
 -	template<typename F>
 -	static F get_kernel_function(string name)
 -	{
 -		name = string("kernel_") + get_arch_name() + "_" + name;
 -
 -		unordered_map<string, void*>::iterator it = kernel_functions.find(name);
 +	inline F operator()() const {
++		assert(kernel);
 +		return kernel;
 +	}
 +protected:
 +	F kernel;
 +};
  
 -		if(it == kernel_functions.end()) {
 -			assert(!"kernel function not found");
 -			return NULL;
 -		}
++class CPUSplitKernel : public DeviceSplitKernel {
++	CPUDevice *device;
++public:
++	explicit CPUSplitKernel(CPUDevice *device);
+ 
 -		return (F)it->second;
 -	}
++	virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
++	                                            RenderTile& rtile,
++	                                            int num_global_elements,
++	                                            device_memory& kernel_globals,
++	                                            device_memory& kernel_data_,
++	                                            device_memory& split_data,
++	                                            device_memory& ray_state,
++	                                            device_memory& queue_index,
++	                                            device_memory& use_queues_flag,
++	                                            device_memory& work_pool_wgs);
+ 
 -	friend class CPUSplitKernel;
++	virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&);
++	virtual int2 split_kernel_local_size();
++	virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task);
++	virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
++};
+ 
 +class CPUDevice : public Device
 +{
  public:
  	TaskPool task_pool;
  	KernelGlobals kernel_globals;
@@@ -132,57 -146,80 +165,94 @@@
  	OSLGlobals osl_globals;
  #endif
  
+ 	bool use_split_kernel;
+ 
+ 	DeviceRequestedFeatures requested_features;
 -	
++
 +	KernelFunctions<void(*)(KernelGlobals *, float *, unsigned int *, int, int, int, int, int)>   path_trace_kernel;
 +	KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)>       convert_to_half_float_kernel;
 +	KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)>       convert_to_byte_kernel;
 +	KernelFunctions<void(*)(KernelGlobals *, uint4 *, float4 *, float*, int, int, int, int, int)> shader_kernel;
 +
 +	KernelFunctions<void(*)(int, TilesInfo*, int, int, float*, float*, float*, float*, float*, int*, int, int, bool)> filter_divide_shadow_kernel;
 +	KernelFunctions<void(*)(int, TilesInfo*, int, int, int, int, float*, float*, int*, int, int, bool)>               filter_get_feature_kernel;
 +	KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)>                                     filter_combine_halves_kernel;
 +	KernelFunctions<void(*)(int, int, int, float*, int, int, int, int)>                                               filter_divide_combined_kernel;
 +
 +	KernelFunctions<void(*)(int, int, float*, float*, float*, int*, int, int, float, float)> filter_nlm_calc_difference_kernel;
 +	KernelFunctions<void(*)(float*, float*, int*, int, int)>                                 filter_nlm_blur_kernel;
 +	KernelFunctions<void(*)(float*, float*, int*, int, int)>                                 filter_nlm_calc_weight_kernel;
 +	KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int, int)>       filter_nlm_update_output_kernel;
 +	KernelFunctions<void(*)(float*, float*, int*, int)>                                      filter_nlm_normalize_kernel;
 +
 +	KernelFunctions<void(*)(int, float*, int, int, int, float*, int*, int*, int, float)>                                         filter_construct_transform_kernel;
 +	KernelFunctions<void(*)(int, int, float*, float*, float*, float*, float*, int*, float*, float3*, int*, int*, int, int, int)> filter_nlm_construct_gramian_kernel;
 +	KernelFunctions<void(*)(int, int, int, int, int, float*, int*, float*, float3*, int*, int)>                                  filter_finalize_kernel;
 +
++	KernelFunctions<void(*)(KernelGlobals *, ccl_constant KernelData*, ccl_global void*, int, ccl_global char*,
++	                       ccl_global uint*, int, int, int, int, int, int, int, int, ccl_global int*, int,
++	                       ccl_global char*, ccl_global unsigned int*, unsigned int, ccl_global float*)>        data_init_kernel;
++	unordered_map<string, KernelFunctions<void(*)(KernelGlobals*, KernelData*)> > split_kernels;
++
 +#define KERNEL_FUNCTIONS(name) \
 +	      KERNEL_NAME_EVAL(cpu, name), \
 +	      KERNEL_NAME_EVAL(cpu_sse2, name), \
 +	      KERNEL_NAME_EVAL(cpu_sse3, name), \
 +	      KERNEL_NAME_EVAL(cpu_sse41, name), \
 +	      KERNEL_NAME_EVAL(cpu_avx, name), \
 +	      KERNEL_NAME_EVAL(cpu_avx2, name)
 +
  	CPUDevice(DeviceInfo& info, Stats &stats, bool background)
 -	: Device(info, stats, background)
 +	: Device(info, stats, background),
- 	  path_trace_kernel(KERNEL_FUNCTIONS(path_trace)),
- 	  convert_to_half_float_kernel(KERNEL_FUNCTIONS(convert_to_half_float)),
- 	  convert_to_byte_kernel(KERNEL_FUNCTIONS(convert_to_byte)),
- 	  shader_kernel(KERNEL_FUNCTIONS(shader)),
- 	  filter_divide_shadow_kernel(KERNEL_FUNCTIONS(filter_divide_shadow)),
- 	  filter_get_feature_kernel(KERNEL_FUNCTIONS(filter_get_feature)),
- 	  filter_combine_halves_kernel(KERNEL_FUNCTIONS(filter_combine_halves)),
- 	  filter_divide_combined_kernel(KERNEL_FUNCTIONS(filter_divide_combined)),
- 	  filter_nlm_calc_difference_kernel(KERNEL_FUNCTIONS(filter_nlm_calc_difference)),
- 	  filter_nlm_blur_kernel(KERNEL_FUNCTIONS(filter_nlm_blur)),
- 	  filter_nlm_calc_weight_kernel(KERNEL_FUNCTIONS(filter_nlm_calc_weight)),
- 	  filter_nlm_update_output_kernel(KERNEL_FUNCTIONS(filter_nlm_update_output)),
- 	  filter_nlm_normalize_kernel(KERNEL_FUNCTIONS(filter_nlm_normalize)),
- 	  filter_construct_transform_kernel(KERNEL_FUNCTIONS(filter_construct_transform)),
- 	  filter_nlm_construct_gramian_kernel(KERNEL_FUNCTIONS(filter_nlm_construct_gramian)),
- 	  filter_finalize_kernel(KERNEL_FUNCTIONS(filter_finalize))
++#define REGISTER_KERNEL(name) name ## _kernel(KERNEL_FUNCTIONS(name))
++	  REGISTER_KERNEL(path_trace),
++	  REGISTER_KERNEL(convert_to_half_float),
++	  REGISTER_KERNEL(convert_to_byte),
++	  REGISTER_KERNEL(shader),
++	  REGISTER_KERNEL(filter_divide_shadow),
++	  REGISTER_KERNEL(filter_get_feature),
++	  REGISTER_KERNEL(filter_combine_halves),
++	  REGISTER_KERNEL(filter_divide_combined),
++	  REGISTER_KER

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list