[Bf-blender-cvs] [504608e] cycles_split_kernel: Cycles: Add CPU split kernels

Mai Lavelle noreply at git.blender.org
Wed Oct 26 16:42:56 CEST 2016


Commit: 504608e2ece6e18b3445a94a4000c51ea38ca30b
Author: Mai Lavelle
Date:   Tue Oct 25 17:59:34 2016 +0200
Branches: cycles_split_kernel
https://developer.blender.org/rB504608e2ece6e18b3445a94a4000c51ea38ca30b

Cycles: Add CPU split kernels

This adds all split kernels for CPU devices besides the `data_init` kernel
which still needs to be written for CPU. While the kernels build and load
properly they don't do anything yet.

===================================================================

M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_split_kernel.h
M	intern/cycles/device/opencl/opencl_split.cpp
M	intern/cycles/kernel/CMakeLists.txt
M	intern/cycles/kernel/kernel.h
M	intern/cycles/kernel/kernel_globals.h
M	intern/cycles/kernel/kernel_queues.h
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/kernel/kernel_work_stealing.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
A	intern/cycles/kernel/kernels/cpu/kernel_split.cpp
A	intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp
A	intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp
A	intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp
A	intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp
A	intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp
M	intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl
M	intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
M	intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
M	intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
M	intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
M	intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
M	intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
M	intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl
M	intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
M	intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl
M	intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl
M	intern/cycles/kernel/split/kernel_background_buffer_update.h
M	intern/cycles/kernel/split/kernel_split_common.h
M	intern/cycles/util/util_atomic.h
M	intern/cycles/util/util_types.h

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 858d759..e3f36ae 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -31,6 +31,7 @@
 #include "kernel.h"
 #include "kernel_compat_cpu.h"
 #include "kernel_types.h"
+#include "split/kernel_split_data.h"
 #include "kernel_globals.h"
 
 #include "osl_shader.h"
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index d679698..643a2fd 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -25,9 +25,6 @@ CCL_NAMESPACE_BEGIN
 
 /* Macro declarations used with split kernel */
 
-/* Macro to enable/disable work-stealing */
-#define __WORK_STEALING__
-
 #define SPLIT_KERNEL_LOCAL_SIZE_X 64
 #define SPLIT_KERNEL_LOCAL_SIZE_Y 1
 
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 9615501..4387b62 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -106,9 +106,6 @@ public:
 	string get_build_options(const DeviceRequestedFeatures& requested_features)
 	{
 		string build_options = "-D__SPLIT_KERNEL__ ";
-#ifdef __WORK_STEALING__
-		build_options += "-D__WORK_STEALING__ ";
-#endif
 		build_options += requested_features.get_build_options();
 
 		/* Set compute device build option. */
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index f5ec9be..4e6d1ec 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -13,6 +13,7 @@ set(INC_SYS
 
 set(SRC
 	kernels/cpu/kernel.cpp
+	kernels/cpu/kernel_split.cpp
 	kernels/opencl/kernel.cl
 	kernels/opencl/kernel_data_init.cl
 	kernels/opencl/kernel_queue_enqueue.cl
@@ -313,25 +314,35 @@ if(CXX_HAS_SSE)
 		kernels/cpu/kernel_sse2.cpp
 		kernels/cpu/kernel_sse3.cpp
 		kernels/cpu/kernel_sse41.cpp
+		kernels/cpu/kernel_split_sse2.cpp
+		kernels/cpu/kernel_split_sse3.cpp
+		kernels/cpu/kernel_split_sse41.cpp
 	)
 
 	set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
 	set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
 	set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
+	set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
+	set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
+	set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
 endif()
 
 if(CXX_HAS_AVX)
 	list(APPEND SRC
 		kernels/cpu/kernel_avx.cpp
+		kernels/cpu/kernel_split_avx.cpp
 	)
 	set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+	set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
 endif()
 
 if(CXX_HAS_AVX2)
 	list(APPEND SRC
 		kernels/cpu/kernel_avx2.cpp
+		kernels/cpu/kernel_split_avx2.cpp
 	)
 	set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
+	set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
 endif()
 
 add_library(cycles_kernel
diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h
index 9279a94..bf0016a 100644
--- a/intern/cycles/kernel/kernel.h
+++ b/intern/cycles/kernel/kernel.h
@@ -28,6 +28,7 @@ CCL_NAMESPACE_BEGIN
 #define KERNEL_FUNCTION_FULL_NAME(name) KERNEL_NAME_EVAL(KERNEL_ARCH, name)
 
 struct KernelGlobals;
+struct KernelData;
 
 KernelGlobals *kernel_globals_create();
 void kernel_globals_free(KernelGlobals *kg);
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index e40ed05..a2d0057 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -64,6 +64,12 @@ typedef struct KernelGlobals {
 	/* Storage for decoupled volume steps. */
 	VolumeStep *decoupled_volume_steps[2];
 	int decoupled_volume_steps_index;
+
+	/* split kernel */
+	ShaderData *sd_input;
+	Intersection *isect_shadow;
+	SplitData split_data;
+	SplitParams split_param_data;
 } KernelGlobals;
 
 #endif  /* __KERNEL_CPU__ */
diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h
index 1ca57ed..011610f 100644
--- a/intern/cycles/kernel/kernel_queues.h
+++ b/intern/cycles/kernel/kernel_queues.h
@@ -17,6 +17,8 @@
 #ifndef __KERNEL_QUEUE_H__
 #define __KERNEL_QUEUE_H__
 
+CCL_NAMESPACE_BEGIN
+
 /*
  * Queue utility functions for split kernel
  */
@@ -122,4 +124,6 @@ ccl_device unsigned int get_global_queue_index(
 	return my_gqidx;
 }
 
+CCL_NAMESPACE_END
+
 #endif // __KERNEL_QUEUE_H__
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 68e5296..2310b0f 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -63,17 +63,23 @@ CCL_NAMESPACE_BEGIN
 #  endif
 #  define __KERNEL_SHADING__
 #  define __KERNEL_ADV_SHADING__
-#  define __BRANCHED_PATH__
+#  ifndef __SPLIT_KERNEL__
+#    define __BRANCHED_PATH__
+#  endif
 #  ifdef WITH_OSL
 #    define __OSL__
 #  endif
-#  define __SUBSURFACE__
+#  ifndef __SPLIT_KERNEL__
+#    define __SUBSURFACE__
+#  endif
 #  define __CMJ__
-#  define __VOLUME__
-#  define __VOLUME_DECOUPLED__
-#  define __VOLUME_SCATTER__
-#  define __SHADOW_RECORD_ALL__
-#  define __VOLUME_RECORD_ALL__
+#  ifndef __SPLIT_KERNEL__
+#    define __VOLUME__
+#    define __VOLUME_DECOUPLED__
+#    define __VOLUME_SCATTER__
+#    define __SHADOW_RECORD_ALL__
+#    define __VOLUME_RECORD_ALL__
+#  endif
 #endif  /* __KERNEL_CPU__ */
 
 #ifdef __KERNEL_CUDA__
@@ -140,6 +146,7 @@ CCL_NAMESPACE_BEGIN
 #define __INTERSECTION_REFINE__
 #define __CLAMP_SAMPLE__
 #define __PATCH_EVAL__
+#define __WORK_STEALING__
 
 #ifdef __KERNEL_SHADING__
 #  define __SVM__
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
index 05dd362..353f13d 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -17,6 +17,8 @@
 #ifndef __KERNEL_WORK_STEALING_H__
 #define __KERNEL_WORK_STEALING_H__
 
+CCL_NAMESPACE_BEGIN
+
 /*
  * Utility functions for work stealing
  */
@@ -27,7 +29,7 @@
 #  pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
 #endif
 
-uint get_group_id_with_ray_index(uint ray_index,
+ccl_device uint get_group_id_with_ray_index(uint ray_index,
                                  uint tile_dim_x,
                                  uint tile_dim_y,
                                  uint parallel_samples,
@@ -44,7 +46,7 @@ uint get_group_id_with_ray_index(uint ray_index,
 	}
 }
 
-uint get_total_work(uint tile_dim_x,
+ccl_device uint get_total_work(uint tile_dim_x,
                     uint tile_dim_y,
                     uint grp_idx,
                     uint grp_idy,
@@ -71,7 +73,7 @@ uint get_total_work(uint tile_dim_x,
 
 /* Returns 0 in case there is no next work available */
 /* Returns 1 in case work assigned is valid */
-int get_next_work(ccl_global uint *work_pool,
+ccl_device int get_next_work(ccl_global uint *work_pool,
                   ccl_private uint *my_work,
                   uint tile_dim_x,
                   uint tile_dim_y,
@@ -101,7 +103,7 @@ int get_next_work(ccl_global uint *work_pool,
 
 /* This function assumes that the passed my_work is valid. */
 /* Decode sample number w.r.t. assigned my_work. */
-uint get_my_sample(uint my_work,
+ccl_device uint get_my_sample(uint my_work,
                    uint tile_dim_x,
                    uint tile_dim_y,
                    uint parallel_samples,
@@ -136,7 +138,7 @@ uint get_my_sample(uint my_work,
 }
 
 /* Decode pixel and tile position w.r.t. assigned my_work. */
-void get_pixel_tile_position(ccl_private uint *pixel_x,
+ccl_device void get_pixel_tile_position(ccl_private uint *pixel_x,
                              ccl_private uint *pixel_y,
                              ccl_private uint *tile_x,
                              ccl_private uint *tile_y,
@@ -190,4 +192,6 @@ void get_pixel_tile_position(ccl_private uint *pixel_x,
 
 #endif  /* __WORK_STEALING__ */
 
+CCL_NAMESPACE_END
+
 #endif  /* __KERNEL_WORK_STEALING_H__ */
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index e948335..0271399 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -49,6 +49,20 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
                                        int offset,
                                        int sample);
 
+#define DECLARE_SPLIT_KERNEL_FUNCTION(name) \
+	void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData *data);
+
+DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect)
+DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission)
+DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
+DECLARE_SPLIT_KERNEL_FUNCTION(background_buffer_update)
+DECLARE_SPLIT_KERNEL_FUNCTION(shader_eval)
+DECLARE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
+DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting)
+DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked)
+DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
+DECLARE_SPLIT_KERNEL_FUNCTION(sum_all_radiance)
+
 void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func));
 
 #undef KERNEL_ARCH
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 5f3aac2..30ffb05 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -21,17 +21,38 @@
  */
 
 #include "kernel_compat_cpu.h"
-#include "kernel_math.h"
-#include "kernel_types.h"
-#include "kernel_globals.h"
-#include "kernel_cpu_image.h"
-#include "kernel_film.h"


@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list