[Bf-blender-cvs] [504608e] cycles_split_kernel: Cycles: Add CPU split kernels
Mai Lavelle
noreply at git.blender.org
Wed Oct 26 16:42:56 CEST 2016
Commit: 504608e2ece6e18b3445a94a4000c51ea38ca30b
Author: Mai Lavelle
Date: Tue Oct 25 17:59:34 2016 +0200
Branches: cycles_split_kernel
https://developer.blender.org/rB504608e2ece6e18b3445a94a4000c51ea38ca30b
Cycles: Add CPU split kernels
This adds all split kernels for CPU devices besides the `data_init` kernel
which still needs to be written for CPU. While the kernels build and load
properly they don't do anything yet.
===================================================================
M intern/cycles/device/device_cpu.cpp
M intern/cycles/device/device_split_kernel.h
M intern/cycles/device/opencl/opencl_split.cpp
M intern/cycles/kernel/CMakeLists.txt
M intern/cycles/kernel/kernel.h
M intern/cycles/kernel/kernel_globals.h
M intern/cycles/kernel/kernel_queues.h
M intern/cycles/kernel/kernel_types.h
M intern/cycles/kernel/kernel_work_stealing.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
A intern/cycles/kernel/kernels/cpu/kernel_split.cpp
A intern/cycles/kernel/kernels/cpu/kernel_split_avx.cpp
A intern/cycles/kernel/kernels/cpu/kernel_split_avx2.cpp
A intern/cycles/kernel/kernels/cpu/kernel_split_sse2.cpp
A intern/cycles/kernel/kernels/cpu/kernel_split_sse3.cpp
A intern/cycles/kernel/kernels/cpu/kernel_split_sse41.cpp
M intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl
M intern/cycles/kernel/kernels/opencl/kernel_data_init.cl
M intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
M intern/cycles/kernel/kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
M intern/cycles/kernel/kernels/opencl/kernel_lamp_emission.cl
M intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
M intern/cycles/kernel/kernels/opencl/kernel_queue_enqueue.cl
M intern/cycles/kernel/kernels/opencl/kernel_scene_intersect.cl
M intern/cycles/kernel/kernels/opencl/kernel_shader_eval.cl
M intern/cycles/kernel/kernels/opencl/kernel_shadow_blocked.cl
M intern/cycles/kernel/kernels/opencl/kernel_sum_all_radiance.cl
M intern/cycles/kernel/split/kernel_background_buffer_update.h
M intern/cycles/kernel/split/kernel_split_common.h
M intern/cycles/util/util_atomic.h
M intern/cycles/util/util_types.h
===================================================================
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 858d759..e3f36ae 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -31,6 +31,7 @@
#include "kernel.h"
#include "kernel_compat_cpu.h"
#include "kernel_types.h"
+#include "split/kernel_split_data.h"
#include "kernel_globals.h"
#include "osl_shader.h"
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index d679698..643a2fd 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -25,9 +25,6 @@ CCL_NAMESPACE_BEGIN
/* Macro declarations used with split kernel */
-/* Macro to enable/disable work-stealing */
-#define __WORK_STEALING__
-
#define SPLIT_KERNEL_LOCAL_SIZE_X 64
#define SPLIT_KERNEL_LOCAL_SIZE_Y 1
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 9615501..4387b62 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -106,9 +106,6 @@ public:
string get_build_options(const DeviceRequestedFeatures& requested_features)
{
string build_options = "-D__SPLIT_KERNEL__ ";
-#ifdef __WORK_STEALING__
- build_options += "-D__WORK_STEALING__ ";
-#endif
build_options += requested_features.get_build_options();
/* Set compute device build option. */
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index f5ec9be..4e6d1ec 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -13,6 +13,7 @@ set(INC_SYS
set(SRC
kernels/cpu/kernel.cpp
+ kernels/cpu/kernel_split.cpp
kernels/opencl/kernel.cl
kernels/opencl/kernel_data_init.cl
kernels/opencl/kernel_queue_enqueue.cl
@@ -313,25 +314,35 @@ if(CXX_HAS_SSE)
kernels/cpu/kernel_sse2.cpp
kernels/cpu/kernel_sse3.cpp
kernels/cpu/kernel_sse41.cpp
+ kernels/cpu/kernel_split_sse2.cpp
+ kernels/cpu/kernel_split_sse3.cpp
+ kernels/cpu/kernel_split_sse41.cpp
)
set_source_files_properties(kernels/cpu/kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
set_source_files_properties(kernels/cpu/kernel_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_split_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_split_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_split_sse41.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE41_KERNEL_FLAGS}")
endif()
if(CXX_HAS_AVX)
list(APPEND SRC
kernels/cpu/kernel_avx.cpp
+ kernels/cpu/kernel_split_avx.cpp
)
set_source_files_properties(kernels/cpu/kernel_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_split_avx.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX_KERNEL_FLAGS}")
endif()
if(CXX_HAS_AVX2)
list(APPEND SRC
kernels/cpu/kernel_avx2.cpp
+ kernels/cpu/kernel_split_avx2.cpp
)
set_source_files_properties(kernels/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
+ set_source_files_properties(kernels/cpu/kernel_split_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_KERNEL_FLAGS}")
endif()
add_library(cycles_kernel
diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h
index 9279a94..bf0016a 100644
--- a/intern/cycles/kernel/kernel.h
+++ b/intern/cycles/kernel/kernel.h
@@ -28,6 +28,7 @@ CCL_NAMESPACE_BEGIN
#define KERNEL_FUNCTION_FULL_NAME(name) KERNEL_NAME_EVAL(KERNEL_ARCH, name)
struct KernelGlobals;
+struct KernelData;
KernelGlobals *kernel_globals_create();
void kernel_globals_free(KernelGlobals *kg);
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index e40ed05..a2d0057 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -64,6 +64,12 @@ typedef struct KernelGlobals {
/* Storage for decoupled volume steps. */
VolumeStep *decoupled_volume_steps[2];
int decoupled_volume_steps_index;
+
+ /* split kernel */
+ ShaderData *sd_input;
+ Intersection *isect_shadow;
+ SplitData split_data;
+ SplitParams split_param_data;
} KernelGlobals;
#endif /* __KERNEL_CPU__ */
diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h
index 1ca57ed..011610f 100644
--- a/intern/cycles/kernel/kernel_queues.h
+++ b/intern/cycles/kernel/kernel_queues.h
@@ -17,6 +17,8 @@
#ifndef __KERNEL_QUEUE_H__
#define __KERNEL_QUEUE_H__
+CCL_NAMESPACE_BEGIN
+
/*
* Queue utility functions for split kernel
*/
@@ -122,4 +124,6 @@ ccl_device unsigned int get_global_queue_index(
return my_gqidx;
}
+CCL_NAMESPACE_END
+
#endif // __KERNEL_QUEUE_H__
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 68e5296..2310b0f 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -63,17 +63,23 @@ CCL_NAMESPACE_BEGIN
# endif
# define __KERNEL_SHADING__
# define __KERNEL_ADV_SHADING__
-# define __BRANCHED_PATH__
+# ifndef __SPLIT_KERNEL__
+# define __BRANCHED_PATH__
+# endif
# ifdef WITH_OSL
# define __OSL__
# endif
-# define __SUBSURFACE__
+# ifndef __SPLIT_KERNEL__
+# define __SUBSURFACE__
+# endif
# define __CMJ__
-# define __VOLUME__
-# define __VOLUME_DECOUPLED__
-# define __VOLUME_SCATTER__
-# define __SHADOW_RECORD_ALL__
-# define __VOLUME_RECORD_ALL__
+# ifndef __SPLIT_KERNEL__
+# define __VOLUME__
+# define __VOLUME_DECOUPLED__
+# define __VOLUME_SCATTER__
+# define __SHADOW_RECORD_ALL__
+# define __VOLUME_RECORD_ALL__
+# endif
#endif /* __KERNEL_CPU__ */
#ifdef __KERNEL_CUDA__
@@ -140,6 +146,7 @@ CCL_NAMESPACE_BEGIN
#define __INTERSECTION_REFINE__
#define __CLAMP_SAMPLE__
#define __PATCH_EVAL__
+#define __WORK_STEALING__
#ifdef __KERNEL_SHADING__
# define __SVM__
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
index 05dd362..353f13d 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -17,6 +17,8 @@
#ifndef __KERNEL_WORK_STEALING_H__
#define __KERNEL_WORK_STEALING_H__
+CCL_NAMESPACE_BEGIN
+
/*
* Utility functions for work stealing
*/
@@ -27,7 +29,7 @@
# pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#endif
-uint get_group_id_with_ray_index(uint ray_index,
+ccl_device uint get_group_id_with_ray_index(uint ray_index,
uint tile_dim_x,
uint tile_dim_y,
uint parallel_samples,
@@ -44,7 +46,7 @@ uint get_group_id_with_ray_index(uint ray_index,
}
}
-uint get_total_work(uint tile_dim_x,
+ccl_device uint get_total_work(uint tile_dim_x,
uint tile_dim_y,
uint grp_idx,
uint grp_idy,
@@ -71,7 +73,7 @@ uint get_total_work(uint tile_dim_x,
/* Returns 0 in case there is no next work available */
/* Returns 1 in case work assigned is valid */
-int get_next_work(ccl_global uint *work_pool,
+ccl_device int get_next_work(ccl_global uint *work_pool,
ccl_private uint *my_work,
uint tile_dim_x,
uint tile_dim_y,
@@ -101,7 +103,7 @@ int get_next_work(ccl_global uint *work_pool,
/* This function assumes that the passed my_work is valid. */
/* Decode sample number w.r.t. assigned my_work. */
-uint get_my_sample(uint my_work,
+ccl_device uint get_my_sample(uint my_work,
uint tile_dim_x,
uint tile_dim_y,
uint parallel_samples,
@@ -136,7 +138,7 @@ uint get_my_sample(uint my_work,
}
/* Decode pixel and tile position w.r.t. assigned my_work. */
-void get_pixel_tile_position(ccl_private uint *pixel_x,
+ccl_device void get_pixel_tile_position(ccl_private uint *pixel_x,
ccl_private uint *pixel_y,
ccl_private uint *tile_x,
ccl_private uint *tile_y,
@@ -190,4 +192,6 @@ void get_pixel_tile_position(ccl_private uint *pixel_x,
#endif /* __WORK_STEALING__ */
+CCL_NAMESPACE_END
+
#endif /* __KERNEL_WORK_STEALING_H__ */
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index e948335..0271399 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -49,6 +49,20 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
int offset,
int sample);
+#define DECLARE_SPLIT_KERNEL_FUNCTION(name) \
+ void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData *data);
+
+DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect)
+DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission)
+DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
+DECLARE_SPLIT_KERNEL_FUNCTION(background_buffer_update)
+DECLARE_SPLIT_KERNEL_FUNCTION(shader_eval)
+DECLARE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
+DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting)
+DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked)
+DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
+DECLARE_SPLIT_KERNEL_FUNCTION(sum_all_radiance)
+
void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func));
#undef KERNEL_ARCH
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 5f3aac2..30ffb05 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -21,17 +21,38 @@
*/
#include "kernel_compat_cpu.h"
-#include "kernel_math.h"
-#include "kernel_types.h"
-#include "kernel_globals.h"
-#include "kernel_cpu_image.h"
-#include "kernel_film.h"
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list