[Bf-blender-cvs] [6850eeb] cycles_split_kernel: Cycles: Replace OpenCL work item functions with own versions

Mai Lavelle noreply at git.blender.org
Tue Oct 25 17:39:09 CEST 2016


Commit: 6850eebeaa235dff70dc0a8a4d2f4a15dfbf31f9
Author: Mai Lavelle
Date:   Tue Oct 25 17:19:05 2016 +0200
Branches: cycles_split_kernel
https://developer.blender.org/rB6850eebeaa235dff70dc0a8a4d2f4a15dfbf31f9

Cycles: Replace OpenCL work item functions with own versions

===================================================================

M	intern/cycles/kernel/kernel_compat_cpu.h
M	intern/cycles/kernel/kernel_compat_opencl.h
M	intern/cycles/kernel/kernel_queues.h
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/kernel/kernel_work_stealing.h
M	intern/cycles/kernel/kernels/opencl/kernel.cl
M	intern/cycles/kernel/split/kernel_background_buffer_update.h
M	intern/cycles/kernel/split/kernel_data_init.h
M	intern/cycles/kernel/split/kernel_direct_lighting.h
M	intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
M	intern/cycles/kernel/split/kernel_lamp_emission.h
M	intern/cycles/kernel/split/kernel_next_iteration_setup.h
M	intern/cycles/kernel/split/kernel_queue_enqueue.h
M	intern/cycles/kernel/split/kernel_scene_intersect.h
M	intern/cycles/kernel/split/kernel_shader_eval.h
M	intern/cycles/kernel/split/kernel_shadow_blocked.h
M	intern/cycles/kernel/split/kernel_sum_all_radiance.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index 9d1f3bd..e6aa8f8 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -44,6 +44,14 @@
 
 #define ccl_addr_space
 
+#define ccl_local_id(d) 0
+#define ccl_global_id(d) 0
+
+#define ccl_local_size(d) 1
+#define ccl_global_size(d) 1
+
+#define ccl_num_groups(d) 1
+
 /* On x86_64, versions of glibc < 2.16 have an issue where expf is
  * much slower than the double version.  This was fixed in glibc 2.16.
  */
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index f076e3a..b60eb14 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -49,6 +49,14 @@
 #  define ccl_addr_space
 #endif
 
+#define ccl_local_id(d) get_local_id(d)
+#define ccl_global_id(d) get_global_id(d)
+
+#define ccl_local_size(d) get_local_size(d)
+#define ccl_global_size(d) get_global_size(d)
+
+#define ccl_num_groups(d) get_num_groups(d)
+
 /* Selective nodes compilation. */
 #ifndef __NODES_MAX_GROUP__
 #  define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX
diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h
index cf5614b..c423625 100644
--- a/intern/cycles/kernel/kernel_queues.h
+++ b/intern/cycles/kernel/kernel_queues.h
@@ -72,7 +72,7 @@ ccl_device void enqueue_ray_index_local(
         ccl_global int *Queue_data,                  /* Queues. */
         ccl_global int *Queue_index)                 /* To do global queue atomics. */
 {
-	int lidx = get_local_id(1) * get_local_size(0) + get_local_id(0);
+	int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
 
 	/* Get local queue id .*/
 	unsigned int lqidx;
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 734b446..68e5296 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -737,7 +737,7 @@ enum ShaderDataFlag {
 };
 
 #ifdef __SPLIT_KERNEL__
-#  define SD_THREAD (get_global_id(1) * get_global_size(0) + get_global_id(0))
+#  define SD_THREAD (ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0))
 #  if !defined(__SPLIT_KERNEL_SOA__)
      /* ShaderData is stored as an Array-of-Structures */
 #    define ccl_soa_member(type, name) type soa_##name
@@ -745,7 +745,7 @@ enum ShaderDataFlag {
 #    define ccl_fetch_array(s, t, index) (&s[SD_THREAD].soa_##t[index])
 #  else
      /* ShaderData is stored as an Structure-of-Arrays */
-#    define SD_GLOBAL_SIZE (get_global_size(0) * get_global_size(1))
+#    define SD_GLOBAL_SIZE (ccl_global_size(0) * ccl_global_size(1))
 #    define SD_FIELD_SIZE(t) sizeof(((struct ShaderData*)0)->t)
 #    define SD_OFFSETOF(t) ((char*)(&((struct ShaderData*)0)->t) - (char*)0)
 #    define ccl_soa_member(type, name) type soa_##name
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
index 7d559b1..00d8a59 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -35,12 +35,12 @@ uint get_group_id_with_ray_index(uint ray_index,
 {
 	if(dim == 0) {
 		uint x_span = ray_index % (tile_dim_x * parallel_samples);
-		return x_span / get_local_size(0);
+		return x_span / ccl_local_size(0);
 	}
 	else /*if(dim == 1)*/ {
 		kernel_assert(dim == 1);
 		uint y_span = ray_index / (tile_dim_x * parallel_samples);
-		return y_span / get_local_size(1);
+		return y_span / ccl_local_size(1);
 	}
 }
 
@@ -51,17 +51,17 @@ uint get_total_work(uint tile_dim_x,
                     uint num_samples)
 {
 	uint threads_within_tile_border_x =
-		(grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
-		                                     : get_local_size(0);
+		(grp_idx == (ccl_num_groups(0) - 1)) ? tile_dim_x % ccl_local_size(0)
+		                                     : ccl_local_size(0);
 	uint threads_within_tile_border_y =
-		(grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
-		                                     : get_local_size(1);
+		(grp_idy == (ccl_num_groups(1) - 1)) ? tile_dim_y % ccl_local_size(1)
+		                                     : ccl_local_size(1);
 
 	threads_within_tile_border_x =
-		(threads_within_tile_border_x == 0) ? get_local_size(0)
+		(threads_within_tile_border_x == 0) ? ccl_local_size(0)
 		                                    : threads_within_tile_border_x;
 	threads_within_tile_border_y =
-		(threads_within_tile_border_y == 0) ? get_local_size(1)
+		(threads_within_tile_border_y == 0) ? ccl_local_size(1)
 		                                    : threads_within_tile_border_y;
 
 	return threads_within_tile_border_x *
@@ -94,7 +94,7 @@ int get_next_work(ccl_global uint *work_pool,
 	                                 grp_idx,
 	                                 grp_idy,
 	                                 num_samples);
-	uint group_index = grp_idy * get_num_groups(0) + grp_idx;
+	uint group_index = grp_idy * ccl_num_groups(0) + grp_idx;
 	*my_work = atomic_inc(&work_pool[group_index]);
 	return (*my_work < total_work) ? 1 : 0;
 }
@@ -118,17 +118,17 @@ uint get_my_sample(uint my_work,
 	                                           parallel_samples,
 	                                           1);
 	uint threads_within_tile_border_x =
-		(grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
-		                                     : get_local_size(0);
+		(grp_idx == (ccl_num_groups(0) - 1)) ? tile_dim_x % ccl_local_size(0)
+		                                     : ccl_local_size(0);
 	uint threads_within_tile_border_y =
-		(grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
-		                                     : get_local_size(1);
+		(grp_idy == (ccl_num_groups(1) - 1)) ? tile_dim_y % ccl_local_size(1)
+		                                     : ccl_local_size(1);
 
 	threads_within_tile_border_x =
-		(threads_within_tile_border_x == 0) ? get_local_size(0)
+		(threads_within_tile_border_x == 0) ? ccl_local_size(0)
 		                                    : threads_within_tile_border_x;
 	threads_within_tile_border_y =
-		(threads_within_tile_border_y == 0) ? get_local_size(1)
+		(threads_within_tile_border_y == 0) ? ccl_local_size(1)
 		                                    : threads_within_tile_border_y;
 
 	return my_work /
@@ -159,17 +159,17 @@ void get_pixel_tile_position(ccl_private uint *pixel_x,
 	                                           parallel_samples,
 	                                           1);
 	uint threads_within_tile_border_x =
-		(grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
-		                                     : get_local_size(0);
+		(grp_idx == (ccl_num_groups(0) - 1)) ? tile_dim_x % ccl_local_size(0)
+		                                     : ccl_local_size(0);
 	uint threads_within_tile_border_y =
-		(grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
-		                                     : get_local_size(1);
+		(grp_idy == (ccl_num_groups(1) - 1)) ? tile_dim_y % ccl_local_size(1)
+		                                     : ccl_local_size(1);
 
 	threads_within_tile_border_x =
-		(threads_within_tile_border_x == 0) ? get_local_size(0)
+		(threads_within_tile_border_x == 0) ? ccl_local_size(0)
 		                                    : threads_within_tile_border_x;
 	threads_within_tile_border_y =
-		(threads_within_tile_border_y == 0) ? get_local_size(1)
+		(threads_within_tile_border_y == 0) ? ccl_local_size(1)
 		                                    : threads_within_tile_border_y;
 
 	uint total_associated_pixels =
@@ -181,9 +181,9 @@ void get_pixel_tile_position(ccl_private uint *pixel_x,
 		work_group_pixel_index / threads_within_tile_border_x;
 
 	*pixel_x =
-		tile_offset_x + (grp_idx * get_local_size(0)) + work_group_pixel_x;
+		tile_offset_x + (grp_idx * ccl_local_size(0)) + work_group_pixel_x;
 	*pixel_y =
-		tile_offset_y + (grp_idy * get_local_size(1)) + work_group_pixel_y;
+		tile_offset_y + (grp_idy * ccl_local_size(1)) + work_group_pixel_y;
 	*tile_x = *pixel_x - tile_offset_x;
 	*tile_y = *pixel_y - tile_offset_y;
 }
diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl
index a68f978..03a27c8 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel.cl
@@ -67,8 +67,8 @@ __kernel void kernel_ocl_path_trace(
 	kg->name = name;
 #include "../../kernel_textures.h"
 
-	int x = sx + get_global_id(0);
-	int y = sy + get_global_id(1);
+	int x = sx + ccl_global_id(0);
+	int y = sy + ccl_global_id(1);
 
 	if(x < sx + sw && y < sy + sh)
 		kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
@@ -96,7 +96,7 @@ __kernel void kernel_ocl_shader(
 	kg->name = name;
 #include "../../kernel_textures.h"
 
-	int x = sx + get_global_id(0);
+	int x = sx + ccl_global_id(0);
 
 	if(x < sx + sw) {
 		kernel_shader_evaluate(kg,
@@ -128,7 +128,7 @@ __kernel void kernel_ocl_bake(
 	kg->name = name;
 #include "../../kernel_textures.h"
 
-	int x = sx + get_global_id(0);
+	int x = sx + ccl_global_id(0);
 
 	if(x < sx + sw) {
 #ifdef __NO_BAKING__
@@ -159,8 +159,8 @@ __kernel void kernel_ocl_convert_to_byte(
 	kg->name = name;
 #include "../../kernel_textures.h"
 
-	int x = sx + get_global_id(0);
-	int y = sy + get_global_id(1);
+	int x = sx + ccl_global_id(0);
+	int y = sy + ccl_global_id(1);
 
 	if(x < sx + sw && y < sy + sh)
 		kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
@@ -186,8 +186,8 @@ __kernel void kernel_ocl_convert_to_half_float(
 	kg->name = name;
 #include "../../kernel_textures.h"
 
-	int x = sx + get_global_id(0);
-	int y = sy + get_global_id(1);
+	int x = sx + ccl_global_id(0);
+	int y = sy + ccl_global_id(1);
 
 	if(x < sx + sw && y < sy + sh)
 		kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
diff --git a/intern/cycles/kernel/split/kernel_background_buffer_update.h b/intern/cycles/kernel/split/kernel_background_buffer_update.h
index 114bdf2..ea2d703 100644
--- a/intern/cycles/kernel/spl

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list