[Bf-blender-cvs] [84645df] cycles_kernel_split: Cycles kernel split: Cleanup of work stealing module

Sergey Sharybin noreply at git.blender.org
Fri May 8 16:22:04 CEST 2015


Commit: 84645dff808c177dbd197c2e80ec95a4c2347d0f
Author: Sergey Sharybin
Date:   Fri May 8 19:21:09 2015 +0500
Branches: cycles_kernel_split
https://developer.blender.org/rB84645dff808c177dbd197c2e80ec95a4c2347d0f

Cycles kernel split: Cleanup of work stealing module

===================================================================

M	intern/cycles/kernel/kernel_work_stealing.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
index 4833fac..9b83d97 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -27,113 +27,163 @@
 #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
 #endif
 
-unsigned int get_group_id_with_ray_index(unsigned int ray_index,
-                                         unsigned int tile_dim_x,
-                                         unsigned int tile_dim_y,
-                                         unsigned int parallel_samples,
-                                         int dim)
+uint get_group_id_with_ray_index(uint ray_index,
+                                 uint tile_dim_x,
+                                 uint tile_dim_y,
+                                 uint parallel_samples,
+                                 int dim)
 {
-	unsigned int retval;
 	if(dim == 0) {
-		unsigned int x_span = ray_index % (tile_dim_x * parallel_samples);
-		retval = x_span / get_local_size(0);
+		uint x_span = ray_index % (tile_dim_x * parallel_samples);
+		return x_span / get_local_size(0);
 	}
-	else if(dim == 1) {
-		unsigned int y_span = ray_index / (tile_dim_x * parallel_samples);
-		retval = y_span / get_local_size(1);
+	else /*if(dim == 1)*/ {
+		kernel_assert(dim == 1);
+		uint y_span = ray_index / (tile_dim_x * parallel_samples);
+		return y_span / get_local_size(1);
 	}
-	return retval;
 }
 
-unsigned int get_total_work(unsigned int tile_dim_x,
-                            unsigned int tile_dim_y,
-                            unsigned int grp_idx,
-                            unsigned int grp_idy,
-                            unsigned int num_samples)
+uint get_total_work(uint tile_dim_x,
+                    uint tile_dim_y,
+                    uint grp_idx,
+                    uint grp_idy,
+                    uint num_samples)
 {
-	unsigned int threads_within_tile_border_x;
-	unsigned int threads_within_tile_border_y;
-
-	threads_within_tile_border_x = (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0) : get_local_size(0);
-	threads_within_tile_border_y = (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1) : get_local_size(1);
-
-	threads_within_tile_border_x = (threads_within_tile_border_x == 0) ? get_local_size(0) : threads_within_tile_border_x;
-	threads_within_tile_border_y = (threads_within_tile_border_y == 0) ? get_local_size(1) : threads_within_tile_border_y;
-
-	return (threads_within_tile_border_x * threads_within_tile_border_y * num_samples);
+	uint threads_within_tile_border_x =
+		(grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
+		                                     : get_local_size(0);
+	uint threads_within_tile_border_y =
+		(grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
+		                                     : get_local_size(1);
+
+	threads_within_tile_border_x =
+		(threads_within_tile_border_x == 0) ? get_local_size(0)
+		                                    : threads_within_tile_border_x;
+	threads_within_tile_border_y =
+		(threads_within_tile_border_y == 0) ? get_local_size(1)
+		                                    : threads_within_tile_border_y;
+
+	return threads_within_tile_border_x *
+	       threads_within_tile_border_y *
+	       num_samples;
 }
 
 /* Returns 0 in case there is no next work available */
 /* Returns 1 in case work assigned is valid */
-int get_next_work(ccl_global unsigned int *work_pool,
-                  ccl_private unsigned int *my_work,
-                  unsigned int tile_dim_x,
-                  unsigned int tile_dim_y,
-                  unsigned int num_samples,
-                  unsigned int parallel_samples,
-                  unsigned int ray_index)
+int get_next_work(ccl_global uint *work_pool,
+                  ccl_private uint *my_work,
+                  uint tile_dim_x,
+                  uint tile_dim_y,
+                  uint num_samples,
+                  uint parallel_samples,
+                  uint ray_index)
 {
-		unsigned int grp_idx = get_group_id_with_ray_index(ray_index, tile_dim_x, tile_dim_y, parallel_samples, 0);
-		unsigned int grp_idy = get_group_id_with_ray_index(ray_index, tile_dim_x, tile_dim_y, parallel_samples, 1);
-		unsigned int total_work = get_total_work(tile_dim_x, tile_dim_y, grp_idx, grp_idy, num_samples);
-		unsigned int group_index = grp_idy * get_num_groups(0) + grp_idx;
-		*my_work = atomic_inc(&work_pool[group_index]);
-		return = (*my_work < total_work) ? 1 : 0;
+	uint grp_idx = get_group_id_with_ray_index(ray_index,
+	                                           tile_dim_x,
+	                                           tile_dim_y,
+	                                           parallel_samples,
+	                                           0);
+	uint grp_idy = get_group_id_with_ray_index(ray_index,
+	                                           tile_dim_x,
+	                                           tile_dim_y,
+	                                           parallel_samples,
+	                                           1);
+	uint total_work = get_total_work(tile_dim_x,
+	                                 tile_dim_y,
+	                                 grp_idx,
+	                                 grp_idy,
+	                                 num_samples);
+	uint group_index = grp_idy * get_num_groups(0) + grp_idx;
+	*my_work = atomic_inc(&work_pool[group_index]);
+	return (*my_work < total_work) ? 1 : 0;
 }
 
-/* This function assumes that the passed my_work is valid */
-/* Decode sample number w.r.t. assigned my_work */
-unsigned int get_my_sample(unsigned int my_work,
-                           unsigned int tile_dim_x,
-                           unsigned int tile_dim_y,
-                           unsigned int parallel_samples,
-                           unsigned int ray_index)
+/* This function assumes that the passed my_work is valid. */
+/* Decode sample number w.r.t. assigned my_work. */
+uint get_my_sample(uint my_work,
+                   uint tile_dim_x,
+                   uint tile_dim_y,
+                   uint parallel_samples,
+                   uint ray_index)
 {
-	unsigned int grp_idx = get_group_id_with_ray_index(ray_index, tile_dim_x, tile_dim_y, parallel_samples, 0);
-	unsigned int grp_idy = get_group_id_with_ray_index(ray_index, tile_dim_x, tile_dim_y, parallel_samples, 1);
-	unsigned int threads_within_tile_border_x;
-	unsigned int threads_within_tile_border_y;
-
-	threads_within_tile_border_x = (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0) : get_local_size(0);
-	threads_within_tile_border_y = (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1) : get_local_size(1);
-
-	threads_within_tile_border_x = (threads_within_tile_border_x == 0) ? get_local_size(0) : threads_within_tile_border_x;
-	threads_within_tile_border_y = (threads_within_tile_border_y == 0) ? get_local_size(1) : threads_within_tile_border_y;
-
-	return (my_work / (threads_within_tile_border_x * threads_within_tile_border_y));
+	uint grp_idx = get_group_id_with_ray_index(ray_index,
+	                                           tile_dim_x,
+	                                           tile_dim_y,
+	                                           parallel_samples,
+	                                           0);
+	uint grp_idy = get_group_id_with_ray_index(ray_index,
+	                                           tile_dim_x,
+	                                           tile_dim_y,
+	                                           parallel_samples,
+	                                           1);
+	uint threads_within_tile_border_x =
+		(grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
+		                                     : get_local_size(0);
+	uint threads_within_tile_border_y =
+		(grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
+		                                     : get_local_size(1);
+
+	threads_within_tile_border_x =
+		(threads_within_tile_border_x == 0) ? get_local_size(0)
+		                                    : threads_within_tile_border_x;
+	threads_within_tile_border_y =
+		(threads_within_tile_border_y == 0) ? get_local_size(1)
+		                                    : threads_within_tile_border_y;
+
+	return my_work /
+	       (threads_within_tile_border_x * threads_within_tile_border_y);
 }
 
-/* Decode pixel and tile position w.r.t. assigned my_work */
-void get_pixel_tile_position(ccl_private unsigned int *pixel_x,
-                             ccl_private unsigned int *pixel_y,
-                             ccl_private unsigned int *tile_x,
-                             ccl_private unsigned int *tile_y,
-                             unsigned int my_work,
-                             unsigned int tile_dim_x,
-                             unsigned int tile_dim_y,
-                             unsigned int tile_offset_x,
-                             unsigned int tile_offset_y,
-                             unsigned int parallel_samples,
-                             unsigned int ray_index)
+/* Decode pixel and tile position w.r.t. assigned my_work. */
+void get_pixel_tile_position(ccl_private uint *pixel_x,
+                             ccl_private uint *pixel_y,
+                             ccl_private uint *tile_x,
+                             ccl_private uint *tile_y,
+                             uint my_work,
+                             uint tile_dim_x,
+                             uint tile_dim_y,
+                             uint tile_offset_x,
+                             uint tile_offset_y,
+                             uint parallel_samples,
+                             uint ray_index)
 {
-	unsigned int grp_idx = get_group_id_with_ray_index(ray_index, tile_dim_x, tile_dim_y, parallel_samples, 0);
-	unsigned int grp_idy = get_group_id_with_ray_index(ray_index, tile_dim_x, tile_dim_y, parallel_samples, 1);
-	unsigned int threads_within_tile_border_x;
-	unsigned int threads_within_tile_border_y;
-
-	threads_within_tile_border_x = (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0) : get_local_size(0);
-	thre

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list