[Bf-blender-cvs] [84645df] cycles_kernel_split: Cycles kernel split: Cleanup of work stealing module
Sergey Sharybin
noreply at git.blender.org
Fri May 8 16:22:04 CEST 2015
Commit: 84645dff808c177dbd197c2e80ec95a4c2347d0f
Author: Sergey Sharybin
Date: Fri May 8 19:21:09 2015 +0500
Branches: cycles_kernel_split
https://developer.blender.org/rB84645dff808c177dbd197c2e80ec95a4c2347d0f
Cycles kernel split: Cleanup of work stealing module
===================================================================
M intern/cycles/kernel/kernel_work_stealing.h
===================================================================
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
index 4833fac..9b83d97 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -27,113 +27,163 @@
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#endif
-unsigned int get_group_id_with_ray_index(unsigned int ray_index,
- unsigned int tile_dim_x,
- unsigned int tile_dim_y,
- unsigned int parallel_samples,
- int dim)
+uint get_group_id_with_ray_index(uint ray_index,
+ uint tile_dim_x,
+ uint tile_dim_y,
+ uint parallel_samples,
+ int dim)
{
- unsigned int retval;
if(dim == 0) {
- unsigned int x_span = ray_index % (tile_dim_x * parallel_samples);
- retval = x_span / get_local_size(0);
+ uint x_span = ray_index % (tile_dim_x * parallel_samples);
+ return x_span / get_local_size(0);
}
- else if(dim == 1) {
- unsigned int y_span = ray_index / (tile_dim_x * parallel_samples);
- retval = y_span / get_local_size(1);
+ else /*if(dim == 1)*/ {
+ kernel_assert(dim == 1);
+ uint y_span = ray_index / (tile_dim_x * parallel_samples);
+ return y_span / get_local_size(1);
}
- return retval;
}
-unsigned int get_total_work(unsigned int tile_dim_x,
- unsigned int tile_dim_y,
- unsigned int grp_idx,
- unsigned int grp_idy,
- unsigned int num_samples)
+uint get_total_work(uint tile_dim_x,
+ uint tile_dim_y,
+ uint grp_idx,
+ uint grp_idy,
+ uint num_samples)
{
- unsigned int threads_within_tile_border_x;
- unsigned int threads_within_tile_border_y;
-
- threads_within_tile_border_x = (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0) : get_local_size(0);
- threads_within_tile_border_y = (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1) : get_local_size(1);
-
- threads_within_tile_border_x = (threads_within_tile_border_x == 0) ? get_local_size(0) : threads_within_tile_border_x;
- threads_within_tile_border_y = (threads_within_tile_border_y == 0) ? get_local_size(1) : threads_within_tile_border_y;
-
- return (threads_within_tile_border_x * threads_within_tile_border_y * num_samples);
+ uint threads_within_tile_border_x =
+ (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
+ : get_local_size(0);
+ uint threads_within_tile_border_y =
+ (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
+ : get_local_size(1);
+
+ threads_within_tile_border_x =
+ (threads_within_tile_border_x == 0) ? get_local_size(0)
+ : threads_within_tile_border_x;
+ threads_within_tile_border_y =
+ (threads_within_tile_border_y == 0) ? get_local_size(1)
+ : threads_within_tile_border_y;
+
+ return threads_within_tile_border_x *
+ threads_within_tile_border_y *
+ num_samples;
}
/* Returns 0 in case there is no next work available */
/* Returns 1 in case work assigned is valid */
-int get_next_work(ccl_global unsigned int *work_pool,
- ccl_private unsigned int *my_work,
- unsigned int tile_dim_x,
- unsigned int tile_dim_y,
- unsigned int num_samples,
- unsigned int parallel_samples,
- unsigned int ray_index)
+int get_next_work(ccl_global uint *work_pool,
+ ccl_private uint *my_work,
+ uint tile_dim_x,
+ uint tile_dim_y,
+ uint num_samples,
+ uint parallel_samples,
+ uint ray_index)
{
- unsigned int grp_idx = get_group_id_with_ray_index(ray_index, tile_dim_x, tile_dim_y, parallel_samples, 0);
- unsigned int grp_idy = get_group_id_with_ray_index(ray_index, tile_dim_x, tile_dim_y, parallel_samples, 1);
- unsigned int total_work = get_total_work(tile_dim_x, tile_dim_y, grp_idx, grp_idy, num_samples);
- unsigned int group_index = grp_idy * get_num_groups(0) + grp_idx;
- *my_work = atomic_inc(&work_pool[group_index]);
- return = (*my_work < total_work) ? 1 : 0;
+ uint grp_idx = get_group_id_with_ray_index(ray_index,
+ tile_dim_x,
+ tile_dim_y,
+ parallel_samples,
+ 0);
+ uint grp_idy = get_group_id_with_ray_index(ray_index,
+ tile_dim_x,
+ tile_dim_y,
+ parallel_samples,
+ 1);
+ uint total_work = get_total_work(tile_dim_x,
+ tile_dim_y,
+ grp_idx,
+ grp_idy,
+ num_samples);
+ uint group_index = grp_idy * get_num_groups(0) + grp_idx;
+ *my_work = atomic_inc(&work_pool[group_index]);
+ return (*my_work < total_work) ? 1 : 0;
}
-/* This function assumes that the passed my_work is valid */
-/* Decode sample number w.r.t. assigned my_work */
-unsigned int get_my_sample(unsigned int my_work,
- unsigned int tile_dim_x,
- unsigned int tile_dim_y,
- unsigned int parallel_samples,
- unsigned int ray_index)
+/* This function assumes that the passed my_work is valid. */
+/* Decode sample number w.r.t. assigned my_work. */
+uint get_my_sample(uint my_work,
+ uint tile_dim_x,
+ uint tile_dim_y,
+ uint parallel_samples,
+ uint ray_index)
{
- unsigned int grp_idx = get_group_id_with_ray_index(ray_index, tile_dim_x, tile_dim_y, parallel_samples, 0);
- unsigned int grp_idy = get_group_id_with_ray_index(ray_index, tile_dim_x, tile_dim_y, parallel_samples, 1);
- unsigned int threads_within_tile_border_x;
- unsigned int threads_within_tile_border_y;
-
- threads_within_tile_border_x = (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0) : get_local_size(0);
- threads_within_tile_border_y = (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1) : get_local_size(1);
-
- threads_within_tile_border_x = (threads_within_tile_border_x == 0) ? get_local_size(0) : threads_within_tile_border_x;
- threads_within_tile_border_y = (threads_within_tile_border_y == 0) ? get_local_size(1) : threads_within_tile_border_y;
-
- return (my_work / (threads_within_tile_border_x * threads_within_tile_border_y));
+ uint grp_idx = get_group_id_with_ray_index(ray_index,
+ tile_dim_x,
+ tile_dim_y,
+ parallel_samples,
+ 0);
+ uint grp_idy = get_group_id_with_ray_index(ray_index,
+ tile_dim_x,
+ tile_dim_y,
+ parallel_samples,
+ 1);
+ uint threads_within_tile_border_x =
+ (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
+ : get_local_size(0);
+ uint threads_within_tile_border_y =
+ (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
+ : get_local_size(1);
+
+ threads_within_tile_border_x =
+ (threads_within_tile_border_x == 0) ? get_local_size(0)
+ : threads_within_tile_border_x;
+ threads_within_tile_border_y =
+ (threads_within_tile_border_y == 0) ? get_local_size(1)
+ : threads_within_tile_border_y;
+
+ return my_work /
+ (threads_within_tile_border_x * threads_within_tile_border_y);
}
-/* Decode pixel and tile position w.r.t. assigned my_work */
-void get_pixel_tile_position(ccl_private unsigned int *pixel_x,
- ccl_private unsigned int *pixel_y,
- ccl_private unsigned int *tile_x,
- ccl_private unsigned int *tile_y,
- unsigned int my_work,
- unsigned int tile_dim_x,
- unsigned int tile_dim_y,
- unsigned int tile_offset_x,
- unsigned int tile_offset_y,
- unsigned int parallel_samples,
- unsigned int ray_index)
+/* Decode pixel and tile position w.r.t. assigned my_work. */
+void get_pixel_tile_position(ccl_private uint *pixel_x,
+ ccl_private uint *pixel_y,
+ ccl_private uint *tile_x,
+ ccl_private uint *tile_y,
+ uint my_work,
+ uint tile_dim_x,
+ uint tile_dim_y,
+ uint tile_offset_x,
+ uint tile_offset_y,
+ uint parallel_samples,
+ uint ray_index)
{
- unsigned int grp_idx = get_group_id_with_ray_index(ray_index, tile_dim_x, tile_dim_y, parallel_samples, 0);
- unsigned int grp_idy = get_group_id_with_ray_index(ray_index, tile_dim_x, tile_dim_y, parallel_samples, 1);
- unsigned int threads_within_tile_border_x;
- unsigned int threads_within_tile_border_y;
-
- threads_within_tile_border_x = (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0) : get_local_size(0);
- thre
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list