[Bf-blender-cvs] [6850eeb] cycles_split_kernel: Cycles: Replace OpenCL work item functions with own versions
Mai Lavelle
noreply at git.blender.org
Tue Oct 25 17:39:09 CEST 2016
Commit: 6850eebeaa235dff70dc0a8a4d2f4a15dfbf31f9
Author: Mai Lavelle
Date: Tue Oct 25 17:19:05 2016 +0200
Branches: cycles_split_kernel
https://developer.blender.org/rB6850eebeaa235dff70dc0a8a4d2f4a15dfbf31f9
Cycles: Replace OpenCL work item functions with own versions
===================================================================
M intern/cycles/kernel/kernel_compat_cpu.h
M intern/cycles/kernel/kernel_compat_opencl.h
M intern/cycles/kernel/kernel_queues.h
M intern/cycles/kernel/kernel_types.h
M intern/cycles/kernel/kernel_work_stealing.h
M intern/cycles/kernel/kernels/opencl/kernel.cl
M intern/cycles/kernel/split/kernel_background_buffer_update.h
M intern/cycles/kernel/split/kernel_data_init.h
M intern/cycles/kernel/split/kernel_direct_lighting.h
M intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
M intern/cycles/kernel/split/kernel_lamp_emission.h
M intern/cycles/kernel/split/kernel_next_iteration_setup.h
M intern/cycles/kernel/split/kernel_queue_enqueue.h
M intern/cycles/kernel/split/kernel_scene_intersect.h
M intern/cycles/kernel/split/kernel_shader_eval.h
M intern/cycles/kernel/split/kernel_shadow_blocked.h
M intern/cycles/kernel/split/kernel_sum_all_radiance.h
===================================================================
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index 9d1f3bd..e6aa8f8 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -44,6 +44,14 @@
#define ccl_addr_space
+#define ccl_local_id(d) 0
+#define ccl_global_id(d) 0
+
+#define ccl_local_size(d) 1
+#define ccl_global_size(d) 1
+
+#define ccl_num_groups(d) 1
+
/* On x86_64, versions of glibc < 2.16 have an issue where expf is
* much slower than the double version. This was fixed in glibc 2.16.
*/
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index f076e3a..b60eb14 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -49,6 +49,14 @@
# define ccl_addr_space
#endif
+#define ccl_local_id(d) get_local_id(d)
+#define ccl_global_id(d) get_global_id(d)
+
+#define ccl_local_size(d) get_local_size(d)
+#define ccl_global_size(d) get_global_size(d)
+
+#define ccl_num_groups(d) get_num_groups(d)
+
/* Selective nodes compilation. */
#ifndef __NODES_MAX_GROUP__
# define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX
diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h
index cf5614b..c423625 100644
--- a/intern/cycles/kernel/kernel_queues.h
+++ b/intern/cycles/kernel/kernel_queues.h
@@ -72,7 +72,7 @@ ccl_device void enqueue_ray_index_local(
ccl_global int *Queue_data, /* Queues. */
ccl_global int *Queue_index) /* To do global queue atomics. */
{
- int lidx = get_local_id(1) * get_local_size(0) + get_local_id(0);
+ int lidx = ccl_local_id(1) * ccl_local_size(0) + ccl_local_id(0);
/* Get local queue id .*/
unsigned int lqidx;
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 734b446..68e5296 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -737,7 +737,7 @@ enum ShaderDataFlag {
};
#ifdef __SPLIT_KERNEL__
-# define SD_THREAD (get_global_id(1) * get_global_size(0) + get_global_id(0))
+# define SD_THREAD (ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0))
# if !defined(__SPLIT_KERNEL_SOA__)
/* ShaderData is stored as an Array-of-Structures */
# define ccl_soa_member(type, name) type soa_##name
@@ -745,7 +745,7 @@ enum ShaderDataFlag {
# define ccl_fetch_array(s, t, index) (&s[SD_THREAD].soa_##t[index])
# else
/* ShaderData is stored as an Structure-of-Arrays */
-# define SD_GLOBAL_SIZE (get_global_size(0) * get_global_size(1))
+# define SD_GLOBAL_SIZE (ccl_global_size(0) * ccl_global_size(1))
# define SD_FIELD_SIZE(t) sizeof(((struct ShaderData*)0)->t)
# define SD_OFFSETOF(t) ((char*)(&((struct ShaderData*)0)->t) - (char*)0)
# define ccl_soa_member(type, name) type soa_##name
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
index 7d559b1..00d8a59 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -35,12 +35,12 @@ uint get_group_id_with_ray_index(uint ray_index,
{
if(dim == 0) {
uint x_span = ray_index % (tile_dim_x * parallel_samples);
- return x_span / get_local_size(0);
+ return x_span / ccl_local_size(0);
}
else /*if(dim == 1)*/ {
kernel_assert(dim == 1);
uint y_span = ray_index / (tile_dim_x * parallel_samples);
- return y_span / get_local_size(1);
+ return y_span / ccl_local_size(1);
}
}
@@ -51,17 +51,17 @@ uint get_total_work(uint tile_dim_x,
uint num_samples)
{
uint threads_within_tile_border_x =
- (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
- : get_local_size(0);
+ (grp_idx == (ccl_num_groups(0) - 1)) ? tile_dim_x % ccl_local_size(0)
+ : ccl_local_size(0);
uint threads_within_tile_border_y =
- (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
- : get_local_size(1);
+ (grp_idy == (ccl_num_groups(1) - 1)) ? tile_dim_y % ccl_local_size(1)
+ : ccl_local_size(1);
threads_within_tile_border_x =
- (threads_within_tile_border_x == 0) ? get_local_size(0)
+ (threads_within_tile_border_x == 0) ? ccl_local_size(0)
: threads_within_tile_border_x;
threads_within_tile_border_y =
- (threads_within_tile_border_y == 0) ? get_local_size(1)
+ (threads_within_tile_border_y == 0) ? ccl_local_size(1)
: threads_within_tile_border_y;
return threads_within_tile_border_x *
@@ -94,7 +94,7 @@ int get_next_work(ccl_global uint *work_pool,
grp_idx,
grp_idy,
num_samples);
- uint group_index = grp_idy * get_num_groups(0) + grp_idx;
+ uint group_index = grp_idy * ccl_num_groups(0) + grp_idx;
*my_work = atomic_inc(&work_pool[group_index]);
return (*my_work < total_work) ? 1 : 0;
}
@@ -118,17 +118,17 @@ uint get_my_sample(uint my_work,
parallel_samples,
1);
uint threads_within_tile_border_x =
- (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
- : get_local_size(0);
+ (grp_idx == (ccl_num_groups(0) - 1)) ? tile_dim_x % ccl_local_size(0)
+ : ccl_local_size(0);
uint threads_within_tile_border_y =
- (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
- : get_local_size(1);
+ (grp_idy == (ccl_num_groups(1) - 1)) ? tile_dim_y % ccl_local_size(1)
+ : ccl_local_size(1);
threads_within_tile_border_x =
- (threads_within_tile_border_x == 0) ? get_local_size(0)
+ (threads_within_tile_border_x == 0) ? ccl_local_size(0)
: threads_within_tile_border_x;
threads_within_tile_border_y =
- (threads_within_tile_border_y == 0) ? get_local_size(1)
+ (threads_within_tile_border_y == 0) ? ccl_local_size(1)
: threads_within_tile_border_y;
return my_work /
@@ -159,17 +159,17 @@ void get_pixel_tile_position(ccl_private uint *pixel_x,
parallel_samples,
1);
uint threads_within_tile_border_x =
- (grp_idx == (get_num_groups(0) - 1)) ? tile_dim_x % get_local_size(0)
- : get_local_size(0);
+ (grp_idx == (ccl_num_groups(0) - 1)) ? tile_dim_x % ccl_local_size(0)
+ : ccl_local_size(0);
uint threads_within_tile_border_y =
- (grp_idy == (get_num_groups(1) - 1)) ? tile_dim_y % get_local_size(1)
- : get_local_size(1);
+ (grp_idy == (ccl_num_groups(1) - 1)) ? tile_dim_y % ccl_local_size(1)
+ : ccl_local_size(1);
threads_within_tile_border_x =
- (threads_within_tile_border_x == 0) ? get_local_size(0)
+ (threads_within_tile_border_x == 0) ? ccl_local_size(0)
: threads_within_tile_border_x;
threads_within_tile_border_y =
- (threads_within_tile_border_y == 0) ? get_local_size(1)
+ (threads_within_tile_border_y == 0) ? ccl_local_size(1)
: threads_within_tile_border_y;
uint total_associated_pixels =
@@ -181,9 +181,9 @@ void get_pixel_tile_position(ccl_private uint *pixel_x,
work_group_pixel_index / threads_within_tile_border_x;
*pixel_x =
- tile_offset_x + (grp_idx * get_local_size(0)) + work_group_pixel_x;
+ tile_offset_x + (grp_idx * ccl_local_size(0)) + work_group_pixel_x;
*pixel_y =
- tile_offset_y + (grp_idy * get_local_size(1)) + work_group_pixel_y;
+ tile_offset_y + (grp_idy * ccl_local_size(1)) + work_group_pixel_y;
*tile_x = *pixel_x - tile_offset_x;
*tile_y = *pixel_y - tile_offset_y;
}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl b/intern/cycles/kernel/kernels/opencl/kernel.cl
index a68f978..03a27c8 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel.cl
@@ -67,8 +67,8 @@ __kernel void kernel_ocl_path_trace(
kg->name = name;
#include "../../kernel_textures.h"
- int x = sx + get_global_id(0);
- int y = sy + get_global_id(1);
+ int x = sx + ccl_global_id(0);
+ int y = sy + ccl_global_id(1);
if(x < sx + sw && y < sy + sh)
kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
@@ -96,7 +96,7 @@ __kernel void kernel_ocl_shader(
kg->name = name;
#include "../../kernel_textures.h"
- int x = sx + get_global_id(0);
+ int x = sx + ccl_global_id(0);
if(x < sx + sw) {
kernel_shader_evaluate(kg,
@@ -128,7 +128,7 @@ __kernel void kernel_ocl_bake(
kg->name = name;
#include "../../kernel_textures.h"
- int x = sx + get_global_id(0);
+ int x = sx + ccl_global_id(0);
if(x < sx + sw) {
#ifdef __NO_BAKING__
@@ -159,8 +159,8 @@ __kernel void kernel_ocl_convert_to_byte(
kg->name = name;
#include "../../kernel_textures.h"
- int x = sx + get_global_id(0);
- int y = sy + get_global_id(1);
+ int x = sx + ccl_global_id(0);
+ int y = sy + ccl_global_id(1);
if(x < sx + sw && y < sy + sh)
kernel_film_convert_to_byte(kg, rgba, buffer, sample_scale, x, y, offset, stride);
@@ -186,8 +186,8 @@ __kernel void kernel_ocl_convert_to_half_float(
kg->name = name;
#include "../../kernel_textures.h"
- int x = sx + get_global_id(0);
- int y = sy + get_global_id(1);
+ int x = sx + ccl_global_id(0);
+ int y = sy + ccl_global_id(1);
if(x < sx + sw && y < sy + sh)
kernel_film_convert_to_half_float(kg, rgba, buffer, sample_scale, x, y, offset, stride);
diff --git a/intern/cycles/kernel/split/kernel_background_buffer_update.h b/intern/cycles/kernel/split/kernel_background_buffer_update.h
index 114bdf2..ea2d703 100644
--- a/intern/cycles/kernel/spl
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list