[Bf-blender-cvs] [124b888] cycles_split_kernel: Cycles: Replace OpenCL atomic functions with own versions
Mai Lavelle
noreply at git.blender.org
Tue Oct 25 17:39:10 CEST 2016
Commit: 124b88872e6ddf9ff06a4c6737c513c092da721e
Author: Mai Lavelle
Date: Tue Oct 25 17:35:49 2016 +0200
Branches: cycles_split_kernel
https://developer.blender.org/rB124b88872e6ddf9ff06a4c6737c513c092da721e
Cycles: Replace OpenCL atomic functions with own versions
===================================================================
M intern/cycles/kernel/kernel_queues.h
M intern/cycles/kernel/kernel_work_stealing.h
M intern/cycles/kernel/split/kernel_background_buffer_update.h
M intern/cycles/kernel/split/kernel_data_init.h
M intern/cycles/kernel/split/kernel_direct_lighting.h
M intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
M intern/cycles/kernel/split/kernel_lamp_emission.h
M intern/cycles/kernel/split/kernel_next_iteration_setup.h
M intern/cycles/kernel/split/kernel_queue_enqueue.h
M intern/cycles/kernel/split/kernel_scene_intersect.h
M intern/cycles/kernel/split/kernel_shader_eval.h
M intern/cycles/kernel/split/kernel_shadow_blocked.h
M intern/cycles/util/util_atomic.h
===================================================================
diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h
index c423625..1ca57ed 100644
--- a/intern/cycles/kernel/kernel_queues.h
+++ b/intern/cycles/kernel/kernel_queues.h
@@ -35,7 +35,7 @@ ccl_device void enqueue_ray_index(
ccl_global int *queue_index) /* Array of size num_queues; Used for atomic increment. */
{
/* This thread's queue index. */
- int my_queue_index = atomic_inc(&queue_index[queue_number]) + (queue_number * queue_size);
+ int my_queue_index = atomic_inc_uint32((ccl_global uint*)&queue_index[queue_number]) + (queue_number * queue_size);
queues[my_queue_index] = ray_index;
}
@@ -77,15 +77,15 @@ ccl_device void enqueue_ray_index_local(
/* Get local queue id .*/
unsigned int lqidx;
if(enqueue_flag) {
- lqidx = atomic_inc(local_queue_atomics);
+ lqidx = atomic_inc_uint32(local_queue_atomics);
}
- barrier(CLK_LOCAL_MEM_FENCE);
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
/* Get global queue offset. */
if(lidx == 0) {
- *local_queue_atomics = atomic_add(&Queue_index[queue_number], *local_queue_atomics);
+ *local_queue_atomics = atomic_add_uint32((ccl_global uint*)&Queue_index[queue_number], *local_queue_atomics);
}
- barrier(CLK_LOCAL_MEM_FENCE);
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
/* Get global queue index and enqueue ray. */
if(enqueue_flag) {
@@ -98,7 +98,7 @@ ccl_device unsigned int get_local_queue_index(
int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */
ccl_local unsigned int *local_queue_atomics)
{
- int my_lqidx = atomic_inc(&local_queue_atomics[queue_number]);
+ int my_lqidx = atomic_inc_uint32(&local_queue_atomics[queue_number]);
return my_lqidx;
}
@@ -107,7 +107,7 @@ ccl_device unsigned int get_global_per_queue_offset(
ccl_local unsigned int *local_queue_atomics,
ccl_global int* global_queue_atomics)
{
- unsigned int queue_offset = atomic_add(&global_queue_atomics[queue_number],
+ unsigned int queue_offset = atomic_add_uint32((ccl_global uint*)&global_queue_atomics[queue_number],
local_queue_atomics[queue_number]);
return queue_offset;
}
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
index 00d8a59..05dd362 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -95,7 +95,7 @@ int get_next_work(ccl_global uint *work_pool,
grp_idy,
num_samples);
uint group_index = grp_idy * ccl_num_groups(0) + grp_idx;
- *my_work = atomic_inc(&work_pool[group_index]);
+ *my_work = atomic_inc_uint32(&work_pool[group_index]);
return (*my_work < total_work) ? 1 : 0;
}
diff --git a/intern/cycles/kernel/split/kernel_background_buffer_update.h b/intern/cycles/kernel/split/kernel_background_buffer_update.h
index ea2d703..ad88494 100644
--- a/intern/cycles/kernel/split/kernel_background_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_background_buffer_update.h
@@ -75,7 +75,7 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
local_queue_atomics = 0;
}
- barrier(CLK_LOCAL_MEM_FENCE);
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
if(ray_index == 0) {
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 147839a..6aab435 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -133,7 +133,7 @@ ccl_device void kernel_data_init(
int group_index = get_group_id(1) * ccl_num_groups(0) + get_group_id(0);
work_pool_wgs[group_index] = 0;
}
- barrier(CLK_LOCAL_MEM_FENCE);
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
#endif /* __WORK_STEALING__ */
/* Initialize queue data and queue index. */
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
index 9ec4c7b..590c823 100644
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -53,7 +53,7 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg)
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
local_queue_atomics = 0;
}
- barrier(CLK_LOCAL_MEM_FENCE);
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
char enqueue_flag = 0;
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index 03c4200..6858774 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -78,7 +78,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobal
local_queue_atomics_bg = 0;
local_queue_atomics_ao = 0;
}
- barrier(CLK_LOCAL_MEM_FENCE);
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
char enqueue_flag = 0;
char enqueue_flag_AO_SHADOW_RAY_CAST = 0;
diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h
index f0d5db6..3e80cc4 100644
--- a/intern/cycles/kernel/split/kernel_lamp_emission.h
+++ b/intern/cycles/kernel/split/kernel_lamp_emission.h
@@ -50,7 +50,7 @@ ccl_device void kernel_lamp_emission(KernelGlobals *kg)
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
local_use_queues_flag = split_params->use_queues_flag[0];
}
- barrier(CLK_LOCAL_MEM_FENCE);
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
int ray_index;
if(local_use_queues_flag) {
diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
index 4dfce6e..73acd3e 100644
--- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h
+++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
@@ -65,7 +65,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg)
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
local_queue_atomics = 0;
}
- barrier(CLK_LOCAL_MEM_FENCE);
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
/* If we are here, then it means that scene-intersect kernel
diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h
index 6fed762..57d09d8 100644
--- a/intern/cycles/kernel/split/kernel_queue_enqueue.h
+++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h
@@ -58,7 +58,7 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg)
if(lidx < 2 ) {
local_queue_atomics[lidx] = 0;
}
- barrier(CLK_LOCAL_MEM_FENCE);
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
int queue_number = -1;
@@ -73,7 +73,7 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg)
if(queue_number != -1) {
my_lqidx = get_local_queue_index(queue_number, local_queue_atomics);
}
- barrier(CLK_LOCAL_MEM_FENCE);
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
if(lidx == 0) {
local_queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] =
@@ -85,7 +85,7 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg)
local_queue_atomics,
split_params->queue_index);
}
- barrier(CLK_LOCAL_MEM_FENCE);
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
unsigned int my_gqidx;
if(queue_number != -1) {
diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h
index 005683c..24a6635 100644
--- a/intern/cycles/kernel/split/kernel_scene_intersect.h
+++ b/intern/cycles/kernel/split/kernel_scene_intersect.h
@@ -71,7 +71,7 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg)
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
local_use_queues_flag = split_params->use_queues_flag[0];
}
- barrier(CLK_LOCAL_MEM_FENCE);
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
int ray_index;
if(local_use_queues_flag) {
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h
index 9364847..4bb80ce 100644
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -52,7 +52,7 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg)
if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
local_queue_atomics = 0;
}
- barrier(CLK_LOCAL_MEM_FENCE);
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
ray_index = get_ray_index(ray_index,
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked.h b/intern/cycles/kernel/split/kernel_shadow_blocked.h
index c86fd83..b5f2f3f 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked.h
@@ -55,7 +55,7 @@ ccl_device void kernel_shadow_blocked(KernelGlobals *kg)
ao_queue_length = split_params->queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS];
dl_queue_length = split_params->queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS];
}
- barrier(CLK_LOCAL_MEM_FENCE);
+ ccl_barrier(CCL_LOCAL_MEM_FENCE);
/* flag determining if the current ray is to process shadow ray for AO or DL */
char shadow_blocked_type = -1;
diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h
index 1d1e296..9c970de 100644
--- a/intern/cycles/util/util_atomic.h
+++ b/intern/cycles/util/util_atomic.h
@@ -32,6 +32,11 @@ ATOMIC_INLINE void atomic_update_max_z(size_t *maximum_value, size_t value)
}
}
+#define atomic_inc_uint32(p) atomic_add_uint32((p), 1)
+
+#define CCL_LOCAL_MEM_FENCE 0
+#define ccl_barrier(flags) (void)0
+
#else /* __KERNEL_GPU__ */
#ifdef __KERNEL_OPENCL__
@@ -58,6 +63,12 @@ ccl_device_inline void atomic_add_float(volatile ccl_global float *source,
new_value.int_value) != prev_value.int_value);
}
+#define atomic_add_uint32(p, x) atomic_add((p), (x))
+#define atomic_inc_uint32(p) atomic_inc((p))
+
+#define CCL_LOCAL_MEM_FENCE CLK_LOCAL_MEM
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list