[Bf-blender-cvs] [3b8a84d] cycles_split_kernel: Cycles: Fix atomics in split kernel after switching away from OpenCL atomics
Mai Lavelle
noreply at git.blender.org
Thu Oct 27 17:48:58 CEST 2016
Commit: 3b8a84d99cfcd91303e7c0aa59489ec473345249
Author: Mai Lavelle
Date: Thu Oct 27 17:29:45 2016 +0200
Branches: cycles_split_kernel
https://developer.blender.org/rB3b8a84d99cfcd91303e7c0aa59489ec473345249
Cycles: Fix atomics in split kernel after switching away from OpenCL atomics
Apparently Blender's atomics return the new value rather than the old value.
===================================================================
M intern/cycles/kernel/kernel_queues.h
M intern/cycles/kernel/kernel_work_stealing.h
M intern/cycles/util/util_atomic.h
===================================================================
diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h
index 011610f..8d3176f 100644
--- a/intern/cycles/kernel/kernel_queues.h
+++ b/intern/cycles/kernel/kernel_queues.h
@@ -37,7 +37,7 @@ ccl_device void enqueue_ray_index(
ccl_global int *queue_index) /* Array of size num_queues; Used for atomic increment. */
{
/* This thread's queue index. */
- int my_queue_index = atomic_inc_uint32((ccl_global uint*)&queue_index[queue_number]) + (queue_number * queue_size);
+ int my_queue_index = atomic_inc_uint32((ccl_global uint*)&queue_index[queue_number]) + (queue_number * queue_size)-1;
queues[my_queue_index] = ray_index;
}
@@ -79,13 +79,14 @@ ccl_device void enqueue_ray_index_local(
/* Get local queue id .*/
unsigned int lqidx;
if(enqueue_flag) {
- lqidx = atomic_inc_uint32(local_queue_atomics);
+ lqidx = atomic_inc_uint32(local_queue_atomics)-1;
}
ccl_barrier(CCL_LOCAL_MEM_FENCE);
/* Get global queue offset. */
if(lidx == 0) {
- *local_queue_atomics = atomic_add_uint32((ccl_global uint*)&Queue_index[queue_number], *local_queue_atomics);
+ *local_queue_atomics = atomic_add_uint32((ccl_global uint*)&Queue_index[queue_number],
+ *local_queue_atomics) - *local_queue_atomics;
}
ccl_barrier(CCL_LOCAL_MEM_FENCE);
@@ -100,7 +101,7 @@ ccl_device unsigned int get_local_queue_index(
int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */
ccl_local unsigned int *local_queue_atomics)
{
- int my_lqidx = atomic_inc_uint32(&local_queue_atomics[queue_number]);
+ int my_lqidx = atomic_inc_uint32(&local_queue_atomics[queue_number])-1;
return my_lqidx;
}
@@ -110,7 +111,7 @@ ccl_device unsigned int get_global_per_queue_offset(
ccl_global int* global_queue_atomics)
{
unsigned int queue_offset = atomic_add_uint32((ccl_global uint*)&global_queue_atomics[queue_number],
- local_queue_atomics[queue_number]);
+ local_queue_atomics[queue_number]) - local_queue_atomics[queue_number];
return queue_offset;
}
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
index 353f13d..afb9ac7 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -97,7 +97,7 @@ ccl_device int get_next_work(ccl_global uint *work_pool,
grp_idy,
num_samples);
uint group_index = grp_idy * ccl_num_groups(0) + grp_idx;
- *my_work = atomic_inc_uint32(&work_pool[group_index]);
+ *my_work = atomic_inc_uint32(&work_pool[group_index])-1;
return (*my_work < total_work) ? 1 : 0;
}
diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h
index 5053d59..27682e7 100644
--- a/intern/cycles/util/util_atomic.h
+++ b/intern/cycles/util/util_atomic.h
@@ -65,8 +65,8 @@ ccl_device_inline void atomic_add_float(volatile ccl_global float *source,
new_value.int_value) != prev_value.int_value);
}
-#define atomic_add_uint32(p, x) atomic_add((p), (x))
-#define atomic_inc_uint32(p) atomic_inc((p))
+#define atomic_add_uint32(p, x) (atomic_add((p), (x))+(x))
+#define atomic_inc_uint32(p) (atomic_inc((p))+1)
#define CCL_LOCAL_MEM_FENCE CLK_LOCAL_MEM_FENCE
#define ccl_barrier(flags) barrier(flags)
More information about the Bf-blender-cvs
mailing list