[Bf-blender-cvs] [3b8a84d] cycles_split_kernel: Cycles: Fix atomics in split kernel after switching away from OpenCL atomics

Mai Lavelle noreply at git.blender.org
Thu Oct 27 17:48:58 CEST 2016


Commit: 3b8a84d99cfcd91303e7c0aa59489ec473345249
Author: Mai Lavelle
Date:   Thu Oct 27 17:29:45 2016 +0200
Branches: cycles_split_kernel
https://developer.blender.org/rB3b8a84d99cfcd91303e7c0aa59489ec473345249

Cycles: Fix atomics in split kernel after switching away from OpenCL atomics

Apparently Blender's atomics return the new value rather than the old value.

===================================================================

M	intern/cycles/kernel/kernel_queues.h
M	intern/cycles/kernel/kernel_work_stealing.h
M	intern/cycles/util/util_atomic.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h
index 011610f..8d3176f 100644
--- a/intern/cycles/kernel/kernel_queues.h
+++ b/intern/cycles/kernel/kernel_queues.h
@@ -37,7 +37,7 @@ ccl_device void enqueue_ray_index(
         ccl_global int *queue_index) /* Array of size num_queues; Used for atomic increment. */
 {
 	/* This thread's queue index. */
-	int my_queue_index = atomic_inc_uint32((ccl_global uint*)&queue_index[queue_number]) + (queue_number * queue_size);
+	int my_queue_index = atomic_inc_uint32((ccl_global uint*)&queue_index[queue_number]) + (queue_number * queue_size)-1;
 	queues[my_queue_index] = ray_index;
 }
 
@@ -79,13 +79,14 @@ ccl_device void enqueue_ray_index_local(
 	/* Get local queue id .*/
 	unsigned int lqidx;
 	if(enqueue_flag) {
-		lqidx = atomic_inc_uint32(local_queue_atomics);
+		lqidx = atomic_inc_uint32(local_queue_atomics)-1;
 	}
 	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
 	/* Get global queue offset. */
 	if(lidx == 0) {
-		*local_queue_atomics = atomic_add_uint32((ccl_global uint*)&Queue_index[queue_number], *local_queue_atomics);
+		*local_queue_atomics = atomic_add_uint32((ccl_global uint*)&Queue_index[queue_number],
+		                                         *local_queue_atomics) - *local_queue_atomics;
 	}
 	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
@@ -100,7 +101,7 @@ ccl_device unsigned int get_local_queue_index(
         int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */
         ccl_local unsigned int *local_queue_atomics)
 {
-	int my_lqidx = atomic_inc_uint32(&local_queue_atomics[queue_number]);
+	int my_lqidx = atomic_inc_uint32(&local_queue_atomics[queue_number])-1;
 	return my_lqidx;
 }
 
@@ -110,7 +111,7 @@ ccl_device unsigned int get_global_per_queue_offset(
         ccl_global int* global_queue_atomics)
 {
 	unsigned int queue_offset = atomic_add_uint32((ccl_global uint*)&global_queue_atomics[queue_number],
-	                                       local_queue_atomics[queue_number]);
+	                                       local_queue_atomics[queue_number]) - local_queue_atomics[queue_number];
 	return queue_offset;
 }
 
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
index 353f13d..afb9ac7 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -97,7 +97,7 @@ ccl_device int get_next_work(ccl_global uint *work_pool,
 	                                 grp_idy,
 	                                 num_samples);
 	uint group_index = grp_idy * ccl_num_groups(0) + grp_idx;
-	*my_work = atomic_inc_uint32(&work_pool[group_index]);
+	*my_work = atomic_inc_uint32(&work_pool[group_index])-1;
 	return (*my_work < total_work) ? 1 : 0;
 }
 
diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h
index 5053d59..27682e7 100644
--- a/intern/cycles/util/util_atomic.h
+++ b/intern/cycles/util/util_atomic.h
@@ -65,8 +65,8 @@ ccl_device_inline void atomic_add_float(volatile ccl_global float *source,
 	                       new_value.int_value) != prev_value.int_value);
 }
 
-#define atomic_add_uint32(p, x) atomic_add((p), (x))
-#define atomic_inc_uint32(p) atomic_inc((p))
+#define atomic_add_uint32(p, x) (atomic_add((p), (x))+(x))
+#define atomic_inc_uint32(p) (atomic_inc((p))+1)
 
 #define CCL_LOCAL_MEM_FENCE CLK_LOCAL_MEM_FENCE
 #define ccl_barrier(flags) barrier(flags)




More information about the Bf-blender-cvs mailing list