[Bf-blender-cvs] [124b888] cycles_split_kernel: Cycles: Replace OpenCL atomic functions with own versions

Mai Lavelle noreply at git.blender.org
Tue Oct 25 17:39:10 CEST 2016


Commit: 124b88872e6ddf9ff06a4c6737c513c092da721e
Author: Mai Lavelle
Date:   Tue Oct 25 17:35:49 2016 +0200
Branches: cycles_split_kernel
https://developer.blender.org/rB124b88872e6ddf9ff06a4c6737c513c092da721e

Cycles: Replace OpenCL atomic functions with own versions

===================================================================

M	intern/cycles/kernel/kernel_queues.h
M	intern/cycles/kernel/kernel_work_stealing.h
M	intern/cycles/kernel/split/kernel_background_buffer_update.h
M	intern/cycles/kernel/split/kernel_data_init.h
M	intern/cycles/kernel/split/kernel_direct_lighting.h
M	intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
M	intern/cycles/kernel/split/kernel_lamp_emission.h
M	intern/cycles/kernel/split/kernel_next_iteration_setup.h
M	intern/cycles/kernel/split/kernel_queue_enqueue.h
M	intern/cycles/kernel/split/kernel_scene_intersect.h
M	intern/cycles/kernel/split/kernel_shader_eval.h
M	intern/cycles/kernel/split/kernel_shadow_blocked.h
M	intern/cycles/util/util_atomic.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h
index c423625..1ca57ed 100644
--- a/intern/cycles/kernel/kernel_queues.h
+++ b/intern/cycles/kernel/kernel_queues.h
@@ -35,7 +35,7 @@ ccl_device void enqueue_ray_index(
         ccl_global int *queue_index) /* Array of size num_queues; Used for atomic increment. */
 {
 	/* This thread's queue index. */
-	int my_queue_index = atomic_inc(&queue_index[queue_number]) + (queue_number * queue_size);
+	int my_queue_index = atomic_inc_uint32((ccl_global uint*)&queue_index[queue_number]) + (queue_number * queue_size);
 	queues[my_queue_index] = ray_index;
 }
 
@@ -77,15 +77,15 @@ ccl_device void enqueue_ray_index_local(
 	/* Get local queue id .*/
 	unsigned int lqidx;
 	if(enqueue_flag) {
-		lqidx = atomic_inc(local_queue_atomics);
+		lqidx = atomic_inc_uint32(local_queue_atomics);
 	}
-	barrier(CLK_LOCAL_MEM_FENCE);
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
 	/* Get global queue offset. */
 	if(lidx == 0) {
-		*local_queue_atomics = atomic_add(&Queue_index[queue_number], *local_queue_atomics);
+		*local_queue_atomics = atomic_add_uint32((ccl_global uint*)&Queue_index[queue_number], *local_queue_atomics);
 	}
-	barrier(CLK_LOCAL_MEM_FENCE);
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
 	/* Get global queue index and enqueue ray. */
 	if(enqueue_flag) {
@@ -98,7 +98,7 @@ ccl_device unsigned int get_local_queue_index(
         int queue_number, /* Queue in which to enqueue the ray; -1 if no queue */
         ccl_local unsigned int *local_queue_atomics)
 {
-	int my_lqidx = atomic_inc(&local_queue_atomics[queue_number]);
+	int my_lqidx = atomic_inc_uint32(&local_queue_atomics[queue_number]);
 	return my_lqidx;
 }
 
@@ -107,7 +107,7 @@ ccl_device unsigned int get_global_per_queue_offset(
         ccl_local unsigned int *local_queue_atomics,
         ccl_global int* global_queue_atomics)
 {
-	unsigned int queue_offset = atomic_add(&global_queue_atomics[queue_number],
+	unsigned int queue_offset = atomic_add_uint32((ccl_global uint*)&global_queue_atomics[queue_number],
 	                                       local_queue_atomics[queue_number]);
 	return queue_offset;
 }
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
index 00d8a59..05dd362 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -95,7 +95,7 @@ int get_next_work(ccl_global uint *work_pool,
 	                                 grp_idy,
 	                                 num_samples);
 	uint group_index = grp_idy * ccl_num_groups(0) + grp_idx;
-	*my_work = atomic_inc(&work_pool[group_index]);
+	*my_work = atomic_inc_uint32(&work_pool[group_index]);
 	return (*my_work < total_work) ? 1 : 0;
 }
 
diff --git a/intern/cycles/kernel/split/kernel_background_buffer_update.h b/intern/cycles/kernel/split/kernel_background_buffer_update.h
index ea2d703..ad88494 100644
--- a/intern/cycles/kernel/split/kernel_background_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_background_buffer_update.h
@@ -75,7 +75,7 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
 	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
 		local_queue_atomics = 0;
 	}
-	barrier(CLK_LOCAL_MEM_FENCE);
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
 	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
 	if(ray_index == 0) {
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 147839a..6aab435 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -133,7 +133,7 @@ ccl_device void kernel_data_init(
 		int group_index = get_group_id(1) * ccl_num_groups(0) + get_group_id(0);
 		work_pool_wgs[group_index] = 0;
 	}
-	barrier(CLK_LOCAL_MEM_FENCE);
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 #endif  /* __WORK_STEALING__ */
 
 	/* Initialize queue data and queue index. */
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
index 9ec4c7b..590c823 100644
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -53,7 +53,7 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg)
 	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
 		local_queue_atomics = 0;
 	}
-	barrier(CLK_LOCAL_MEM_FENCE);
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
 	char enqueue_flag = 0;
 	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index 03c4200..6858774 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -78,7 +78,7 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(KernelGlobal
 		local_queue_atomics_bg = 0;
 		local_queue_atomics_ao = 0;
 	}
-	barrier(CLK_LOCAL_MEM_FENCE);
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
 	char enqueue_flag = 0;
 	char enqueue_flag_AO_SHADOW_RAY_CAST = 0;
diff --git a/intern/cycles/kernel/split/kernel_lamp_emission.h b/intern/cycles/kernel/split/kernel_lamp_emission.h
index f0d5db6..3e80cc4 100644
--- a/intern/cycles/kernel/split/kernel_lamp_emission.h
+++ b/intern/cycles/kernel/split/kernel_lamp_emission.h
@@ -50,7 +50,7 @@ ccl_device void kernel_lamp_emission(KernelGlobals *kg)
 	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
 		local_use_queues_flag = split_params->use_queues_flag[0];
 	}
-	barrier(CLK_LOCAL_MEM_FENCE);
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
 	int ray_index;
 	if(local_use_queues_flag) {
diff --git a/intern/cycles/kernel/split/kernel_next_iteration_setup.h b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
index 4dfce6e..73acd3e 100644
--- a/intern/cycles/kernel/split/kernel_next_iteration_setup.h
+++ b/intern/cycles/kernel/split/kernel_next_iteration_setup.h
@@ -65,7 +65,7 @@ ccl_device void kernel_next_iteration_setup(KernelGlobals *kg)
 	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
 		local_queue_atomics = 0;
 	}
-	barrier(CLK_LOCAL_MEM_FENCE);
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
 	if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
 		/* If we are here, then it means that scene-intersect kernel
diff --git a/intern/cycles/kernel/split/kernel_queue_enqueue.h b/intern/cycles/kernel/split/kernel_queue_enqueue.h
index 6fed762..57d09d8 100644
--- a/intern/cycles/kernel/split/kernel_queue_enqueue.h
+++ b/intern/cycles/kernel/split/kernel_queue_enqueue.h
@@ -58,7 +58,7 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg)
 	if(lidx < 2 ) {
 		local_queue_atomics[lidx] = 0;
 	}
-	barrier(CLK_LOCAL_MEM_FENCE);
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
 	int queue_number = -1;
 
@@ -73,7 +73,7 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg)
 	if(queue_number != -1) {
 		my_lqidx = get_local_queue_index(queue_number, local_queue_atomics);
 	}
-	barrier(CLK_LOCAL_MEM_FENCE);
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
 	if(lidx == 0) {
 		local_queue_atomics[QUEUE_ACTIVE_AND_REGENERATED_RAYS] =
@@ -85,7 +85,7 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg)
 		                                    local_queue_atomics,
 		                                    split_params->queue_index);
 	}
-	barrier(CLK_LOCAL_MEM_FENCE);
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
 	unsigned int my_gqidx;
 	if(queue_number != -1) {
diff --git a/intern/cycles/kernel/split/kernel_scene_intersect.h b/intern/cycles/kernel/split/kernel_scene_intersect.h
index 005683c..24a6635 100644
--- a/intern/cycles/kernel/split/kernel_scene_intersect.h
+++ b/intern/cycles/kernel/split/kernel_scene_intersect.h
@@ -71,7 +71,7 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg)
 	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
 		local_use_queues_flag = split_params->use_queues_flag[0];
 	}
-	barrier(CLK_LOCAL_MEM_FENCE);
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
 	int ray_index;
 	if(local_use_queues_flag) {
diff --git a/intern/cycles/kernel/split/kernel_shader_eval.h b/intern/cycles/kernel/split/kernel_shader_eval.h
index 9364847..4bb80ce 100644
--- a/intern/cycles/kernel/split/kernel_shader_eval.h
+++ b/intern/cycles/kernel/split/kernel_shader_eval.h
@@ -52,7 +52,7 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg)
 	if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
 		local_queue_atomics = 0;
 	}
-	barrier(CLK_LOCAL_MEM_FENCE);
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
 	int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
 	ray_index = get_ray_index(ray_index,
diff --git a/intern/cycles/kernel/split/kernel_shadow_blocked.h b/intern/cycles/kernel/split/kernel_shadow_blocked.h
index c86fd83..b5f2f3f 100644
--- a/intern/cycles/kernel/split/kernel_shadow_blocked.h
+++ b/intern/cycles/kernel/split/kernel_shadow_blocked.h
@@ -55,7 +55,7 @@ ccl_device void kernel_shadow_blocked(KernelGlobals *kg)
 		ao_queue_length = split_params->queue_index[QUEUE_SHADOW_RAY_CAST_AO_RAYS];
 		dl_queue_length = split_params->queue_index[QUEUE_SHADOW_RAY_CAST_DL_RAYS];
 	}
-	barrier(CLK_LOCAL_MEM_FENCE);
+	ccl_barrier(CCL_LOCAL_MEM_FENCE);
 
 	/* flag determining if the current ray is to process shadow ray for AO or DL */
 	char shadow_blocked_type = -1;
diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h
index 1d1e296..9c970de 100644
--- a/intern/cycles/util/util_atomic.h
+++ b/intern/cycles/util/util_atomic.h
@@ -32,6 +32,11 @@ ATOMIC_INLINE void atomic_update_max_z(size_t *maximum_value, size_t value)
 	}
 }
 
+#define atomic_inc_uint32(p) atomic_add_uint32((p), 1)
+
+#define CCL_LOCAL_MEM_FENCE 0
+#define ccl_barrier(flags) (void)0
+
 #else  /* __KERNEL_GPU__ */
 
 #ifdef __KERNEL_OPENCL__
@@ -58,6 +63,12 @@ ccl_device_inline void atomic_add_float(volatile ccl_global float *source,
 	                       new_value.int_value) != prev_value.int_value);
 }
 
+#define atomic_add_uint32(p, x) atomic_add((p), (x))
+#define atomic_inc_uint32(p) atomic_inc((p))
+
+#define CCL_LOCAL_MEM_FENCE CLK_LOCAL_MEM

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list