[Bf-blender-cvs] [cb3b197] master: Cycles: Use utility define for restrict pointers

Sergey Sharybin noreply at git.blender.org
Mon Jul 11 14:04:23 CEST 2016


Commit: cb3b19730c4fa402c065e288330f4f1f197026ab
Author: Sergey Sharybin
Date:   Mon Jul 11 13:53:37 2016 +0200
Branches: master
https://developer.blender.org/rBcb3b19730c4fa402c065e288330f4f1f197026ab

Cycles: Use utility define for restrict pointers

This way restrict can be used for CUDA and OpenCL as well.

>From quick tests in areas i've been testing this it might give some
barely measurable %% of speedup, but it increases registers pressure.

So use of this qualifier is still really limited.

===================================================================

M	intern/cycles/kernel/bvh/qbvh_nodes.h
M	intern/cycles/kernel/kernel_compat_cuda.h
M	intern/cycles/kernel/kernel_compat_opencl.h
M	intern/cycles/util/util_types.h

===================================================================

diff --git a/intern/cycles/kernel/bvh/qbvh_nodes.h b/intern/cycles/kernel/bvh/qbvh_nodes.h
index 6dfb1c0..a833f4b 100644
--- a/intern/cycles/kernel/bvh/qbvh_nodes.h
+++ b/intern/cycles/kernel/bvh/qbvh_nodes.h
@@ -22,27 +22,27 @@ struct QBVHStackItem {
 /* TOOD(sergey): Investigate if using intrinsics helps for both
  * stack item swap and float comparison.
  */
-ccl_device_inline void qbvh_item_swap(QBVHStackItem *__restrict a,
-                                      QBVHStackItem *__restrict b)
+ccl_device_inline void qbvh_item_swap(QBVHStackItem *ccl_restrict a,
+                                      QBVHStackItem *ccl_restrict b)
 {
 	QBVHStackItem tmp = *a;
 	*a = *b;
 	*b = tmp;
 }
 
-ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1,
-                                       QBVHStackItem *__restrict s2,
-                                       QBVHStackItem *__restrict s3)
+ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
+                                       QBVHStackItem *ccl_restrict s2,
+                                       QBVHStackItem *ccl_restrict s3)
 {
 	if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
 	if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
 	if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
 }
 
-ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1,
-                                       QBVHStackItem *__restrict s2,
-                                       QBVHStackItem *__restrict s3,
-                                       QBVHStackItem *__restrict s4)
+ccl_device_inline void qbvh_stack_sort(QBVHStackItem *ccl_restrict s1,
+                                       QBVHStackItem *ccl_restrict s2,
+                                       QBVHStackItem *ccl_restrict s3,
+                                       QBVHStackItem *ccl_restrict s4)
 {
 	if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
 	if(s4->dist < s3->dist) { qbvh_item_swap(s4, s3); }
@@ -53,7 +53,7 @@ ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1,
 
 /* Axis-aligned nodes intersection */
 
-ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *__restrict kg,
+ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *ccl_restrict kg,
                                                   const ssef& tnear,
                                                   const ssef& tfar,
 #ifdef __KERNEL_AVX2__
@@ -69,7 +69,7 @@ ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *__restrict kg,
                                                   const int far_y,
                                                   const int far_z,
                                                   const int node_addr,
-                                                  ssef *__restrict dist)
+                                                  ssef *ccl_restrict dist)
 {
 	const int offset = node_addr + 1;
 #ifdef __KERNEL_AVX2__
@@ -104,7 +104,7 @@ ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *__restrict kg,
 }
 
 ccl_device_inline int qbvh_aligned_node_intersect_robust(
-        KernelGlobals *__restrict kg,
+        KernelGlobals *ccl_restrict kg,
         const ssef& tnear,
         const ssef& tfar,
 #ifdef __KERNEL_AVX2__
@@ -121,7 +121,7 @@ ccl_device_inline int qbvh_aligned_node_intersect_robust(
         const int far_z,
         const int node_addr,
         const float difl,
-        ssef *__restrict dist)
+        ssef *ccl_restrict dist)
 {
 	const int offset = node_addr + 1;
 #ifdef __KERNEL_AVX2__
@@ -152,7 +152,7 @@ ccl_device_inline int qbvh_aligned_node_intersect_robust(
 /* Unaligned nodes intersection */
 
 ccl_device_inline int qbvh_unaligned_node_intersect(
-        KernelGlobals *__restrict kg,
+        KernelGlobals *ccl_restrict kg,
         const ssef& tnear,
         const ssef& tfar,
 #ifdef __KERNEL_AVX2__
@@ -168,7 +168,7 @@ ccl_device_inline int qbvh_unaligned_node_intersect(
         const int far_y,
         const int far_z,
         const int node_addr,
-        ssef *__restrict dist)
+        ssef *ccl_restrict dist)
 {
 	const int offset = node_addr;
 	const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1);
@@ -236,7 +236,7 @@ ccl_device_inline int qbvh_unaligned_node_intersect(
 }
 
 ccl_device_inline int qbvh_unaligned_node_intersect_robust(
-        KernelGlobals *__restrict kg,
+        KernelGlobals *ccl_restrict kg,
         const ssef& tnear,
         const ssef& tfar,
 #ifdef __KERNEL_AVX2__
@@ -253,7 +253,7 @@ ccl_device_inline int qbvh_unaligned_node_intersect_robust(
         const int far_z,
         const int node_addr,
         const float difl,
-        ssef *__restrict dist)
+        ssef *ccl_restrict dist)
 {
 	const int offset = node_addr;
 	const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1);
@@ -324,7 +324,7 @@ ccl_device_inline int qbvh_unaligned_node_intersect_robust(
  */
 
 ccl_device_inline int qbvh_node_intersect(
-        KernelGlobals *__restrict kg,
+        KernelGlobals *ccl_restrict kg,
         const ssef& tnear,
         const ssef& tfar,
 #ifdef __KERNEL_AVX2__
@@ -340,7 +340,7 @@ ccl_device_inline int qbvh_node_intersect(
         const int far_y,
         const int far_z,
         const int node_addr,
-        ssef *__restrict dist)
+        ssef *ccl_restrict dist)
 {
 	const int offset = node_addr;
 	const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
@@ -377,7 +377,7 @@ ccl_device_inline int qbvh_node_intersect(
 }
 
 ccl_device_inline int qbvh_node_intersect_robust(
-        KernelGlobals *__restrict kg,
+        KernelGlobals *ccl_restrict kg,
         const ssef& tnear,
         const ssef& tfar,
 #ifdef __KERNEL_AVX2__
@@ -394,7 +394,7 @@ ccl_device_inline int qbvh_node_intersect_robust(
         const int far_z,
         const int node_addr,
         const float difl,
-        ssef *__restrict dist)
+        ssef *ccl_restrict dist)
 {
 	const int offset = node_addr;
 	const float4 node = kernel_tex_fetch(__bvh_nodes, offset);
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 4231475..08f6f45 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -42,6 +42,7 @@
 #define ccl_constant
 #define ccl_may_alias
 #define ccl_addr_space
+#define ccl_restrict __restrict__
 
 /* No assert supported for CUDA */
 
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index a570844..8505cb8 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -39,6 +39,7 @@
 #define ccl_global __global
 #define ccl_local __local
 #define ccl_private __private
+#define ccl_restrict restrict
 
 #ifdef __SPLIT_KERNEL__
 #  define ccl_addr_space __global
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 972befa..257c6ad 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -37,6 +37,7 @@
 #define ccl_device_noinline static
 #define ccl_global
 #define ccl_constant
+#define ccl_restrict __restrict
 #define __KERNEL_WITH_SSE_ALIGN__
 
 #if defined(_WIN32) && !defined(FREE_WINDOWS)




More information about the Bf-blender-cvs mailing list