[Bf-blender-cvs] [6121c28501e] blender-v2.83-release: Fix T75895: Unable to Compile Cycles on NAVI/Linux

Jeroen Bakker noreply at git.blender.org
Thu Apr 30 15:07:18 CEST 2020


Commit: 6121c28501eff722717fb8b777f6004fb6d4e152
Author: Jeroen Bakker
Date:   Thu Apr 30 14:15:10 2020 +0200
Branches: blender-v2.83-release
https://developer.blender.org/rB6121c28501eff722717fb8b777f6004fb6d4e152

Fix T75895: Unable to Compile Cycles on NAVI/Linux

This patch will add some compiler hints to break unrolling in the
nestled for loops of the voronoi node.

Reviewed by: Brecht van Lommel

Differential Revision: https://developer.blender.org/D7574

===================================================================

M	intern/cycles/kernel/kernel_compat_cuda.h
M	intern/cycles/kernel/kernel_compat_opencl.h
M	intern/cycles/kernel/kernel_compat_optix.h
M	intern/cycles/kernel/svm/svm_voronoi.h
M	intern/cycles/util/util_defines.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 3c5a10540d5..4094e173da9 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -71,6 +71,7 @@ __device__ half __float2half(const float f)
 #define ccl_may_alias
 #define ccl_addr_space
 #define ccl_restrict __restrict__
+#define ccl_loop_no_unroll
 /* TODO(sergey): In theory we might use references with CUDA, however
  * performance impact yet to be investigated.
  */
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index 4963f1cd196..35dc95ca10d 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -43,6 +43,7 @@
 #define ccl_local __local
 #define ccl_local_param __local
 #define ccl_private __private
+#define ccl_loop_no_unroll __attribute__((opencl_unroll_hint(1)))
 #define ccl_restrict restrict
 #define ccl_ref
 #define ccl_align(n) __attribute__((aligned(n)))
diff --git a/intern/cycles/kernel/kernel_compat_optix.h b/intern/cycles/kernel/kernel_compat_optix.h
index 7068acc3a32..970f5cf864c 100644
--- a/intern/cycles/kernel/kernel_compat_optix.h
+++ b/intern/cycles/kernel/kernel_compat_optix.h
@@ -70,6 +70,7 @@ __device__ half __float2half(const float f)
 #define ccl_private
 #define ccl_may_alias
 #define ccl_addr_space
+#define ccl_loop_no_unroll
 #define ccl_restrict __restrict__
 #define ccl_ref
 #define ccl_align(n) __align__(n)
diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h
index 2ad22592eef..f0fc0068fa2 100644
--- a/intern/cycles/kernel/svm/svm_voronoi.h
+++ b/intern/cycles/kernel/svm/svm_voronoi.h
@@ -684,7 +684,8 @@ ccl_device void voronoi_f1_4d(float4 coord,
   float4 targetPosition = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
   for (int u = -1; u <= 1; u++) {
     for (int k = -1; k <= 1; k++) {
-      for (int j = -1; j <= 1; j++) {
+      ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+      {
         for (int i = -1; i <= 1; i++) {
           float4 cellOffset = make_float4(i, j, k, u);
           float4 pointPosition = cellOffset +
@@ -722,7 +723,8 @@ ccl_device void voronoi_smooth_f1_4d(float4 coord,
   float4 smoothPosition = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
   for (int u = -2; u <= 2; u++) {
     for (int k = -2; k <= 2; k++) {
-      for (int j = -2; j <= 2; j++) {
+      ccl_loop_no_unroll for (int j = -2; j <= 2; j++)
+      {
         for (int i = -2; i <= 2; i++) {
           float4 cellOffset = make_float4(i, j, k, u);
           float4 pointPosition = cellOffset +
@@ -765,7 +767,8 @@ ccl_device void voronoi_f2_4d(float4 coord,
   float4 positionF2 = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
   for (int u = -1; u <= 1; u++) {
     for (int k = -1; k <= 1; k++) {
-      for (int j = -1; j <= 1; j++) {
+      ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+      {
         for (int i = -1; i <= 1; i++) {
           float4 cellOffset = make_float4(i, j, k, u);
           float4 pointPosition = cellOffset +
@@ -803,7 +806,8 @@ ccl_device void voronoi_distance_to_edge_4d(float4 coord, float randomness, floa
   float minDistance = 8.0f;
   for (int u = -1; u <= 1; u++) {
     for (int k = -1; k <= 1; k++) {
-      for (int j = -1; j <= 1; j++) {
+      ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+      {
         for (int i = -1; i <= 1; i++) {
           float4 cellOffset = make_float4(i, j, k, u);
           float4 vectorToPoint = cellOffset +
@@ -822,7 +826,8 @@ ccl_device void voronoi_distance_to_edge_4d(float4 coord, float randomness, floa
   minDistance = 8.0f;
   for (int u = -1; u <= 1; u++) {
     for (int k = -1; k <= 1; k++) {
-      for (int j = -1; j <= 1; j++) {
+      ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+      {
         for (int i = -1; i <= 1; i++) {
           float4 cellOffset = make_float4(i, j, k, u);
           float4 vectorToPoint = cellOffset +
@@ -851,7 +856,8 @@ ccl_device void voronoi_n_sphere_radius_4d(float4 coord, float randomness, float
   float minDistance = 8.0f;
   for (int u = -1; u <= 1; u++) {
     for (int k = -1; k <= 1; k++) {
-      for (int j = -1; j <= 1; j++) {
+      ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+      {
         for (int i = -1; i <= 1; i++) {
           float4 cellOffset = make_float4(i, j, k, u);
           float4 pointPosition = cellOffset +
@@ -871,7 +877,8 @@ ccl_device void voronoi_n_sphere_radius_4d(float4 coord, float randomness, float
   float4 closestPointToClosestPoint = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
   for (int u = -1; u <= 1; u++) {
     for (int k = -1; k <= 1; k++) {
-      for (int j = -1; j <= 1; j++) {
+      ccl_loop_no_unroll for (int j = -1; j <= 1; j++)
+      {
         for (int i = -1; i <= 1; i++) {
           if (i == 0 && j == 0 && k == 0 && u == 0) {
             continue;
diff --git a/intern/cycles/util/util_defines.h b/intern/cycles/util/util_defines.h
index 24a20a969ab..e8e414587fb 100644
--- a/intern/cycles/util/util_defines.h
+++ b/intern/cycles/util/util_defines.h
@@ -45,6 +45,7 @@
 #  define ccl_restrict __restrict
 #  define ccl_ref &
 #  define ccl_optional_struct_init
+#  define ccl_loop_no_unroll
 #  define __KERNEL_WITH_SSE_ALIGN__
 
 #  if defined(_WIN32) && !defined(FREE_WINDOWS)



More information about the Bf-blender-cvs mailing list