[Bf-blender-cvs] [1988665c3c3] master: Cleanup: make vector types make/print functions consistent between CPU and GPU

Tue Aug 9 16:07:27 CEST 2022

Commit: 1988665c3c310cbfb66b4e499408fa48c522545c
Author: Brecht Van Lommel
Date:   Fri Jul 29 14:40:50 2022 +0200
Branches: master
https://developer.blender.org/rB1988665c3c310cbfb66b4e499408fa48c522545c

Cleanup: make vector types make/print functions consistent between CPU and GPU

Now all the same ones are available on CPU and GPU, which was previously not
possible due to lack of operator overloadng in OpenCL. Print functions are
no-ops on some GPUs.

Ref D15535

===================================================================

M	intern/cycles/kernel/device/metal/compat.h
M	intern/cycles/util/types.h
M	intern/cycles/util/types_float2.h
M	intern/cycles/util/types_float2_impl.h
M	intern/cycles/util/types_float3.h
M	intern/cycles/util/types_float3_impl.h
M	intern/cycles/util/types_float4.h
M	intern/cycles/util/types_float4_impl.h
M	intern/cycles/util/types_int3.h
M	intern/cycles/util/types_int3_impl.h
M	intern/cycles/util/types_int4.h
M	intern/cycles/util/types_int4_impl.h

===================================================================

diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h
index 80ee8ef5b57..b20cfca9a9c 100644
--- a/intern/cycles/kernel/device/metal/compat.h
+++ b/intern/cycles/kernel/device/metal/compat.h
@@ -189,35 +189,46 @@ void kernel_gpu_##name::run(thread MetalKernelContext& context, \
   } volume_write_lambda_pass{kg, this, state};
 
 /* make_type definitions with Metal style element initializers */
-#ifdef make_float2
-#  undef make_float2
-#endif
-#ifdef make_float3
-#  undef make_float3
-#endif
-#ifdef make_float4
-#  undef make_float4
-#endif
-#ifdef make_int2
-#  undef make_int2
-#endif
-#ifdef make_int3
-#  undef make_int3
-#endif
-#ifdef make_int4
-#  undef make_int4
-#endif
-#ifdef make_uchar4
-#  undef make_uchar4
-#endif
-
-#define make_float2(x, y) float2(x, y)
-#define make_float3(x, y, z) float3(x, y, z)
-#define make_float4(x, y, z, w) float4(x, y, z, w)
-#define make_int2(x, y) int2(x, y)
-#define make_int3(x, y, z) int3(x, y, z)
-#define make_int4(x, y, z, w) int4(x, y, z, w)
-#define make_uchar4(x, y, z, w) uchar4(x, y, z, w)
+ccl_device_forceinline float2 make_float2(const float x, const float y)
+{
+  return float2(x, y);
+}
+
+ccl_device_forceinline float3 make_float3(const float x, const float y, const float z)
+{
+  return float3(x, y, z);
+}
+
+ccl_device_forceinline float4 make_float4(const float x,
+                                          const float y,
+                                          const float z,
+                                          const float w)
+{
+  return float4(x, y, z, w);
+}
+
+ccl_device_forceinline int2 make_int2(const int x, const int y)
+{
+  return int2(x, y);
+}
+
+ccl_device_forceinline int3 make_int3(const int x, const int y, const int z)
+{
+  return int3(x, y, z);
+}
+
+ccl_device_forceinline int4 make_int4(const int x, const int y, const int z, const int w)
+{
+  return int4(x, y, z, w);
+}
+
+ccl_device_forceinline uchar4 make_uchar4(const uchar x,
+                                          const uchar y,
+                                          const uchar z,
+                                          const uchar w)
+{
+  return uchar4(x, y, z, w);
+}
 
 /* Math functions */
 
diff --git a/intern/cycles/util/types.h b/intern/cycles/util/types.h
index 26031d9e0fd..d0d8c2941d7 100644
--- a/intern/cycles/util/types.h
+++ b/intern/cycles/util/types.h
@@ -71,6 +71,18 @@ ccl_device_inline bool is_power_of_two(size_t x)
 
 CCL_NAMESPACE_END
 
+/* Device side printf only tested on CUDA, may work on more GPU devices. */
+#if !defined(__KERNEL_GPU__) || defined(__KERNEL_CUDA__)
+#  define __KERNEL_PRINTF__
+#endif
+
+ccl_device_inline void print_float(ccl_private const char *label, const float a)
+{
+#ifdef __KERNEL_PRINTF__
+  printf("%s: %.8f\n", label, (double)a);
+#endif
+}
+
 /* Most GPU APIs matching native vector types, so we only need to implement them for
  * CPU and oneAPI. */
 #if defined(__KERNEL_GPU__) && !defined(__KERNEL_ONEAPI__)
diff --git a/intern/cycles/util/types_float2.h b/intern/cycles/util/types_float2.h
index f37aa1b4ad2..ea510ef832c 100644
--- a/intern/cycles/util/types_float2.h
+++ b/intern/cycles/util/types_float2.h
@@ -20,7 +20,8 @@ struct float2 {
 };
 
 ccl_device_inline float2 make_float2(float x, float y);
-ccl_device_inline void print_float2(const char *label, const float2 &a);
 #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
 
+ccl_device_inline void print_float2(ccl_private const char *label, const float2 a);
+
 CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_float2_impl.h b/intern/cycles/util/types_float2_impl.h
index 9d1820fe17d..7ba7dee2e3a 100644
--- a/intern/cycles/util/types_float2_impl.h
+++ b/intern/cycles/util/types_float2_impl.h
@@ -31,11 +31,13 @@ ccl_device_inline float2 make_float2(float x, float y)
   float2 a = {x, y};
   return a;
 }
+#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
 
-ccl_device_inline void print_float2(const char *label, const float2 &a)
+ccl_device_inline void print_float2(ccl_private const char *label, const float2 a)
 {
+#ifdef __KERNEL_PRINTF__
   printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y);
+#endif
 }
-#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_float3.h b/intern/cycles/util/types_float3.h
index 4e43e928657..87c6b1d3654 100644
--- a/intern/cycles/util/types_float3.h
+++ b/intern/cycles/util/types_float3.h
@@ -47,11 +47,12 @@ struct ccl_try_align(16) float3
 #  endif
 };
 
-ccl_device_inline float3 make_float3(float f);
 ccl_device_inline float3 make_float3(float x, float y, float z);
-ccl_device_inline void print_float3(const char *label, const float3 &a);
 #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
 
+ccl_device_inline float3 make_float3(float f);
+ccl_device_inline void print_float3(ccl_private const char *label, const float3 a);
+
 /* Smaller float3 for storage. For math operations this must be converted to float3, so that on the
  * CPU SIMD instructions can be used. */
 #if defined(__KERNEL_METAL__)
diff --git a/intern/cycles/util/types_float3_impl.h b/intern/cycles/util/types_float3_impl.h
index cbd3f76dae4..da76ab2ab2a 100644
--- a/intern/cycles/util/types_float3_impl.h
+++ b/intern/cycles/util/types_float3_impl.h
@@ -56,38 +56,35 @@ __forceinline float &float3::operator[](int i)
 }
 #  endif
 
-ccl_device_inline float3 make_float3(float f)
+ccl_device_inline float3 make_float3(float x, float y, float z)
 {
-#  ifdef __KERNEL_GPU__
-  float3 a = {f, f, f};
+#  if defined(__KERNEL_GPU__)
+  return {x, y, z};
+#  elif defined(__KERNEL_SSE__)
+  return float3(_mm_set_ps(0.0f, z, y, x));
 #  else
-#    ifdef __KERNEL_SSE__
-  float3 a(_mm_set1_ps(f));
-#    else
-  float3 a = {f, f, f, f};
-#    endif
+  return {x, y, z, 0.0f};
 #  endif
-  return a;
 }
 
-ccl_device_inline float3 make_float3(float x, float y, float z)
+#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
+
+ccl_device_inline float3 make_float3(float f)
 {
-#  ifdef __KERNEL_GPU__
-  float3 a = {x, y, z};
-#  else
-#    ifdef __KERNEL_SSE__
-  float3 a(_mm_set_ps(0.0f, z, y, x));
-#    else
-  float3 a = {x, y, z, 0.0f};
-#    endif
-#  endif
-  return a;
+#if defined(__KERNEL_GPU__)
+  return make_float3(f, f, f);
+#elif defined(__KERNEL_SSE__)
+  return float3(_mm_set1_ps(f));
+#else
+  return {f, f, f, f};
+#endif
 }
 
-ccl_device_inline void print_float3(const char *label, const float3 &a)
+ccl_device_inline void print_float3(ccl_private const char *label, const float3 a)
 {
+#ifdef __KERNEL_PRINTF__
   printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z);
+#endif
 }
-#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_float4.h b/intern/cycles/util/types_float4.h
index 6b301c47362..a347cfce9a1 100644
--- a/intern/cycles/util/types_float4.h
+++ b/intern/cycles/util/types_float4.h
@@ -40,10 +40,11 @@ struct ccl_try_align(16) float4
 #  endif
 };
 
-ccl_device_inline float4 make_float4(float f);
 ccl_device_inline float4 make_float4(float x, float y, float z, float w);
-ccl_device_inline float4 make_float4(const int4 &i);
-ccl_device_inline void print_float4(const char *label, const float4 &a);
 #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
 
+ccl_device_inline float4 make_float4(float f);
+ccl_device_inline float4 make_float4(const int4 i);
+ccl_device_inline void print_float4(ccl_private const char *label, const float4 a);
+
 CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_float4_impl.h b/intern/cycles/util/types_float4_impl.h
index 77b4fbff788..420d9316926 100644
--- a/intern/cycles/util/types_float4_impl.h
+++ b/intern/cycles/util/types_float4_impl.h
@@ -52,40 +52,40 @@ __forceinline float &float4::operator[](int i)
 }
 #  endif
 
-ccl_device_inline float4 make_float4(float f)
+ccl_device_inline float4 make_float4(float x, float y, float z, float w)
 {
 #  ifdef __KERNEL_SSE__
-  float4 a(_mm_set1_ps(f));
+  return float4(_mm_set_ps(w, z, y, x));
 #  else
-  float4 a = {f, f, f, f};
+  return {x, y, z, w};
 #  endif
-  return a;
 }
 
-ccl_device_inline float4 make_float4(float x, float y, float z, float w)
+#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
+
+ccl_device_inline float4 make_float4(float f)
 {
-#  ifdef __KERNEL_SSE__
-  float4 a(_mm_set_ps(w, z, y, x));
-#  else
-  float4 a = {x, y, z, w};
-#  endif
-  return a;
+#ifdef __KERNEL_SSE__
+  return float4(_mm_set1_ps(f));
+#else
+  return make_float4(f, f, f, f);
+#endif
 }
 
-ccl_device_inline float4 make_float4(const int4 &i)
+ccl_device_inline float4 make_float4(const int4 i)
 {
-#  ifdef __KERNEL_SSE__
-  float4 a(_mm_cvtepi32_ps(i.m128));
-#  else
-  float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w};
-#  endif
-  return a;
+#ifdef __KERNEL_SSE__
+  return float4(_mm_cvtepi32_ps(i.m128));
+#else
+  return make_float4((float)i.x, (float)i.y, (float)i.z, (float)i.w);
+#endif
 }
 
-ccl_device_inline void print_float4(const char *label, const float4 &a)
+ccl_device_inline void print_float4(ccl_private const char *label, const float4 a)
 {
+#ifdef __KERNEL_PRINTF__
   printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w);
+#endif
 }
-#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_int3.h b/intern/cycles/util/types_int3.h
index 3196b158ee9..e059ddd3660 100644
--- a/intern/cycles/util/types_int3.h
+++ b/intern/cycles/util/types_int3.h
@@ -44,9 +44,10 @@ struct ccl_try_align(16) int3
 #  endif
 };
 
-ccl_device_inline int3 make_int3(int i);
 ccl_device_inline int3 make_int3(int x, int y, int z);
-ccl_device_inline void print_int3(const char *label, const int3 &a);
 #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
 
+ccl_device_inline int3 make_int3(int i);
+ccl_device_inline void print_int3(ccl_private const char *label, const int3 a);
+
 CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_int3_impl.h b/intern/cycles/util/types_int3_impl.h
index abc0f4e4309..830dfa3c658 100644
--- a/intern/cycles/util/types_int3_impl.h
+++ b/intern/cycles/util/types_int3_impl.h
@@ -56,38 +56,35 @@ __forceinline int &int3::operator[](int i)
 }
 #  endif
 
-ccl_device_inline int3 make_int3(int i)
+ccl_device_inline int3 

@@ Diff output truncated at 10240 characters. @@