[Bf-blender-cvs] [79ab76e156d] master: Cleanup: simplifications and consistency for vector types

Thu Jul 28 21:34:40 CEST 2022

Commit: 79ab76e156d4bde937335be784cdf220294600d5
Author: Brecht Van Lommel
Date:   Thu Jul 28 19:57:30 2022 +0200
Branches: master
https://developer.blender.org/rB79ab76e156d4bde937335be784cdf220294600d5

Cleanup: simplifications and consistency for vector types

* OneAPI: remove separate float3 definition
* OneAPI: disable operator[] to match other GPUs
* OneAPI: make int3 compact to match other GPUs
* Use #pragma once
* Add __KERNEL_NATIVE_VECTOR_TYPES__ to simplify checks
* Remove unused vector3

===================================================================

M	intern/cycles/kernel/CMakeLists.txt
M	intern/cycles/kernel/device/cpu/compat.h
M	intern/cycles/kernel/device/oneapi/compat.h
M	intern/cycles/util/CMakeLists.txt
M	intern/cycles/util/types.h
M	intern/cycles/util/types_float2.h
M	intern/cycles/util/types_float2_impl.h
M	intern/cycles/util/types_float3.h
M	intern/cycles/util/types_float3_impl.h
M	intern/cycles/util/types_float4.h
M	intern/cycles/util/types_float4_impl.h
M	intern/cycles/util/types_float8.h
M	intern/cycles/util/types_float8_impl.h
M	intern/cycles/util/types_int2.h
M	intern/cycles/util/types_int2_impl.h
M	intern/cycles/util/types_int3.h
M	intern/cycles/util/types_int3_impl.h
M	intern/cycles/util/types_int4.h
M	intern/cycles/util/types_int4_impl.h
M	intern/cycles/util/types_uchar2.h
M	intern/cycles/util/types_uchar2_impl.h
M	intern/cycles/util/types_uchar3.h
M	intern/cycles/util/types_uchar3_impl.h
M	intern/cycles/util/types_uchar4.h
M	intern/cycles/util/types_uchar4_impl.h
M	intern/cycles/util/types_uint2.h
M	intern/cycles/util/types_uint2_impl.h
M	intern/cycles/util/types_uint3.h
M	intern/cycles/util/types_uint3_impl.h
M	intern/cycles/util/types_uint4.h
M	intern/cycles/util/types_uint4_impl.h
M	intern/cycles/util/types_ushort4.h
D	intern/cycles/util/types_vector3.h
D	intern/cycles/util/types_vector3_impl.h

===================================================================

diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 8ecdac6ee27..dfcd75a135e 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -355,8 +355,6 @@ set(SRC_UTIL_HEADERS
   ../util/types_uint4.h
   ../util/types_uint4_impl.h
   ../util/types_ushort4.h
-  ../util/types_vector3.h
-  ../util/types_vector3_impl.h
 )
 
 set(LIB
diff --git a/intern/cycles/kernel/device/cpu/compat.h b/intern/cycles/kernel/device/cpu/compat.h
index 631e55e0d42..1e3e790ca1f 100644
--- a/intern/cycles/kernel/device/cpu/compat.h
+++ b/intern/cycles/kernel/device/cpu/compat.h
@@ -33,38 +33,4 @@ CCL_NAMESPACE_BEGIN
 
 #define kernel_assert(cond) assert(cond)
 
-/* Macros to handle different memory storage on different devices */
-
-#ifdef __KERNEL_SSE2__
-typedef vector3<sseb> sse3b;
-typedef vector3<ssef> sse3f;
-typedef vector3<ssei> sse3i;
-
-ccl_device_inline void print_sse3b(const char *label, sse3b &a)
-{
-  print_sseb(label, a.x);
-  print_sseb(label, a.y);
-  print_sseb(label, a.z);
-}
-
-ccl_device_inline void print_sse3f(const char *label, sse3f &a)
-{
-  print_ssef(label, a.x);
-  print_ssef(label, a.y);
-  print_ssef(label, a.z);
-}
-
-ccl_device_inline void print_sse3i(const char *label, sse3i &a)
-{
-  print_ssei(label, a.x);
-  print_ssei(label, a.y);
-  print_ssei(label, a.z);
-}
-
-#  if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
-typedef vector3<avxf> avx3f;
-#  endif
-
-#endif
-
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/device/oneapi/compat.h b/intern/cycles/kernel/device/oneapi/compat.h
index 1b25259bcf5..ccd141aa892 100644
--- a/intern/cycles/kernel/device/oneapi/compat.h
+++ b/intern/cycles/kernel/device/oneapi/compat.h
@@ -149,25 +149,13 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
 /* clang-format on */
 
 /* Types */
+
 /* It's not possible to use sycl types like sycl::float3, sycl::int3, etc
- * because these types have different interfaces from blender version */
+ * because these types have different interfaces from blender version. */
 
 using uchar = unsigned char;
 using sycl::half;
 
-struct float3 {
-  float x, y, z;
-};
-
-ccl_always_inline float3 make_float3(float x, float y, float z)
-{
-  return {x, y, z};
-}
-ccl_always_inline float3 make_float3(float x)
-{
-  return {x, x, x};
-}
-
 /* math functions */
 #define fabsf(x) sycl::fabs((x))
 #define copysignf(x, y) sycl::copysign((x), (y))
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index 9bc9f00e142..b33bad79e6c 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -129,8 +129,6 @@ set(SRC_HEADERS
   types_uint4.h
   types_uint4_impl.h
   types_ushort4.h
-  types_vector3.h
-  types_vector3_impl.h
   unique_ptr.h
   vector.h
   version.h
diff --git a/intern/cycles/util/types.h b/intern/cycles/util/types.h
index 031c2f7c4c1..26031d9e0fd 100644
--- a/intern/cycles/util/types.h
+++ b/intern/cycles/util/types.h
@@ -12,6 +12,7 @@
 
 #if !defined(__KERNEL_GPU__)
 #  include <stdint.h>
+#  include <stdio.h>
 #endif
 
 #include "util/defines.h"
@@ -70,6 +71,12 @@ ccl_device_inline bool is_power_of_two(size_t x)
 
 CCL_NAMESPACE_END
 
+/* Most GPU APIs matching native vector types, so we only need to implement them for
+ * CPU and oneAPI. */
+#if defined(__KERNEL_GPU__) && !defined(__KERNEL_ONEAPI__)
+#  define __KERNEL_NATIVE_VECTOR_TYPES__
+#endif
+
 /* Vectorized types declaration. */
 #include "util/types_uchar2.h"
 #include "util/types_uchar3.h"
@@ -90,8 +97,6 @@ CCL_NAMESPACE_END
 #include "util/types_float4.h"
 #include "util/types_float8.h"
 
-#include "util/types_vector3.h"
-
 /* Vectorized types implementation. */
 #include "util/types_uchar2_impl.h"
 #include "util/types_uchar3_impl.h"
@@ -110,8 +115,6 @@ CCL_NAMESPACE_END
 #include "util/types_float4_impl.h"
 #include "util/types_float8_impl.h"
 
-#include "util/types_vector3_impl.h"
-
 /* SSE types. */
 #ifndef __KERNEL_GPU__
 #  include "util/sseb.h"
diff --git a/intern/cycles/util/types_float2.h b/intern/cycles/util/types_float2.h
index 07b9ec0986b..f37aa1b4ad2 100644
--- a/intern/cycles/util/types_float2.h
+++ b/intern/cycles/util/types_float2.h
@@ -1,8 +1,7 @@
 /* SPDX-License-Identifier: Apache-2.0
  * Copyright 2011-2022 Blender Foundation */
 
-#ifndef __UTIL_TYPES_FLOAT2_H__
-#define __UTIL_TYPES_FLOAT2_H__
+#pragma once
 
 #ifndef __UTIL_TYPES_H__
 #  error "Do not include this file directly, include util/types.h instead."
@@ -10,18 +9,18 @@
 
 CCL_NAMESPACE_BEGIN
 
-#if !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__)
+#ifndef __KERNEL_NATIVE_VECTOR_TYPES__
 struct float2 {
   float x, y;
 
+#  ifndef __KERNEL_GPU__
   __forceinline float operator[](int i) const;
   __forceinline float &operator[](int i);
+#  endif
 };
 
 ccl_device_inline float2 make_float2(float x, float y);
 ccl_device_inline void print_float2(const char *label, const float2 &a);
-#endif /* !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__) */
+#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
 
 CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_FLOAT2_H__ */
diff --git a/intern/cycles/util/types_float2_impl.h b/intern/cycles/util/types_float2_impl.h
index 45fc90c52bd..9d1820fe17d 100644
--- a/intern/cycles/util/types_float2_impl.h
+++ b/intern/cycles/util/types_float2_impl.h
@@ -1,20 +1,16 @@
 /* SPDX-License-Identifier: Apache-2.0
  * Copyright 2011-2022 Blender Foundation */
 
-#ifndef __UTIL_TYPES_FLOAT2_IMPL_H__
-#define __UTIL_TYPES_FLOAT2_IMPL_H__
+#pragma once
 
 #ifndef __UTIL_TYPES_H__
 #  error "Do not include this file directly, include util/types.h instead."
 #endif
 
-#ifndef __KERNEL_GPU__
-#  include <cstdio>
-#endif
-
 CCL_NAMESPACE_BEGIN
 
-#if !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__)
+#ifndef __KERNEL_NATIVE_VECTOR_TYPES__
+#  ifndef __KERNEL_GPU__
 __forceinline float float2::operator[](int i) const
 {
   util_assert(i >= 0);
@@ -28,6 +24,7 @@ __forceinline float &float2::operator[](int i)
   util_assert(i < 2);
   return *(&x + i);
 }
+#  endif
 
 ccl_device_inline float2 make_float2(float x, float y)
 {
@@ -39,8 +36,6 @@ ccl_device_inline void print_float2(const char *label, const float2 &a)
 {
   printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y);
 }
-#endif /* !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__) */
+#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
 
 CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_FLOAT2_IMPL_H__ */
diff --git a/intern/cycles/util/types_float3.h b/intern/cycles/util/types_float3.h
index c7900acaa69..4e43e928657 100644
--- a/intern/cycles/util/types_float3.h
+++ b/intern/cycles/util/types_float3.h
@@ -1,8 +1,7 @@
 /* SPDX-License-Identifier: Apache-2.0
  * Copyright 2011-2022 Blender Foundation */
 
-#ifndef __UTIL_TYPES_FLOAT3_H__
-#define __UTIL_TYPES_FLOAT3_H__
+#pragma once
 
 #ifndef __UTIL_TYPES_H__
 #  error "Do not include this file directly, include util/types.h instead."
@@ -10,17 +9,28 @@
 
 CCL_NAMESPACE_BEGIN
 
-#if !defined(__KERNEL_GPU__)
+#ifndef __KERNEL_NATIVE_VECTOR_TYPES__
 struct ccl_try_align(16) float3
 {
-#  ifdef __KERNEL_SSE__
+#  ifdef __KERNEL_GPU__
+  /* Compact structure for GPU. */
+  float x, y, z;
+#  else
+  /* SIMD aligned structure for CPU. */
+#    ifdef __KERNEL_SSE__
   union {
     __m128 m128;
     struct {
       float x, y, z, w;
     };
   };
+#    else
+  float x, y, z, w;
+#    endif
+#  endif
 
+#  ifdef __KERNEL_SSE__
+  /* Convenient constructors and operators for SIMD, otherwise default is enough. */
   __forceinline float3();
   __forceinline float3(const float3 &a);
   __forceinline explicit float3(const __m128 &a);
@@ -29,18 +39,18 @@ struct ccl_try_align(16) float3
   __forceinline operator __m128 &();
 
   __forceinline float3 &operator=(const float3 &a);
-#  else  /* __KERNEL_SSE__ */
-  float x, y, z, w;
-#  endif /* __KERNEL_SSE__ */
+#  endif
 
+#  ifndef __KERNEL_GPU__
   __forceinline float operator[](int i) const;
   __forceinline float &operator[](int i);
+#  endif
 };
 
 ccl_device_inline float3 make_float3(float f);
 ccl_device_inline float3 make_float3(float x, float y, float z);
 ccl_device_inline void print_float3(const char *label, const float3 &a);
-#endif /* !defined(__KERNEL_GPU__) */
+#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */
 
 /* Smaller float3 for storage. For math operations this must be converted to float3, so that on the
  * CPU SIMD instructions can be used. */
@@ -78,5 +88,3 @@ struct packed_float3 {
 static_assert(sizeof(packed_float3) == 12, "packed_float3 expected to be exactly 12 bytes");
 
 CCL_NAMESPACE_END
-
-#endif /* __UTIL_TYPES_FLOAT3_H__ */
diff --git a/intern/cycles/util/types_float3_impl.h b/intern/cycles/util/types_float3_impl.h
index 2e6e864c8ea..cbd3f76dae4 100644
--- a/intern/cycles/util/types_float3_impl.h
+++ b/intern/cycles/util/types_float3_impl.h
@@ -1,20 +1,15 @@
 /* SPDX-License-Identifier: Apache-2.0
  * Copyright 2011-2022 Blender Foundation */
 
-#ifndef __UTIL_TYPES_FLOAT3_IMPL_H__
-#define __UTIL_TYPES_FLOAT3_IMPL_H__
+#pragma once
 
 #ifndef __UTIL_TYPES_H__
 #  error "Do not include this file directly, include util/types.h instead."
 #endif
 
-#ifndef __KERNEL_GPU__
-#  include <cstdio>
-#endif
-
 CCL_NAMESPACE_BEGIN
 
-#if !defined(__KERNEL_GPU__)
+#ifndef __KERNEL_NATIVE_VECTOR_TYPES__
 #  ifdef __KERNEL_SSE__
 __forceinline float3::float3()
 {
@@ -45,6 +40,7 @@ __forceinline float3 &float3::operator=(const float3 &a)
 }
 #  endif /* __KERNEL_SSE__ */
 
+#  ifndef __KERNEL_GPU__
 __forceinline float float3::operator[](int i) const
 {
   util_assert(i >= 0);
@@ -58,23 +54,32 @@ __forceinline float &float3::operator[](int i)
   util_assert(i < 3);
   return *(&x + i);
 }
+#  endif
 
 ccl_device_inline float3 make_float3(float f)
 {
-#  ifdef __KERNEL_SSE__
-  float3 a(_mm_set1_ps(f));
+#  ifdef __KERNEL_GPU__
+  float3 a = {f, f, f};
 #  else
+#    ifdef __KERNEL_SSE__
+  float3 a(_mm_set1_ps(f));
+#    else
   float3 a = {f, f, f, f};
+#    endif
 #  endif
   return a;
 }
 
 ccl_device_inline float3 make_float3(float x, float y, float z)
 {
-#  ifdef __KERNEL_SSE__
-  float3 a(_mm_set_ps(0.0f, z, y, x));
+#  ifdef __KERNEL_GPU__
+  float3 a = {x, y, z};
 #  else
+#    ifdef __KERNE

@@ Diff output truncated at 10240 characters. @@