[Bf-blender-cvs] [ceb80600d66] cycles_oneapi: Cycles: Compact float3 for oneAPI backend

Stefan Werner noreply at git.blender.org
Tue Apr 12 20:33:15 CEST 2022


Commit: ceb80600d661bb1372d170d81fbb66af0237fa0b
Author: Stefan Werner
Date:   Thu Apr 7 16:09:15 2022 +0200
Branches: cycles_oneapi
https://developer.blender.org/rBceb80600d661bb1372d170d81fbb66af0237fa0b

Cycles: Compact float3 for oneAPI backend

The oneAPI backend was using 128byts for float3, now it's 96 bytes.
Saving memory and maybe giving a bit more speed.

===================================================================

M	intern/cycles/kernel/device/oneapi/compat.h
M	intern/cycles/util/types_float3.h
M	intern/cycles/util/types_float3_impl.h
M	intern/cycles/util/types_int4_impl.h

===================================================================

diff --git a/intern/cycles/kernel/device/oneapi/compat.h b/intern/cycles/kernel/device/oneapi/compat.h
index eb924f48067..0c0ec827a71 100644
--- a/intern/cycles/kernel/device/oneapi/compat.h
+++ b/intern/cycles/kernel/device/oneapi/compat.h
@@ -156,6 +156,19 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
 typedef unsigned char uchar;
 using sycl::half;
 
+typedef struct float3 {
+  float x, y, z;
+} float3;
+
+ccl_always_inline float3 make_float3(float x, float y, float z)
+{
+  return {x, y, z};
+}
+ccl_always_inline float3 make_float3(float x)
+{
+  return {x, x, x};
+}
+
 /* math functions */
 #define fabsf(x) sycl::fabs((x))
 #define copysignf(x, y) sycl::copysign((x), (y))
diff --git a/intern/cycles/util/types_float3.h b/intern/cycles/util/types_float3.h
index 7e28a62438e..4f51583b20c 100644
--- a/intern/cycles/util/types_float3.h
+++ b/intern/cycles/util/types_float3.h
@@ -10,7 +10,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-#if !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__)
+#if !defined(__KERNEL_GPU__)
 struct ccl_try_align(16) float3
 {
 #  ifdef __KERNEL_SSE__
diff --git a/intern/cycles/util/types_float3_impl.h b/intern/cycles/util/types_float3_impl.h
index c2ef6ab1f07..7fbceafb493 100644
--- a/intern/cycles/util/types_float3_impl.h
+++ b/intern/cycles/util/types_float3_impl.h
@@ -14,7 +14,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-#if !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__)
+#if !defined(__KERNEL_GPU__)
 #  ifdef __KERNEL_SSE__
 __forceinline float3::float3()
 {
diff --git a/intern/cycles/util/types_int4_impl.h b/intern/cycles/util/types_int4_impl.h
index 088c51e62bf..11e1ede6705 100644
--- a/intern/cycles/util/types_int4_impl.h
+++ b/intern/cycles/util/types_int4_impl.h
@@ -83,6 +83,8 @@ ccl_device_inline int4 make_int4(const float3 &f)
 {
 #  ifdef __KERNEL_SSE__
   int4 a(_mm_cvtps_epi32(f.m128));
+#  elif defined(__KERNEL_ONEAPI__)
+  int4 a = {(int)f.x, (int)f.y, (int)f.z, 0};
 #  else
   int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w};
 #  endif



More information about the Bf-blender-cvs mailing list