[Bf-blender-cvs] [ceb80600d66] cycles_oneapi: Cycles: Compact float3 for oneAPI backend
Stefan Werner
noreply at git.blender.org
Tue Apr 12 20:33:15 CEST 2022
Commit: ceb80600d661bb1372d170d81fbb66af0237fa0b
Author: Stefan Werner
Date: Thu Apr 7 16:09:15 2022 +0200
Branches: cycles_oneapi
https://developer.blender.org/rBceb80600d661bb1372d170d81fbb66af0237fa0b
Cycles: Compact float3 for oneAPI backend
The oneAPI backend was using 128byts for float3, now it's 96 bytes.
Saving memory and maybe giving a bit more speed.
===================================================================
M intern/cycles/kernel/device/oneapi/compat.h
M intern/cycles/util/types_float3.h
M intern/cycles/util/types_float3_impl.h
M intern/cycles/util/types_int4_impl.h
===================================================================
diff --git a/intern/cycles/kernel/device/oneapi/compat.h b/intern/cycles/kernel/device/oneapi/compat.h
index eb924f48067..0c0ec827a71 100644
--- a/intern/cycles/kernel/device/oneapi/compat.h
+++ b/intern/cycles/kernel/device/oneapi/compat.h
@@ -156,6 +156,19 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
typedef unsigned char uchar;
using sycl::half;
+typedef struct float3 {
+ float x, y, z;
+} float3;
+
+ccl_always_inline float3 make_float3(float x, float y, float z)
+{
+ return {x, y, z};
+}
+ccl_always_inline float3 make_float3(float x)
+{
+ return {x, x, x};
+}
+
/* math functions */
#define fabsf(x) sycl::fabs((x))
#define copysignf(x, y) sycl::copysign((x), (y))
diff --git a/intern/cycles/util/types_float3.h b/intern/cycles/util/types_float3.h
index 7e28a62438e..4f51583b20c 100644
--- a/intern/cycles/util/types_float3.h
+++ b/intern/cycles/util/types_float3.h
@@ -10,7 +10,7 @@
CCL_NAMESPACE_BEGIN
-#if !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__)
+#if !defined(__KERNEL_GPU__)
struct ccl_try_align(16) float3
{
# ifdef __KERNEL_SSE__
diff --git a/intern/cycles/util/types_float3_impl.h b/intern/cycles/util/types_float3_impl.h
index c2ef6ab1f07..7fbceafb493 100644
--- a/intern/cycles/util/types_float3_impl.h
+++ b/intern/cycles/util/types_float3_impl.h
@@ -14,7 +14,7 @@
CCL_NAMESPACE_BEGIN
-#if !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__)
+#if !defined(__KERNEL_GPU__)
# ifdef __KERNEL_SSE__
__forceinline float3::float3()
{
diff --git a/intern/cycles/util/types_int4_impl.h b/intern/cycles/util/types_int4_impl.h
index 088c51e62bf..11e1ede6705 100644
--- a/intern/cycles/util/types_int4_impl.h
+++ b/intern/cycles/util/types_int4_impl.h
@@ -83,6 +83,8 @@ ccl_device_inline int4 make_int4(const float3 &f)
{
# ifdef __KERNEL_SSE__
int4 a(_mm_cvtps_epi32(f.m128));
+# elif defined(__KERNEL_ONEAPI__)
+ int4 a = {(int)f.x, (int)f.y, (int)f.z, 0};
# else
int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w};
# endif
More information about the Bf-blender-cvs
mailing list