[Bf-blender-cvs] [46976043314] master: Cleanup: use float3 SSE instead of ssef for voronoi texture.

Brecht Van Lommel noreply at git.blender.org
Sun Jul 15 00:46:46 CEST 2018


Commit: 4697604331482c394c8a148c54a8e942120b634f
Author: Brecht Van Lommel
Date:   Sat Jul 14 15:38:58 2018 +0200
Branches: master
https://developer.blender.org/rB4697604331482c394c8a148c54a8e942120b634f

Cleanup: use float3 SSE instead of ssef for voronoi texture.

===================================================================

M	intern/cycles/kernel/svm/svm_noise.h
M	intern/cycles/kernel/svm/svm_voronoi.h
M	intern/cycles/util/util_math.h
M	intern/cycles/util/util_math_float3.h
M	intern/cycles/util/util_math_int3.h

===================================================================

diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h
index 38074f0faff..8c425ecf326 100644
--- a/intern/cycles/kernel/svm/svm_noise.h
+++ b/intern/cycles/kernel/svm/svm_noise.h
@@ -32,12 +32,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-#ifndef __KERNEL_SSE2__
-ccl_device int quick_floor(float x)
-{
-	return float_to_int(x) - ((x < 0) ? 1 : 0);
-}
-#else
+#ifdef __KERNEL_SSE2__
 ccl_device_inline ssei quick_floor_sse(const ssef& x)
 {
 	ssei b = truncatei(x);
@@ -46,18 +41,6 @@ ccl_device_inline ssei quick_floor_sse(const ssef& x)
 }
 #endif
 
-#ifndef __KERNEL_SSE2__
-ccl_device float bits_to_01(uint bits)
-{
-	return bits * (1.0f/(float)0xFFFFFFFF);
-}
-#else
-ccl_device_inline ssef bits_to_01_sse(const ssei& bits)
-{
-	return uint32_to_float(bits) * ssef(1.0f/(float)0xFFFFFFFF);
-}
-#endif
-
 ccl_device uint hash(uint kx, uint ky, uint kz)
 {
 	// define some handy macros
@@ -129,7 +112,7 @@ ccl_device uint phash(int kx, int ky, int kz, int3 p)
 #ifndef __KERNEL_SSE2__
 ccl_device float floorfrac(float x, int* i)
 {
-	*i = quick_floor(x);
+	*i = quick_floor_to_int(x);
 	return x - *i;
 }
 #else
@@ -304,33 +287,27 @@ ccl_device float snoise(float3 p)
 }
 
 /* cell noise */
-#ifndef __KERNEL_SSE2__
-ccl_device_noinline float cellnoise(float3 p)
+ccl_device float cellnoise(float3 p)
 {
-	uint ix = quick_floor(p.x);
-	uint iy = quick_floor(p.y);
-	uint iz = quick_floor(p.z);
-
-	return bits_to_01(hash(ix, iy, iz));
+	int3 ip = quick_floor_to_int3(p);
+	return bits_to_01(hash(ip.x, ip.y, ip.z));
 }
 
-ccl_device float3 cellnoise_color(float3 p)
+ccl_device float3 cellnoise3(float3 p)
 {
-	float r = cellnoise(p);
-	float g = cellnoise(make_float3(p.y, p.x, p.z));
-	float b = cellnoise(make_float3(p.y, p.z, p.x));
-
+	int3 ip = quick_floor_to_int3(p);
+#ifndef __KERNEL_SSE__
+	float r = bits_to_01(hash(ip.x, ip.y, ip.z));
+	float g = bits_to_01(hash(ip.y, ip.x, ip.z));
+	float b = bits_to_01(hash(ip.y, ip.z, ip.x));
 	return make_float3(r, g, b);
-}
 #else
-ccl_device ssef cellnoise_color(const ssef& p)
-{
-	ssei ip = quick_floor_sse(p);
-	ssei ip_yxz = shuffle<1, 0, 2, 3>(ip);
-	ssei ip_xyy = shuffle<0, 1, 1, 3>(ip);
-	ssei ip_zzx = shuffle<2, 2, 0, 3>(ip);
-	return bits_to_01_sse(hash_sse(ip_xyy, ip_yxz, ip_zzx));
-}
+	ssei ip_yxz = shuffle<1, 0, 2, 3>(ssei(ip.m128));
+	ssei ip_xyy = shuffle<0, 1, 1, 3>(ssei(ip.m128));
+	ssei ip_zzx = shuffle<2, 2, 0, 3>(ssei(ip.m128));
+	ssei bits = hash_sse(ip_xyy, ip_yxz, ip_zzx);
+	return float3(uint32_to_float(bits) * ssef(1.0f/(float)0xFFFFFFFF));
 #endif
+}
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h
index 9bfb182544b..5d0b8a2a406 100644
--- a/intern/cycles/kernel/svm/svm_voronoi.h
+++ b/intern/cycles/kernel/svm/svm_voronoi.h
@@ -23,34 +23,19 @@ ccl_device float voronoi_F1_distance(float3 p)
 	/* returns squared distance in da */
 	float da = 1e10f;
 
-#ifndef __KERNEL_SSE2__
-	int ix = floor_to_int(p.x), iy = floor_to_int(p.y), iz = floor_to_int(p.z);
+	int3 xyzi = quick_floor_to_int3(p);
 
 	for(int xx = -1; xx <= 1; xx++) {
 		for(int yy = -1; yy <= 1; yy++) {
 			for(int zz = -1; zz <= 1; zz++) {
-				float3 ip = make_float3(ix + xx, iy + yy, iz + zz);
-				float3 vp = ip + cellnoise_color(ip);
+				int3 ip = xyzi + make_int3(xx, yy, zz);
+				float3 fp = make_float3(ip.x, ip.y, ip.z);
+				float3 vp = fp + cellnoise3(fp);
 				float d = len_squared(p - vp);
 				da = min(d, da);
 			}
 		}
 	}
-#else
-	ssef vec_p = load4f(p);
-	ssei xyzi = quick_floor_sse(vec_p);
-
-	for(int xx = -1; xx <= 1; xx++) {
-		for(int yy = -1; yy <= 1; yy++) {
-			for(int zz = -1; zz <= 1; zz++) {
-				ssef ip = ssef(xyzi + ssei(xx, yy, zz, 0));
-				ssef vp = ip + cellnoise_color(ip);
-				float d = len_squared<1, 1, 1, 0>(vec_p - vp);
-				da = min(d, da);
-			}
-		}
-	}
-#endif
 
 	return da;
 }
@@ -59,37 +44,17 @@ ccl_device float3 voronoi_F1_color(float3 p)
 {
 	/* returns color of the nearest point */
 	float da = 1e10f;
-
-#ifndef __KERNEL_SSE2__
 	float3 pa;
-	int ix = floor_to_int(p.x), iy = floor_to_int(p.y), iz = floor_to_int(p.z);
 
-	for(int xx = -1; xx <= 1; xx++) {
-		for(int yy = -1; yy <= 1; yy++) {
-			for(int zz = -1; zz <= 1; zz++) {
-				float3 ip = make_float3(ix + xx, iy + yy, iz + zz);
-				float3 vp = ip + cellnoise_color(ip);
-				float d = len_squared(p - vp);
-
-				if(d < da) {
-					da = d;
-					pa = vp;
-				}
-			}
-		}
-	}
-
-	return cellnoise_color(pa);
-#else
-	ssef pa, vec_p = load4f(p);
-	ssei xyzi = quick_floor_sse(vec_p);
+	int3 xyzi = quick_floor_to_int3(p);
 
 	for(int xx = -1; xx <= 1; xx++) {
 		for(int yy = -1; yy <= 1; yy++) {
 			for(int zz = -1; zz <= 1; zz++) {
-				ssef ip = ssef(xyzi + ssei(xx, yy, zz, 0));
-				ssef vp = ip + cellnoise_color(ip);
-				float d = len_squared<1, 1, 1, 0>(vec_p - vp);
+				int3 ip = xyzi + make_int3(xx, yy, zz);
+				float3 fp = make_float3(ip.x, ip.y, ip.z);
+				float3 vp = fp + cellnoise3(fp);
+				float d = len_squared(p - vp);
 
 				if(d < da) {
 					da = d;
@@ -99,9 +64,7 @@ ccl_device float3 voronoi_F1_color(float3 p)
 		}
 	}
 
-	ssef color = cellnoise_color(pa);
-	return (float3 &)color;
-#endif
+	return cellnoise3(pa);
 }
 
 ccl_device_noinline float4 svm_voronoi(NodeVoronoiColoring coloring, float3 p)
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index fd3199f209f..85cbd18b7ba 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -266,6 +266,11 @@ ccl_device_inline int floor_to_int(float f)
 	return float_to_int(floorf(f));
 }
 
+ccl_device_inline int quick_floor_to_int(float x)
+{
+	return float_to_int(x) - ((x < 0) ? 1 : 0);
+}
+
 ccl_device_inline int ceil_to_int(float f)
 {
 	return float_to_int(ceilf(f));
@@ -550,6 +555,11 @@ ccl_device_inline float xor_signmask(float x, int y)
 	return __int_as_float(__float_as_int(x) ^ y);
 }
 
+ccl_device float bits_to_01(uint bits)
+{
+	return bits * (1.0f/(float)0xFFFFFFFF);
+}
+
 /* projections */
 ccl_device_inline float2 map_to_tube(const float3 co)
 {
diff --git a/intern/cycles/util/util_math_float3.h b/intern/cycles/util/util_math_float3.h
index f5149fe13ed..e42ded76c75 100644
--- a/intern/cycles/util/util_math_float3.h
+++ b/intern/cycles/util/util_math_float3.h
@@ -377,6 +377,18 @@ ccl_device_inline bool isequal_float3(const float3 a, const float3 b)
 #endif
 }
 
+ccl_device_inline int3 quick_floor_to_int3(const float3 a)
+{
+#ifdef __KERNEL_SSE__
+	int3 b = int3(_mm_cvttps_epi32(a.m128));
+	int3 isneg = int3(_mm_castps_si128(_mm_cmplt_ps(a.m128, _mm_set_ps1(0.0f))));
+	/* Unsaturated add 0xffffffff is the same as subtract -1. */
+	return b + isneg;
+#else
+	return make_int3(quick_floor_to_int(a.x), quick_floor_to_int(a.y), quick_floor_to_int(a.z));
+#endif
+}
+
 ccl_device_inline bool isfinite3_safe(float3 v)
 {
 	return isfinite_safe(v.x) && isfinite_safe(v.y) && isfinite_safe(v.z);
diff --git a/intern/cycles/util/util_math_int3.h b/intern/cycles/util/util_math_int3.h
index 6eef8517665..81b10f31f4a 100644
--- a/intern/cycles/util/util_math_int3.h
+++ b/intern/cycles/util/util_math_int3.h
@@ -91,6 +91,24 @@ ccl_device_inline bool operator<(const int3 &a, const int3 &b)
 {
 	return a.x < b.x && a.y < b.y && a.z < b.z;
 }
+
+ccl_device_inline int3 operator+(const int3 &a, const int3 &b)
+{
+#ifdef __KERNEL_SSE__
+	return int3(_mm_add_epi32(a.m128, b.m128));
+#else
+	return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
+#endif
+}
+
+ccl_device_inline int3 operator-(const int3 &a, const int3 &b)
+{
+#ifdef __KERNEL_SSE__
+	return int3(_mm_sub_epi32(a.m128, b.m128));
+#else
+	return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
+#endif
+}
 #endif  /* !__KERNEL_OPENCL__ */
 
 CCL_NAMESPACE_END



More information about the Bf-blender-cvs mailing list