[Bf-blender-cvs] [10077229d6f] soc-2019-cycles-procedural: SSE noise implementation
OmarSquircleArt
noreply at git.blender.org
Wed Jul 3 13:52:41 CEST 2019
Commit: 10077229d6f30d7d6f3e42b9a5cca63db1493a91
Author: OmarSquircleArt
Date: Wed Jul 3 13:53:32 2019 +0200
Branches: soc-2019-cycles-procedural
https://developer.blender.org/rB10077229d6f30d7d6f3e42b9a5cca63db1493a91
SSE noise implementation
===================================================================
M intern/cycles/kernel/svm/svm_musgrave.h
M intern/cycles/kernel/svm/svm_noise.h
M intern/cycles/kernel/svm/svm_noisetex.h
M intern/cycles/kernel/svm/svm_texture.h
M intern/cycles/kernel/svm/svm_wave.h
M intern/cycles/render/nodes.cpp
M intern/cycles/util/util_hash.h
M intern/cycles/util/util_math.h
M intern/cycles/util/util_ssef.h
M intern/cycles/util/util_ssei.h
===================================================================
diff --git a/intern/cycles/kernel/svm/svm_musgrave.h b/intern/cycles/kernel/svm/svm_musgrave.h
index 67fb5ca6241..efbf4d20f88 100644
--- a/intern/cycles/kernel/svm/svm_musgrave.h
+++ b/intern/cycles/kernel/svm/svm_musgrave.h
@@ -34,14 +34,14 @@ ccl_device_noinline float noise_musgrave_fBm(float3 p, float H, float lacunarity
int i;
for (i = 0; i < float_to_int(octaves); i++) {
- value += snoise(p) * pwr;
+ value += snoise_3d(p) * pwr;
pwr *= pwHL;
p *= lacunarity;
}
rmd = octaves - floorf(octaves);
if (rmd != 0.0f)
- value += rmd * snoise(p) * pwr;
+ value += rmd * snoise_3d(p) * pwr;
return value;
}
@@ -65,14 +65,14 @@ ccl_device_noinline float noise_musgrave_multi_fractal(float3 p,
int i;
for (i = 0; i < float_to_int(octaves); i++) {
- value *= (pwr * snoise(p) + 1.0f);
+ value *= (pwr * snoise_3d(p) + 1.0f);
pwr *= pwHL;
p *= lacunarity;
}
rmd = octaves - floorf(octaves);
if (rmd != 0.0f)
- value *= (rmd * pwr * snoise(p) + 1.0f); /* correct? */
+ value *= (rmd * pwr * snoise_3d(p) + 1.0f); /* correct? */
return value;
}
@@ -94,11 +94,11 @@ ccl_device_noinline float noise_musgrave_hetero_terrain(
int i;
/* first unscaled octave of function; later octaves are scaled */
- value = offset + snoise(p);
+ value = offset + snoise_3d(p);
p *= lacunarity;
for (i = 1; i < float_to_int(octaves); i++) {
- increment = (snoise(p) + offset) * pwr * value;
+ increment = (snoise_3d(p) + offset) * pwr * value;
value += increment;
pwr *= pwHL;
p *= lacunarity;
@@ -106,7 +106,7 @@ ccl_device_noinline float noise_musgrave_hetero_terrain(
rmd = octaves - floorf(octaves);
if (rmd != 0.0f) {
- increment = (snoise(p) + offset) * pwr * value;
+ increment = (snoise_3d(p) + offset) * pwr * value;
value += rmd * increment;
}
@@ -129,7 +129,7 @@ ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(
float pwr = pwHL;
int i;
- result = snoise(p) + offset;
+ result = snoise_3d(p) + offset;
weight = gain * result;
p *= lacunarity;
@@ -137,7 +137,7 @@ ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(
if (weight > 1.0f)
weight = 1.0f;
- signal = (snoise(p) + offset) * pwr;
+ signal = (snoise_3d(p) + offset) * pwr;
pwr *= pwHL;
result += weight * signal;
weight *= gain * signal;
@@ -146,7 +146,7 @@ ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(
rmd = octaves - floorf(octaves);
if (rmd != 0.0f)
- result += rmd * ((snoise(p) + offset) * pwr);
+ result += rmd * ((snoise_3d(p) + offset) * pwr);
return result;
}
@@ -167,7 +167,7 @@ ccl_device_noinline float noise_musgrave_ridged_multi_fractal(
float pwr = pwHL;
int i;
- signal = offset - fabsf(snoise(p));
+ signal = offset - fabsf(snoise_3d(p));
signal *= signal;
result = signal;
weight = 1.0f;
@@ -175,7 +175,7 @@ ccl_device_noinline float noise_musgrave_ridged_multi_fractal(
for (i = 1; i < float_to_int(octaves); i++) {
p *= lacunarity;
weight = saturate(signal * gain);
- signal = offset - fabsf(snoise(p));
+ signal = offset - fabsf(snoise_3d(p));
signal *= signal;
signal *= weight;
result += signal * pwr;
diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h
index a4824076066..dd7d7178101 100644
--- a/intern/cycles/kernel/svm/svm_noise.h
+++ b/intern/cycles/kernel/svm/svm_noise.h
@@ -32,270 +32,559 @@
CCL_NAMESPACE_BEGIN
-#ifdef __KERNEL_SSE2__
-ccl_device_inline ssei quick_floor_sse(const ssef &x)
+/* **** Perlin Noise **** */
+
+/* The following functions compute 1D, 2D, 3D, and 4D perlin noise.
+ * The code is based on the OSL noise code for compatibility.
+ * See oslnoise.h
+ */
+
+/* An alternative to Hermite interpolation that have zero first and
+ * second derivatives at t = 0 and t = 1.
+ * Described in Ken Perlin's "Improving noise" [2002].
+ */
+ccl_device float fade(float t)
{
- ssei b = truncatei(x);
- ssei isneg = cast((x < ssef(0.0f)).m128);
- return b + isneg; // unsaturated add 0xffffffff is the same as subtract -1
+ return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
}
-#endif
-#ifdef __KERNEL_SSE2__
-ccl_device_inline ssei hash_sse(const ssei &kx, const ssei &ky, const ssei &kz)
-{
-# define rot(x, k) (((x) << (k)) | (srl(x, 32 - (k))))
-# define xor_rot(a, b, c) \
- do { \
- a = a ^ b; \
- a = a - rot(b, c); \
- } while (0)
-
- uint len = 3;
- ssei magic = ssei(0xdeadbeef + (len << 2) + 13);
- ssei a = magic + kx;
- ssei b = magic + ky;
- ssei c = magic + kz;
-
- xor_rot(c, b, 14);
- xor_rot(a, c, 11);
- xor_rot(b, a, 25);
- xor_rot(c, b, 16);
- xor_rot(a, c, 4);
- xor_rot(b, a, 14);
- xor_rot(c, b, 24);
-
- return c;
-# undef rot
-# undef xor_rot
+ccl_device_inline float negate_if(float val, int condition)
+{
+ return (condition) ? -val : val;
}
-#endif
-#if 0 // unused
-ccl_device int imod(int a, int b)
+ccl_device float grad1(int hash, float x)
{
- a %= b;
- return a < 0 ? a + b : a;
+ int h = hash & 15;
+ float g = 1 + (h & 7);
+ return negate_if(g, h & 8) * x;
}
-ccl_device uint phash(int kx, int ky, int kz, int3 p)
+ccl_device_noinline float perlin_1d(float x)
{
- return hash_uint3(imod(kx, p.x), imod(ky, p.y), imod(kz, p.z));
+ int X;
+ float fx = floorfrac(x, &X);
+ float u = fade(fx);
+
+ return mix(grad1(hash_uint(X), fx), grad1(hash_uint(X + 1), fx - 1.0f), u);
}
-#endif
+/* 2D, 3D, and 4D noise can be accelerated using SSE, so we do a separate
+ * implementation for the SSE kernels.
+ */
#ifndef __KERNEL_SSE2__
-ccl_device float floorfrac(float x, int *i)
+/* Bilinear Interpolation:
+ *
+ * v2 v3
+ * @ + + + + @ y
+ * + + ^
+ * + + |
+ * + + |
+ * @ + + + + @ @------> x
+ * v0 v1
+ *
+ */
+ccl_device float bi_mix(float v0, float v1, float v2, float v3, float x, float y)
{
- *i = quick_floor_to_int(x);
- return x - *i;
+ float x1 = 1.0f - x;
+ return (1.0f - y) * (v0 * x1 + v1 * x) + y * (v2 * x1 + v3 * x);
}
-#else
-ccl_device_inline ssef floorfrac_sse(const ssef &x, ssei *i)
+
+/* Trilinear Interpolation:
+ *
+ * v6 v7
+ * @ + + + + + + @
+ * +\ +\
+ * + \ + \
+ * + \ + \
+ * + \ v4 + \ v5
+ * + @ + + + +++ + @ z
+ * + + + + y ^
+ * v2 @ + +++ + + + @ v3 + \ |
+ * \ + \ + \ |
+ * \ + \ + \|
+ * \ + \ + +---------> x
+ * \+ \+
+ * @ + + + + + + @
+ * v0 v1
+ */
+ccl_device float tri_mix(float v0,
+ float v1,
+ float v2,
+ float v3,
+ float v4,
+ float v5,
+ float v6,
+ float v7,
+ float x,
+ float y,
+ float z)
{
- *i = quick_floor_sse(x);
- return x - ssef(*i);
+ float x1 = 1.0f - x;
+ float y1 = 1.0f - y;
+ float z1 = 1.0f - z;
+ return z1 * (y1 * (v0 * x1 + v1 * x) + y * (v2 * x1 + v3 * x)) +
+ z * (y1 * (v4 * x1 + v5 * x) + y * (v6 * x1 + v7 * x));
}
-#endif
-#ifndef __KERNEL_SSE2__
-ccl_device float fade(float t)
+ccl_device float grad2(int hash, float x, float y)
{
- return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
+ int h = hash & 7;
+ float u = h < 4 ? x : y;
+ float v = 2.0f * (h < 4 ? y : x);
+ return negate_if(u, h & 1) + negate_if(v, h & 2);
}
-#else
-ccl_device_inline ssef fade_sse(const ssef *t)
+
+ccl_device float grad3(int hash, float x, float y, float z)
{
- ssef a = madd(*t, ssef(6.0f), ssef(-15.0f));
- ssef b = madd(*t, a, ssef(10.0f));
- return ((*t) * (*t)) * ((*t) * b);
+ int h = hash & 15;
+ float u = h < 8 ? x : y;
+ float vt = ((h == 12) || (h == 14)) ? x : z;
+ float v = h < 4 ? y : vt;
+ return negate_if(u, h & 1) + negate_if(v, h & 2);
}
-#endif
-#ifndef __KERNEL_SSE2__
-ccl_device float nerp(float t, float a, float b)
+ccl_device float grad4(int hash, float x, float y, float z, float w)
{
- return (1.0f - t) * a + t * b;
+ int h = hash & 31;
+ float u = h < 24 ? x : y;
+ float v = h < 16 ? y : z;
+ float s = h < 8 ? z : w;
+ return negate_if(u, h & 1) + negate_if(v, h & 2) + negate_if(s, h & 4);
}
-#else
-ccl_device_inline ssef nerp_sse(const ssef &t, const ssef &a, const ssef &b)
+
+ccl_device_noinline float perlin_2d(float x, float y)
{
- ssef x1 = (ssef(1.0f) - t) * a;
- return madd(t, b, x1);
+ int X;
+ int Y;
+
+ float fx = floorfrac(x, &X);
+ float fy = floorfrac(y, &Y);
+
+ float u = fade(fx);
+ float v = fade(fy);
+
+ float r = bi_mix(grad2(hash_uint2(X, Y), fx, fy),
+ grad2(hash_uint2(X + 1, Y), fx - 1.0f, fy),
+ grad2(hash_uint2(X, Y + 1), fx, fy - 1.0f),
+ grad2(hash_uint2(X + 1, Y + 1), fx - 1.0f, fy - 1.0f),
+ u,
+ v);
+
+ return r;
}
-#endif
-#ifndef __KERNEL_SSE2__
-ccl_device float grad(int hash, float x, float y, float z)
+ccl_device_noinline float perlin_3d(float x, float y, float z)
{
- // use vectors pointing to the edges of the cube
- int h = hash & 15;
- float u = h < 8 ? x : y;
- float vt = ((h == 12) | (h == 14)) ? x : z;
- float v = h < 4 ? y : vt;
- return ((h & 1) ? -u : u) + ((h & 2) ? -v : v);
+ int X;
+ int Y;
+ int Z;
+
+ float fx = floorfrac(x, &X);
+ float fy = floorfrac(y, &Y);
+ float fz = floorfrac(z, &Z);
+
+ float u = fade(fx);
+ float v = fade(fy);
+ float w = fade(fz);
+
+ float r = tri_mix(grad3(hash_uint3(X, Y, Z), fx, fy, fz),
+ grad3(hash_uint3(X + 1, Y, Z), fx - 1.0f, fy, fz),
+ grad3(hash_uint3(X, Y + 1, Z), fx, fy - 1.0f, fz),
+ grad3(hash_uint3(X + 1, Y + 1, Z), fx - 1.0f, fy - 1.0f, fz),
+ grad3(hash_uint3(X, Y, Z + 1), fx, fy, fz - 1.0f),
+ grad3(hash_uint3(X + 1, Y, Z + 1), fx - 1.0
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list