[Bf-blender-cvs] [3a243ad83fb] blender2.8: Eevee: Attempt to optimize GTAO shader.

Clément Foucault noreply at git.blender.org
Fri Jun 23 02:52:56 CEST 2017


Commit: 3a243ad83fb5f485ecc6fdc2bcc65a93a9f5ea53
Author: Clément Foucault
Date:   Fri Jun 23 02:52:15 2017 +0200
Branches: blender2.8
https://developer.blender.org/rB3a243ad83fb5f485ecc6fdc2bcc65a93a9f5ea53

Eevee: Attempt to optimize GTAO shader.

Unroll horizon search loop. Use fast version of acos.
On nvidia linux, unrolling the 2nd loop is giving very high compilation time.

===================================================================

M	source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl
M	source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl

===================================================================

diff --git a/source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl b/source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl
index 65ff09c4eae..b044cacf1b1 100644
--- a/source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/ambient_occlusion_lib.glsl
@@ -5,28 +5,21 @@
 
 #define MAX_PHI_STEP 32
 /* NOTICE : this is multiplied by 2 */
-#define MAX_THETA_STEP 6.0
+#define MAX_THETA_STEP 12
 
 uniform sampler2D minMaxDepthTex;
 uniform float aoDistance;
 uniform float aoSamples;
 uniform float aoFactor;
 
-float sample_depth(vec2 co, int level)
+float get_max_horizon(vec2 co, vec3 x, float h, float lod)
 {
-	return textureLod(minMaxDepthTex, co, float(level)).g;
-}
-
-float get_max_horizon(vec2 co, vec3 x, float h, float step)
-{
-	if (co.x > 1.0 || co.x < 0.0 || co.y > 1.0 || co.y < 0.0)
-		return h;
-
-	float depth = sample_depth(co, int(step));
+	float depth = textureLod(minMaxDepthTex, co, floor(lod)).g;
 
 	/* Background case */
-	if (depth == 1.0)
-		return h;
+	/* this is really slow and is only a problem
+	 * if the far clip plane is near enough to notice */
+	// depth += step(1.0, depth) * 1e20;
 
 	vec3 s = get_view_space_from_depth(co, depth); /* s View coordinate */
 	vec3 omega_s = s - x;
@@ -39,6 +32,124 @@ float get_max_horizon(vec2 co, vec3 x, float h, float step)
 	return mix(h, max_h, blend);
 }
 
+void search_step(
+        vec2 t_phi, vec3 x, vec2 x_, float rand, vec2 pixel_ratio,
+        inout float j, inout float ofs, inout float h1, inout float h2)
+{
+	ofs += ofs; /* Step size is doubled each iteration */
+
+	vec2 s_ = t_phi * ofs * rand * pixel_ratio; /* s^ Screen coordinate */
+	vec2 co;
+
+	co = x_ + s_;
+	h1 = get_max_horizon(co, x, h1, j);
+
+	co = x_ - s_;
+	h2 = get_max_horizon(co, x, h2, j);
+
+	j += 0.5;
+}
+
+void search_horizon(
+        vec2 t_phi, vec3 x, vec2 x_, float rand,
+        float max_dist, vec2 pixel_ratio, float pixel_len,
+        inout float h1, inout float h2)
+{
+	float ofs = 1.5 * pixel_len;
+	float j = 0.0;
+
+#if 0 /* manually unrolled bellow */
+	for (int i = 0; i < MAX_THETA_STEP; i++) {
+		search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+		if (ofs > max_dist)
+			return;
+	}
+#endif
+	search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+	if (ofs > max_dist)	return;
+
+	search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+	if (ofs > max_dist)	return;
+
+	search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+	if (ofs > max_dist)	return;
+
+	search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+	if (ofs > max_dist)	return;
+
+	search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+	if (ofs > max_dist)	return;
+
+	search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+	if (ofs > max_dist)	return;
+
+	search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+	if (ofs > max_dist)	return;
+
+	search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+	if (ofs > max_dist)	return;
+
+	search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+	if (ofs > max_dist)	return;
+
+	search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+	if (ofs > max_dist)	return;
+
+	search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+	if (ofs > max_dist)	return;
+
+	search_step(t_phi, x, x_, rand, pixel_ratio, j, ofs, h1, h2);
+}
+
+void integrate_slice(
+        float iter, vec3 x, vec3 normal, vec2 x_, vec2 noise,
+        float max_dist, vec2 pixel_ratio, float pixel_len,
+        inout float visibility, inout vec3 bent_normal)
+{
+	float phi = M_PI * ((noise.r + iter) / aoSamples);
+
+	/* Rotate with random direction to get jittered result. */
+	vec2 t_phi = vec2(cos(phi), sin(phi)); /* Screen space direction */
+
+	/* Search maximum horizon angles h1 and h2 */
+	float h1 = -1.0, h2 = -1.0; /* init at cos(pi) */
+	search_horizon(t_phi, x, x_, noise.g, max_dist, pixel_ratio, pixel_len, h1, h2);
+
+	/* (Slide 54) */
+	h1 = -fast_acos(h1);
+	h2 = fast_acos(h2);
+
+	/* Projecting Normal to Plane P defined by t_phi and omega_o */
+	vec3 h = vec3(t_phi.y, -t_phi.x, 0.0); /* Normal vector to Integration plane */
+	vec3 t = vec3(-t_phi, 0.0);
+	vec3 n_proj = normal - h * dot(h, normal);
+	float n_proj_len = max(1e-16, length(n_proj));
+
+	/* Clamping thetas (slide 58) */
+	float cos_n = clamp(n_proj.z / n_proj_len, -1.0, 1.0);
+	float n = sign(dot(n_proj, t)) * fast_acos(cos_n); /* Angle between view vec and normal */
+	h1 = n + max(h1 - n, -M_PI_2);
+	h2 = n + min(h2 - n, M_PI_2);
+
+	/* Solving inner integral */
+	float sin_n = sin(n);
+	float h1_2 = 2.0 * h1;
+	float h2_2 = 2.0 * h2;
+	float vd = (-cos(h1_2 - n) + cos_n + h1_2 * sin_n) + (-cos(h2_2 - n) + cos_n + h2_2 * sin_n);
+	vd *= 0.25 * n_proj_len;
+	visibility += vd;
+
+#ifdef USE_BENT_NORMAL
+	/* Finding Bent normal */
+	float b_angle = (h1 + h2) / 2.0;
+	/* The 0.5 factor below is here to equilibrate the accumulated vectors.
+	 * (sin(b_angle) * -t_phi) will accumulate to (phi_step * result_nor.xy * 0.5).
+	 * (cos(b_angle) * 0.5) will accumulate to (phi_step * result_nor.z * 0.5). */
+	/* Weight sample by vd */
+	bent_normal += vec3(sin(b_angle) * -t_phi, cos(b_angle) * 0.5) * vd;
+#endif
+}
+
 void gtao(vec3 normal, vec3 position, vec2 noise, out float visibility
 #ifdef USE_BENT_NORMAL
 	, out vec3 bent_normal
@@ -66,62 +177,12 @@ void gtao(vec3 normal, vec3 position, vec2 noise, out float visibility
 	visibility = 0.0;
 #ifdef USE_BENT_NORMAL
 	bent_normal = vec3(0.0);
+#else
+	vec3 bent_normal = vec3(0.0);
 #endif
-	for (float i = 0.0; i < aoSamples && i < MAX_PHI_STEP; i++) {
-		float phi = M_PI * ((noise.r + i) / aoSamples);
-
-		/* Rotate with random direction to get jittered result. */
-		vec2 t_phi = vec2(cos(phi), sin(phi)); /* Screen space direction */
-
-		/* Search maximum horizon angles h1 and h2 */
-		float h1 = -1.0, h2 = -1.0; /* init at cos(pi) */
-		float ofs = 1.5 * pixel_len;
-		for (float j = 0.0; ofs < max_dist && j < MAX_THETA_STEP; j += 0.5) {
-			ofs += ofs; /* Step size is doubled each iteration */
-
-			vec2 s_ = t_phi * ofs * noise.g * pixel_ratio; /* s^ Screen coordinate */
-			vec2 co;
-
-			co = x_ + s_;
-			h1 = get_max_horizon(co, x, h1, j);
-
-			co = x_ - s_;
-			h2 = get_max_horizon(co, x, h2, j);
-		}
-
-		/* (Slide 54) */
-		h1 = -acos(h1);
-		h2 = acos(h2);
-
-		/* Projecting Normal to Plane P defined by t_phi and omega_o */
-		vec3 h = vec3(t_phi.y, -t_phi.x, 0.0); /* Normal vector to Integration plane */
-		vec3 t = vec3(-t_phi, 0.0);
-		vec3 n_proj = normal - h * dot(h, normal);
-		float n_proj_len = max(1e-16, length(n_proj));
-
-		/* Clamping thetas (slide 58) */
-		float cos_n = clamp(n_proj.z / n_proj_len, -1.0, 1.0);
-		float n = sign(dot(n_proj, t)) * acos(cos_n); /* Angle between view vec and normal */
-		h1 = n + max(h1 - n, -M_PI_2);
-		h2 = n + min(h2 - n, M_PI_2);
-
-		/* Solving inner integral */
-		float sin_n = sin(n);
-		float h1_2 = 2.0 * h1;
-		float h2_2 = 2.0 * h2;
-		float vd = (-cos(h1_2 - n) + cos_n + h1_2 * sin_n) + (-cos(h2_2 - n) + cos_n + h2_2 * sin_n);
-		vd *= 0.25 * n_proj_len;
-		visibility += vd;
-
-#ifdef USE_BENT_NORMAL
-		/* Finding Bent normal */
-		float b_angle = (h1 + h2) / 2.0;
-		/* The 0.5 factor below is here to equilibrate the accumulated vectors.
-		 * (sin(b_angle) * -t_phi) will accumulate to (phi_step * result_nor.xy * 0.5).
-		 * (cos(b_angle) * 0.5) will accumulate to (phi_step * result_nor.z * 0.5). */
-		/* Weight sample by vd */
-		bent_normal += vec3(sin(b_angle) * -t_phi, cos(b_angle) * 0.5) * vd;
-#endif
+	for (float i = 0.0; i < MAX_PHI_STEP; i++) {
+		if (i >= aoSamples) break;
+		integrate_slice(i, x, normal, x_, noise, max_dist, pixel_ratio, pixel_len, visibility, bent_normal);
 	}
 
 	visibility = clamp(visibility / aoSamples, 1e-8, 1.0);
diff --git a/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl b/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl
index 6ce4b2f7501..d4f1781ae6c 100644
--- a/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl
+++ b/source/blender/draw/engines/eevee/shaders/bsdf_common_lib.glsl
@@ -146,6 +146,22 @@ float distance_squared(vec3 a, vec3 b) { a -= b; return dot(a, a); }
 
 float inverse_distance(vec3 V) { return max( 1 / length(V), 1e-8); }
 
+/* ------- Fast Math ------- */
+
+/* [Drobot2014a] Low Level Optimizations for GCN */
+float fast_sqrt(float x)
+{
+	return intBitsToFloat(0x1fbd1df5 + (floatBitsToInt(x) >> 1));
+}
+
+/* [Eberly2014] GPGPU Programming for Games and Science */
+float fast_acos(float x)
+{
+	float res = -0.156583 * abs(x) + M_PI_2;
+	res *= fast_sqrt(1.0 - abs(x));
+	return (x >= 0) ? res : M_PI - res;
+}
+
 float line_plane_intersect_dist(vec3 lineorigin, vec3 linedirection, vec3 planeorigin, vec3 planenormal)
 {
 	return dot(planenormal, planeorigin - lineorigin) / dot(planenormal, linedirection);




More information about the Bf-blender-cvs mailing list