[Bf-blender-cvs] [6d8e308eae4] blender2.8: GPUShader: Optimize Multisample resolve shader.

Clément Foucault noreply at git.blender.org
Wed May 2 20:54:31 CEST 2018


Commit: 6d8e308eae4c980a093582c29084ae20ee814972
Author: Clément Foucault
Date:   Mon Apr 23 23:07:58 2018 +0200
Branches: blender2.8
https://developer.blender.org/rB6d8e308eae4c980a093582c29084ae20ee814972

GPUShader: Optimize Multisample resolve shader.

Group all fetches together without interleived alu to let compiler optimize.

Also do the color samples only if needed.

Went from 3.86ms to [1.11-2.22]ms [min-max] for the 16samples resolve pass
on my nvidia card.

===================================================================

M	source/blender/gpu/shaders/gpu_shader_image_multisample_resolve_frag.glsl

===================================================================

diff --git a/source/blender/gpu/shaders/gpu_shader_image_multisample_resolve_frag.glsl b/source/blender/gpu/shaders/gpu_shader_image_multisample_resolve_frag.glsl
index de1fd8b6b58..57362c88320 100644
--- a/source/blender/gpu/shaders/gpu_shader_image_multisample_resolve_frag.glsl
+++ b/source/blender/gpu/shaders/gpu_shader_image_multisample_resolve_frag.glsl
@@ -8,60 +8,117 @@ out vec4 fragColor;
 #error "Too many samples"
 #endif
 
+// #define USE_DEPTH_WEIGHTING
+
 void main()
 {
 	ivec2 texel = ivec2(gl_FragCoord.xy);
 
-	float depth = 1.0;
-	depth = min(depth, texelFetch(depthMulti, texel, 0).r);
-	depth = min(depth, texelFetch(depthMulti, texel, 1).r);
+	bvec4 b1, b2, b3, b4;
+	vec4 w1, w2, w3, w4;
+	vec4 d1, d2, d3, d4;
+	vec4 c1, c2, c3, c4, c5, c6, c7, c8;
+	vec4 c9, c10, c11, c12, c13, c14, c15, c16;
+	d1 = d2 = d3 = d4 = vec4(1.0);
+	w1 = w2 = w3 = w4 = vec4(0.0);
+	c1 = c2 = c3 = c4 = c5 = c6 = c7 = c8 = vec4(0.0);
+	c9 = c10 = c11 = c12 = c13 = c14 = c15 = c16 = vec4(0.0);
+
+	/* Depth */
+
+	d1.x = texelFetch(depthMulti, texel, 0).r;
+	d1.y = texelFetch(depthMulti, texel, 1).r;
+#if SAMPLES > 2
+	d1.z = texelFetch(depthMulti, texel, 2).r;
+	d1.w = texelFetch(depthMulti, texel, 3).r;
+#endif
+#if SAMPLES > 4
+	d2.x = texelFetch(depthMulti, texel, 4).r;
+	d2.y = texelFetch(depthMulti, texel, 5).r;
+	d2.z = texelFetch(depthMulti, texel, 6).r;
+	d2.w = texelFetch(depthMulti, texel, 7).r;
+#endif
+#if SAMPLES > 8
+	d3.x = texelFetch(depthMulti, texel, 8).r;
+	d3.y = texelFetch(depthMulti, texel, 9).r;
+	d3.z = texelFetch(depthMulti, texel, 10).r;
+	d3.w = texelFetch(depthMulti, texel, 11).r;
+	d4.x = texelFetch(depthMulti, texel, 12).r;
+	d4.y = texelFetch(depthMulti, texel, 13).r;
+	d4.z = texelFetch(depthMulti, texel, 14).r;
+	d4.w = texelFetch(depthMulti, texel, 15).r;
+#endif
+
+	/* COLOR */
+	b1 = notEqual(d1, vec4(1.0));
+	if (any(b1)) {
+		c1 = texelFetch(colorMulti, texel, 0);
+		c2 = texelFetch(colorMulti, texel, 1);
 #if SAMPLES > 2
-	depth = min(depth, texelFetch(depthMulti, texel, 2).r);
-	depth = min(depth, texelFetch(depthMulti, texel, 3).r);
+		c3 = texelFetch(colorMulti, texel, 2);
+		c4 = texelFetch(colorMulti, texel, 3);
 #endif
+		w1 = vec4(b1);
+	}
 #if SAMPLES > 4
-	depth = min(depth, texelFetch(depthMulti, texel, 4).r);
-	depth = min(depth, texelFetch(depthMulti, texel, 5).r);
-	depth = min(depth, texelFetch(depthMulti, texel, 6).r);
-	depth = min(depth, texelFetch(depthMulti, texel, 7).r);
+	b2 = notEqual(d2, vec4(1.0));
+	if (any(b2)) {
+		c5 = texelFetch(colorMulti, texel, 4);
+		c6 = texelFetch(colorMulti, texel, 5);
+		c7 = texelFetch(colorMulti, texel, 6);
+		c8 = texelFetch(colorMulti, texel, 7);
+		w2 = vec4(b2);
+	}
 #endif
 #if SAMPLES > 8
-	depth = min(depth, texelFetch(depthMulti, texel, 8).r);
-	depth = min(depth, texelFetch(depthMulti, texel, 9).r);
-	depth = min(depth, texelFetch(depthMulti, texel, 10).r);
-	depth = min(depth, texelFetch(depthMulti, texel, 11).r);
-	depth = min(depth, texelFetch(depthMulti, texel, 12).r);
-	depth = min(depth, texelFetch(depthMulti, texel, 13).r);
-	depth = min(depth, texelFetch(depthMulti, texel, 14).r);
-	depth = min(depth, texelFetch(depthMulti, texel, 15).r);
+	b3 = notEqual(d3, vec4(1.0));
+	if (any(b3)) {
+		c9 = texelFetch(colorMulti, texel, 8);
+		c10 = texelFetch(colorMulti, texel, 9);
+		c11 = texelFetch(colorMulti, texel, 10);
+		c12 = texelFetch(colorMulti, texel, 11);
+		w3 = vec4(b3);
+	}
+	b4 = notEqual(d4, vec4(1.0));
+	if (any(b4)) {
+		c13 = texelFetch(colorMulti, texel, 12);
+		c14 = texelFetch(colorMulti, texel, 13);
+		c15 = texelFetch(colorMulti, texel, 14);
+		c16 = texelFetch(colorMulti, texel, 15);
+		w4 = vec4(b4);
+	}
+#endif
+
+#if SAMPLES > 8
+	d1 = min(d1, min(d3, d4));
+#endif
+#if SAMPLES > 4
+	d1 = min(d1, d2);
+#endif
+#if SAMPLES > 2
+	d1.xy = min(d1.xy, d1.zw);
+#endif
+	gl_FragDepth = min(d1.x, d1.y);
+
+#ifdef USE_DEPTH_WEIGHTING
+	c1  *= w1.x; c2  *= w1.y; c3  *= w1.z; c4  *= w1.w;
+	c5  *= w2.x; c6  *= w2.y; c7  *= w2.z; c8  *= w2.w;
+	c9  *= w3.x; c10 *= w3.y; c11 *= w3.z; c12 *= w3.w;
+	c13 *= w4.x; c14 *= w4.y; c15 *= w4.z; c16 *= w4.w;
 #endif
 
-	vec4 color = vec4(0.0);
-	color += texelFetch(colorMulti, texel, 0);
-	color += texelFetch(colorMulti, texel, 1);
+	c1 =  c1 + c2;
 #if SAMPLES > 2
-	color += texelFetch(colorMulti, texel, 2);
-	color += texelFetch(colorMulti, texel, 3);
+	c1 += c3 + c4;
 #endif
 #if SAMPLES > 4
-	color += texelFetch(colorMulti, texel, 4);
-	color += texelFetch(colorMulti, texel, 5);
-	color += texelFetch(colorMulti, texel, 6);
-	color += texelFetch(colorMulti, texel, 7);
+	c1 += c5 + c6 + c7 + c8;
 #endif
 #if SAMPLES > 8
-	color += texelFetch(colorMulti, texel, 8);
-	color += texelFetch(colorMulti, texel, 9);
-	color += texelFetch(colorMulti, texel, 10);
-	color += texelFetch(colorMulti, texel, 11);
-	color += texelFetch(colorMulti, texel, 12);
-	color += texelFetch(colorMulti, texel, 13);
-	color += texelFetch(colorMulti, texel, 14);
-	color += texelFetch(colorMulti, texel, 15);
+	c1 += c9 + c10 + c11 + c12 + c13 + c14 + c15 + c16;
 #endif
 
 	const float inv_samples = 1.0 / float(SAMPLES);
 
-	fragColor = color * inv_samples;
-	gl_FragDepth = depth;
+	fragColor = c1 * inv_samples;
 }



More information about the Bf-blender-cvs mailing list