[Bf-blender-cvs] [9abacf38fd4] blender2.8: Eevee: SSR: Making ray count a define rather than an uniform.

Clément Foucault noreply at git.blender.org
Wed Sep 13 15:29:44 CEST 2017


Commit: 9abacf38fd4f3b0de4b6d9a4644b9cb1155482c9
Author: Clément Foucault
Date:   Wed Sep 13 15:29:13 2017 +0200
Branches: blender2.8
https://developer.blender.org/rB9abacf38fd4f3b0de4b6d9a4644b9cb1155482c9

Eevee: SSR: Making ray count a define rather than an uniform.

The branching introduced by the uniform caused problems on mesa + AMD in the resolve stage.
This patch create one shader per sample count without branching.
This improves performance of a single ray per pixel case (3.0ms against 3.6ms in my testing)

===================================================================

M	source/blender/draw/engines/eevee/eevee_effects.c
M	source/blender/draw/engines/eevee/shaders/effect_ssr_frag.glsl

===================================================================

diff --git a/source/blender/draw/engines/eevee/eevee_effects.c b/source/blender/draw/engines/eevee/eevee_effects.c
index b18d6455893..bcc9986d671 100644
--- a/source/blender/draw/engines/eevee/eevee_effects.c
+++ b/source/blender/draw/engines/eevee/eevee_effects.c
@@ -60,9 +60,10 @@ typedef struct EEVEE_LightProbeData {
 
 /* SSR shader variations */
 enum {
-	SSR_RESOLVE      = (1 << 0),
-	SSR_FULL_TRACE   = (1 << 1),
-	SSR_MAX_SHADER   = (1 << 2),
+	SSR_SAMPLES      = (1 << 0) | (1 << 1),
+	SSR_RESOLVE      = (1 << 2),
+	SSR_FULL_TRACE   = (1 << 3),
+	SSR_MAX_SHADER   = (1 << 4),
 };
 
 static struct {
@@ -198,8 +199,11 @@ static struct GPUShader *eevee_effects_ssr_shader_get(int options)
 		char *ssr_shader_str = BLI_dynstr_get_cstring(ds_frag);
 		BLI_dynstr_free(ds_frag);
 
+		int samples = (SSR_SAMPLES & options) + 1;
+
 		DynStr *ds_defines = BLI_dynstr_new();
 		BLI_dynstr_appendf(ds_defines, SHADER_DEFINES);
+		BLI_dynstr_appendf(ds_defines, "#define RAY_COUNT %d\n", samples);
 		if (options & SSR_RESOLVE) {
 			BLI_dynstr_appendf(ds_defines, "#define STEP_RESOLVE\n");
 		}
@@ -856,6 +860,7 @@ void EEVEE_effects_cache_init(EEVEE_SceneLayerData *sldata, EEVEE_Data *vedata)
 
 	if ((effects->enabled_effects & EFFECT_SSR) != 0) {
 		int options = (effects->reflection_trace_full) ? SSR_FULL_TRACE : 0;
+		options |= (effects->ssr_ray_count - 1);
 
 		struct GPUShader *trace_shader = eevee_effects_ssr_shader_get(options);
 		struct GPUShader *resolve_shader = eevee_effects_ssr_shader_get(SSR_RESOLVE | options);
@@ -871,7 +876,6 @@ void EEVEE_effects_cache_init(EEVEE_SceneLayerData *sldata, EEVEE_Data *vedata)
 		DRW_shgroup_uniform_vec4(grp, "viewvecs[0]", (float *)stl->g_data->viewvecs, 2);
 		DRW_shgroup_uniform_vec2(grp, "mipRatio[0]", (float *)stl->g_data->mip_ratio, 10);
 		DRW_shgroup_uniform_vec4(grp, "ssrParameters", &effects->ssr_quality, 1);
-		DRW_shgroup_uniform_int(grp, "rayCount", &effects->ssr_ray_count, 1);
 		DRW_shgroup_uniform_int(grp, "planar_count", &sldata->probes->num_planar, 1);
 		DRW_shgroup_uniform_float(grp, "maxRoughness", &effects->ssr_max_roughness, 1);
 		DRW_shgroup_uniform_buffer(grp, "planarDepth", &vedata->txl->planar_depth);
@@ -900,10 +904,15 @@ void EEVEE_effects_cache_init(EEVEE_SceneLayerData *sldata, EEVEE_Data *vedata)
 		DRW_shgroup_uniform_buffer(grp, "probeCubes", &sldata->probe_pool);
 		DRW_shgroup_uniform_buffer(grp, "probePlanars", &vedata->txl->planar_pool);
 		DRW_shgroup_uniform_buffer(grp, "hitBuffer0", &stl->g_data->ssr_hit_output[0]);
-		DRW_shgroup_uniform_buffer(grp, "hitBuffer1", (effects->ssr_ray_count < 2) ? &stl->g_data->ssr_hit_output[0] : &stl->g_data->ssr_hit_output[1]);
-		DRW_shgroup_uniform_buffer(grp, "hitBuffer2", (effects->ssr_ray_count < 3) ? &stl->g_data->ssr_hit_output[0] : &stl->g_data->ssr_hit_output[2]);
-		DRW_shgroup_uniform_buffer(grp, "hitBuffer3", (effects->ssr_ray_count < 4) ? &stl->g_data->ssr_hit_output[0] : &stl->g_data->ssr_hit_output[3]);
-		DRW_shgroup_uniform_int(grp, "rayCount", &effects->ssr_ray_count, 1);
+		if (effects->ssr_ray_count > 1) {
+			DRW_shgroup_uniform_buffer(grp, "hitBuffer1", &stl->g_data->ssr_hit_output[1]);
+		}
+		if (effects->ssr_ray_count > 2) {
+			DRW_shgroup_uniform_buffer(grp, "hitBuffer2", &stl->g_data->ssr_hit_output[2]);
+		}
+		if (effects->ssr_ray_count > 3) {
+			DRW_shgroup_uniform_buffer(grp, "hitBuffer3", &stl->g_data->ssr_hit_output[3]);
+		}
 		DRW_shgroup_call_add(grp, quad, NULL);
 	}
 
diff --git a/source/blender/draw/engines/eevee/shaders/effect_ssr_frag.glsl b/source/blender/draw/engines/eevee/shaders/effect_ssr_frag.glsl
index 0a958404385..1f3c7822124 100644
--- a/source/blender/draw/engines/eevee/shaders/effect_ssr_frag.glsl
+++ b/source/blender/draw/engines/eevee/shaders/effect_ssr_frag.glsl
@@ -111,7 +111,6 @@ void main()
 	if (dot(speccol_roughness.rgb, vec3(1.0)) == 0.0)
 		discard;
 
-
 	float roughness = speccol_roughness.a;
 	float roughnessSquared = max(1e-3, roughness * roughness);
 	float a2 = roughnessSquared * roughnessSquared;
@@ -129,8 +128,6 @@ void main()
 	vec3 T, B;
 	make_orthonormal_basis(N, T, B); /* Generate tangent space */
 
-	float ray_ofs = 1.0 / float(rayCount);
-
 	/* Planar Reflections */
 	for (int i = 0; i < MAX_PLANAR && i < planar_count; ++i) {
 		PlanarData pd = planars_data[i];
@@ -144,20 +141,31 @@ void main()
 			tracePosition = transform_point(ViewMatrix, tracePosition);
 			vec3 planeNormal = transform_direction(ViewMatrix, pd.pl_normal);
 
-			/* TODO : Raytrace together if textureGather is supported. */
 			hitData0 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand, 0.0);
-			if (rayCount > 1) hitData1 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand.xyz * vec3(1.0, -1.0, -1.0), 1.0 * ray_ofs);
-			if (rayCount > 2) hitData2 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand.xzy * vec3(1.0,  1.0, -1.0), 2.0 * ray_ofs);
-			if (rayCount > 3) hitData3 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand.xzy * vec3(1.0, -1.0,  1.0), 3.0 * ray_ofs);
+#if (RAY_COUNT > 1)
+			hitData1 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand.xyz * vec3(1.0, -1.0, -1.0), 1.0 / float(RAY_COUNT));
+#endif
+#if (RAY_COUNT > 2)
+			hitData2 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand.xzy * vec3(1.0,  1.0, -1.0), 2.0 / float(RAY_COUNT));
+#endif
+#if (RAY_COUNT > 3)
+			hitData3 = do_planar_ssr(i, V, N, T, B, planeNormal, tracePosition, a2, rand.xzy * vec3(1.0, -1.0,  1.0), 3.0 / float(RAY_COUNT));
+#endif
 			return;
 		}
 	}
 
 	/* TODO : Raytrace together if textureGather is supported. */
 	hitData0 = do_ssr(V, N, T, B, viewPosition, a2, rand, 0.0);
-	if (rayCount > 1) hitData1 = do_ssr(V, N, T, B, viewPosition, a2, rand.xyz * vec3(1.0, -1.0, -1.0), 1.0 * ray_ofs);
-	if (rayCount > 2) hitData2 = do_ssr(V, N, T, B, viewPosition, a2, rand.xzy * vec3(1.0,  1.0, -1.0), 2.0 * ray_ofs);
-	if (rayCount > 3) hitData3 = do_ssr(V, N, T, B, viewPosition, a2, rand.xzy * vec3(1.0, -1.0,  1.0), 3.0 * ray_ofs);
+#if (RAY_COUNT > 1)
+	hitData1 = do_ssr(V, N, T, B, viewPosition, a2, rand.xyz * vec3(1.0, -1.0, -1.0), 1.0 / float(RAY_COUNT));
+#endif
+#if (RAY_COUNT > 2)
+	hitData2 = do_ssr(V, N, T, B, viewPosition, a2, rand.xzy * vec3(1.0,  1.0, -1.0), 2.0 / float(RAY_COUNT));
+#endif
+#if (RAY_COUNT > 3)
+	hitData3 = do_ssr(V, N, T, B, viewPosition, a2, rand.xzy * vec3(1.0, -1.0,  1.0), 3.0 / float(RAY_COUNT));
+#endif
 }
 
 #else /* STEP_RESOLVE */
@@ -395,21 +403,21 @@ void main()
 			ssr_accum += get_ssr_sample(hitBuffer0, pd, planar_index, worldPosition, N, V,
 			                            roughnessSquared, cone_tan, source_uvs,
 			                            texture_size, target_texel, weight_acc);
-			if (rayCount > 1) {
-				ssr_accum += get_ssr_sample(hitBuffer1, pd, planar_index, worldPosition, N, V,
-				                            roughnessSquared, cone_tan, source_uvs,
-				                            texture_size, target_texel, weight_acc);
-			}
-			if (rayCount > 2) {
-				ssr_accum += get_ssr_sample(hitBuffer2, pd, planar_index, worldPosition, N, V,
-				                            roughnessSquared, cone_tan, source_uvs,
-				                            texture_size, target_texel, weight_acc);
-			}
-			if (rayCount > 3) {
-				ssr_accum += get_ssr_sample(hitBuffer3, pd, planar_index, worldPosition, N, V,
-				                            roughnessSquared, cone_tan, source_uvs,
-				                            texture_size, target_texel, weight_acc);
-			}
+#if (RAY_COUNT > 1)
+			ssr_accum += get_ssr_sample(hitBuffer1, pd, planar_index, worldPosition, N, V,
+			                            roughnessSquared, cone_tan, source_uvs,
+			                            texture_size, target_texel, weight_acc);
+#endif
+#if (RAY_COUNT > 2)
+			ssr_accum += get_ssr_sample(hitBuffer2, pd, planar_index, worldPosition, N, V,
+			                            roughnessSquared, cone_tan, source_uvs,
+			                            texture_size, target_texel, weight_acc);
+#endif
+#if (RAY_COUNT > 3)
+			ssr_accum += get_ssr_sample(hitBuffer3, pd, planar_index, worldPosition, N, V,
+			                            roughnessSquared, cone_tan, source_uvs,
+			                            texture_size, target_texel, weight_acc);
+#endif
 		}
 	}



More information about the Bf-blender-cvs mailing list