[Bf-blender-cvs] [23cc453] master: Fix T48732: New GGX breaks OpenCL kernel

Sergey Sharybin noreply at git.blender.org
Tue Jun 28 14:16:12 CEST 2016


Commit: 23cc453975c42069bac21e849b1bf7e3e60cd8e7
Author: Sergey Sharybin
Date:   Tue Jun 28 17:11:17 2016 +0500
Branches: master
https://developer.blender.org/rB23cc453975c42069bac21e849b1bf7e3e60cd8e7

Fix T48732: New GGX breaks OpenCL kernel

Make sure we don't perform any implicit address space conversion.

A bit annoying, but less intrusive approaches (like using temp private
variable in .cl kernel) do not work correct here.

Using generic address space will help from code side here, but will
be somewhat slower due to extra things happening as far as i know.

===================================================================

M	intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
M	intern/cycles/kernel/kernel_random.h
M	intern/cycles/kernel/kernel_shader.h

===================================================================

diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
index 8f8e19d..afd4a8d 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
@@ -98,9 +98,10 @@ ccl_device float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, float3 wo, const boo
 
 	for(int order = 0; order < 10; order++) {
 		/* Sample microfacet height and normal */
-		if(!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, lcg_step_float(lcg_state)))
+		if(!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, lcg_step_float_addrspace(lcg_state)))
 			break;
-		float3 wm = mf_sample_vndf(-wr, alpha, make_float2(lcg_step_float(lcg_state), lcg_step_float(lcg_state)));
+		float3 wm = mf_sample_vndf(-wr, alpha, make_float2(lcg_step_float_addrspace(lcg_state),
+		                                                   lcg_step_float_addrspace(lcg_state)));
 
 #ifdef MF_MULTI_DIFFUSE
 		if(order == 0) {
@@ -128,14 +129,16 @@ ccl_device float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, float3 wo, const boo
 			/* Bounce from the microfacet. */
 #ifdef MF_MULTI_GLASS
 			bool next_outside;
-			wr = mf_sample_phase_glass(-wr, outside? eta: 1.0f/eta, wm, lcg_step_float(lcg_state), &next_outside);
+			wr = mf_sample_phase_glass(-wr, outside? eta: 1.0f/eta, wm, lcg_step_float_addrspace(lcg_state), &next_outside);
 			if(!next_outside) {
 				outside = !outside;
 				wr = -wr;
 				hr = -hr;
 			}
 #elif defined(MF_MULTI_DIFFUSE)
-			wr = mf_sample_phase_diffuse(wm, lcg_step_float(lcg_state), lcg_step_float(lcg_state));
+			wr = mf_sample_phase_diffuse(wm,
+			                             lcg_step_float_addrspace(lcg_state),
+			                             lcg_step_float_addrspace(lcg_state));
 #else /* MF_MULTI_GLOSSY */
 			wr = mf_sample_phase_glossy(-wr, n, k, &throughput, wm);
 #endif
@@ -179,13 +182,14 @@ ccl_device float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, float3 *wo, const
 	int order;
 	for(order = 0; order < 10; order++) {
 		/* Sample microfacet height. */
-		if(!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, lcg_step_float(lcg_state))) {
+		if(!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, lcg_step_float_addrspace(lcg_state))) {
 			/* The random walk has left the surface. */
 			*wo = outside? wr: -wr;
 			return throughput;
 		}
 		/* Sample microfacet normal. */
-		float3 wm = mf_sample_vndf(-wr, alpha, make_float2(lcg_step_float(lcg_state), lcg_step_float(lcg_state)));
+		float3 wm = mf_sample_vndf(-wr, alpha, make_float2(lcg_step_float_addrspace(lcg_state),
+		                                                   lcg_step_float_addrspace(lcg_state)));
 
 		/* First-bounce color is already accounted for in mix weight. */
 		if(order > 0)
@@ -194,14 +198,16 @@ ccl_device float3 MF_FUNCTION_FULL_NAME(mf_sample)(float3 wi, float3 *wo, const
 		/* Bounce from the microfacet. */
 #ifdef MF_MULTI_GLASS
 		bool next_outside;
-		wr = mf_sample_phase_glass(-wr, outside? eta: 1.0f/eta, wm, lcg_step_float(lcg_state), &next_outside);
+		wr = mf_sample_phase_glass(-wr, outside? eta: 1.0f/eta, wm, lcg_step_float_addrspace(lcg_state), &next_outside);
 		if(!next_outside) {
 			hr = -hr;
 			wr = -wr;
 			outside = !outside;
 		}
 #elif defined(MF_MULTI_DIFFUSE)
-		wr = mf_sample_phase_diffuse(wm, lcg_step_float(lcg_state), lcg_step_float(lcg_state));
+		wr = mf_sample_phase_diffuse(wm,
+		                             lcg_step_float_addrspace(lcg_state),
+		                             lcg_step_float_addrspace(lcg_state));
 #else /* MF_MULTI_GLOSSY */
 		wr = mf_sample_phase_glossy(-wr, n, k, &throughput, wm);
 #endif
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index bf3c25d..94598e2 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -232,14 +232,14 @@ ccl_device void path_rng_end(KernelGlobals *kg, ccl_global uint *rng_state, RNG
 
 /* Linear Congruential Generator */
 
-ccl_device uint lcg_step_uint(ccl_addr_space uint *rng)
+ccl_device uint lcg_step_uint(uint *rng)
 {
 	/* implicit mod 2^32 */
 	*rng = (1103515245*(*rng) + 12345);
 	return *rng;
 }
 
-ccl_device float lcg_step_float(ccl_addr_space uint *rng)
+ccl_device float lcg_step_float(uint *rng)
 {
 	/* implicit mod 2^32 */
 	*rng = (1103515245*(*rng) + 12345);
@@ -314,5 +314,21 @@ ccl_device_inline uint lcg_state_init(RNG *rng, const ccl_addr_space PathState *
 	return lcg_init(*rng + state->rng_offset + state->sample*scramble);
 }
 
+/* TODO(sergey): For until we can use generic address space from OpenCL 2.0. */
+
+ccl_device_inline uint lcg_state_init_addrspace(ccl_addr_space RNG *rng,
+                                                const ccl_addr_space PathState *state,
+                                                uint scramble)
+{
+	return lcg_init(*rng + state->rng_offset + state->sample*scramble);
+}
+
+ccl_device float lcg_step_float_addrspace(ccl_addr_space uint *rng)
+{
+	/* implicit mod 2^32 */
+	*rng = (1103515245*(*rng) + 12345);
+	return (float)*rng * (1.0f/(float)0xFFFFFFFF);
+}
+
 CCL_NAMESPACE_END
 
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index 3a4770f..765baa2 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -827,7 +827,7 @@ ccl_device float3 shader_holdout_eval(KernelGlobals *kg, ShaderData *sd)
 
 /* Surface Evaluation */
 
-ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, RNG *rng,
+ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, ccl_addr_space RNG *rng,
 	ccl_addr_space PathState *state, float randb, int path_flag, ShaderContext ctx)
 {
 	ccl_fetch(sd, num_closure) = 0;
@@ -851,7 +851,7 @@ ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, RNG *rng,
 	}
 
 	if(rng && (ccl_fetch(sd, flag) & SD_BSDF_NEEDS_LCG)) {
-		ccl_fetch(sd, lcg_state) = lcg_state_init(rng, state, 0xb4bc3953);
+		ccl_fetch(sd, lcg_state) = lcg_state_init_addrspace(rng, state, 0xb4bc3953);
 	}
 }




More information about the Bf-blender-cvs mailing list