[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [57317] trunk/blender/intern/cycles/kernel : Fix #35665: cycles CUDA crash after recent changes.

Brecht Van Lommel brechtvanlommel at pandora.be
Sun Jun 9 18:37:05 CEST 2013


Revision: 57317
          http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=57317
Author:   blendix
Date:     2013-06-09 16:37:04 +0000 (Sun, 09 Jun 2013)
Log Message:
-----------
Fix #35665: cycles CUDA crash after recent changes. This works around a compiler
bug in CUDA 4.2 (solved in 5.5) with typedef'd function parameters.

Modified Paths:
--------------
    trunk/blender/intern/cycles/kernel/kernel_path.h
    trunk/blender/intern/cycles/kernel/kernel_random.h

Modified: trunk/blender/intern/cycles/kernel/kernel_path.h
===================================================================
--- trunk/blender/intern/cycles/kernel/kernel_path.h	2013-06-09 16:18:23 UTC (rev 57316)
+++ trunk/blender/intern/cycles/kernel/kernel_path.h	2013-06-09 16:37:04 UTC (rev 57317)
@@ -233,7 +233,7 @@
 	return result;
 }
 
-__device float4 kernel_path_progressive(KernelGlobals *kg, RNG rng, int sample, Ray ray, __global float *buffer)
+__device float4 kernel_path_progressive(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer)
 {
 	/* initialize */
 	PathRadiance L;
@@ -271,7 +271,7 @@
 			}
 
 			extmax = kernel_data.curve_kernel_data.maximum_width;
-			lcg_state = lcg_init(rng + rng_offset + sample*0x51633e2d);
+			lcg_state = lcg_init(*rng + rng_offset + sample*0x51633e2d);
 		}
 
 		bool hit = scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax);
@@ -399,7 +399,7 @@
 
 			/* do bssrdf scatter step if we picked a bssrdf closure */
 			if(sc) {
-				uint lcg_state = lcg_init(rng + rng_offset + sample*0x68bc21eb);
+				uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
 				subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false);
 			}
 		}
@@ -538,7 +538,7 @@
 
 #ifdef __NON_PROGRESSIVE__
 
-__device void kernel_path_indirect(KernelGlobals *kg, RNG rng, int sample, Ray ray, __global float *buffer,
+__device void kernel_path_indirect(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer,
 	float3 throughput, int num_samples, int num_total_samples,
 	float min_ray_pdf, float ray_pdf, PathState state, int rng_offset, PathRadiance *L)
 {
@@ -644,7 +644,7 @@
 
 			/* do bssrdf scatter step if we picked a bssrdf closure */
 			if(sc) {
-				uint lcg_state = lcg_init(rng + rng_offset + sample*0x68bc21eb);
+				uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
 				subsurface_scatter_step(kg, &sd, state.flag, sc, &lcg_state, false);
 			}
 		}
@@ -767,7 +767,7 @@
 	}
 }
 
-__device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, RNG rng, int sample,
+__device_noinline void kernel_path_non_progressive_lighting(KernelGlobals *kg, RNG *rng, int sample,
 	ShaderData *sd, float3 throughput, float num_samples_adjust,
 	float min_ray_pdf, float ray_pdf, PathState state,
 	int rng_offset, PathRadiance *L, __global float *buffer)
@@ -830,13 +830,13 @@
 		for(int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
 			int num_samples = ceil_to_int(num_samples_adjust*light_select_num_samples(kg, i));
 			float num_samples_inv = num_samples_adjust/(num_samples*kernel_data.integrator.num_all_lights);
-			RNG lamp_rng = cmj_hash(rng, i);
+			RNG lamp_rng = cmj_hash(*rng, i);
 
 			if(kernel_data.integrator.pdf_triangles != 0.0f)
 				num_samples_inv *= 0.5f;
 
 			for(int j = 0; j < num_samples; j++) {
-				float2 light_uv = path_rng_2D(kg, lamp_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT_U);
+				float2 light_uv = path_rng_2D(kg, &lamp_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_LIGHT_U);
 				float light_u = light_uv.x;
 				float light_v = light_uv.y;
 
@@ -905,7 +905,7 @@
 		num_samples = ceil_to_int(num_samples_adjust*num_samples);
 
 		float num_samples_inv = num_samples_adjust/num_samples;
-		RNG bsdf_rng = cmj_hash(rng, i);
+		RNG bsdf_rng = cmj_hash(*rng, i);
 
 		for(int j = 0; j < num_samples; j++) {
 			/* sample BSDF */
@@ -913,7 +913,7 @@
 			BsdfEval bsdf_eval;
 			float3 bsdf_omega_in;
 			differential3 bsdf_domega_in;
-			float2 bsdf_uv = path_rng_2D(kg, bsdf_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U);
+			float2 bsdf_uv = path_rng_2D(kg, &bsdf_rng, sample*num_samples + j, aa_samples*num_samples, rng_offset + PRNG_BSDF_U);
 			float bsdf_u = bsdf_uv.x;
 			float bsdf_v = bsdf_uv.y;
 			int label;
@@ -964,7 +964,7 @@
 	}
 }
 
-__device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG rng, int sample, Ray ray, __global float *buffer)
+__device float4 kernel_path_non_progressive(KernelGlobals *kg, RNG *rng, int sample, Ray ray, __global float *buffer)
 {
 	/* initialize */
 	PathRadiance L;
@@ -997,7 +997,7 @@
 			}
 
 			extmax = kernel_data.curve_kernel_data.maximum_width;
-			lcg_state = lcg_init(rng + rng_offset + sample*0x51633e2d);
+			lcg_state = lcg_init(*rng + rng_offset + sample*0x51633e2d);
 		}
 
 		if(!scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax)) {
@@ -1090,7 +1090,7 @@
 					continue;
 
 				/* set up random number generator */
-				uint lcg_state = lcg_init(rng + rng_offset + sample*0x68bc21eb);
+				uint lcg_state = lcg_init(*rng + rng_offset + sample*0x68bc21eb);
 				int num_samples = kernel_data.integrator.subsurface_samples;
 				float num_samples_inv = 1.0f/num_samples;
 
@@ -1163,7 +1163,7 @@
 	float lens_u = 0.0f, lens_v = 0.0f;
 
 	if(kernel_data.cam.aperturesize > 0.0f) {
-		float2 lens_uv = path_rng_2D(kg, rng, sample, num_samples, PRNG_LENS_U);
+		float2 lens_uv = path_rng_2D(kg, &rng, sample, num_samples, PRNG_LENS_U);
 		lens_u = lens_uv.x;
 		lens_v = lens_uv.y;
 	}
@@ -1172,7 +1172,7 @@
 
 #ifdef __CAMERA_MOTION__
 	if(kernel_data.cam.shuttertime != -1.0f)
-		time = path_rng_1D(kg, rng, sample, num_samples, PRNG_TIME);
+		time = path_rng_1D(kg, &rng, sample, num_samples, PRNG_TIME);
 #endif
 
 	camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, &ray);
@@ -1184,10 +1184,10 @@
 #ifdef __NON_PROGRESSIVE__
 		if(kernel_data.integrator.progressive)
 #endif
-			L = kernel_path_progressive(kg, rng, sample, ray, buffer);
+			L = kernel_path_progressive(kg, &rng, sample, ray, buffer);
 #ifdef __NON_PROGRESSIVE__
 		else
-			L = kernel_path_non_progressive(kg, rng, sample, ray, buffer);
+			L = kernel_path_non_progressive(kg, &rng, sample, ray, buffer);
 #endif
 	}
 	else

Modified: trunk/blender/intern/cycles/kernel/kernel_random.h
===================================================================
--- trunk/blender/intern/cycles/kernel/kernel_random.h	2013-06-09 16:18:23 UTC (rev 57316)
+++ trunk/blender/intern/cycles/kernel/kernel_random.h	2013-06-09 16:37:04 UTC (rev 57317)
@@ -102,10 +102,10 @@
 	return index;
 }
 
-__device_inline float path_rng(KernelGlobals *kg, RNG rng, int sample, int dimension)
+__device_inline float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dimension)
 {
 #ifdef __SOBOL_FULL_SCREEN__
-	uint result = sobol_dimension(kg, rng, dimension);
+	uint result = sobol_dimension(kg, *rng, dimension);
 	float r = (float)result * (1.0f/(float)0xFFFFFFFF);
 	return r;
 #else
@@ -117,20 +117,20 @@
 	float shift;
 
 	if(dimension & 1)
-		shift = (rng >> 16)*(1.0f/(float)0xFFFF);
+		shift = (*rng >> 16)*(1.0f/(float)0xFFFF);
 	else
-		shift = (rng & 0xFFFF)*(1.0f/(float)0xFFFF);
+		shift = (*rng & 0xFFFF)*(1.0f/(float)0xFFFF);
 
 	return r + shift - floorf(r + shift);
 #endif
 }
 
-__device_inline float path_rng_1D(KernelGlobals *kg, RNG rng, int sample, int num_samples, int dimension)
+__device_inline float path_rng_1D(KernelGlobals *kg, RNG *rng, int sample, int num_samples, int dimension)
 {
 #ifdef __CMJ__
 	if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
 		/* correlated multi-jittered */
-		int p = rng + dimension;
+		int p = *rng + dimension;
 		return cmj_sample_1D(sample, num_samples, p);
 	}
 #endif
@@ -139,12 +139,12 @@
 	return path_rng(kg, rng, sample, dimension);
 }
 
-__device_inline float2 path_rng_2D(KernelGlobals *kg, RNG rng, int sample, int num_samples, int dimension)
+__device_inline float2 path_rng_2D(KernelGlobals *kg, RNG *rng, int sample, int num_samples, int dimension)
 {
 #ifdef __CMJ__
 	if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
 		/* correlated multi-jittered */
-		int p = rng + dimension;
+		int p = *rng + dimension;
 		return cmj_sample_2D(sample, num_samples, p);
 	}
 #endif
@@ -184,7 +184,7 @@
 		*fy = 0.5f;
 	}
 	else {
-		float2 fxy = path_rng_2D(kg, *rng, sample, num_samples, PRNG_FILTER_U);
+		float2 fxy = path_rng_2D(kg, rng, sample, num_samples, PRNG_FILTER_U);
 
 		*fx = fxy.x;
 		*fy = fxy.y;




More information about the Bf-blender-cvs mailing list