[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [59032] branches/soc-2013-dingto/intern/ cycles: Cycles / Non-Progressive integrator:

Thomas Dinges blender at dingto.org
Fri Aug 9 19:23:54 CEST 2013


Revision: 59032
          http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=59032
Author:   dingto
Date:     2013-08-09 17:23:54 +0000 (Fri, 09 Aug 2013)
Log Message:
-----------
Cycles / Non-Progressive integrator:
* Code refactor to split the GPU kernel into two, one for each integrator.
This way we can enable Non-Progressive integrator on GPU in trunk without a performance drop.

Thanks to Brecht for some help and review! 

Modified Paths:
--------------
    branches/soc-2013-dingto/intern/cycles/device/device_cuda.cpp
    branches/soc-2013-dingto/intern/cycles/device/device_task.h
    branches/soc-2013-dingto/intern/cycles/kernel/kernel.cl
    branches/soc-2013-dingto/intern/cycles/kernel/kernel.cpp
    branches/soc-2013-dingto/intern/cycles/kernel/kernel.cu
    branches/soc-2013-dingto/intern/cycles/kernel/kernel_path.h
    branches/soc-2013-dingto/intern/cycles/kernel/kernel_sse2.cpp
    branches/soc-2013-dingto/intern/cycles/kernel/kernel_sse3.cpp
    branches/soc-2013-dingto/intern/cycles/render/session.cpp

Modified: branches/soc-2013-dingto/intern/cycles/device/device_cuda.cpp
===================================================================
--- branches/soc-2013-dingto/intern/cycles/device/device_cuda.cpp	2013-08-09 11:41:01 UTC (rev 59031)
+++ branches/soc-2013-dingto/intern/cycles/device/device_cuda.cpp	2013-08-09 17:23:54 UTC (rev 59032)
@@ -558,7 +558,7 @@
 		}
 	}
 
-	void path_trace(RenderTile& rtile, int sample)
+	void path_trace(RenderTile& rtile, int sample, bool progressive)
 	{
 		if(have_error())
 			return;
@@ -570,7 +570,10 @@
 		CUdeviceptr d_rng_state = cuda_device_ptr(rtile.rng_state);
 
 		/* get kernel function */
-		cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"))
+		if(progressive)
+			cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace_progressive"))
+		else
+			cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace_non_progressive"))
 		
 		/* pass in parameters */
 		int offset = 0;
@@ -914,6 +917,8 @@
 		if(task->type == DeviceTask::PATH_TRACE) {
 			RenderTile tile;
 			
+			bool progressive = task->integrator_progressive;
+			
 			/* keep rendering tiles until done */
 			while(task->acquire_tile(this, tile)) {
 				int start_sample = tile.start_sample;
@@ -925,7 +930,7 @@
 							break;
 					}
 
-					path_trace(tile, sample);
+					path_trace(tile, sample, progressive);
 
 					tile.sample = sample + 1;
 

Modified: branches/soc-2013-dingto/intern/cycles/device/device_task.h
===================================================================
--- branches/soc-2013-dingto/intern/cycles/device/device_task.h	2013-08-09 11:41:01 UTC (rev 59031)
+++ branches/soc-2013-dingto/intern/cycles/device/device_task.h	2013-08-09 17:23:54 UTC (rev 59032)
@@ -65,6 +65,7 @@
 	boost::function<bool(void)> get_cancel;
 
 	bool need_finish_queue;
+	bool integrator_progressive;
 protected:
 	double last_update_time;
 };

Modified: branches/soc-2013-dingto/intern/cycles/kernel/kernel.cl
===================================================================
--- branches/soc-2013-dingto/intern/cycles/kernel/kernel.cl	2013-08-09 11:41:01 UTC (rev 59031)
+++ branches/soc-2013-dingto/intern/cycles/kernel/kernel.cl	2013-08-09 17:23:54 UTC (rev 59032)
@@ -51,7 +51,7 @@
 	int y = sy + get_global_id(1);
 
 	if(x < sx + sw && y < sy + sh)
-		kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
+		kernel_path_trace_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
 }
 
 __kernel void kernel_ocl_tonemap(

Modified: branches/soc-2013-dingto/intern/cycles/kernel/kernel.cpp
===================================================================
--- branches/soc-2013-dingto/intern/cycles/kernel/kernel.cpp	2013-08-09 11:41:01 UTC (rev 59031)
+++ branches/soc-2013-dingto/intern/cycles/kernel/kernel.cpp	2013-08-09 17:23:54 UTC (rev 59032)
@@ -90,7 +90,10 @@
 
 void kernel_cpu_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
 {
-	kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
+	if(kernel_data.integrator.progressive)
+		kernel_path_trace_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
+	else
+		kernel_path_trace_non_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
 }
 
 /* Tonemapping */

Modified: branches/soc-2013-dingto/intern/cycles/kernel/kernel.cu
===================================================================
--- branches/soc-2013-dingto/intern/cycles/kernel/kernel.cu	2013-08-09 11:41:01 UTC (rev 59031)
+++ branches/soc-2013-dingto/intern/cycles/kernel/kernel.cu	2013-08-09 17:23:54 UTC (rev 59032)
@@ -26,15 +26,24 @@
 #include "kernel_path.h"
 #include "kernel_displace.h"
 
-extern "C" __global__ void kernel_cuda_path_trace(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
+extern "C" __global__ void kernel_cuda_path_trace_progressive(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
 {
 	int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
 	int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
 
 	if(x < sx + sw && y < sy + sh)
-		kernel_path_trace(NULL, buffer, rng_state, sample, x, y, offset, stride);
+		kernel_path_trace_progressive(NULL, buffer, rng_state, sample, x, y, offset, stride);
 }
 
+extern "C" __global__ void kernel_cuda_path_trace_non_progressive(float *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
+{
+	int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
+	int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
+
+	if(x < sx + sw && y < sy + sh)
+		kernel_path_trace_non_progressive(NULL, buffer, rng_state, sample, x, y, offset, stride);
+}
+
 extern "C" __global__ void kernel_cuda_tonemap(uchar4 *rgba, float *buffer, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
 {
 	int x = sx + blockDim.x*blockIdx.x + threadIdx.x;

Modified: branches/soc-2013-dingto/intern/cycles/kernel/kernel_path.h
===================================================================
--- branches/soc-2013-dingto/intern/cycles/kernel/kernel_path.h	2013-08-09 11:41:01 UTC (rev 59031)
+++ branches/soc-2013-dingto/intern/cycles/kernel/kernel_path.h	2013-08-09 17:23:54 UTC (rev 59032)
@@ -1134,20 +1134,8 @@
 
 #endif
 
-__device void kernel_path_trace(KernelGlobals *kg,
-	__global float *buffer, __global uint *rng_state,
-	int sample, int x, int y, int offset, int stride)
+__device_inline void kernel_path_trace_setup(KernelGlobals *kg, __global uint *rng_state, int sample, int x, int y, RNG *rng, Ray *ray)
 {
-	/* buffer offset */
-	int index = offset + x + y*stride;
-	int pass_stride = kernel_data.film.pass_stride;
-
-	rng_state += index;
-	buffer += index*pass_stride;
-
-	/* initialize random numbers */
-	RNG rng;
-
 	float filter_u;
 	float filter_v;
 #ifdef __CMJ__
@@ -1156,38 +1144,82 @@
 	int num_samples = 0;
 #endif
 
-	path_rng_init(kg, rng_state, sample, num_samples, &rng, x, y, &filter_u, &filter_v);
+	path_rng_init(kg, rng_state, sample, num_samples, rng, x, y, &filter_u, &filter_v);
 
 	/* sample camera ray */
-	Ray ray;
 
 	float lens_u = 0.0f, lens_v = 0.0f;
 
 	if(kernel_data.cam.aperturesize > 0.0f)
-		path_rng_2D(kg, &rng, sample, num_samples, PRNG_LENS_U, &lens_u, &lens_v);
+		path_rng_2D(kg, rng, sample, num_samples, PRNG_LENS_U, &lens_u, &lens_v);
 
 	float time = 0.0f;
 
 #ifdef __CAMERA_MOTION__
 	if(kernel_data.cam.shuttertime != -1.0f)
-		time = path_rng_1D(kg, &rng, sample, num_samples, PRNG_TIME);
+		time = path_rng_1D(kg, rng, sample, num_samples, PRNG_TIME);
 #endif
 
-	camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, &ray);
+	camera_sample(kg, x, y, filter_u, filter_v, lens_u, lens_v, time, ray);
+}
 
+__device void kernel_path_trace_progressive(KernelGlobals *kg,
+	__global float *buffer, __global uint *rng_state,
+	int sample, int x, int y, int offset, int stride)
+{
+	/* buffer offset */
+	int index = offset + x + y*stride;
+	int pass_stride = kernel_data.film.pass_stride;
+
+	rng_state += index;
+	buffer += index*pass_stride;
+
+	/* initialize random numbers and ray */
+	RNG rng;
+	Ray ray;
+
+	kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
+
 	/* integrate */
 	float4 L;
 
-	if (ray.t != 0.0f) {
+	if (ray.t != 0.0f)
+		L = kernel_path_progressive(kg, &rng, sample, ray, buffer);
+	else
+		L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+	/* accumulate result in output buffer */
+	kernel_write_pass_float4(buffer, sample, L);
+
+	path_rng_end(kg, rng_state, rng);
+}
+
+__device void kernel_path_trace_non_progressive(KernelGlobals *kg,
+	__global float *buffer, __global uint *rng_state,
+	int sample, int x, int y, int offset, int stride)
+{
+	/* buffer offset */
+	int index = offset + x + y*stride;
+	int pass_stride = kernel_data.film.pass_stride;
+
+	rng_state += index;
+	buffer += index*pass_stride;
+
+	/* initialize random numbers and ray */
+	RNG rng;
+	Ray ray;
+
+	kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
+
+	/* integrate */
+	float4 L;
+
+	if (ray.t != 0.0f)
 #ifdef __NON_PROGRESSIVE__
-		if(kernel_data.integrator.progressive)
+		L = kernel_path_non_progressive(kg, &rng, sample, ray, buffer);
+#else
+		L = kernel_path_progressive(kg, &rng, sample, ray, buffer);
 #endif
-			L = kernel_path_progressive(kg, &rng, sample, ray, buffer);
-#ifdef __NON_PROGRESSIVE__
-		else
-			L = kernel_path_non_progressive(kg, &rng, sample, ray, buffer);
-#endif
-	}
 	else
 		L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
 

Modified: branches/soc-2013-dingto/intern/cycles/kernel/kernel_sse2.cpp
===================================================================
--- branches/soc-2013-dingto/intern/cycles/kernel/kernel_sse2.cpp	2013-08-09 11:41:01 UTC (rev 59031)
+++ branches/soc-2013-dingto/intern/cycles/kernel/kernel_sse2.cpp	2013-08-09 17:23:54 UTC (rev 59032)
@@ -39,7 +39,10 @@
 
 void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
 {
-	kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
+	if(kernel_data.integrator.progressive)
+		kernel_path_trace_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
+	else
+		kernel_path_trace_non_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
 }
 
 /* Tonemapping */

Modified: branches/soc-2013-dingto/intern/cycles/kernel/kernel_sse3.cpp
===================================================================
--- branches/soc-2013-dingto/intern/cycles/kernel/kernel_sse3.cpp	2013-08-09 11:41:01 UTC (rev 59031)
+++ branches/soc-2013-dingto/intern/cycles/kernel/kernel_sse3.cpp	2013-08-09 17:23:54 UTC (rev 59032)
@@ -41,7 +41,10 @@
 
 void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
 {
-	kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
+	if(kernel_data.integrator.progressive)
+		kernel_path_trace_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
+	else
+		kernel_path_trace_non_progressive(kg, buffer, rng_state, sample, x, y, offset, stride);
 }
 
 /* Tonemapping */

Modified: branches/soc-2013-dingto/intern/cycles/render/session.cpp
===================================================================

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list