[Bf-blender-cvs] [562392c] soc-2016-cycles_denoising: Cycles: Implement tile overscan for GPU denoising

Lukas Stockner noreply at git.blender.org
Mon Jun 20 22:51:48 CEST 2016


Commit: 562392cc85c1d3f44da29d67294e9a1d91153cfc
Author: Lukas Stockner
Date:   Mon Jun 20 22:44:39 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB562392cc85c1d3f44da29d67294e9a1d91153cfc

Cycles: Implement tile overscan for GPU denoising

This commit adds support for tile overscan - rendering a larger tile internally
and only showing its center area. That is needed for GPU denoising since the regular
approach of keeping the neighbor tiles in memory would require far too much memory.
Since tiles are generally quite large on GPUs, the added overhead isn't too large.

===================================================================

M	intern/cycles/blender/blender_session.cpp
M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/kernel/kernel_filter.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M	intern/cycles/render/buffers.cpp
M	intern/cycles/render/buffers.h
M	intern/cycles/render/session.cpp
M	intern/cycles/render/session.h
M	intern/cycles/render/tile.cpp
M	intern/cycles/render/tile.h

===================================================================

diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index f538f57..ebda63f 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -390,10 +390,10 @@ static void add_pass(BL::RenderEngine& b_engine,
 void BlenderSession::do_write_update_render_tile(RenderTile& rtile, bool do_update_only, bool highlight)
 {
 	BufferParams& params = rtile.buffers->params;
-	int x = params.full_x - session->tile_manager.params.full_x;
-	int y = params.full_y - session->tile_manager.params.full_y;
-	int w = params.width;
-	int h = params.height;
+	int x = params.full_x + params.overscan - session->tile_manager.params.full_x;
+	int y = params.full_y + params.overscan - session->tile_manager.params.full_y;
+	int w = params.final_width;
+	int h = params.final_height;
 
 	/* get render result */
 	BL::RenderResult b_rr = begin_render_result(b_engine, x, y, w, h, b_rlay_name.c_str(), b_rview_name.c_str());
@@ -502,7 +502,8 @@ void BlenderSession::render()
 
 		buffer_params.passes = passes;
 		buffer_params.denoising_passes = b_layer_iter->keep_denoise_data() || b_layer_iter->denoise_result();
-		session->tile_manager.denoise = b_layer_iter->denoise_result();
+		session->tile_manager.schedule_denoising = b_layer_iter->denoise_result();
+		session->params.denoise_result = b_layer_iter->denoise_result();
 		scene->film->denoising_passes = buffer_params.denoising_passes;
 		scene->film->denoise_flags = 0;
 		if(b_layer_iter->denoise_diffuse_direct()) scene->film->denoise_flags |= DENOISE_DIFFUSE_DIR;
@@ -759,7 +760,7 @@ void BlenderSession::do_write_update_render_result(BL::RenderResult& b_rr,
 	BufferParams& params = buffers->params;
 	float exposure = scene->film->exposure;
 
-	vector<float> pixels(params.width*params.height*4);
+	vector<float> pixels(params.final_width*params.final_height*4);
 
 	/* Adjust absolute sample number to the range. */
 	int sample = rtile.sample;
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 0c4fed1..0ea6973 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -222,8 +222,8 @@ public:
 		RenderTile tile;
 
 		void(*path_trace_kernel)(KernelGlobals*, float*, unsigned int*, int, int, int, int, int);
-		void(*filter_estimate_params_kernel)(KernelGlobals*, int, float**, int, int, int*, int*, int*, int*, void*);
-		void(*filter_final_pass_kernel)(KernelGlobals*, int, float**, int, int, int*, int*, int*, int*, void*);
+		void(*filter_estimate_params_kernel)(KernelGlobals*, int, float**, int, int, int*, int*, int*, int*, void*, int4);
+		void(*filter_final_pass_kernel)(KernelGlobals*, int, float**, int, int, int*, int*, int*, int*, void*, int4);
 
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
 		if(system_cpu_support_avx2()) {
@@ -314,14 +314,15 @@ public:
 				int tile_y[4] = {rtiles[0].y, rtiles[3].y, rtiles[6].y, rtiles[6].y+rtiles[6].h};
 				FilterStorage *storages = new FilterStorage[tile.w*tile.h];
 
-				for(int y = tile.y; y < tile.y + tile.h; y++) {
-					for(int x = tile.x; x < tile.x + tile.w; x++) {
-						filter_estimate_params_kernel(&kg, sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages);
+				int4 filter_rect = make_int4(tile.x, tile.y, tile.x + tile.w, tile.y + tile.h);
+				for(int y = filter_rect.y; y < filter_rect.w; y++) {
+					for(int x = filter_rect.x; x < filter_rect.z; x++) {
+						filter_estimate_params_kernel(&kg, sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, filter_rect);
 					}
 				}
-				for(int y = tile.y; y < tile.y + tile.h; y++) {
-					for(int x = tile.x; x < tile.x + tile.w; x++) {
-						filter_final_pass_kernel(&kg, sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages);
+				for(int y = filter_rect.y; y < filter_rect.w; y++) {
+					for(int x = filter_rect.x; x < filter_rect.z; x++) {
+						filter_final_pass_kernel(&kg, sample, buffers, x, y, tile_x, tile_y, offsets, strides, storages, filter_rect);
 					}
 				}
 			}
diff --git a/intern/cycles/kernel/kernel_filter.h b/intern/cycles/kernel/kernel_filter.h
index 4971f0d..3dc2ee0 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -123,9 +123,9 @@ ccl_device_inline bool filter_firefly_rejection(float3 pixel_color, float pixel_
  * - Start of the next upper/right neighbor (not accessed)
  * buffers contains the nine buffer pointers (y-major ordering, starting with the lower left tile), offset and stride the respective parameters of the tile.
  */
-ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, float **buffers, int x, int y, int *tile_x, int *tile_y, int *offset, int *stride, FilterStorage *storage)
+ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, float **buffers, int x, int y, int *tile_x, int *tile_y, int *offset, int *stride, FilterStorage *storage, int4 filter_rect)
 {
-	storage += (y - tile_y[1])*(tile_y[2] - tile_y[1]) + (x - tile_x[1]);
+	storage += (y-filter_rect.y)*(filter_rect.z-filter_rect.x) + (x-filter_rect.x);
 
 	/* Temporary storage, used in different steps of the algorithm. */
 	float tempmatrix[(2*DENOISE_FEATURES+1)*(2*DENOISE_FEATURES+1)], tempvector[2*DENOISE_FEATURES+1];
@@ -350,9 +350,9 @@ ccl_device void kernel_filter_estimate_params(KernelGlobals *kg, int sample, flo
 
 
 
-ccl_device void kernel_filter_final_pass(KernelGlobals *kg, int sample, float **buffers, int x, int y, int *tile_x, int *tile_y, int *offset, int *stride, FilterStorage *storage)
+ccl_device void kernel_filter_final_pass(KernelGlobals *kg, int sample, float **buffers, int x, int y, int *tile_x, int *tile_y, int *offset, int *stride, FilterStorage *storage, int4 filter_rect)
 {
-	storage += (y - tile_y[1])*(tile_y[2] - tile_y[1]) + (x - tile_x[1]);
+	storage += (y-filter_rect.y)*(filter_rect.z-filter_rect.x) + (x-filter_rect.x);
 	float *buffer, features[DENOISE_FEATURES];
 
 	/* === Get center pixel. === */
@@ -372,9 +372,9 @@ ccl_device void kernel_filter_final_pass(KernelGlobals *kg, int sample, float **
 	/* Apply a median filter to the 3x3 window aroung the current pixel. */
 	int sort_idx = 0;
 	float global_bandwidths[9];
-	for(int py = max(y-1, tile_y[1]); py < min(y+2, tile_y[2]); py++) {
-		for(int px = max(x-1, tile_x[1]); px < min(x+2, tile_x[2]); px++) {
-			int ofs = (py-y)*(tile_y[2] - tile_y[1]) + (px-x);
+	for(int py = max(y-1, filter_rect.y); py < min(y+2, filter_rect.w); py++) {
+		for(int px = max(x-1, filter_rect.x); px < min(x+2, filter_rect.z); px++) {
+			int ofs = (py-y)*(filter_rect.z - filter_rect.x) + (px-x);
 			if(storage[ofs].rank != rank) continue;
 			global_bandwidths[sort_idx++] = storage[ofs].global_bandwidth;
 		}
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index b719a79..fd0ebfa 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -58,7 +58,8 @@ void KERNEL_FUNCTION_FULL_NAME(filter_estimate_params)(KernelGlobals *kg,
                                                        int *tile_y,
                                                        int *offset,
                                                        int *stride,
-                                                       void *storage);
+                                                       void *storage,
+                                                       int4 filter_rect);
 
 void KERNEL_FUNCTION_FULL_NAME(filter_final_pass)(KernelGlobals *kg,
                                                   int sample,
@@ -69,6 +70,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_final_pass)(KernelGlobals *kg,
                                                   int *tile_y,
                                                   int *offset,
                                                   int *stride,
-                                                  void *storage);
+                                                  void *storage,
+                                                  int4 filter_rect);
 
 #undef KERNEL_ARCH
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 9e468b4..26fc871 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -140,23 +140,25 @@ void KERNEL_FUNCTION_FULL_NAME(filter_estimate_params)(KernelGlobals *kg,
                                                        int *tile_y,
                                                        int *offset,
                                                        int *stride,
-                                                       void *storage)
+                                                       void *storage,
+                                                       int4 filter_rect)
 {
-	kernel_filter_estimate_params(kg, sample, buffers, x, y, tile_x, tile_y, offset, stride, (FilterStorage*) storage);
+	kernel_filter_estimate_params(kg, sample, buffers, x, y, tile_x, tile_y, offset, stride, (FilterStorage*) storage, filter_rect);
 }
 
 void KERNEL_FUNCTION_FULL_NAME(filter_final_pass)(KernelGlobals *kg,
-                                                       int sample,
-                                                       float** buffers,
-                                                       int x,
-                                                       int y,
-                                                       int *tile_x,
-                                                       int *tile_y,
-                                                       int *offset,
-                                                       int *stride,
-                                                       void *storage)
+                                                  int sample,
+                                                  float** buffers,
+                                                  int x,
+                                                  int y,
+                                                  int *tile_x,
+                        

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list