[Bf-blender-cvs] [f7bac653cf] temp-cycles-denoising: Cycles Denoising: Remove tile overscan when using GPU rendering

Lukas Stockner noreply at git.blender.org
Fri Mar 24 20:18:32 CET 2017


Commit: f7bac653cfd32e44912d36c55785bea0dd04cde9
Author: Lukas Stockner
Date:   Fri Feb 17 03:48:12 2017 +0100
Branches: temp-cycles-denoising
https://developer.blender.org/rBf7bac653cfd32e44912d36c55785bea0dd04cde9

Cycles Denoising: Remove tile overscan when using GPU rendering

Denoising a pixel requires access to the other pixels surrounding it. On the CPU, this is solved by waiting for the neighboring tiles to be rendered before the central tile is denoised.
On the GPU, it was handled by rendering larger tiles internally and discarding the overscan area after denoising. That saved a bit of memory, but wasted computation (with 256x256 tiles and a half-window of 8, 13% of rendered pixels were never actually seen).

Also, supporting overscan tiles made the code more complex. So, this commit removes the overscan code and uses the CPU approach on GPUs as well.

===================================================================

M	intern/cycles/blender/blender_session.cpp
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/device/device_denoising.cpp
M	intern/cycles/device/device_denoising.h
M	intern/cycles/render/buffers.cpp
M	intern/cycles/render/buffers.h
M	intern/cycles/render/session.cpp
M	intern/cycles/render/tile.h

===================================================================

diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index fedd61e791..747ab569c7 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -392,11 +392,10 @@ static void end_render_result(BL::RenderEngine& b_engine,
 void BlenderSession::do_write_update_render_tile(RenderTile& rtile, bool do_update_only, bool highlight)
 {
 	BufferParams& params = rtile.buffers->params;
-	BufferParams& full_params = session->tile_manager.state.buffer;
-	int x = rtile.x + params.overscan - full_params.full_x;
-	int y = rtile.y + params.overscan - full_params.full_y;
-	int w = rtile.w - 2*params.overscan;
-	int h = rtile.h - 2*params.overscan;
+	int x = params.full_x - session->tile_manager.params.full_x;
+	int y = params.full_y - session->tile_manager.params.full_y;
+	int w = params.width;
+	int h = params.height;
 
 	/* get render result */
 	BL::RenderResult b_rr = begin_render_result(b_engine, x, y, w, h, b_rlay_name.c_str(), b_rview_name.c_str());
@@ -460,7 +459,6 @@ void BlenderSession::render()
 
 	/* get buffer parameters */
 	SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background);
-	const bool is_cpu = session_params.device.type == DEVICE_CPU;
 	BufferParams buffer_params = BlenderSync::get_buffer_params(b_render, b_v3d, b_rv3d, scene->camera, width, height);
 
 	/* render each layer */
@@ -506,7 +504,7 @@ void BlenderSession::render()
 
 		buffer_params.passes = passes;
 		buffer_params.denoising_data_pass = b_layer_iter->denoise_result();
-		session->tile_manager.schedule_denoising = (b_layer_iter->denoise_result() && is_cpu) && !getenv("CPU_OVERSCAN");
+		session->tile_manager.schedule_denoising = b_layer_iter->denoise_result();
 		session->params.denoise_result = b_layer_iter->denoise_result();
 		scene->film->denoising_data_pass = buffer_params.denoising_data_pass;
 		scene->film->denoising_flags = 0;
@@ -760,12 +758,9 @@ void BlenderSession::do_write_update_render_result(BL::RenderResult& b_rr,
 		return;
 
 	BufferParams& params = buffers->params;
-	float exposure = scene? scene->film->exposure : 1.0f;
-
-	int4 rect = make_int4(rtile.x + params.overscan, rtile.y + params.overscan,
-	                      rtile.x+rtile.w - params.overscan, rtile.y+rtile.h - params.overscan);
+	float exposure = scene->film->exposure;
 
-	vector<float> pixels((rect.w-rect.y)*(rect.z-rect.x)*4);
+	vector<float> pixels(params.width*params.height*4);
 
 	/* Adjust absolute sample number to the range. */
 	int sample = rtile.sample;
@@ -786,7 +781,7 @@ void BlenderSession::do_write_update_render_result(BL::RenderResult& b_rr,
 			int components = b_pass.channels();
 
 			/* copy pixels */
-			if(!buffers->get_pass_rect(pass_type, exposure, sample, components, rect, &pixels[0]))
+			if(!buffers->get_pass_rect(pass_type, exposure, sample, components, &pixels[0]))
 				memset(&pixels[0], 0, pixels.size()*sizeof(float));
 
 			b_pass.rect(&pixels[0]);
@@ -795,7 +790,7 @@ void BlenderSession::do_write_update_render_result(BL::RenderResult& b_rr,
 	else {
 		/* copy combined pass */
 		BL::RenderPass b_combined_pass(b_rlay.passes.find_by_type(BL::RenderPass::type_COMBINED, b_rview_name.c_str()));
-		if(buffers->get_pass_rect(PASS_COMBINED, exposure, sample, 4, rect, &pixels[0]))
+		if(buffers->get_pass_rect(PASS_COMBINED, exposure, sample, 4, &pixels[0]))
 			b_combined_pass.rect(&pixels[0]);
 	}
 
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 5043844171..bdabc29d84 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1225,15 +1225,18 @@ public:
 		return !have_error();
 	}
 
-	void denoise(RenderTile &rtile, const DeviceTask &task, int sample)
+	void denoise(RenderTile &rtile, const DeviceTask &task)
 	{
 		DenoisingTask denoising(this);
 
-		int overscan = rtile.buffers->params.overscan;
-		denoising.filter_area = make_int4(rtile.x + overscan, rtile.y + overscan, rtile.w - 2*overscan, rtile.h - 2*overscan);
-		denoising.render_buffer.samples = sample;
+		denoising.filter_area = make_int4(rtile.x, rtile.y, rtile.w, rtile.h);
+		denoising.render_buffer.samples = rtile.sample;
+
+		RenderTile rtiles[9];
+		rtiles[4] = rtile;
+		task.get_neighbor_tiles(rtiles);
+		denoising.tiles_from_rendertiles(rtiles);
 
-		denoising.tiles_from_single_tile(rtile);
 		denoising.init_from_devicetask(task);
 
 		denoising.functions.construct_transform = function_bind(&CUDADevice::denoising_construct_transform, this, &denoising);
@@ -1713,15 +1716,13 @@ public:
 							task->update_progress(&tile, tile.w*tile.h);
 						}
 					}
-
-					if(tile.buffers->params.overscan && !task->get_cancel()) { /* TODO(lukas) Works, but seems hacky? */
-						denoise(tile, *task, tile.start_sample + tile.num_samples);
-					}
 				}
 				else if(tile.task == RenderTile::DENOISE) {
-					int sample = tile.start_sample + tile.num_samples;
-					denoise(tile, *task, sample);
-					tile.sample = sample;
+					tile.sample = tile.start_sample + tile.num_samples;
+
+					denoise(tile, *task);
+
+					task->update_progress(&tile, tile.w*tile.h);
 				}
 
 				task->release_tile(tile);
diff --git a/intern/cycles/device/device_denoising.cpp b/intern/cycles/device/device_denoising.cpp
index 3f5130f233..73f190c8e7 100644
--- a/intern/cycles/device/device_denoising.cpp
+++ b/intern/cycles/device/device_denoising.cpp
@@ -42,26 +42,6 @@ void DenoisingTask::init_from_devicetask(const DeviceTask &task)
 	                 min(tiles->y[3], filter_area.y + filter_area.w + half_window));
 }
 
-void DenoisingTask::tiles_from_single_tile(const RenderTile &tile)
-{
-	tiles = (TilesInfo*) tiles_mem.resize(sizeof(TilesInfo)/sizeof(int));
-
-	tiles->x[0] = tile.x;
-	tiles->x[1] = tile.x;
-	tiles->x[2] = tile.x+tile.w;
-	tiles->x[3] = tile.x+tile.w;
-	tiles->y[0] = tile.y;
-	tiles->y[1] = tile.y;
-	tiles->y[2] = tile.y+tile.h;
-	tiles->y[3] = tile.y+tile.h;
-	std::fill(tiles->buffers, tiles->buffers+9, (device_ptr) 0);
-	std::fill(tiles->offsets, tiles->offsets+9, 0);
-	std::fill(tiles->strides, tiles->strides+9, 0);
-	tiles->buffers[4] = tile.buffer;
-	tiles->offsets[4] = tile.offset;
-	tiles->strides[4] = tile.stride;
-}
-
 void DenoisingTask::tiles_from_rendertiles(RenderTile *rtiles)
 {
 	tiles = (TilesInfo*) tiles_mem.resize(sizeof(TilesInfo)/sizeof(int));
diff --git a/intern/cycles/device/device_denoising.h b/intern/cycles/device/device_denoising.h
index be67f2dfde..07dac7087b 100644
--- a/intern/cycles/device/device_denoising.h
+++ b/intern/cycles/device/device_denoising.h
@@ -46,7 +46,6 @@ public:
 
 	TilesInfo *tiles;
 	device_vector<int> tiles_mem;
-	void tiles_from_single_tile(const RenderTile &tile);
 	void tiles_from_rendertiles(RenderTile *rtiles);
 
 	int4 rect;
diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp
index 8e14c505a0..b73f8d9b7d 100644
--- a/intern/cycles/render/buffers.cpp
+++ b/intern/cycles/render/buffers.cpp
@@ -41,13 +41,10 @@ BufferParams::BufferParams()
 	full_y = 0;
 	full_width = 0;
 	full_height = 0;
-	final_width = 0;
-	final_height = 0;
 
 	denoising_data_pass = false;
 	denoising_clean_pass = false;
 	denoising_split_pass = false;
-	overscan = 0;
 
 	Pass::add(PASS_COMBINED, passes);
 }
@@ -66,9 +63,6 @@ bool BufferParams::modified(const BufferParams& params)
 		&& height == params.height
 		&& full_width == params.full_width
 		&& full_height == params.full_height
-		&& final_width == params.final_width
-		&& final_height == params.final_height
-		&& overscan == params.overscan
 		&& Pass::equals(passes, params.passes));
 }
 
@@ -176,29 +170,8 @@ bool RenderBuffers::copy_from_device()
 	return true;
 }
 
-/* When calling from the BlenderSession, rect is in final image coordinates.
- * To make addressing the buffer easier, rect is brought to "buffer coordinates"
- * where the buffer starts at (0, 0) and ends at (width, height). */
-int4 RenderBuffers::rect_to_local(int4 rect) {
-	rect.x -= params.full_x;
-	rect.y -= params.full_y;
-	rect.z -= params.full_x;
-	rect.w -= params.full_y;
-	assert(rect.x >= 0 && rect.y >= 0 && rect.z <= params.width && rect.w <= params.height);
-	return rect;
-}
-
-/* Helper macro that loops over all the pixels in the rect.
- * First, the buffer pointer is shifted to the starting point of the rect.
- * Then, after each line, the buffer pointer is shifted to the start of the next one. */
-#define FOREACH_PIXEL in += (rect.y*params.width + rect.x)*pass_stride; \
-                      for(int y = rect.y; y < rect.w; y++, in += (params.width + rect.x - rect.z)*pass_stride) \
-                          for(int x = rect.x; x < rect.z; x++, in += pass_stride, pixels += components)
-
-bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int components, int4 rect, float *pixels)
+bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels)
 {
-	rect = rect_to_local(rect);
-
 	int pass_offset = 0;
 
 	for(size_t j = 0; j < params.passes.size(); j++) {
@@ -215,19 +188,22 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int
 		float scale = (pass.filter)? 1.0f/(float)sample: 1.0f;
 		float scale_exposure = (pass.exposure)? scale*exposure: scale;
 
+		int size = params.width*params.height;
 
 		if(components == 1) {
 			assert(pass.components == components);
 
 			/* scalar */
 			if(type == PASS_DEPTH) {
-				FOREACH_PIXEL {
-					pixels[0] = (in[0] == 0.0f)? 1e10f: in[0]*scale_exposure;
+				for(int i = 0; i < size; i++, in += pass_stride, pixels++) {
+					float f = *in;
+					pixels[0] = (f == 0.0f)? 1e10f: f*scale_exposure;
 				}
 			}
 			else if(type == PASS_MIST) {
-				FOREACH_PIXEL {
-						pixels[0] = saturate(in[0]*scale_exposure);
+				for(int i = 0; i < size; i++, in += pass_stride, pixels++) {
+					float f = *in;
+					pixels[0] = saturate(f*scale_exposure);
 				}
 			}
 #ifdef WITH_CYCLES_DEBUG
@@ -236,14 +212,16 @@ bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int
 			        type == PASS_BVH_INTERSECTIONS ||
 			        type == PASS_RAY_BOUNCES)
 			{
-				FOREACH_PIXE

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list