[Bf-blender-cvs] [6eefa6dd3b] temp-cycles-denoising: Cycles Denoising: Deduplicate split/mega kernel code in the CPUDevice

Lukas Stockner noreply at git.blender.org
Fri Mar 24 20:18:21 CET 2017


Commit: 6eefa6dd3b397c650b5e4f3dabf8499d13afc53a
Author: Lukas Stockner
Date:   Tue Mar 14 02:20:38 2017 +0100
Branches: temp-cycles-denoising
https://developer.blender.org/rB6eefa6dd3b397c650b5e4f3dabf8499d13afc53a

Cycles Denoising: Deduplicate split/mega kernel code in the CPUDevice

This also means that denoising is now also executed when using the split kernel, but the result is black since the kernel doesn't write the data yet.

===================================================================

M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_memory.h

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index f0366a4e90..8b3f30f1b7 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -364,12 +364,7 @@ public:
 	void thread_run(DeviceTask *task)
 	{
 		if(task->type == DeviceTask::RENDER) {
-			if(!use_split_kernel) {
-				thread_render(*task);
-			}
-			else {
-				thread_path_trace_split(*task);
-			}
+			thread_render(*task);
 		}
 		else if(task->type == DeviceTask::FILM_CONVERT)
 			thread_film_convert(*task);
@@ -587,48 +582,63 @@ public:
 		}
 		return true;
 	}
- 
-	void thread_path_trace_split(DeviceTask& task)
+
+	void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
 	{
-		if(task_pool.canceled()) {
-			if(task.need_finish_queue == false)
-				return;
-		}
+		float *render_buffer = (float*)tile.buffer;
+		uint *rng_state = (uint*)tile.rng_state;
+		int start_sample = tile.start_sample;
+		int end_sample = tile.start_sample + tile.num_samples;
 
-		RenderTile tile;
+		for(int sample = start_sample; sample < end_sample; sample++) {
+#ifdef WITH_CYCLES_DEBUG_FPE
+			scoped_fpe fpe(FPE_ENABLED);
+#endif
+			if(task.get_cancel() || task_pool.canceled()) {
+				if(task.need_finish_queue == false)
+					break;
+			}
 
-		CPUSplitKernel split_kernel(this);
+			for(int y = tile.y; y < tile.y + tile.h; y++) {
+				for(int x = tile.x; x < tile.x + tile.w; x++) {
+					path_trace_kernel()(kg, render_buffer, rng_state,
+					                    sample, x, y, tile.offset, tile.stride);
+				}
+			}
 
-		/* allocate buffer for kernel globals */
-		device_only_memory<KernelGlobals> kgbuffer;
-		kgbuffer.resize(1);
-		mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE);
+			tile.sample = sample + 1;
 
-		KernelGlobals *kg = (KernelGlobals*)kgbuffer.device_pointer;
-		*kg = thread_kernel_globals_init();
+#ifdef WITH_CYCLES_DEBUG_FPE
+			fpe.restore();
+#endif
+			task.update_progress(&tile, tile.w*tile.h);
+		}
+	}
 
-		requested_features.max_closure = MAX_CLOSURE;
-		if(!split_kernel.load_kernels(requested_features)) {
-			thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer);
-			mem_free(kgbuffer);
+	void denoise(DeviceTask &task, RenderTile &tile)
+	{
+		tile.sample = tile.start_sample + tile.num_samples;
 
-			return;
-		}
+		DenoisingTask denoising(this);
+		denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h);
+		denoising.render_buffer.samples = tile.sample;
 
-		while(task.acquire_tile(this, tile)) {
-			device_memory data;
-			split_kernel.path_trace(&task, tile, kgbuffer, data);
+		RenderTile rtiles[9];
+		rtiles[4] = tile;
+		task.get_neighbor_tiles(rtiles);
+		denoising.tiles_from_rendertiles(rtiles);
 
-			task.release_tile(tile);
+		denoising.init_from_devicetask(task);
+		denoising.functions.construct_transform = function_bind(&CPUDevice::denoising_construct_transform, this, &denoising);
+		denoising.functions.reconstruct = function_bind(&CPUDevice::denoising_reconstruct, this, _1, _2, _3, _4, _5, &denoising);
+		denoising.functions.divide_shadow = function_bind(&CPUDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
+		denoising.functions.non_local_means = function_bind(&CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
+		denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
+		denoising.functions.get_feature = function_bind(&CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
 
-			if(task_pool.canceled()) {
-				if(task.need_finish_queue == false)
-					break;
-			}
-		}
+		denoising.run_denoising();
 
-		thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer);
-		mem_free(kgbuffer);
+		task.update_progress(&tile, tile.w*tile.h);
 	}
 
 	void thread_render(DeviceTask& task)
@@ -638,84 +648,39 @@ public:
 				return;
 		}
 
-		KernelGlobals kg = thread_kernel_globals_init();
-		RenderTile tile;
-
-		while(task.acquire_tile(this, tile)) {
-			float *render_buffer = (float*)tile.buffer;
-
-			if(tile.task == RenderTile::PATH_TRACE) {
-				uint *rng_state = (uint*)tile.rng_state;
-				int start_sample = tile.start_sample;
-				int end_sample = tile.start_sample + tile.num_samples;
+		/* allocate buffer for kernel globals */
+		device_only_memory<KernelGlobals> kgbuffer;
+		kgbuffer.resize(1);
+		mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE);
 
-				for(int sample = start_sample; sample < end_sample; sample++) {
-#ifdef WITH_CYCLES_DEBUG_FPE
-					scoped_fpe fpe(FPE_ENABLED);
-#endif
-					if(task.get_cancel() || task_pool.canceled()) {
-						if(task.need_finish_queue == false)
-							break;
-					}
+		KernelGlobals *kg = (KernelGlobals*)kgbuffer.device_pointer;
+		*kg = thread_kernel_globals_init();
 
-					for(int y = tile.y; y < tile.y + tile.h; y++) {
-						for(int x = tile.x; x < tile.x + tile.w; x++) {
-							path_trace_kernel()(&kg, render_buffer, rng_state,
-							                    sample, x, y, tile.offset, tile.stride);
-						}
-					}
+		CPUSplitKernel *split_kernel = NULL;
+		if(use_split_kernel) {
+			split_kernel = new CPUSplitKernel(this);
+			requested_features.max_closure = MAX_CLOSURE;
+			if(!split_kernel->load_kernels(requested_features)) {
+				thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer);
+				mem_free(kgbuffer);
 
-					tile.sample = sample + 1;
+				return;
+			}
+		}
 
-#ifdef WITH_CYCLES_DEBUG_FPE
-					fpe.restore();
-#endif
-					task.update_progress(&tile, tile.w*tile.h);
+		RenderTile tile;
+		while(task.acquire_tile(this, tile)) {
+			if(tile.task == RenderTile::PATH_TRACE) {
+				if(use_split_kernel) {
+					device_memory data;
+					split_kernel->path_trace(&task, tile, kgbuffer, data);
 				}
-
-				if(tile.buffers->params.overscan && !task.get_cancel()) {
-					DenoisingTask denoising(this);
-
-					int overscan = tile.buffers->params.overscan;
-					denoising.filter_area = make_int4(tile.x + overscan, tile.y + overscan, tile.w - 2*overscan, tile.h - 2*overscan);
-					denoising.render_buffer.samples = end_sample;
-
-					denoising.tiles_from_single_tile(tile);
-					denoising.init_from_devicetask(task);
-
-					denoising.functions.construct_transform = function_bind(&CPUDevice::denoising_construct_transform, this, &denoising);
-					denoising.functions.reconstruct = function_bind(&CPUDevice::denoising_reconstruct, this, _1, _2, _3, _4, _5, &denoising);
-					denoising.functions.divide_shadow = function_bind(&CPUDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
-					denoising.functions.non_local_means = function_bind(&CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
-					denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
-					denoising.functions.get_feature = function_bind(&CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
-
-					denoising.run_denoising();
+				else {
+					path_trace(task, tile, kg);
 				}
 			}
 			else if(tile.task == RenderTile::DENOISE) {
-				tile.sample = tile.start_sample + tile.num_samples;
-
-				DenoisingTask denoising(this);
-				denoising.filter_area = make_int4(tile.x, tile.y, tile.w, tile.h);
-				denoising.render_buffer.samples = tile.sample;
-
-				RenderTile rtiles[9];
-				rtiles[4] = tile;
-				task.get_neighbor_tiles(rtiles);
-				denoising.tiles_from_rendertiles(rtiles);
-
-				denoising.init_from_devicetask(task);
-				denoising.functions.construct_transform = function_bind(&CPUDevice::denoising_construct_transform, this, &denoising);
-				denoising.functions.reconstruct = function_bind(&CPUDevice::denoising_reconstruct, this, _1, _2, _3, _4, _5, &denoising);
-				denoising.functions.divide_shadow = function_bind(&CPUDevice::denoising_divide_shadow, this, _1, _2, _3, _4, _5, &denoising);
-				denoising.functions.non_local_means = function_bind(&CPUDevice::denoising_non_local_means, this, _1, _2, _3, _4, &denoising);
-				denoising.functions.combine_halves = function_bind(&CPUDevice::denoising_combine_halves, this, _1, _2, _3, _4, _5, _6, &denoising);
-				denoising.functions.get_feature = function_bind(&CPUDevice::denoising_get_feature, this, _1, _2, _3, _4, &denoising);
-
-				denoising.run_denoising();
-
-				task.update_progress(&tile, tile.w*tile.h);
+				denoise(task, tile);
 			}
 
 			task.release_tile(tile);
@@ -726,7 +691,9 @@ public:
 			}
 		}
 
-		thread_kernel_globals_free(&kg);
+		thread_kernel_globals_free((KernelGlobals*)kgbuffer.device_pointer);
+		mem_free(kgbuffer);
+		delete split_kernel;
 	}
 
 	void thread_film_convert(DeviceTask& task)
diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h
index ce7f46c8f3..4c6a69b36f 100644
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/device_memory.h
@@ -224,9 +224,7 @@ public:
 	device_only_memory()
 	{
 		data_type = device_type_traits<T>::data_type;
-		data_elements = device_type_traits<T>::num_elements;
-
-		assert(data_elements > 0);
+		data_elements = max(device_type_traits<T>::num_elements, 1);
 	}
 
 	void resize(size_t num)




More information about the Bf-blender-cvs mailing list