[Bf-blender-cvs] [2f92506ad80] temp-cycles-denoising: Cycles Denoising: Hopefully fix Multi-GPU denoising

Lukas Stockner noreply at git.blender.org
Sun Mar 26 21:47:23 CEST 2017


Commit: 2f92506ad803fab727dda41a1a34affb264d8e63
Author: Lukas Stockner
Date:   Sun Mar 26 21:14:13 2017 +0200
Branches: temp-cycles-denoising
https://developer.blender.org/rB2f92506ad803fab727dda41a1a34affb264d8e63

Cycles Denoising: Hopefully fix Multi-GPU denoising

The problem there is that currently tiles get allocated on the GPU that's used to render them.
However, if a GPU is supposed to denoise a tile, it needs all 8 neighbors in its memory as well.

Therefore, the code now allocates and copies the tiles on the denoising GPU as well.

===================================================================

M	intern/cycles/device/device.h
M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/device/device_multi.cpp
M	intern/cycles/device/device_task.h
M	intern/cycles/device/opencl/opencl_base.cpp
M	intern/cycles/render/buffers.h
M	intern/cycles/render/session.cpp
M	intern/cycles/render/session.h

===================================================================

diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index bd26cb66ba3..e924e40b99d 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -312,6 +312,8 @@ public:
 	/* multi device */
 	virtual void map_tile(Device * /*sub_device*/, RenderTile& /*tile*/) {}
 	virtual int device_number(Device * /*sub_device*/) { return 0; }
+	virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/) {}
+	virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/) {}
 
 	/* static */
 	static Device *create(DeviceInfo& info, Stats &stats, bool background = true);
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 45e3fe491e4..126f3fe90c4 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -643,13 +643,15 @@ public:
 
 		RenderTile rtiles[9];
 		rtiles[4] = tile;
-		task.get_neighbor_tiles(rtiles);
+		task.get_neighbor_tiles(rtiles, this);
 		denoising.tiles_from_rendertiles(rtiles);
 
 		denoising.init_from_devicetask(task);
 
 		denoising.run_denoising();
 
+		task.release_neighbor_tiles(rtiles, this);
+
 		task.update_progress(&tile, tile.w*tile.h);
 	}
 
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index daccc4d1225..a19d02f7039 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1260,13 +1260,14 @@ public:
 
 		RenderTile rtiles[9];
 		rtiles[4] = rtile;
-		task.get_neighbor_tiles(rtiles);
+		task.get_neighbor_tiles(rtiles, this);
 		denoising.tiles_from_rendertiles(rtiles);
 
 		denoising.init_from_devicetask(task);
 
-
 		denoising.run_denoising();
+
+		task.release_neighbor_tiles(rtiles, this);
 	}
 
 	void path_trace(RenderTile& rtile, int sample, bool branched)
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index 3368fd3d756..0f579d87f1a 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -299,6 +299,40 @@ public:
 		return -1;
 	}
 
+	void map_neighbor_tiles(Device * sub_device, RenderTile * tiles)
+	{
+		for(int i = 0; i < 9; i++) {
+			/* If the tile isn't already allocated on the current device,
+			 * allocate anc copy it now.
+			 * Note that this temporarily modifies the RenderBuffers,
+			 * so this function is not threadsafe. */
+			if(tiles[i].buffers->device != sub_device) {
+				device_vector<float> &mem = tiles[i].buffers->buffer;
+
+				device_ptr original_ptr = mem.device_pointer;
+				mem.device_pointer = 0;
+				sub_device->mem_alloc("Temporary memory for neighboring tile", mem, MEM_READ_WRITE);
+				sub_device->mem_copy_to(mem);
+				tiles[i].buffer = mem.device_pointer;
+				mem.device_pointer = original_ptr;
+			}
+		}
+	}
+
+	void unmap_neighbor_tiles(Device * sub_device, RenderTile * tiles)
+	{
+		for(int i = 0; i < 9; i++) {
+			if(tiles[i].buffers->device != sub_device) {
+				device_vector<float> &mem = tiles[i].buffers->buffer;
+
+				device_ptr original_ptr = mem.device_pointer;
+				mem.device_pointer = tiles[i].buffer;
+				sub_device->mem_free(mem);
+				mem.device_pointer = original_ptr;
+			}
+		}
+	}
+
 	int get_split_task_count(DeviceTask& task)
 	{
 		int total_tasks = 0;
diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
index 9b958e159dc..e41d3b74ba6 100644
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -65,7 +65,8 @@ public:
 	function<void(RenderTile&)> update_tile_sample;
 	function<void(RenderTile&)> release_tile;
 	function<bool(void)> get_cancel;
-	function<void(RenderTile*)> get_neighbor_tiles;
+	function<void(RenderTile*, Device*)> get_neighbor_tiles;
+	function<void(RenderTile*, Device*)> release_neighbor_tiles;
 
 	int denoising_radius;
 	float denoising_pca_threshold;
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index db1efe14e74..4cac759f8fc 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -946,12 +946,14 @@ void OpenCLDeviceBase::denoise(RenderTile &rtile, const DeviceTask &task)
 
 	RenderTile rtiles[9];
 	rtiles[4] = rtile;
-	task.get_neighbor_tiles(rtiles);
+	task.get_neighbor_tiles(rtiles, this);
 	denoising.tiles_from_rendertiles(rtiles);
 
 	denoising.init_from_devicetask(task);
 
 	denoising.run_denoising();
+
+	task.release_neighbor_tiles(rtiles, this);
 }
 
 void OpenCLDeviceBase::shader(DeviceTask& task)
diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h
index e415b822f7f..767014a6fa1 100644
--- a/intern/cycles/render/buffers.h
+++ b/intern/cycles/render/buffers.h
@@ -79,6 +79,8 @@ public:
 	/* random number generator state */
 	device_vector<uint> rng_state;
 
+	Device *device;
+
 	explicit RenderBuffers(Device *device);
 	~RenderBuffers();
 
@@ -89,8 +91,6 @@ public:
 
 protected:
 	void device_free();
-
-	Device *device;
 };
 
 /* Display Buffer
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index 507cac1b167..fd5a6a17378 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -501,8 +501,10 @@ void Session::release_tile(RenderTile& rtile)
 	update_status_time();
 }
 
-void Session::get_neighbor_tiles(RenderTile *tiles)
+void Session::get_neighbor_tiles(RenderTile *tiles, Device *tile_device)
 {
+	thread_scoped_lock buffers_lock(buffers_mutex);
+
 	int center_idx = tiles[4].tile_index;
 	assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE);
 	BufferParams buffer_params = tile_manager.params;
@@ -538,6 +540,13 @@ void Session::get_neighbor_tiles(RenderTile *tiles)
 	}
 
 	assert(tiles[4].buffers);
+	device->map_neighbor_tiles(tile_device, tiles);
+}
+
+void Session::release_neighbor_tiles(RenderTile *tiles, Device *tile_device)
+{
+	thread_scoped_lock buffers_lock(buffers_mutex);
+	device->unmap_neighbor_tiles(tile_device, tiles);
 }
 
 void Session::run_cpu()
@@ -955,7 +964,8 @@ void Session::render()
 	
 	task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2);
 	task.release_tile = function_bind(&Session::release_tile, this, _1);
-	task.get_neighbor_tiles = function_bind(&Session::get_neighbor_tiles, this, _1);
+	task.get_neighbor_tiles = function_bind(&Session::get_neighbor_tiles, this, _1, _2);
+	task.release_neighbor_tiles = function_bind(&Session::release_neighbor_tiles, this, _1, _2);
 	task.get_cancel = function_bind(&Progress::get_cancel, &this->progress);
 	task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1);
 	task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2);
diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h
index 1d80e804fc8..aedecffc144 100644
--- a/intern/cycles/render/session.h
+++ b/intern/cycles/render/session.h
@@ -198,7 +198,9 @@ protected:
 	bool acquire_tile(Device *tile_device, RenderTile& tile);
 	void update_tile_sample(RenderTile& tile);
 	void release_tile(RenderTile& tile);
-	void get_neighbor_tiles(RenderTile *tiles);
+
+	void get_neighbor_tiles(RenderTile *tiles, Device *tile_device);
+	void release_neighbor_tiles(RenderTile *tiles, Device *tile_device);
 
 	bool device_use_gl;




More information about the Bf-blender-cvs mailing list