[Bf-blender-cvs] [2f92506ad80] temp-cycles-denoising: Cycles Denoising: Hopefully fix Multi-GPU denoising
Lukas Stockner
noreply at git.blender.org
Sun Mar 26 21:47:23 CEST 2017
Commit: 2f92506ad803fab727dda41a1a34affb264d8e63
Author: Lukas Stockner
Date: Sun Mar 26 21:14:13 2017 +0200
Branches: temp-cycles-denoising
https://developer.blender.org/rB2f92506ad803fab727dda41a1a34affb264d8e63
Cycles Denoising: Hopefully fix Multi-GPU denoising
The problem there is that currently tiles get allocated on the GPU that's used to render them.
However, if a GPU is supposed to denoise a tile, it needs all 8 neighbors in its memory as well.
Therefore, the code now allocates and copies the tiles on the denoising GPU as well.
===================================================================
M intern/cycles/device/device.h
M intern/cycles/device/device_cpu.cpp
M intern/cycles/device/device_cuda.cpp
M intern/cycles/device/device_multi.cpp
M intern/cycles/device/device_task.h
M intern/cycles/device/opencl/opencl_base.cpp
M intern/cycles/render/buffers.h
M intern/cycles/render/session.cpp
M intern/cycles/render/session.h
===================================================================
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index bd26cb66ba3..e924e40b99d 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -312,6 +312,8 @@ public:
/* multi device */
virtual void map_tile(Device * /*sub_device*/, RenderTile& /*tile*/) {}
virtual int device_number(Device * /*sub_device*/) { return 0; }
+ virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/) {}
+ virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTile * /*tiles*/) {}
/* static */
static Device *create(DeviceInfo& info, Stats &stats, bool background = true);
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 45e3fe491e4..126f3fe90c4 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -643,13 +643,15 @@ public:
RenderTile rtiles[9];
rtiles[4] = tile;
- task.get_neighbor_tiles(rtiles);
+ task.get_neighbor_tiles(rtiles, this);
denoising.tiles_from_rendertiles(rtiles);
denoising.init_from_devicetask(task);
denoising.run_denoising();
+ task.release_neighbor_tiles(rtiles, this);
+
task.update_progress(&tile, tile.w*tile.h);
}
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index daccc4d1225..a19d02f7039 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1260,13 +1260,14 @@ public:
RenderTile rtiles[9];
rtiles[4] = rtile;
- task.get_neighbor_tiles(rtiles);
+ task.get_neighbor_tiles(rtiles, this);
denoising.tiles_from_rendertiles(rtiles);
denoising.init_from_devicetask(task);
-
denoising.run_denoising();
+
+ task.release_neighbor_tiles(rtiles, this);
}
void path_trace(RenderTile& rtile, int sample, bool branched)
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index 3368fd3d756..0f579d87f1a 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -299,6 +299,40 @@ public:
return -1;
}
+ void map_neighbor_tiles(Device * sub_device, RenderTile * tiles)
+ {
+ for(int i = 0; i < 9; i++) {
+ /* If the tile isn't already allocated on the current device,
+ * allocate anc copy it now.
+ * Note that this temporarily modifies the RenderBuffers,
+ * so this function is not threadsafe. */
+ if(tiles[i].buffers->device != sub_device) {
+ device_vector<float> &mem = tiles[i].buffers->buffer;
+
+ device_ptr original_ptr = mem.device_pointer;
+ mem.device_pointer = 0;
+ sub_device->mem_alloc("Temporary memory for neighboring tile", mem, MEM_READ_WRITE);
+ sub_device->mem_copy_to(mem);
+ tiles[i].buffer = mem.device_pointer;
+ mem.device_pointer = original_ptr;
+ }
+ }
+ }
+
+ void unmap_neighbor_tiles(Device * sub_device, RenderTile * tiles)
+ {
+ for(int i = 0; i < 9; i++) {
+ if(tiles[i].buffers->device != sub_device) {
+ device_vector<float> &mem = tiles[i].buffers->buffer;
+
+ device_ptr original_ptr = mem.device_pointer;
+ mem.device_pointer = tiles[i].buffer;
+ sub_device->mem_free(mem);
+ mem.device_pointer = original_ptr;
+ }
+ }
+ }
+
int get_split_task_count(DeviceTask& task)
{
int total_tasks = 0;
diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
index 9b958e159dc..e41d3b74ba6 100644
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -65,7 +65,8 @@ public:
function<void(RenderTile&)> update_tile_sample;
function<void(RenderTile&)> release_tile;
function<bool(void)> get_cancel;
- function<void(RenderTile*)> get_neighbor_tiles;
+ function<void(RenderTile*, Device*)> get_neighbor_tiles;
+ function<void(RenderTile*, Device*)> release_neighbor_tiles;
int denoising_radius;
float denoising_pca_threshold;
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index db1efe14e74..4cac759f8fc 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -946,12 +946,14 @@ void OpenCLDeviceBase::denoise(RenderTile &rtile, const DeviceTask &task)
RenderTile rtiles[9];
rtiles[4] = rtile;
- task.get_neighbor_tiles(rtiles);
+ task.get_neighbor_tiles(rtiles, this);
denoising.tiles_from_rendertiles(rtiles);
denoising.init_from_devicetask(task);
denoising.run_denoising();
+
+ task.release_neighbor_tiles(rtiles, this);
}
void OpenCLDeviceBase::shader(DeviceTask& task)
diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h
index e415b822f7f..767014a6fa1 100644
--- a/intern/cycles/render/buffers.h
+++ b/intern/cycles/render/buffers.h
@@ -79,6 +79,8 @@ public:
/* random number generator state */
device_vector<uint> rng_state;
+ Device *device;
+
explicit RenderBuffers(Device *device);
~RenderBuffers();
@@ -89,8 +91,6 @@ public:
protected:
void device_free();
-
- Device *device;
};
/* Display Buffer
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index 507cac1b167..fd5a6a17378 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -501,8 +501,10 @@ void Session::release_tile(RenderTile& rtile)
update_status_time();
}
-void Session::get_neighbor_tiles(RenderTile *tiles)
+void Session::get_neighbor_tiles(RenderTile *tiles, Device *tile_device)
{
+ thread_scoped_lock buffers_lock(buffers_mutex);
+
int center_idx = tiles[4].tile_index;
assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE);
BufferParams buffer_params = tile_manager.params;
@@ -538,6 +540,13 @@ void Session::get_neighbor_tiles(RenderTile *tiles)
}
assert(tiles[4].buffers);
+ device->map_neighbor_tiles(tile_device, tiles);
+}
+
+void Session::release_neighbor_tiles(RenderTile *tiles, Device *tile_device)
+{
+ thread_scoped_lock buffers_lock(buffers_mutex);
+ device->unmap_neighbor_tiles(tile_device, tiles);
}
void Session::run_cpu()
@@ -955,7 +964,8 @@ void Session::render()
task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2);
task.release_tile = function_bind(&Session::release_tile, this, _1);
- task.get_neighbor_tiles = function_bind(&Session::get_neighbor_tiles, this, _1);
+ task.get_neighbor_tiles = function_bind(&Session::get_neighbor_tiles, this, _1, _2);
+ task.release_neighbor_tiles = function_bind(&Session::release_neighbor_tiles, this, _1, _2);
task.get_cancel = function_bind(&Progress::get_cancel, &this->progress);
task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1);
task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2);
diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h
index 1d80e804fc8..aedecffc144 100644
--- a/intern/cycles/render/session.h
+++ b/intern/cycles/render/session.h
@@ -198,7 +198,9 @@ protected:
bool acquire_tile(Device *tile_device, RenderTile& tile);
void update_tile_sample(RenderTile& tile);
void release_tile(RenderTile& tile);
- void get_neighbor_tiles(RenderTile *tiles);
+
+ void get_neighbor_tiles(RenderTile *tiles, Device *tile_device);
+ void release_neighbor_tiles(RenderTile *tiles, Device *tile_device);
bool device_use_gl;
More information about the Bf-blender-cvs
mailing list