[Bf-blender-cvs] [3d5dbc1c449] blender-v3.0-release: Cycles: Reintroduce device-only memory handling that got lost in Cycles X merge
Patrick Mours
noreply at git.blender.org
Tue Jan 11 10:38:37 CET 2022
Commit: 3d5dbc1c44907c73d2e6e57a146cbadaea9623bd
Author: Patrick Mours
Date: Mon Dec 6 14:58:35 2021 +0100
Branches: blender-v3.0-release
https://developer.blender.org/rB3d5dbc1c44907c73d2e6e57a146cbadaea9623bd
Cycles: Reintroduce device-only memory handling that got lost in Cycles X merge
Somehow only a part of rBf4f8b6dde32b0438e0b97a6d8ebeb89802987127 ended up in
Cycles X, causing the issue that commit fixed, "OPTIX_ERROR_INVALID_VALUE" when the
system is out of memory, to show up again.
This adds the missing changes to fix that problem.
Maniphest Tasks: T93620
Differential Revision: https://developer.blender.org/D13488
===================================================================
M intern/cycles/device/cpu/device_impl.cpp
M intern/cycles/device/cuda/device_impl.cpp
M intern/cycles/device/optix/device_impl.cpp
===================================================================
diff --git a/intern/cycles/device/cpu/device_impl.cpp b/intern/cycles/device/cpu/device_impl.cpp
index 68dec7f0af2..5db89d1e4fb 100644
--- a/intern/cycles/device/cpu/device_impl.cpp
+++ b/intern/cycles/device/cpu/device_impl.cpp
@@ -134,8 +134,7 @@ void CPUDevice::mem_alloc(device_memory &mem)
<< string_human_readable_size(mem.memory_size()) << ")";
}
- if (mem.type == MEM_DEVICE_ONLY) {
- assert(!mem.host_pointer);
+ if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) {
size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES;
void *data = util_aligned_malloc(mem.memory_size(), alignment);
mem.device_pointer = (device_ptr)data;
@@ -194,7 +193,7 @@ void CPUDevice::mem_free(device_memory &mem)
tex_free((device_texture &)mem);
}
else if (mem.device_pointer) {
- if (mem.type == MEM_DEVICE_ONLY) {
+ if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) {
util_aligned_free((void *)mem.device_pointer);
}
mem.device_pointer = 0;
diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp
index 20945796a2d..8c5779f4a72 100644
--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
@@ -680,7 +680,7 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_
void *shared_pointer = 0;
- if (mem_alloc_result != CUDA_SUCCESS && can_map_host) {
+ if (mem_alloc_result != CUDA_SUCCESS && can_map_host && mem.type != MEM_DEVICE_ONLY) {
if (mem.shared_pointer) {
/* Another device already allocated host memory. */
mem_alloc_result = CUDA_SUCCESS;
@@ -703,8 +703,14 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_
}
if (mem_alloc_result != CUDA_SUCCESS) {
- status = " failed, out of device and host memory";
- set_error("System is out of GPU and shared host memory");
+ if (mem.type == MEM_DEVICE_ONLY) {
+ status = " failed, out of device memory";
+ set_error("System is out of GPU memory");
+ }
+ else {
+ status = " failed, out of device and host memory";
+ set_error("System is out of GPU and shared host memory");
+ }
}
if (mem.name) {
diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp
index f230f865f60..b33b5e21eee 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -44,14 +44,14 @@
CCL_NAMESPACE_BEGIN
OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
- : device(device), queue(device), state(device, "__denoiser_state")
+ : device(device), queue(device), state(device, "__denoiser_state", true)
{
}
OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
: CUDADevice(info, stats, profiler),
sbt_data(this, "__sbt", MEM_READ_ONLY),
- launch_params(this, "__params"),
+ launch_params(this, "__params", false),
denoiser_(this)
{
/* Make the CUDA context current. */
@@ -507,7 +507,7 @@ class OptiXDevice::DenoiseContext {
: denoise_params(task.params),
render_buffers(task.render_buffers),
buffer_params(task.buffer_params),
- guiding_buffer(device, "denoiser guiding passes buffer"),
+ guiding_buffer(device, "denoiser guiding passes buffer", true),
num_samples(task.num_samples)
{
num_input_passes = 1;
@@ -1001,6 +1001,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
const OptixBuildInput &build_input,
uint16_t num_motion_steps)
{
+ /* Allocate and build acceleration structures only one at a time, to prevent parallel builds
+ * from running out of memory (since both original and compacted acceleration structure memory
+ * may be allocated at the same time for the duration of this function). The builds would
+ * otherwise happen on the same CUDA stream anyway. */
+ static thread_mutex mutex;
+ thread_scoped_lock lock(mutex);
+
const CUDAContextScope scope(this);
const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC);
@@ -1026,13 +1033,14 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
optix_assert(optixAccelComputeMemoryUsage(context, &options, &build_input, 1, &sizes));
/* Allocate required output buffers. */
- device_only_memory<char> temp_mem(this, "optix temp as build mem");
+ device_only_memory<char> temp_mem(this, "optix temp as build mem", true);
temp_mem.alloc_to_device(align_up(sizes.tempSizeInBytes, 8) + 8);
if (!temp_mem.device_pointer) {
/* Make sure temporary memory allocation succeeded. */
return false;
}
+ /* Acceleration structure memory has to be allocated on the device (not allowed on the host). */
device_only_memory<char> &out_data = *bvh->as_data;
if (operation == OPTIX_BUILD_OPERATION_BUILD) {
assert(out_data.device == this);
@@ -1081,12 +1089,13 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
/* There is no point compacting if the size does not change. */
if (compacted_size < sizes.outputSizeInBytes) {
- device_only_memory<char> compacted_data(this, "optix compacted as");
+ device_only_memory<char> compacted_data(this, "optix compacted as", false);
compacted_data.alloc_to_device(compacted_size);
- if (!compacted_data.device_pointer)
+ if (!compacted_data.device_pointer) {
/* Do not compact if memory allocation for compacted acceleration structure fails.
* Can just use the uncompacted one then, so succeed here regardless. */
return !have_error();
+ }
optix_assert(optixAccelCompact(
context, NULL, out_handle, compacted_data.device_pointer, compacted_size, &out_handle));
@@ -1097,6 +1106,8 @@ bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
std::swap(out_data.device_size, compacted_data.device_size);
std::swap(out_data.device_pointer, compacted_data.device_pointer);
+ /* Original acceleration structure memory is freed when 'compacted_data' goes out of scope.
+ */
}
}
@@ -1185,7 +1196,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
const float4 pw = make_float4(
curve_radius[ka], curve_radius[k0], curve_radius[k1], curve_radius[kb]);
- /* Convert Catmull-Rom data to Bezier spline. */
+ /* Convert Catmull-Rom data to B-spline. */
static const float4 cr2bsp0 = make_float4(+7, -4, +5, -2) / 6.f;
static const float4 cr2bsp1 = make_float4(-2, 11, -4, +1) / 6.f;
static const float4 cr2bsp2 = make_float4(+1, -4, 11, -2) / 6.f;
More information about the Bf-blender-cvs
mailing list