[Bf-blender-cvs] [98a5c924fca] master: Cycles: Metal readiness: Specify DeviceQueue::enqueue arg types
Michael Jones
noreply at git.blender.org
Mon Nov 29 15:56:13 CET 2021
Commit: 98a5c924fca00b4b39e75a4fc16585cfa040398c
Author: Michael Jones
Date: Mon Nov 29 14:49:53 2021 +0000
Branches: master
https://developer.blender.org/rB98a5c924fca00b4b39e75a4fc16585cfa040398c
Cycles: Metal readiness: Specify DeviceQueue::enqueue arg types
This patch adds new arg-type parameters to `DeviceQueue::enqueue` and its overrides. This is in preparation for the Metal backend which needs this information for correct argument encoding.
Ref T92212
Reviewed By: brecht
Maniphest Tasks: T92212
Differential Revision: https://developer.blender.org/D13357
===================================================================
M intern/cycles/device/cuda/device_impl.cpp
M intern/cycles/device/cuda/queue.cpp
M intern/cycles/device/cuda/queue.h
M intern/cycles/device/hip/device_impl.cpp
M intern/cycles/device/hip/queue.cpp
M intern/cycles/device/hip/queue.h
M intern/cycles/device/optix/device_impl.cpp
M intern/cycles/device/optix/queue.cpp
M intern/cycles/device/optix/queue.h
M intern/cycles/device/queue.h
M intern/cycles/integrator/pass_accessor_gpu.cpp
M intern/cycles/integrator/path_trace_work_gpu.cpp
M intern/cycles/integrator/shader_eval.cpp
===================================================================
diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp
index e05fef3897c..ee55e6dc632 100644
--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
@@ -477,10 +477,10 @@ void CUDADevice::reserve_local_memory(const uint kernel_features)
* still to make it faster. */
CUDADeviceQueue queue(this);
- void *d_path_index = nullptr;
- void *d_render_buffer = nullptr;
+ device_ptr d_path_index = 0;
+ device_ptr d_render_buffer = 0;
int d_work_size = 0;
- void *args[] = {&d_path_index, &d_render_buffer, &d_work_size};
+ DeviceKernelArguments args(&d_path_index, &d_render_buffer, &d_work_size);
queue.init_execution();
queue.enqueue(test_kernel, 1, args);
diff --git a/intern/cycles/device/cuda/queue.cpp b/intern/cycles/device/cuda/queue.cpp
index 09352a84181..880d7ca4cf2 100644
--- a/intern/cycles/device/cuda/queue.cpp
+++ b/intern/cycles/device/cuda/queue.cpp
@@ -89,7 +89,9 @@ bool CUDADeviceQueue::kernel_available(DeviceKernel kernel) const
return cuda_device_->kernels.available(kernel);
}
-bool CUDADeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[])
+bool CUDADeviceQueue::enqueue(DeviceKernel kernel,
+ const int work_size,
+ DeviceKernelArguments const &args)
{
if (cuda_device_->have_error()) {
return false;
@@ -133,7 +135,7 @@ bool CUDADeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *ar
1,
shared_mem_bytes,
cuda_stream_,
- args,
+ const_cast<void**>(args.values),
0),
"enqueue");
diff --git a/intern/cycles/device/cuda/queue.h b/intern/cycles/device/cuda/queue.h
index 28613cda071..0836af12098 100644
--- a/intern/cycles/device/cuda/queue.h
+++ b/intern/cycles/device/cuda/queue.h
@@ -42,7 +42,9 @@ class CUDADeviceQueue : public DeviceQueue {
virtual bool kernel_available(DeviceKernel kernel) const override;
- virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override;
+ virtual bool enqueue(DeviceKernel kernel,
+ const int work_size,
+ DeviceKernelArguments const &args) override;
virtual bool synchronize() override;
diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp
index 53c4f3f0b3f..4f1cbabc89b 100644
--- a/intern/cycles/device/hip/device_impl.cpp
+++ b/intern/cycles/device/hip/device_impl.cpp
@@ -440,10 +440,10 @@ void HIPDevice::reserve_local_memory(const uint kernel_features)
* still to make it faster. */
HIPDeviceQueue queue(this);
- void *d_path_index = nullptr;
- void *d_render_buffer = nullptr;
+ device_ptr d_path_index = 0;
+ device_ptr d_render_buffer = 0;
int d_work_size = 0;
- void *args[] = {&d_path_index, &d_render_buffer, &d_work_size};
+ DeviceKernelArguments args(&d_path_index, &d_render_buffer, &d_work_size);
queue.init_execution();
queue.enqueue(test_kernel, 1, args);
diff --git a/intern/cycles/device/hip/queue.cpp b/intern/cycles/device/hip/queue.cpp
index 0f053ccbeb5..42841324ed6 100644
--- a/intern/cycles/device/hip/queue.cpp
+++ b/intern/cycles/device/hip/queue.cpp
@@ -89,7 +89,9 @@ bool HIPDeviceQueue::kernel_available(DeviceKernel kernel) const
return hip_device_->kernels.available(kernel);
}
-bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[])
+bool HIPDeviceQueue::enqueue(DeviceKernel kernel,
+ const int work_size,
+ DeviceKernelArguments const &args)
{
if (hip_device_->have_error()) {
return false;
@@ -132,7 +134,7 @@ bool HIPDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *arg
1,
shared_mem_bytes,
hip_stream_,
- args,
+ const_cast<void**>(args.values),
0),
"enqueue");
diff --git a/intern/cycles/device/hip/queue.h b/intern/cycles/device/hip/queue.h
index 95d1afaff0f..8040d367798 100644
--- a/intern/cycles/device/hip/queue.h
+++ b/intern/cycles/device/hip/queue.h
@@ -42,7 +42,9 @@ class HIPDeviceQueue : public DeviceQueue {
virtual bool kernel_available(DeviceKernel kernel) const override;
- virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override;
+ virtual bool enqueue(DeviceKernel kernel,
+ const int work_size,
+ DeviceKernelArguments const &args) override;
virtual bool synchronize() override;
diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp
index b82b1281eb8..1d893d9c65b 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -667,22 +667,22 @@ bool OptiXDevice::denoise_filter_guiding_preprocess(DenoiseContext &context)
const int work_size = buffer_params.width * buffer_params.height;
- void *args[] = {const_cast<device_ptr *>(&context.guiding_params.device_pointer),
- const_cast<int *>(&context.guiding_params.pass_stride),
- const_cast<int *>(&context.guiding_params.pass_albedo),
- const_cast<int *>(&context.guiding_params.pass_normal),
- &context.render_buffers->buffer.device_pointer,
- const_cast<int *>(&buffer_params.offset),
- const_cast<int *>(&buffer_params.stride),
- const_cast<int *>(&buffer_params.pass_stride),
- const_cast<int *>(&context.pass_sample_count),
- const_cast<int *>(&context.pass_denoising_albedo),
- const_cast<int *>(&context.pass_denoising_normal),
- const_cast<int *>(&buffer_params.full_x),
- const_cast<int *>(&buffer_params.full_y),
- const_cast<int *>(&buffer_params.width),
- const_cast<int *>(&buffer_params.height),
- const_cast<int *>(&context.num_samples)};
+ DeviceKernelArguments args(&context.guiding_params.device_pointer,
+ &context.guiding_params.pass_stride,
+ &context.guiding_params.pass_albedo,
+ &context.guiding_params.pass_normal,
+ &context.render_buffers->buffer.device_pointer,
+ &buffer_params.offset,
+ &buffer_params.stride,
+ &buffer_params.pass_stride,
+ &context.pass_sample_count,
+ &context.pass_denoising_albedo,
+ &context.pass_denoising_normal,
+ &buffer_params.full_x,
+ &buffer_params.full_y,
+ &buffer_params.width,
+ &buffer_params.height,
+ &context.num_samples);
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args);
}
@@ -693,11 +693,11 @@ bool OptiXDevice::denoise_filter_guiding_set_fake_albedo(DenoiseContext &context
const int work_size = buffer_params.width * buffer_params.height;
- void *args[] = {const_cast<device_ptr *>(&context.guiding_params.device_pointer),
- const_cast<int *>(&context.guiding_params.pass_stride),
- const_cast<int *>(&context.guiding_params.pass_albedo),
- const_cast<int *>(&buffer_params.width),
- const_cast<int *>(&buffer_params.height)};
+ DeviceKernelArguments args(&context.guiding_params.device_pointer,
+ &context.guiding_params.pass_stride,
+ &context.guiding_params.pass_albedo,
+ &buffer_params.width,
+ &buffer_params.height);
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args);
}
@@ -793,15 +793,15 @@ bool OptiXDevice::denoise_filter_color_preprocess(DenoiseContext &context, const
const int work_size = buffer_params.width * buffer_params.height;
- void *args[] = {&context.render_buffers->buffer.device_pointer,
- const_cast<int *>(&buffer_params.full_x),
- const_cast<int *>(&buffer_params.full_y),
- const_cast<int *>(&buffer_params.width),
- const_cast<int *>(&buffer_params.height),
- const_cast<int *>(&buffer_params.offset),
- const_cast<int *>(&buffer_params.stride),
- const_cast<int *>(&buffer_params.pass_stride),
- const_cast<int *>(&pass.denoised_offset)};
+ DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
+ &buffer_params.full_x,
+ &buffer_params.full_y,
+ &buffer_params.width,
+ &buffer_params.height,
+ &buffer_params.offset,
+ &buffer_params.stride,
+ &buffer_params.pass_stride,
+ &pass.denoised_offset);
return denoiser_.queue.enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args);
}
@@ -813,20 +813,20 @@ bool OptiXDevice::denoise_filter_color_postprocess(DenoiseContext &context,
const int work_size = buffer_params.width * buffer_params.height;
- void *args[] = {&context.render_buffers->buffer.device_pointer,
- const_cast<int *>(&buffer_params.full_x),
- const_cast<int *>(&buffer_params.full_y),
- const_cast<int *>(&buffer_params.wid
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list