[Bf-blender-cvs] [555cdae6de8] cycles-x: Cleanup: add mechanism for devices to not have a megakernel
Brecht Van Lommel
noreply at git.blender.org
Wed Apr 28 20:07:50 CEST 2021
Commit: 555cdae6de861edba6173d7ea9912131cb3e5ee8
Author: Brecht Van Lommel
Date: Wed Apr 28 16:59:01 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB555cdae6de861edba6173d7ea9912131cb3e5ee8
Cleanup: add mechanism for devices to not have a megakernel
===================================================================
M intern/cycles/device/cuda/kernel.cpp
M intern/cycles/device/cuda/kernel.h
M intern/cycles/device/cuda/queue.cpp
M intern/cycles/device/cuda/queue.h
M intern/cycles/device/device_queue.h
M intern/cycles/integrator/path_trace_work_gpu.cpp
===================================================================
diff --git a/intern/cycles/device/cuda/kernel.cpp b/intern/cycles/device/cuda/kernel.cpp
index 0dd776769d2..793c9efe72a 100644
--- a/intern/cycles/device/cuda/kernel.cpp
+++ b/intern/cycles/device/cuda/kernel.cpp
@@ -48,6 +48,11 @@ const CUDADeviceKernel &CUDADeviceKernels::get(DeviceKernel kernel) const
return kernels_[(int)kernel];
}
+bool CUDADeviceKernels::available(DeviceKernel kernel) const
+{
+ return kernels_[(int)kernel].function != nullptr;
+}
+
CCL_NAMESPACE_END
#endif /* WITH_CUDA*/
diff --git a/intern/cycles/device/cuda/kernel.h b/intern/cycles/device/cuda/kernel.h
index dba8702f691..b489547a350 100644
--- a/intern/cycles/device/cuda/kernel.h
+++ b/intern/cycles/device/cuda/kernel.h
@@ -33,10 +33,10 @@ class CUDADevice;
/* CUDA kernel and associate occupancy information. */
class CUDADeviceKernel {
public:
- CUfunction function;
+ CUfunction function = nullptr;
- int num_threads_per_block;
- int min_blocks;
+ int num_threads_per_block = 0;
+ int min_blocks = 0;
};
/* Cache of CUDA kernels for each DeviceKernel. */
@@ -44,6 +44,7 @@ class CUDADeviceKernels {
public:
void load(CUDADevice *device);
const CUDADeviceKernel &get(DeviceKernel kernel) const;
+ bool available(DeviceKernel kernel) const;
protected:
CUDADeviceKernel kernels_[DEVICE_KERNEL_NUM];
diff --git a/intern/cycles/device/cuda/queue.cpp b/intern/cycles/device/cuda/queue.cpp
index 9ee0298aa2a..32af06d85df 100644
--- a/intern/cycles/device/cuda/queue.cpp
+++ b/intern/cycles/device/cuda/queue.cpp
@@ -47,6 +47,11 @@ void CUDADeviceQueue::init_execution()
debug_init_execution();
}
+bool CUDADeviceQueue::kernel_available(DeviceKernel kernel) const
+{
+ return cuda_device_->kernels.available(kernel);
+}
+
bool CUDADeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[])
{
if (cuda_device_->have_error()) {
diff --git a/intern/cycles/device/cuda/queue.h b/intern/cycles/device/cuda/queue.h
index 59e10bcd711..acdcfd8a69e 100644
--- a/intern/cycles/device/cuda/queue.h
+++ b/intern/cycles/device/cuda/queue.h
@@ -37,6 +37,8 @@ class CUDADeviceQueue : public DeviceQueue {
virtual void init_execution() override;
+ virtual bool kernel_available(DeviceKernel kernel) const override;
+
virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override;
virtual bool synchronize() override;
diff --git a/intern/cycles/device/device_queue.h b/intern/cycles/device/device_queue.h
index 5d556ec44dc..dffa3ce26b7 100644
--- a/intern/cycles/device/device_queue.h
+++ b/intern/cycles/device/device_queue.h
@@ -43,6 +43,9 @@ class DeviceQueue {
* Use this method after device synchronization has finished before enqueueing any kernels. */
virtual void init_execution() = 0;
+ /* Test if an optional device kernel is available. */
+ virtual bool kernel_available(DeviceKernel kernel) const = 0;
+
/* Enqueue kernel execution.
*
* Execute the kernel work_size times on the device.
diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp
index 3590a788529..d477c92da13 100644
--- a/intern/cycles/integrator/path_trace_work_gpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_gpu.cpp
@@ -204,7 +204,8 @@ bool PathTraceWorkGPU::enqueue_path_iteration()
const int max_num_paths = get_max_num_paths();
const float megakernel_threshold = 0.02f;
- const bool use_megakernel = (num_paths < megakernel_threshold * max_num_paths);
+ const bool use_megakernel = queue_->kernel_available(DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) &&
+ (num_paths < megakernel_threshold * max_num_paths_);
if (use_megakernel) {
enqueue_path_iteration(DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL);
More information about the Bf-blender-cvs
mailing list