[Bf-blender-cvs] [40dd4041be1] cycles-x: Cycles X: restore shader raytracing for AO and bevel nodes

Brecht Van Lommel noreply at git.blender.org
Fri Jun 4 15:10:38 CEST 2021


Commit: 40dd4041be10eb0b45a911c1ca98430e9eed53a0
Author: Brecht Van Lommel
Date:   Thu May 27 12:32:55 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB40dd4041be10eb0b45a911c1ca98430e9eed53a0

Cycles X: restore shader raytracing for AO and bevel nodes

Adds a dedicated shade_surface_raytrace kernel to avoid slowing down the other
shaders on the GPU, as happened before.

Differential Revision: https://developer.blender.org/D11418

===================================================================

M	intern/cycles/device/cuda/queue.cpp
M	intern/cycles/device/device_kernel.cpp
M	intern/cycles/device/optix/device_impl.cpp
M	intern/cycles/device/optix/device_impl.h
M	intern/cycles/device/optix/queue.cpp
M	intern/cycles/integrator/path_trace_work_gpu.cpp
M	intern/cycles/integrator/path_trace_work_gpu.h
M	intern/cycles/kernel/CMakeLists.txt
M	intern/cycles/kernel/device/cuda/kernel.cu
A	intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu
M	intern/cycles/kernel/integrator/integrator_intersect_closest.h
M	intern/cycles/kernel/integrator/integrator_megakernel.h
M	intern/cycles/kernel/integrator/integrator_shade_surface.h
M	intern/cycles/kernel/integrator/integrator_subsurface.h
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/kernel/svm/svm.h
M	intern/cycles/kernel/svm/svm_ao.h
M	intern/cycles/kernel/svm/svm_bevel.h

===================================================================

diff --git a/intern/cycles/device/cuda/queue.cpp b/intern/cycles/device/cuda/queue.cpp
index 02315b8e116..1746f45f840 100644
--- a/intern/cycles/device/cuda/queue.cpp
+++ b/intern/cycles/device/cuda/queue.cpp
@@ -105,6 +105,7 @@ bool CUDADeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *ar
     case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT:
     case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW:
     case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE:
+    case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
     case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
     case DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL:
     case DEVICE_KERNEL_INTEGRATOR_RESET:
diff --git a/intern/cycles/device/device_kernel.cpp b/intern/cycles/device/device_kernel.cpp
index d9ee7838c63..e8edbe5582d 100644
--- a/intern/cycles/device/device_kernel.cpp
+++ b/intern/cycles/device/device_kernel.cpp
@@ -40,6 +40,8 @@ const char *device_kernel_as_string(DeviceKernel kernel)
       return "integrator_shade_shadow";
     case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE:
       return "integrator_shade_surface";
+    case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
+      return "integrator_shade_surface_raytrace";
     case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
       return "integrator_shade_volume";
     case DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL:
diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp
index 57d1c101d0f..558fb09b3e0 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -188,12 +188,6 @@ bool OptiXDevice::load_kernels(const DeviceRequestedFeatures &requested_features
     return true;
   }
 
-  /* TODO: Shader raytracing requires OptiX to overwrite the shading kernels too! */
-  if (requested_features.nodes_features & NODE_FEATURE_RAYTRACE) {
-    set_error("AO and Bevel shader nodes are not currently supported with OptiX");
-    return false;
-  }
-
   const CUDAContextScope scope(this);
 
   /* Unload existing OptiX module and pipelines first. */
@@ -277,7 +271,12 @@ bool OptiXDevice::load_kernels(const DeviceRequestedFeatures &requested_features
             "the Optix SDK to be able to compile Optix kernels on demand).");
         return false;
       }
-      ptx_filename = compile_kernel(requested_features, "kernel", "optix", true);
+      ptx_filename = compile_kernel(requested_features,
+                                    (requested_features.nodes_features & NODE_FEATURE_RAYTRACE) ?
+                                        "kernel_shader_raytrace" :
+                                        "kernel",
+                                    "optix",
+                                    true);
     }
     if (ptx_filename.empty() || !path_read_text(ptx_filename, ptx_data)) {
       set_error(string_printf("Failed to load OptiX kernel from '%s'", ptx_filename.c_str()));
@@ -383,17 +382,17 @@ bool OptiXDevice::load_kernels(const DeviceRequestedFeatures &requested_features
 
   /* Shader raytracing replaces some functions with direct callables. */
   if (requested_features.nodes_features & NODE_FEATURE_RAYTRACE) {
-    group_descs[PG_CALL + 0].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
-    group_descs[PG_CALL + 0].callables.moduleDC = optix_module;
-    group_descs[PG_CALL + 0].callables.entryFunctionNameDC = "__direct_callable__svm_eval_nodes";
-    group_descs[PG_CALL + 1].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
-    group_descs[PG_CALL + 1].callables.moduleDC = optix_module;
-    group_descs[PG_CALL + 1].callables.entryFunctionNameDC =
-        "__direct_callable__kernel_volume_shadow";
-    group_descs[PG_CALL + 2].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
-    group_descs[PG_CALL + 2].callables.moduleDC = optix_module;
-    group_descs[PG_CALL + 2].callables.entryFunctionNameDC =
-        "__direct_callable__subsurface_scatter_multi_setup";
+    group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
+    group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].raygen.module = optix_module;
+    group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].raygen.entryFunctionName =
+        "__raygen__kernel_optix_integrator_shade_surface_raytrace";
+    group_descs[PG_CALL_SVM_AO].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
+    group_descs[PG_CALL_SVM_AO].callables.moduleDC = optix_module;
+    group_descs[PG_CALL_SVM_AO].callables.entryFunctionNameDC = "__direct_callable__svm_node_ao";
+    group_descs[PG_CALL_SVM_BEVEL].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
+    group_descs[PG_CALL_SVM_BEVEL].callables.moduleDC = optix_module;
+    group_descs[PG_CALL_SVM_BEVEL].callables.entryFunctionNameDC =
+        "__direct_callable__svm_node_bevel";
   }
 
   optix_assert(optixProgramGroupCreate(
@@ -437,6 +436,44 @@ bool OptiXDevice::load_kernels(const DeviceRequestedFeatures &requested_features
   link_options.overrideUsesMotionBlur = motion_blur;
 #  endif
 
+  if (requested_features.nodes_features & NODE_FEATURE_RAYTRACE) {
+    /* Create shader raytracing pipeline. */
+    vector<OptixProgramGroup> pipeline_groups;
+    pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
+    pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_RAYTRACE]);
+    pipeline_groups.push_back(groups[PG_MISS]);
+    pipeline_groups.push_back(groups[PG_HITD]);
+    pipeline_groups.push_back(groups[PG_HITS]);
+    pipeline_groups.push_back(groups[PG_HITL]);
+#  if OPTIX_ABI_VERSION >= 36
+    if (motion_blur) {
+      pipeline_groups.push_back(groups[PG_HITD_MOTION]);
+      pipeline_groups.push_back(groups[PG_HITS_MOTION]);
+    }
+#  endif
+    pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
+    pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
+
+    optix_assert(optixPipelineCreate(context,
+                                     &pipeline_options,
+                                     &link_options,
+                                     pipeline_groups.data(),
+                                     pipeline_groups.size(),
+                                     nullptr,
+                                     0,
+                                     &pipelines[PIP_SHADE_RAYTRACE]));
+
+    /* Combine ray generation and trace continuation stack size. */
+    const unsigned int css = stack_size[PG_RGEN_SHADE_SURFACE_RAYTRACE].cssRG +
+                             link_options.maxTraceDepth * trace_css;
+    const unsigned int dss = std::max(stack_size[PG_CALL_SVM_AO].dssDC,
+                                      stack_size[PG_CALL_SVM_BEVEL].dssDC);
+
+    /* Set stack size depending on pipeline options. */
+    optix_assert(optixPipelineSetStackSize(
+        pipelines[PIP_SHADE_RAYTRACE], 0, dss, css, motion_blur ? 3 : 2));
+  }
+
   { /* Create intersection-only pipeline. */
     vector<OptixProgramGroup> pipeline_groups;
     pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
diff --git a/intern/cycles/device/optix/device_impl.h b/intern/cycles/device/optix/device_impl.h
index 24b62b23abc..4f3a3a3effe 100644
--- a/intern/cycles/device/optix/device_impl.h
+++ b/intern/cycles/device/optix/device_impl.h
@@ -33,6 +33,7 @@ enum {
   PG_RGEN_INTERSECT_CLOSEST,
   PG_RGEN_INTERSECT_SHADOW,
   PG_RGEN_INTERSECT_SUBSURFACE,
+  PG_RGEN_SHADE_SURFACE_RAYTRACE,
   PG_MISS,
   PG_HITD, /* Default hit group. */
   PG_HITS, /* __SHADOW_RECORD_ALL__ hit group. */
@@ -41,12 +42,24 @@ enum {
   PG_HITD_MOTION,
   PG_HITS_MOTION,
 #  endif
-  PG_CALL,
-  NUM_PROGRAM_GROUPS = PG_CALL + 3
+  PG_CALL_SVM_AO,
+  PG_CALL_SVM_BEVEL,
+  NUM_PROGRAM_GROUPS
 };
 
+static const int MISS_PROGRAM_GROUP_OFFSET = PG_MISS;
+static const int NUM_MIS_PROGRAM_GROUPS = 1;
+static const int HIT_PROGAM_GROUP_OFFSET = PG_HITD;
+#  if OPTIX_ABI_VERSION >= 36
+static const int NUM_HIT_PROGRAM_GROUPS = 5;
+#  else
+static const int NUM_HIT_PROGRAM_GROUPS = 3;
+#  endif
+static const int CALLABLE_PROGRAM_GROUPS_BASE = PG_CALL_SVM_AO;
+static const int NUM_CALLABLE_PROGRAM_GROUPS = 2;
+
 /* List of OptiX pipelines. */
-enum { PIP_MEGAKERNEL, PIP_INTERSECT, NUM_PIPELINES };
+enum { PIP_SHADE_RAYTRACE, PIP_INTERSECT, NUM_PIPELINES };
 
 /* A single shader binding table entry. */
 struct SbtRecord {
diff --git a/intern/cycles/device/optix/queue.cpp b/intern/cycles/device/optix/queue.cpp
index 5860632c364..a13743fcddf 100644
--- a/intern/cycles/device/optix/queue.cpp
+++ b/intern/cycles/device/optix/queue.cpp
@@ -40,7 +40,8 @@ void OptiXDeviceQueue::init_execution()
 
 static bool is_optix_specific_kernel(DeviceKernel kernel)
 {
-  return (kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
+  return (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
+          kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
           kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
           kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE);
 }
@@ -73,12 +74,25 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *a
                         sizeof(device_ptr),
                         cuda_stream_));
 
+  if (kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE) {
+    cuda_device_assert(
+        cuda_device_,
+        cuMemcpyHtoDAsync(launch_params_ptr + offsetof(KernelParamsOptiX, render_buffer),
+                          args[1],  // &d_render_buffer
+                          sizeof(device_ptr),
+                          cuda_stream_));
+  }
+
   cuda_device_assert(cuda_device_, cuStreamSynchronize(cuda_stream_));
 
   OptixPipeline pipeline = nullptr;
   OptixShaderBindingTable sbt_params = {};
 
   switch (kernel) {
+    case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
+      pipeline = optix_device->pipelines[PIP_SHADE_RAYTRACE];
+      sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_SHADE_SURFACE_RAYTRACE * sizeof(SbtRecord);
+      break;
     case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
       pipeline = optix_device->pipelines[PIP_INTERSECT];
       sbt_params.raygenRecord = sbt_data_ptr + PG_RGEN_INTERSECT_CLOSEST * sizeof(SbtRecord);
@@ -132,18 +146,14 @@ bool OptiXDeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *a
       return false;
   }
 
-  sbt_params.missRecordBase = sbt_data_ptr + PG_MISS * sizeof(SbtRecord);
+  sbt_params.missReco

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list