[Bf-blender-cvs] [c10546f5e9f] master: Cycles: Add support for shader raytracing in OptiX

Patrick Mours noreply at git.blender.org
Fri Dec 4 13:04:39 CET 2020


Commit: c10546f5e9fe2a300b6a21e1e16b22c93060d0e9
Author: Patrick Mours
Date:   Thu Dec 3 12:19:36 2020 +0100
Branches: master
https://developer.blender.org/rBc10546f5e9fe2a300b6a21e1e16b22c93060d0e9

Cycles: Add support for shader raytracing in OptiX

Support for the AO and bevel shader nodes requires calling "optixTrace" from within the shading
VM, which is only allowed from inlined functions to the raygen program or callables. This patch
therefore converts the shading VM to use direct callables to make it work. To prevent performance
regressions a separate kernel module is compiled and used for this purpose.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D9733

===================================================================

M	intern/cycles/device/device_optix.cpp
M	intern/cycles/kernel/CMakeLists.txt
M	intern/cycles/kernel/kernel_subsurface.h
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/kernel/kernel_volume.h
M	intern/cycles/kernel/svm/svm.h

===================================================================

diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index 95234845f98..682540a51fd 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -141,7 +141,8 @@ class OptiXDevice : public CUDADevice {
     PG_BAKE,  // kernel_bake_evaluate
     PG_DISP,  // kernel_displace_evaluate
     PG_BACK,  // kernel_background_evaluate
-    NUM_PROGRAM_GROUPS
+    PG_CALL,
+    NUM_PROGRAM_GROUPS = PG_CALL + 3
   };
 
   // List of OptiX pipelines
@@ -334,11 +335,6 @@ class OptiXDevice : public CUDADevice {
       set_error("OptiX backend does not support baking yet");
       return false;
     }
-    // Disable shader raytracing support for now, since continuation callables are slow
-    if (requested_features.use_shader_raytrace) {
-      set_error("OptiX backend does not support 'Ambient Occlusion' and 'Bevel' shader nodes yet");
-      return false;
-    }
 
     const CUDAContextScope scope(cuContext);
 
@@ -410,7 +406,9 @@ class OptiXDevice : public CUDADevice {
     }
 
     {  // Load and compile PTX module with OptiX kernels
-      string ptx_data, ptx_filename = path_get("lib/kernel_optix.ptx");
+      string ptx_data, ptx_filename = path_get(requested_features.use_shader_raytrace ?
+                                                   "lib/kernel_optix_shader_raytrace.ptx" :
+                                                   "lib/kernel_optix.ptx");
       if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) {
         if (!getenv("OPTIX_ROOT_DIR")) {
           set_error(
@@ -525,6 +523,21 @@ class OptiXDevice : public CUDADevice {
       group_descs[PG_BACK].raygen.entryFunctionName = "__raygen__kernel_optix_background";
     }
 
+    // Shader raytracing replaces some functions with direct callables
+    if (requested_features.use_shader_raytrace) {
+      group_descs[PG_CALL + 0].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
+      group_descs[PG_CALL + 0].callables.moduleDC = optix_module;
+      group_descs[PG_CALL + 0].callables.entryFunctionNameDC = "__direct_callable__svm_eval_nodes";
+      group_descs[PG_CALL + 1].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
+      group_descs[PG_CALL + 1].callables.moduleDC = optix_module;
+      group_descs[PG_CALL + 1].callables.entryFunctionNameDC =
+          "__direct_callable__kernel_volume_shadow";
+      group_descs[PG_CALL + 2].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
+      group_descs[PG_CALL + 2].callables.moduleDC = optix_module;
+      group_descs[PG_CALL + 2].callables.entryFunctionNameDC =
+          "__direct_callable__subsurface_scatter_multi_setup";
+    }
+
     check_result_optix_ret(optixProgramGroupCreate(
         context, group_descs, NUM_PROGRAM_GROUPS, &group_options, nullptr, 0, groups));
 
@@ -564,33 +577,51 @@ class OptiXDevice : public CUDADevice {
 #  endif
 
     {  // Create path tracing pipeline
-      OptixProgramGroup pipeline_groups[] = {
-        groups[PG_RGEN],
-        groups[PG_MISS],
-        groups[PG_HITD],
-        groups[PG_HITS],
-        groups[PG_HITL],
+      vector<OptixProgramGroup> pipeline_groups;
+      pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
+      pipeline_groups.push_back(groups[PG_RGEN]);
+      pipeline_groups.push_back(groups[PG_MISS]);
+      pipeline_groups.push_back(groups[PG_HITD]);
+      pipeline_groups.push_back(groups[PG_HITS]);
+      pipeline_groups.push_back(groups[PG_HITL]);
 #  if OPTIX_ABI_VERSION >= 36
-        groups[PG_HITD_MOTION],
-        groups[PG_HITS_MOTION],
+      if (motion_blur) {
+        pipeline_groups.push_back(groups[PG_HITD_MOTION]);
+        pipeline_groups.push_back(groups[PG_HITS_MOTION]);
+      }
 #  endif
-      };
-      check_result_optix_ret(
-          optixPipelineCreate(context,
-                              &pipeline_options,
-                              &link_options,
-                              pipeline_groups,
-                              (sizeof(pipeline_groups) / sizeof(pipeline_groups[0])),
-                              nullptr,
-                              0,
-                              &pipelines[PIP_PATH_TRACE]));
+      if (requested_features.use_shader_raytrace) {
+        pipeline_groups.push_back(groups[PG_CALL + 0]);
+        pipeline_groups.push_back(groups[PG_CALL + 1]);
+        pipeline_groups.push_back(groups[PG_CALL + 2]);
+      }
+
+      check_result_optix_ret(optixPipelineCreate(context,
+                                                 &pipeline_options,
+                                                 &link_options,
+                                                 pipeline_groups.data(),
+                                                 pipeline_groups.size(),
+                                                 nullptr,
+                                                 0,
+                                                 &pipelines[PIP_PATH_TRACE]));
 
       // Combine ray generation and trace continuation stack size
       const unsigned int css = stack_size[PG_RGEN].cssRG + link_options.maxTraceDepth * trace_css;
+      // Max direct callable depth is one of the following, so combine accordingly
+      // - __raygen__ -> svm_eval_nodes
+      // - __raygen__ -> kernel_volume_shadow -> svm_eval_nodes
+      // - __raygen__ -> subsurface_scatter_multi_setup -> svm_eval_nodes
+      const unsigned int dss = stack_size[PG_CALL + 0].dssDC +
+                               std::max(stack_size[PG_CALL + 1].dssDC,
+                                        stack_size[PG_CALL + 2].dssDC);
 
       // Set stack size depending on pipeline options
       check_result_optix_ret(
-          optixPipelineSetStackSize(pipelines[PIP_PATH_TRACE], 0, 0, css, (motion_blur ? 3 : 2)));
+          optixPipelineSetStackSize(pipelines[PIP_PATH_TRACE],
+                                    0,
+                                    requested_features.use_shader_raytrace ? dss : 0,
+                                    css,
+                                    motion_blur ? 3 : 2));
     }
 
     // Only need to create shader evaluation pipeline if one of these features is used:
@@ -599,37 +630,51 @@ class OptiXDevice : public CUDADevice {
                                           requested_features.use_true_displacement;
 
     if (use_shader_eval_pipeline) {  // Create shader evaluation pipeline
-      OptixProgramGroup pipeline_groups[] = {
-        groups[PG_BAKE],
-        groups[PG_DISP],
-        groups[PG_BACK],
-        groups[PG_MISS],
-        groups[PG_HITD],
-        groups[PG_HITS],
-        groups[PG_HITL],
+      vector<OptixProgramGroup> pipeline_groups;
+      pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
+      pipeline_groups.push_back(groups[PG_BAKE]);
+      pipeline_groups.push_back(groups[PG_DISP]);
+      pipeline_groups.push_back(groups[PG_BACK]);
+      pipeline_groups.push_back(groups[PG_MISS]);
+      pipeline_groups.push_back(groups[PG_HITD]);
+      pipeline_groups.push_back(groups[PG_HITS]);
+      pipeline_groups.push_back(groups[PG_HITL]);
 #  if OPTIX_ABI_VERSION >= 36
-        groups[PG_HITD_MOTION],
-        groups[PG_HITS_MOTION],
+      if (motion_blur) {
+        pipeline_groups.push_back(groups[PG_HITD_MOTION]);
+        pipeline_groups.push_back(groups[PG_HITS_MOTION]);
+      }
 #  endif
-      };
-      check_result_optix_ret(
-          optixPipelineCreate(context,
-                              &pipeline_options,
-                              &link_options,
-                              pipeline_groups,
-                              (sizeof(pipeline_groups) / sizeof(pipeline_groups[0])),
-                              nullptr,
-                              0,
-                              &pipelines[PIP_SHADER_EVAL]));
+      if (requested_features.use_shader_raytrace) {
+        pipeline_groups.push_back(groups[PG_CALL + 0]);
+        pipeline_groups.push_back(groups[PG_CALL + 1]);
+        pipeline_groups.push_back(groups[PG_CALL + 2]);
+      }
+
+      check_result_optix_ret(optixPipelineCreate(context,
+                                                 &pipeline_options,
+                                                 &link_options,
+                                                 pipeline_groups.data(),
+                                                 pipeline_groups.size(),
+                                                 nullptr,
+                                                 0,
+                                                 &pipelines[PIP_SHADER_EVAL]));
 
       // Calculate continuation stack size based on the maximum of all ray generation stack sizes
       const unsigned int css = std::max(stack_size[PG_BAKE].cssRG,
                                         std::max(stack_size[PG_DISP].cssRG,
                                                  stack_size[PG_BACK].cssRG)) +
                                link_options.maxTraceDepth * trace_css;
+      const unsigned int dss = stack_size[PG_CALL + 0].dssDC +
+                               std::max(stack_size[PG_CALL + 1].dssDC,
+                                        stack_size[PG_CALL + 2].dssDC);
 
-      check_result_optix_ret(optixPipelineSetStackSize(
-          pipelines[PIP_SHADER_EVAL], 0, 0, css, (pipeline_options.usesMotionBlur ? 3 : 2)));
+      check_result_optix_ret(
+          optixPipelineSetStackSize(pipelines[PIP_SHADER_EVAL],
+                                    0,
+                                    requested_features.use_shader_raytrace ? dss : 0,
+                                    css,
+                                    motion_blur ? 3 : 2));
     }
 
     // Clean up program group objects
@@ -734,6 +779,9 @@ class OptiXDevice : public CUDADevice {
 #  else
       sbt_params.hitgroupRecordCount = 3;  // PG_HITD, PG_HITS, PG_HITL
 #  endif
+      sbt_params.callablesRecordBase = sbt_data.device_pointer + PG_CALL * sizeof(SbtRecord);
+      sbt_params.callablesRecordCount = 3;
+      sbt_params.callablesRecordStrideInBytes = sizeof(SbtRecord);
 
       // Launch the ray generation program
       check_result_optix(optixLaunch(pipelines[PIP_PATH_TRACE],
@@ -1061,6 +1109,9 @@ class OptiXDevice : public CUDADevice {
 #  else
       sbt_params.hitgrou

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list