[Bf-blender-cvs] [091bb14364d] cycles_procedural_api: Cycles: add support for BVH refit in OptiX

Thu Nov 5 18:57:50 CET 2020

Commit: 091bb14364d97a4450ff6275ca8defd4c01186a4
Author: Kévin Dietrich
Date:   Thu Oct 15 17:34:58 2020 +0200
Branches: cycles_procedural_api
https://developer.blender.org/rB091bb14364d97a4450ff6275ca8defd4c01186a4

Cycles: add support for BVH refit in OptiX

In order to achieve that we cache the OptiX datat handles in the Geometry structure, and we do not free the memory from the original BVH build. When building the BVH for a refit, we simply tell OptiX that the build is supposed to be an update. For this, we should also tell OptiX that the initial build should allow for updates, however this is only supported in viewport renders for now.

I have not done any profile to check how fast things are here, however it seems that the bottleneck is still data transfer.

Differential Revision: https://developer.blender.org/D9353

===================================================================

M	intern/cycles/bvh/bvh_optix.cpp
M	intern/cycles/device/device_optix.cpp
M	intern/cycles/render/geometry.cpp
M	intern/cycles/render/geometry.h

===================================================================

diff --git a/intern/cycles/bvh/bvh_optix.cpp b/intern/cycles/bvh/bvh_optix.cpp
index 0527c0eeda8..51611a4aaf6 100644
--- a/intern/cycles/bvh/bvh_optix.cpp
+++ b/intern/cycles/bvh/bvh_optix.cpp
@@ -216,8 +216,7 @@ void BVHOptiX::pack_nodes(const BVHNode *)
 
 void BVHOptiX::refit_nodes()
 {
-  // TODO(pmours): Implement?
-  VLOG(1) << "Refit is not yet implemented for OptiX BVH.";
+  geometry[0]->do_optix_refit = true;
 }
 
 BVHNode *BVHOptiX::widen_children_nodes(const BVHNode *)
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index 0d9c8dc7ce4..a14aef35063 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -1078,23 +1078,23 @@ class OptiXDevice : public CUDADevice {
 
   bool build_optix_bvh(const OptixBuildInput &build_input,
                        uint16_t num_motion_steps,
-                       OptixTraversableHandle &out_handle)
+                       OptixTraversableHandle &out_handle,
+                       CUdeviceptr &out_data_ptr,
+                       OptixBuildOperation operation)
   {
-    out_handle = 0;
-
     const CUDAContextScope scope(cuContext);
 
     // Compute memory usage
     OptixAccelBufferSizes sizes = {};
     OptixAccelBuildOptions options;
-    options.operation = OPTIX_BUILD_OPERATION_BUILD;
+    options.operation = operation;
     if (background) {
       // Prefer best performance and lowest memory consumption in background
       options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | OPTIX_BUILD_FLAG_ALLOW_COMPACTION;
     }
     else {
       // Prefer fast updates in viewport
-      options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_BUILD;
+      options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_BUILD | OPTIX_BUILD_FLAG_ALLOW_UPDATE;
     }
 
     options.motionOptions.numKeys = num_motion_steps;
@@ -1120,7 +1120,14 @@ class OptiXDevice : public CUDADevice {
     }
 
     CUdeviceptr out_data = 0;
-    check_result_cuda_ret(cuMemAlloc(&out_data, sizes.outputSizeInBytes));
+    if (operation == OPTIX_BUILD_OPERATION_BUILD) {
+      check_result_cuda_ret(cuMemAlloc(&out_data, sizes.outputSizeInBytes));
+      out_data_ptr = out_data;
+    }
+    else {
+      out_data = out_data_ptr;
+    }
+
     as_mem.push_back(out_data);
 
     // Finally build the acceleration structure
@@ -1187,10 +1194,21 @@ class OptiXDevice : public CUDADevice {
     unordered_map<Geometry *, OptixTraversableHandle> geometry;
     geometry.reserve(bvh->geometry.size());
 
-    // Free all previous acceleration structures
+    // Free all previous acceleration structures which can not be refit
+    std::set<CUdeviceptr> refit_mem;
+
+    for (Geometry *geom : bvh->geometry) {
+      if (geom->do_optix_refit) {
+        refit_mem.insert(geom->optix_data_handle);
+      }
+    }
+
     for (CUdeviceptr mem : as_mem) {
-      cuMemFree(mem);
+      if (refit_mem.find(mem) == refit_mem.end()) {
+        cuMemFree(mem);
+      }
     }
+
     as_mem.clear();
 
     // Build bottom level acceleration structures (BLAS)
@@ -1201,6 +1219,21 @@ class OptiXDevice : public CUDADevice {
       if (geometry.find(geom) != geometry.end())
         continue;
 
+      OptixTraversableHandle handle;
+      OptixBuildOperation operation;
+      CUdeviceptr out_data;
+      // Refit is only possible in viewport for now.
+      if (ob->geometry->do_optix_refit && !background) {
+        out_data = geom->optix_data_handle;
+        handle = geom->optix_handle;
+        operation = OPTIX_BUILD_OPERATION_UPDATE;
+      }
+      else {
+        out_data = 0;
+        handle = 0;
+        operation = OPTIX_BUILD_OPERATION_BUILD;
+      }
+
       if (geom->type == Geometry::HAIR) {
         // Build BLAS for curve primitives
         Hair *const hair = static_cast<Hair *const>(ob->geometry);
@@ -1364,9 +1397,11 @@ class OptiXDevice : public CUDADevice {
         }
 
         // Allocate memory for new BLAS and build it
-        OptixTraversableHandle handle;
-        if (build_optix_bvh(build_input, num_motion_steps, handle)) {
+        if (build_optix_bvh(build_input, num_motion_steps, handle, out_data, operation)) {
           geometry.insert({ob->geometry, handle});
+          geom->optix_data_handle = out_data;
+          geom->optix_handle = handle;
+          geom->do_optix_refit = false;
         }
         else {
           return false;
@@ -1436,9 +1471,11 @@ class OptiXDevice : public CUDADevice {
         build_input.triangleArray.primitiveIndexOffset = mesh->optix_prim_offset;
 
         // Allocate memory for new BLAS and build it
-        OptixTraversableHandle handle;
-        if (build_optix_bvh(build_input, num_motion_steps, handle)) {
+        if (build_optix_bvh(build_input, num_motion_steps, handle, out_data, operation)) {
           geometry.insert({ob->geometry, handle});
+          geom->optix_data_handle = out_data;
+          geom->optix_handle = handle;
+          geom->do_optix_refit = false;
         }
         else {
           return false;
@@ -1612,7 +1649,9 @@ class OptiXDevice : public CUDADevice {
     build_input.instanceArray.instances = instances.device_pointer;
     build_input.instanceArray.numInstances = num_instances;
 
-    return build_optix_bvh(build_input, 0, tlas_handle);
+    CUdeviceptr out_data = 0;
+    tlas_handle = 0;
+    return build_optix_bvh(build_input, 0, tlas_handle, out_data, OPTIX_BUILD_OPERATION_BUILD);
   }
 
   void const_copy_to(const char *name, void *host, size_t size) override
diff --git a/intern/cycles/render/geometry.cpp b/intern/cycles/render/geometry.cpp
index 9457d5e0205..65a869875bc 100644
--- a/intern/cycles/render/geometry.cpp
+++ b/intern/cycles/render/geometry.cpp
@@ -74,6 +74,9 @@ Geometry::Geometry(const NodeType *node_type, const Type type)
   attr_map_offset = 0;
   optix_prim_offset = 0;
   prim_offset = 0;
+  do_optix_refit = false;
+  optix_handle = 0;
+  optix_data_handle = 0;
 }
 
 Geometry::~Geometry()
diff --git a/intern/cycles/render/geometry.h b/intern/cycles/render/geometry.h
index 9367c4a9de7..85df927f7f4 100644
--- a/intern/cycles/render/geometry.h
+++ b/intern/cycles/render/geometry.h
@@ -90,6 +90,9 @@ class Geometry : public Node {
   /* Update Flags */
   bool need_update;
   bool need_update_rebuild;
+  bool do_optix_refit;
+  unsigned long long optix_handle;
+  unsigned long long optix_data_handle;
 
   /* Index into scene->geometry (only valid during update) */
   size_t index;