[Bf-blender-cvs] [2d994de77c3] master: Cycles: MetalRT optimisation for subsurface intersection queries

Michael Jones noreply at git.blender.org
Mon Feb 6 20:12:31 CET 2023


Commit: 2d994de77c35a6e8a8a9c78935a3f8ed7d147f7d
Author: Michael Jones
Date:   Mon Feb 6 19:09:51 2023 +0000
Branches: master
https://developer.blender.org/rB2d994de77c35a6e8a8a9c78935a3f8ed7d147f7d

Cycles: MetalRT optimisation for subsurface intersection queries

This patch optimises subsurface intersection queries on MetalRT. Currently intersect_local traverses from the scene root, retrospectively discarding all non-local hits. Using a lookup of bottom level acceleration structures, we can explicitly query only the relevant instance. On M1 Max, with MetalRT selected, this can give a render speedup of 15-20% for scenes like Monster which make heavy use of subsurface scattering.

Patch authored by Marco Giordano.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D17153

===================================================================

M	intern/cycles/device/metal/bvh.h
M	intern/cycles/device/metal/bvh.mm
M	intern/cycles/device/metal/device_impl.h
M	intern/cycles/device/metal/device_impl.mm
M	intern/cycles/device/metal/kernel.h
M	intern/cycles/device/metal/kernel.mm
M	intern/cycles/device/metal/queue.mm
M	intern/cycles/kernel/device/metal/bvh.h
M	intern/cycles/kernel/device/metal/compat.h
M	intern/cycles/kernel/device/metal/kernel.metal

===================================================================

diff --git a/intern/cycles/device/metal/bvh.h b/intern/cycles/device/metal/bvh.h
index 519cbf00294..5448a3ae41d 100644
--- a/intern/cycles/device/metal/bvh.h
+++ b/intern/cycles/device/metal/bvh.h
@@ -21,6 +21,7 @@ class BVHMetal : public BVH {
 
   API_AVAILABLE(macos(11.0))
   vector<id<MTLAccelerationStructure>> blas_array;
+  vector<uint32_t> blas_lookup;
 
   bool motion_blur = false;
 
diff --git a/intern/cycles/device/metal/bvh.mm b/intern/cycles/device/metal/bvh.mm
index a7fd64d3c98..c692b762d86 100644
--- a/intern/cycles/device/metal/bvh.mm
+++ b/intern/cycles/device/metal/bvh.mm
@@ -816,6 +816,11 @@ bool BVHMetal::build_TLAS(Progress &progress,
 
     uint32_t instance_index = 0;
     uint32_t motion_transform_index = 0;
+
+    // allocate look up buffer for wost case scenario
+    uint64_t count = objects.size();
+    blas_lookup.resize(count);
+
     for (Object *ob : objects) {
       /* Skip non-traceable objects */
       if (!ob->is_traceable())
@@ -843,12 +848,15 @@ bool BVHMetal::build_TLAS(Progress &progress,
       /* Set user instance ID to object index */
       int object_index = ob->get_device_index();
       uint32_t user_id = uint32_t(object_index);
+      int currIndex = instance_index++;
+      assert(user_id < blas_lookup.size());
+      blas_lookup[user_id] = accel_struct_index;
 
       /* Bake into the appropriate descriptor */
       if (motion_blur) {
         MTLAccelerationStructureMotionInstanceDescriptor *instances =
             (MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
-        MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[instance_index++];
+        MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[currIndex];
 
         desc.accelerationStructureIndex = accel_struct_index;
         desc.userID = user_id;
@@ -894,7 +902,7 @@ bool BVHMetal::build_TLAS(Progress &progress,
       else {
         MTLAccelerationStructureUserIDInstanceDescriptor *instances =
             (MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
-        MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[instance_index++];
+        MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[currIndex];
 
         desc.accelerationStructureIndex = accel_struct_index;
         desc.userID = user_id;
diff --git a/intern/cycles/device/metal/device_impl.h b/intern/cycles/device/metal/device_impl.h
index a10962b4e45..2b89ebf19c9 100644
--- a/intern/cycles/device/metal/device_impl.h
+++ b/intern/cycles/device/metal/device_impl.h
@@ -74,6 +74,11 @@ class MetalDevice : public Device {
   id<MTLBuffer> texture_bindings_3d = nil;
   std::vector<id<MTLTexture>> texture_slot_map;
 
+  /* BLAS encoding & lookup */
+  id<MTLArgumentEncoder> mtlBlasArgEncoder = nil;
+  id<MTLBuffer> blas_buffer = nil;
+  id<MTLBuffer> blas_lookup_buffer = nil;
+
   bool use_metalrt = false;
   MetalPipelineType kernel_specialization_level = PSO_GENERIC;
 
diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm
index 35298822e41..aadf5e02934 100644
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@@ -192,6 +192,10 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
         arg_desc_as.dataType = MTLDataTypeInstanceAccelerationStructure;
         arg_desc_as.access = MTLArgumentAccessReadOnly;
 
+        MTLArgumentDescriptor *arg_desc_ptrs = [[MTLArgumentDescriptor alloc] init];
+        arg_desc_ptrs.dataType = MTLDataTypePointer;
+        arg_desc_ptrs.access = MTLArgumentAccessReadOnly;
+
         MTLArgumentDescriptor *arg_desc_ift = [[MTLArgumentDescriptor alloc] init];
         arg_desc_ift.dataType = MTLDataTypeIntersectionFunctionTable;
         arg_desc_ift.access = MTLArgumentAccessReadOnly;
@@ -204,14 +208,28 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
         [ancillary_desc addObject:[arg_desc_ift copy]]; /* ift_shadow */
         arg_desc_ift.index = index++;
         [ancillary_desc addObject:[arg_desc_ift copy]]; /* ift_local */
+        arg_desc_ift.index = index++;
+        [ancillary_desc addObject:[arg_desc_ift copy]]; /* ift_local_prim */
+        arg_desc_ptrs.index = index++;
+        [ancillary_desc addObject:[arg_desc_ptrs copy]]; /* blas array */
+        arg_desc_ptrs.index = index++;
+        [ancillary_desc addObject:[arg_desc_ptrs copy]]; /* look up table for blas */
 
         [arg_desc_ift release];
         [arg_desc_as release];
+        [arg_desc_ptrs release];
       }
     }
 
     mtlAncillaryArgEncoder = [mtlDevice newArgumentEncoderWithArguments:ancillary_desc];
 
+    // preparing the blas arg encoder
+    MTLArgumentDescriptor *arg_desc_blas = [[MTLArgumentDescriptor alloc] init];
+    arg_desc_blas.dataType = MTLDataTypeInstanceAccelerationStructure;
+    arg_desc_blas.access = MTLArgumentAccessReadOnly;
+    mtlBlasArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_blas ]];
+    [arg_desc_blas release];
+
     for (int i = 0; i < ancillary_desc.count; i++) {
       [ancillary_desc[i] release];
     }
@@ -1240,6 +1258,33 @@ void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
     if (@available(macos 11.0, *)) {
       if (bvh->params.top_level) {
         bvhMetalRT = bvh_metal;
+
+        // allocate required buffers for BLAS array
+        uint64_t count = bvhMetalRT->blas_array.size();
+        uint64_t bufferSize = mtlBlasArgEncoder.encodedLength * count;
+        blas_buffer = [mtlDevice newBufferWithLength:bufferSize options:default_storage_mode];
+        stats.mem_alloc(blas_buffer.allocatedSize);
+
+        for (uint64_t i = 0; i < count; ++i) {
+          [mtlBlasArgEncoder setArgumentBuffer:blas_buffer
+                                        offset:i * mtlBlasArgEncoder.encodedLength];
+          [mtlBlasArgEncoder setAccelerationStructure:bvhMetalRT->blas_array[i] atIndex:0];
+        }
+
+        count = bvhMetalRT->blas_lookup.size();
+        bufferSize = sizeof(uint32_t) * count;
+        blas_lookup_buffer = [mtlDevice newBufferWithLength:bufferSize
+                                                    options:default_storage_mode];
+        stats.mem_alloc(blas_lookup_buffer.allocatedSize);
+
+        memcpy([blas_lookup_buffer contents],
+               bvhMetalRT -> blas_lookup.data(),
+               blas_lookup_buffer.allocatedSize);
+
+        if (default_storage_mode == MTLResourceStorageModeManaged) {
+          [blas_buffer didModifyRange:NSMakeRange(0, blas_buffer.length)];
+          [blas_lookup_buffer didModifyRange:NSMakeRange(0, blas_lookup_buffer.length)];
+        }
       }
     }
   }
diff --git a/intern/cycles/device/metal/kernel.h b/intern/cycles/device/metal/kernel.h
index 212671f52a0..0225c5c4947 100644
--- a/intern/cycles/device/metal/kernel.h
+++ b/intern/cycles/device/metal/kernel.h
@@ -19,6 +19,8 @@ enum {
   METALRT_FUNC_SHADOW_BOX,
   METALRT_FUNC_LOCAL_TRI,
   METALRT_FUNC_LOCAL_BOX,
+  METALRT_FUNC_LOCAL_TRI_PRIM,
+  METALRT_FUNC_LOCAL_BOX_PRIM,
   METALRT_FUNC_CURVE_RIBBON,
   METALRT_FUNC_CURVE_RIBBON_SHADOW,
   METALRT_FUNC_CURVE_ALL,
@@ -28,7 +30,13 @@ enum {
   METALRT_FUNC_NUM
 };
 
-enum { METALRT_TABLE_DEFAULT, METALRT_TABLE_SHADOW, METALRT_TABLE_LOCAL, METALRT_TABLE_NUM };
+enum {
+  METALRT_TABLE_DEFAULT,
+  METALRT_TABLE_SHADOW,
+  METALRT_TABLE_LOCAL,
+  METALRT_TABLE_LOCAL_PRIM,
+  METALRT_TABLE_NUM
+};
 
 /* Pipeline State Object types */
 enum MetalPipelineType {
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index 2ed230ee657..d9e977f1ed6 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -524,6 +524,8 @@ void MetalKernelPipeline::compile()
           "__anyhit__cycles_metalrt_shadow_all_hit_box",
           "__anyhit__cycles_metalrt_local_hit_tri",
           "__anyhit__cycles_metalrt_local_hit_box",
+          "__anyhit__cycles_metalrt_local_hit_tri_prim",
+          "__anyhit__cycles_metalrt_local_hit_box_prim",
           "__intersection__curve_ribbon",
           "__intersection__curve_ribbon_shadow",
           "__intersection__curve_all",
@@ -614,11 +616,17 @@ void MetalKernelPipeline::compile()
                          rt_intersection_function[METALRT_FUNC_LOCAL_BOX],
                          rt_intersection_function[METALRT_FUNC_LOCAL_BOX],
                          nil];
+    table_functions[METALRT_TABLE_LOCAL_PRIM] = [NSArray
+        arrayWithObjects:rt_intersection_function[METALRT_FUNC_LOCAL_TRI_PRIM],
+                         rt_intersection_function[METALRT_FUNC_LOCAL_BOX_PRIM],
+                         rt_intersection_function[METALRT_FUNC_LOCAL_BOX_PRIM],
+                         nil];
 
     NSMutableSet *unique_functions = [NSMutableSet
         setWithArray:table_functions[METALRT_TABLE_DEFAULT]];
     [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_SHADOW]];
     [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL]];
+    [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL_PRIM]];
 
     if (kernel_has_intersection(device_kernel)) {
       linked_functions = [[NSArray arrayWithArray:[unique_functions allObjects]]
diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm
index 9137e9b1fb0..b824b75ccf4 100644
--- a/intern/cycles/device/metal/queue.mm
+++ b/intern/cycles/device/metal/queue.mm
@@ -482,6 +482,12 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
       if (metal_device_->bvhMetalRT) {
         id<MTLAccelerationStructure> accel_struct = metal_device_->bvhMetalRT->accel_struct;
         [metal_device_->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:2];
+        [metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_buffer
+                                                  offset:0
+                                                 atIndex:7];
+        [metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_lookup_buffer
+                                                  offset:0
+  

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list