[Bf-blender-cvs] [fd77a28031d] master: Cycles: bake transparent shadows for hair

Brecht Van Lommel noreply at git.blender.org
Tue Oct 19 15:30:41 CEST 2021


Commit: fd77a28031daff3122ded3a1cb37a7fb44feedf6
Author: Brecht Van Lommel
Date:   Mon Sep 20 16:16:11 2021 +0200
Branches: master
https://developer.blender.org/rBfd77a28031daff3122ded3a1cb37a7fb44feedf6

Cycles: bake transparent shadows for hair

These transparent shadows can be expansive to evaluate. Especially on the
GPU they can lead to poor occupancy when only some pixels require many kernel
launches to trace and evaluate many layers of transparency.

Baked transparency allows tracing a single ray in many cases by accumulating
the throughput directly in the intersection program without recording hits
or evaluating shaders. Transparency is baked at curve vertices and
interpolated, for most shaders this will look practically the same as actual
shader evaluation.

Fixes T91428, performance regression with spring demo file due to transparent
hair, and makes it render significantly faster than Blender 2.93.

Differential Revision: https://developer.blender.org/D12880

===================================================================

M	intern/cycles/bvh/bvh_embree.cpp
M	intern/cycles/device/cpu/kernel.cpp
M	intern/cycles/device/cpu/kernel.h
M	intern/cycles/device/device_kernel.cpp
M	intern/cycles/integrator/shader_eval.cpp
M	intern/cycles/integrator/shader_eval.h
M	intern/cycles/kernel/bvh/bvh.h
M	intern/cycles/kernel/bvh/bvh_embree.h
M	intern/cycles/kernel/bvh/bvh_shadow_all.h
M	intern/cycles/kernel/bvh/bvh_util.h
M	intern/cycles/kernel/device/cpu/kernel_arch.h
M	intern/cycles/kernel/device/cpu/kernel_arch_impl.h
M	intern/cycles/kernel/device/gpu/kernel.h
M	intern/cycles/kernel/device/optix/kernel.cu
M	intern/cycles/kernel/geom/geom_shader_data.h
M	intern/cycles/kernel/integrator/integrator_intersect_shadow.h
M	intern/cycles/kernel/integrator/integrator_shade_shadow.h
M	intern/cycles/kernel/integrator/integrator_state.h
M	intern/cycles/kernel/kernel_bake.h
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/render/attribute.cpp
M	intern/cycles/render/geometry.cpp
M	intern/cycles/render/hair.cpp
M	intern/cycles/render/hair.h

===================================================================

diff --git a/intern/cycles/bvh/bvh_embree.cpp b/intern/cycles/bvh/bvh_embree.cpp
index 343d62dedf4..cd19e009bf3 100644
--- a/intern/cycles/bvh/bvh_embree.cpp
+++ b/intern/cycles/bvh/bvh_embree.cpp
@@ -80,31 +80,49 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
       Intersection current_isect;
       kernel_embree_convert_hit(kg, ray, hit, &current_isect);
 
-      /* If no transparent shadows, all light is blocked. */
+      /* If no transparent shadows or max number of hits exceeded, all light is blocked. */
       const int flags = intersection_get_shader_flags(kg, current_isect.prim, current_isect.type);
-      if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->max_hits == 0) {
+      if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->num_hits >= ctx->max_hits) {
         ctx->opaque_hit = true;
         return;
       }
 
+      ++ctx->num_hits;
+
+      /* Always use baked shadow transparency for curves. */
+      if (current_isect.type & PRIMITIVE_ALL_CURVE) {
+        ctx->throughput *= intersection_curve_shadow_transparency(
+            kg, current_isect.object, current_isect.prim, current_isect.u);
+
+        if (ctx->throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
+          ctx->opaque_hit = true;
+          return;
+        }
+        else {
+          *args->valid = 0;
+          return;
+        }
+      }
+
       /* Test if we need to record this transparent intersection. */
-      if (ctx->num_hits < ctx->max_hits || ray->tfar < ctx->max_t) {
+      const uint max_record_hits = min(ctx->max_hits, INTEGRATOR_SHADOW_ISECT_SIZE);
+      if (ctx->num_recorded_hits < max_record_hits || ray->tfar < ctx->max_t) {
         /* If maximum number of hits was reached, replace the intersection with the
          * highest distance. We want to find the N closest intersections. */
-        const int num_recorded_hits = min(ctx->num_hits, ctx->max_hits);
-        int isect_index = num_recorded_hits;
-        if (num_recorded_hits + 1 >= ctx->max_hits) {
+        const uint num_recorded_hits = min(ctx->num_recorded_hits, max_record_hits);
+        uint isect_index = num_recorded_hits;
+        if (num_recorded_hits + 1 >= max_record_hits) {
           float max_t = ctx->isect_s[0].t;
-          int max_recorded_hit = 0;
+          uint max_recorded_hit = 0;
 
-          for (int i = 1; i < num_recorded_hits; ++i) {
+          for (uint i = 1; i < num_recorded_hits; ++i) {
             if (ctx->isect_s[i].t > max_t) {
               max_recorded_hit = i;
               max_t = ctx->isect_s[i].t;
             }
           }
 
-          if (num_recorded_hits >= ctx->max_hits) {
+          if (num_recorded_hits >= max_record_hits) {
             isect_index = max_recorded_hit;
           }
 
@@ -118,10 +136,9 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
         ctx->isect_s[isect_index] = current_isect;
       }
 
-      /* Always increase the number of hits, even beyond ray.max_hits so that
-       * the caller can detect this as and consider it opaque, or trace another
-       * ray. */
-      ++ctx->num_hits;
+      /* Always increase the number of recorded hits, even beyond the maximum,
+       * so that we can detect this and trace another ray if needed. */
+      ++ctx->num_recorded_hits;
 
       /* This tells Embree to continue tracing. */
       *args->valid = 0;
@@ -160,7 +177,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
 
       if (ctx->lcg_state) {
         /* See triangle_intersect_subsurface() for the native equivalent. */
-        for (int i = min(ctx->max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
+        for (int i = min((int)ctx->max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
           if (local_isect->hits[i].t == ray->tfar) {
             /* This tells Embree to continue tracing. */
             *args->valid = 0;
diff --git a/intern/cycles/device/cpu/kernel.cpp b/intern/cycles/device/cpu/kernel.cpp
index 91282390e27..bbad2f3147d 100644
--- a/intern/cycles/device/cpu/kernel.cpp
+++ b/intern/cycles/device/cpu/kernel.cpp
@@ -44,6 +44,7 @@ CPUKernels::CPUKernels()
       /* Shader evaluation. */
       REGISTER_KERNEL(shader_eval_displace),
       REGISTER_KERNEL(shader_eval_background),
+      REGISTER_KERNEL(shader_eval_curve_shadow_transparency),
       /* Adaptive sampling. */
       REGISTER_KERNEL(adaptive_sampling_convergence_check),
       REGISTER_KERNEL(adaptive_sampling_filter_x),
diff --git a/intern/cycles/device/cpu/kernel.h b/intern/cycles/device/cpu/kernel.h
index 2db09057e44..3787fe37a33 100644
--- a/intern/cycles/device/cpu/kernel.h
+++ b/intern/cycles/device/cpu/kernel.h
@@ -58,6 +58,7 @@ class CPUKernels {
 
   ShaderEvalFunction shader_eval_displace;
   ShaderEvalFunction shader_eval_background;
+  ShaderEvalFunction shader_eval_curve_shadow_transparency;
 
   /* Adaptive stopping. */
 
diff --git a/intern/cycles/device/device_kernel.cpp b/intern/cycles/device/device_kernel.cpp
index ceaddee4756..e0833331b77 100644
--- a/intern/cycles/device/device_kernel.cpp
+++ b/intern/cycles/device/device_kernel.cpp
@@ -74,6 +74,8 @@ const char *device_kernel_as_string(DeviceKernel kernel)
       return "shader_eval_displace";
     case DEVICE_KERNEL_SHADER_EVAL_BACKGROUND:
       return "shader_eval_background";
+    case DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY:
+      return "shader_eval_curve_shadow_transparency";
 
       /* Film. */
 
diff --git a/intern/cycles/integrator/shader_eval.cpp b/intern/cycles/integrator/shader_eval.cpp
index cfc30056f7d..3de7bb6fd16 100644
--- a/intern/cycles/integrator/shader_eval.cpp
+++ b/intern/cycles/integrator/shader_eval.cpp
@@ -122,6 +122,9 @@ bool ShaderEval::eval_cpu(Device *device,
         case SHADER_EVAL_BACKGROUND:
           kernels.shader_eval_background(kg, input_data, output_data, work_index);
           break;
+        case SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY:
+          kernels.shader_eval_curve_shadow_transparency(kg, input_data, output_data, work_index);
+          break;
       }
     });
   });
@@ -144,6 +147,9 @@ bool ShaderEval::eval_gpu(Device *device,
     case SHADER_EVAL_BACKGROUND:
       kernel = DEVICE_KERNEL_SHADER_EVAL_BACKGROUND;
       break;
+    case SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY:
+      kernel = DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY;
+      break;
   };
 
   /* Create device queue. */
diff --git a/intern/cycles/integrator/shader_eval.h b/intern/cycles/integrator/shader_eval.h
index 013fad17d4f..43b6b1bdd47 100644
--- a/intern/cycles/integrator/shader_eval.h
+++ b/intern/cycles/integrator/shader_eval.h
@@ -30,6 +30,7 @@ class Progress;
 enum ShaderEvalType {
   SHADER_EVAL_DISPLACE,
   SHADER_EVAL_BACKGROUND,
+  SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY,
 };
 
 /* ShaderEval class performs shader evaluation for background light and displacement. */
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index 0d9ba7e6369..813ac15711e 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -367,12 +367,13 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
                                                      ccl_private const Ray *ray,
                                                      uint visibility,
                                                      uint max_hits,
-                                                     ccl_private uint *num_hits)
+                                                     ccl_private uint *num_recorded_hits,
+                                                     ccl_private float *throughput)
 {
 #  ifdef __KERNEL_OPTIX__
   uint p0 = state;
-  uint p1 = 0; /* Unused */
-  uint p2 = 0; /* Number of hits. */
+  uint p1 = __float_as_uint(1.0f); /* Throughput. */
+  uint p2 = 0;                     /* Number of hits. */
   uint p3 = max_hits;
   uint p4 = visibility;
   uint p5 = false;
@@ -382,7 +383,6 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
     ray_mask = 0xFF;
   }
 
-  *num_hits = 0; /* Initialize hit count to zero. */
   optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0,
              ray->P,
              ray->D,
@@ -402,12 +402,14 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
              p4,
              p5);
 
-  *num_hits = p2;
+  *num_recorded_hits = uint16_unpack_from_uint_0(p2);
+  *throughput = __uint_as_float(p1);
 
   return p5;
 #  else /* __KERNEL_OPTIX__ */
   if (!scene_intersect_valid(ray)) {
-    *num_hits = 0;
+    *num_recorded_hits = 0;
+    *throughput = 1.0f;
     return false;
   }
 
@@ -422,7 +424,8 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
     kernel_embree_setup_ray(*ray, rtc_ray, visibility);
     rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
 
-    *num_hits = ctx.num_hits;
+    *num_recorded_hits = ctx.num_recorded_hits;
+    *throughput = ctx.throughput;
     return ctx.opaque_hit;
   }
 #    endif /* __EMBREE__ */
@@ -431,21 +434,25 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
   if (kernel_data.bvh.have_motion) {
 #      ifdef __HAIR__
     if (kernel_data.bvh.have_curves) {
-      return bvh_intersect_shadow_all_hair_motion(kg, ray, state, visibility, max_hits, num_hits);
+      return bvh_intersect_shadow_all_hair_motion(
+          kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
     }
 #      endif /* __HAIR__ */
 
-    return bvh_intersect_shadow_all_motion(kg, ray, state, visibility, max_hits, num_hits);
+    return bvh_intersect_shadow_all_motion(
+        kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
   }
 #    endif   /* __OBJECT_MOTION__ */
 
 #    ifdef __HAIR__
   if (kernel_data.bvh.have_curves) {
-    return bvh_intersect_shadow_all_hair(kg, ray, state, visibility, max_hits, num_hits);
+    return bvh_intersect_shadow_all_hair(
+        kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
   }
 #    endif /* __HAIR__ */
 
-  return bvh_intersect_shadow_all(kg, ray, state, visibility, max_hits, num_hits);
+  return bvh_interse

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list