[Bf-blender-cvs] [ce4ab25a57e] soc-2022-many-lights-sampling: Fix CUDA error in wdas_cloud by refactoring code

Brecht Van Lommel noreply at git.blender.org
Thu Dec 1 21:17:04 CET 2022


Commit: ce4ab25a57ee07f6e2a7322247c17ff97d277e4e
Author: Brecht Van Lommel
Date:   Wed Nov 30 22:41:24 2022 +0100
Branches: soc-2022-many-lights-sampling
https://developer.blender.org/rBce4ab25a57ee07f6e2a7322247c17ff97d277e4e

Fix CUDA error in wdas_cloud by refactoring code

This looks like a compiler bug as it does not happen with -O0, and trying to
identify any particular line of code as causing the issue seems impossible.

Instead do some random changes that seems to sidestep the issue:
* Changing inline flags in light tree
* Reduce lifetime of LightSample
* Share emitter sampling between light tree and distribution

===================================================================

M	intern/cycles/kernel/integrator/shade_volume.h
M	intern/cycles/kernel/light/distribution.h
M	intern/cycles/kernel/light/sample.h
M	intern/cycles/kernel/light/tree.h
M	intern/cycles/kernel/light/triangle.h
M	intern/cycles/kernel/types.h
M	intern/cycles/scene/light.cpp

===================================================================

diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h
index a23efd5738b..dbc28a2de0f 100644
--- a/intern/cycles/kernel/integrator/shade_volume.h
+++ b/intern/cycles/kernel/integrator/shade_volume.h
@@ -685,15 +685,14 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
 #  endif /* __DENOISING_FEATURES__ */
 }
 
-/* Path tracing: sample point on light and evaluate light shader, then
- * queue shadow ray to be traced. */
-ccl_device_forceinline bool integrate_volume_sample_light(
+/* Path tracing: sample point on light for equiangular sampling. */
+ccl_device_forceinline bool integrate_volume_equiangular_sample_light(
     KernelGlobals kg,
     IntegratorState state,
     ccl_private const Ray *ccl_restrict ray,
     ccl_private const ShaderData *ccl_restrict sd,
     ccl_private const RNGState *ccl_restrict rng_state,
-    ccl_private LightSample *ccl_restrict ls)
+    ccl_private float3 *ccl_restrict P)
 {
   /* Test if there is a light or BSDF that needs direct light. */
   if (!kernel_data.integrator.use_direct_light) {
@@ -705,6 +704,7 @@ ccl_device_forceinline bool integrate_volume_sample_light(
   const uint bounce = INTEGRATOR_STATE(state, path, bounce);
   const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT);
 
+  LightSample ls;
   if (!light_sample_from_volume_segment(kg,
                                         rand_light.x,
                                         rand_light.y,
@@ -714,14 +714,20 @@ ccl_device_forceinline bool integrate_volume_sample_light(
                                         ray->tmax - ray->tmin,
                                         bounce,
                                         path_flag,
-                                        ls)) {
+                                        &ls)) {
+    return false;
+  }
+
+  if (ls.shader & SHADER_EXCLUDE_SCATTER) {
     return false;
   }
 
-  if (ls->shader & SHADER_EXCLUDE_SCATTER) {
+  if (ls.t == FLT_MAX) {
     return false;
   }
 
+  *P = ls.P;
+
   return true;
 }
 
@@ -737,8 +743,7 @@ ccl_device_forceinline void integrate_volume_direct_light(
 #  ifdef __PATH_GUIDING__
     ccl_private const Spectrum unlit_throughput,
 #  endif
-    ccl_private const Spectrum throughput,
-    ccl_private LightSample *ccl_restrict ls)
+    ccl_private const Spectrum throughput)
 {
   PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_DIRECT_LIGHT);
 
@@ -756,6 +761,7 @@ ccl_device_forceinline void integrate_volume_direct_light(
    * Additionally we could end up behind the light or outside a spot light cone, which might
    * waste a sample. Though on the other hand it would be possible to prevent that with
    * equiangular sampling restricted to a smaller sub-segment where the light has influence. */
+  LightSample ls;
   {
     const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
     const uint bounce = INTEGRATOR_STATE(state, path, bounce);
@@ -771,12 +777,12 @@ ccl_device_forceinline void integrate_volume_direct_light(
                                     SD_BSDF_HAS_TRANSMISSION,
                                     bounce,
                                     path_flag,
-                                    ls)) {
+                                    &ls)) {
       return;
     }
   }
 
-  if (ls->shader & SHADER_EXCLUDE_SCATTER) {
+  if (ls.shader & SHADER_EXCLUDE_SCATTER) {
     return;
   }
 
@@ -788,32 +794,32 @@ ccl_device_forceinline void integrate_volume_direct_light(
    * non-constant light sources. */
   ShaderDataTinyStorage emission_sd_storage;
   ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
-  const Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, ls, sd->time);
+  const Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time);
   if (is_zero(light_eval)) {
     return;
   }
 
   /* Evaluate BSDF. */
   BsdfEval phase_eval ccl_optional_struct_init;
-  float phase_pdf = volume_shader_phase_eval(kg, state, sd, phases, ls->D, &phase_eval);
+  float phase_pdf = volume_shader_phase_eval(kg, state, sd, phases, ls.D, &phase_eval);
 
-  if (ls->shader & SHADER_USE_MIS) {
-    float mis_weight = light_sample_mis_weight_nee(kg, ls->pdf, phase_pdf);
+  if (ls.shader & SHADER_USE_MIS) {
+    float mis_weight = light_sample_mis_weight_nee(kg, ls.pdf, phase_pdf);
     bsdf_eval_mul(&phase_eval, mis_weight);
   }
 
-  bsdf_eval_mul(&phase_eval, light_eval / ls->pdf);
+  bsdf_eval_mul(&phase_eval, light_eval / ls.pdf);
 
   /* Path termination. */
   const float terminate = path_state_rng_light_termination(kg, rng_state);
-  if (light_sample_terminate(kg, ls, &phase_eval, terminate)) {
+  if (light_sample_terminate(kg, &ls, &phase_eval, terminate)) {
     return;
   }
 
   /* Create shadow ray. */
   Ray ray ccl_optional_struct_init;
-  light_sample_to_volume_shadow_ray(kg, sd, ls, P, &ray);
-  const bool is_light = light_sample_is_light(ls);
+  light_sample_to_volume_shadow_ray(kg, sd, &ls, P, &ray);
+  const bool is_light = light_sample_is_light(&ls);
 
   /* Branch off shadow kernel. */
   IntegratorShadowState shadow_state = integrator_shadow_path_init(
@@ -878,8 +884,8 @@ ccl_device_forceinline void integrate_volume_direct_light(
 
   /* Write Lightgroup, +1 as lightgroup is int but we need to encode into a uint8_t. */
   INTEGRATOR_STATE_WRITE(
-      shadow_state, shadow_path, lightgroup) = (ls->type != LIGHT_BACKGROUND) ?
-                                                   ls->group + 1 :
+      shadow_state, shadow_path, lightgroup) = (ls.type != LIGHT_BACKGROUND) ?
+                                                   ls.group + 1 :
                                                    kernel_data.background.lightgroup + 1;
 
 #  ifdef __PATH_GUIDING__
@@ -1009,10 +1015,10 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
   /* TODO: distant lights are ignored now, but could instead use even distribution. */
   LightSample ls ccl_optional_struct_init;
   const bool need_light_sample = !(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_TERMINATE);
+  float3 equiangular_P = zero_float3();
   const bool have_equiangular_sample = need_light_sample &&
-                                       integrate_volume_sample_light(
-                                           kg, state, ray, &sd, &rng_state, &ls) &&
-                                       (ls.t != FLT_MAX);
+                                       integrate_volume_equiangular_sample_light(
+                                           kg, state, ray, &sd, &rng_state, &equiangular_P);
 
   VolumeSampleMethod direct_sample_method = (have_equiangular_sample) ?
                                                 volume_stack_sample_method(kg, state) :
@@ -1042,7 +1048,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
                                  render_buffer,
                                  step_size,
                                  direct_sample_method,
-                                 ls.P,
+                                 equiangular_P,
                                  result);
 
   /* Perform path termination. The intersect_closest will have already marked this path
@@ -1109,8 +1115,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
 #  ifdef __PATH_GUIDING__
                                   unlit_throughput,
 #  endif
-                                  result.direct_throughput,
-                                  &ls);
+                                  result.direct_throughput);
   }
 
   /* Indirect light.
diff --git a/intern/cycles/kernel/light/distribution.h b/intern/cycles/kernel/light/distribution.h
index 45d6811d521..adf01faa68b 100644
--- a/intern/cycles/kernel/light/distribution.h
+++ b/intern/cycles/kernel/light/distribution.h
@@ -55,42 +55,20 @@ ccl_device_noinline bool light_distribution_sample(KernelGlobals kg,
                                                    const float3 P,
                                                    const int bounce,
                                                    const uint32_t path_flag,
-                                                   ccl_private LightSample *ls)
+                                                   ccl_private int *emitter_object,
+                                                   ccl_private int *emitter_prim,
+                                                   ccl_private int *emitter_shader_flag,
+                                                   ccl_private float *emitter_pdf_selection)
 {
   /* Sample light index from distribution. */
   const int index = light_distribution_sample(kg, &randu);
   ccl_global const KernelLightDistribution *kdistribution = &kernel_data_fetch(light_distribution,
                                                                                index);
-  const int prim = kdistribution->prim;
 
-  if (prim >= 0) {
-    /* Mesh light. */
-    const int object = kdistribution->mesh_light.object_id;
-
-    /* Exclude synthetic meshes from shadow catcher pass. */
-    if ((path_flag & PATH_RAY_SHADOW_CATCHER_PASS) &&
-        !(kernel_data_fetch(object_flag, object) & SD_OBJECT_SHADOW_CATCHER)) {
-      return false;
-    }
-
-    const int shader_flag = kdistribution->mesh_light.shader_flag;
-    triangle_light_sample<in_volume_segment>(kg, prim, object, randu, randv, time, ls, P);
-    ls->shader |= shader_flag;
-    return (ls->pdf > 0.0f);
-  }
-
-  const int lamp = -prim - 1;
-
-  if (UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) {
-    return false;
-  }
-
-  if (!light_sample<in_volume_segment>(kg, lamp, randu, randv, P, path_flag, ls)) {
-    return false;
-  }
-
-  ls->pdf_selection = kernel_data.integrator.distribution_pdf_lights;
-  ls->pdf *= ls->pdf_selection;
+  *emitter_object = kdistribution->mesh_light.object_id;
+  *emitter_prim = kdistribution->prim;
+  *emitter_shader_flag = kdistribution->mesh_light.shader_flag;
+  *emitter_pdf_selection = kernel_data.integrator.distribution_pdf_lights;
 
   return true;
 }
diff --git a/intern/cycles/kernel/light/sample.h b/intern/cycles/kernel/light/sample.h
index a462d0e01ac..f9c6b7988b2 100644
--- a/intern/cycle

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list