[Bf-blender-cvs] [ce4ab25a57e] soc-2022-many-lights-sampling: Fix CUDA error in wdas_cloud by refactoring code
Brecht Van Lommel
noreply at git.blender.org
Thu Dec 1 21:17:04 CET 2022
Commit: ce4ab25a57ee07f6e2a7322247c17ff97d277e4e
Author: Brecht Van Lommel
Date: Wed Nov 30 22:41:24 2022 +0100
Branches: soc-2022-many-lights-sampling
https://developer.blender.org/rBce4ab25a57ee07f6e2a7322247c17ff97d277e4e
Fix CUDA error in wdas_cloud by refactoring code
This looks like a compiler bug as it does not happen with -O0, and trying to
identify any particular line of code as causing the issue seems impossible.
Instead do some random changes that seems to sidestep the issue:
* Changing inline flags in light tree
* Reduce lifetime of LightSample
* Share emitter sampling between light tree and distribution
===================================================================
M intern/cycles/kernel/integrator/shade_volume.h
M intern/cycles/kernel/light/distribution.h
M intern/cycles/kernel/light/sample.h
M intern/cycles/kernel/light/tree.h
M intern/cycles/kernel/light/triangle.h
M intern/cycles/kernel/types.h
M intern/cycles/scene/light.cpp
===================================================================
diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h
index a23efd5738b..dbc28a2de0f 100644
--- a/intern/cycles/kernel/integrator/shade_volume.h
+++ b/intern/cycles/kernel/integrator/shade_volume.h
@@ -685,15 +685,14 @@ ccl_device_forceinline void volume_integrate_heterogeneous(
# endif /* __DENOISING_FEATURES__ */
}
-/* Path tracing: sample point on light and evaluate light shader, then
- * queue shadow ray to be traced. */
-ccl_device_forceinline bool integrate_volume_sample_light(
+/* Path tracing: sample point on light for equiangular sampling. */
+ccl_device_forceinline bool integrate_volume_equiangular_sample_light(
KernelGlobals kg,
IntegratorState state,
ccl_private const Ray *ccl_restrict ray,
ccl_private const ShaderData *ccl_restrict sd,
ccl_private const RNGState *ccl_restrict rng_state,
- ccl_private LightSample *ccl_restrict ls)
+ ccl_private float3 *ccl_restrict P)
{
/* Test if there is a light or BSDF that needs direct light. */
if (!kernel_data.integrator.use_direct_light) {
@@ -705,6 +704,7 @@ ccl_device_forceinline bool integrate_volume_sample_light(
const uint bounce = INTEGRATOR_STATE(state, path, bounce);
const float2 rand_light = path_state_rng_2D(kg, rng_state, PRNG_LIGHT);
+ LightSample ls;
if (!light_sample_from_volume_segment(kg,
rand_light.x,
rand_light.y,
@@ -714,14 +714,20 @@ ccl_device_forceinline bool integrate_volume_sample_light(
ray->tmax - ray->tmin,
bounce,
path_flag,
- ls)) {
+ &ls)) {
+ return false;
+ }
+
+ if (ls.shader & SHADER_EXCLUDE_SCATTER) {
return false;
}
- if (ls->shader & SHADER_EXCLUDE_SCATTER) {
+ if (ls.t == FLT_MAX) {
return false;
}
+ *P = ls.P;
+
return true;
}
@@ -737,8 +743,7 @@ ccl_device_forceinline void integrate_volume_direct_light(
# ifdef __PATH_GUIDING__
ccl_private const Spectrum unlit_throughput,
# endif
- ccl_private const Spectrum throughput,
- ccl_private LightSample *ccl_restrict ls)
+ ccl_private const Spectrum throughput)
{
PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_DIRECT_LIGHT);
@@ -756,6 +761,7 @@ ccl_device_forceinline void integrate_volume_direct_light(
* Additionally we could end up behind the light or outside a spot light cone, which might
* waste a sample. Though on the other hand it would be possible to prevent that with
* equiangular sampling restricted to a smaller sub-segment where the light has influence. */
+ LightSample ls;
{
const uint32_t path_flag = INTEGRATOR_STATE(state, path, flag);
const uint bounce = INTEGRATOR_STATE(state, path, bounce);
@@ -771,12 +777,12 @@ ccl_device_forceinline void integrate_volume_direct_light(
SD_BSDF_HAS_TRANSMISSION,
bounce,
path_flag,
- ls)) {
+ &ls)) {
return;
}
}
- if (ls->shader & SHADER_EXCLUDE_SCATTER) {
+ if (ls.shader & SHADER_EXCLUDE_SCATTER) {
return;
}
@@ -788,32 +794,32 @@ ccl_device_forceinline void integrate_volume_direct_light(
* non-constant light sources. */
ShaderDataTinyStorage emission_sd_storage;
ccl_private ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
- const Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, ls, sd->time);
+ const Spectrum light_eval = light_sample_shader_eval(kg, state, emission_sd, &ls, sd->time);
if (is_zero(light_eval)) {
return;
}
/* Evaluate BSDF. */
BsdfEval phase_eval ccl_optional_struct_init;
- float phase_pdf = volume_shader_phase_eval(kg, state, sd, phases, ls->D, &phase_eval);
+ float phase_pdf = volume_shader_phase_eval(kg, state, sd, phases, ls.D, &phase_eval);
- if (ls->shader & SHADER_USE_MIS) {
- float mis_weight = light_sample_mis_weight_nee(kg, ls->pdf, phase_pdf);
+ if (ls.shader & SHADER_USE_MIS) {
+ float mis_weight = light_sample_mis_weight_nee(kg, ls.pdf, phase_pdf);
bsdf_eval_mul(&phase_eval, mis_weight);
}
- bsdf_eval_mul(&phase_eval, light_eval / ls->pdf);
+ bsdf_eval_mul(&phase_eval, light_eval / ls.pdf);
/* Path termination. */
const float terminate = path_state_rng_light_termination(kg, rng_state);
- if (light_sample_terminate(kg, ls, &phase_eval, terminate)) {
+ if (light_sample_terminate(kg, &ls, &phase_eval, terminate)) {
return;
}
/* Create shadow ray. */
Ray ray ccl_optional_struct_init;
- light_sample_to_volume_shadow_ray(kg, sd, ls, P, &ray);
- const bool is_light = light_sample_is_light(ls);
+ light_sample_to_volume_shadow_ray(kg, sd, &ls, P, &ray);
+ const bool is_light = light_sample_is_light(&ls);
/* Branch off shadow kernel. */
IntegratorShadowState shadow_state = integrator_shadow_path_init(
@@ -878,8 +884,8 @@ ccl_device_forceinline void integrate_volume_direct_light(
/* Write Lightgroup, +1 as lightgroup is int but we need to encode into a uint8_t. */
INTEGRATOR_STATE_WRITE(
- shadow_state, shadow_path, lightgroup) = (ls->type != LIGHT_BACKGROUND) ?
- ls->group + 1 :
+ shadow_state, shadow_path, lightgroup) = (ls.type != LIGHT_BACKGROUND) ?
+ ls.group + 1 :
kernel_data.background.lightgroup + 1;
# ifdef __PATH_GUIDING__
@@ -1009,10 +1015,10 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
/* TODO: distant lights are ignored now, but could instead use even distribution. */
LightSample ls ccl_optional_struct_init;
const bool need_light_sample = !(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_TERMINATE);
+ float3 equiangular_P = zero_float3();
const bool have_equiangular_sample = need_light_sample &&
- integrate_volume_sample_light(
- kg, state, ray, &sd, &rng_state, &ls) &&
- (ls.t != FLT_MAX);
+ integrate_volume_equiangular_sample_light(
+ kg, state, ray, &sd, &rng_state, &equiangular_P);
VolumeSampleMethod direct_sample_method = (have_equiangular_sample) ?
volume_stack_sample_method(kg, state) :
@@ -1042,7 +1048,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
render_buffer,
step_size,
direct_sample_method,
- ls.P,
+ equiangular_P,
result);
/* Perform path termination. The intersect_closest will have already marked this path
@@ -1109,8 +1115,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
# ifdef __PATH_GUIDING__
unlit_throughput,
# endif
- result.direct_throughput,
- &ls);
+ result.direct_throughput);
}
/* Indirect light.
diff --git a/intern/cycles/kernel/light/distribution.h b/intern/cycles/kernel/light/distribution.h
index 45d6811d521..adf01faa68b 100644
--- a/intern/cycles/kernel/light/distribution.h
+++ b/intern/cycles/kernel/light/distribution.h
@@ -55,42 +55,20 @@ ccl_device_noinline bool light_distribution_sample(KernelGlobals kg,
const float3 P,
const int bounce,
const uint32_t path_flag,
- ccl_private LightSample *ls)
+ ccl_private int *emitter_object,
+ ccl_private int *emitter_prim,
+ ccl_private int *emitter_shader_flag,
+ ccl_private float *emitter_pdf_selection)
{
/* Sample light index from distribution. */
const int index = light_distribution_sample(kg, &randu);
ccl_global const KernelLightDistribution *kdistribution = &kernel_data_fetch(light_distribution,
index);
- const int prim = kdistribution->prim;
- if (prim >= 0) {
- /* Mesh light. */
- const int object = kdistribution->mesh_light.object_id;
-
- /* Exclude synthetic meshes from shadow catcher pass. */
- if ((path_flag & PATH_RAY_SHADOW_CATCHER_PASS) &&
- !(kernel_data_fetch(object_flag, object) & SD_OBJECT_SHADOW_CATCHER)) {
- return false;
- }
-
- const int shader_flag = kdistribution->mesh_light.shader_flag;
- triangle_light_sample<in_volume_segment>(kg, prim, object, randu, randv, time, ls, P);
- ls->shader |= shader_flag;
- return (ls->pdf > 0.0f);
- }
-
- const int lamp = -prim - 1;
-
- if (UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) {
- return false;
- }
-
- if (!light_sample<in_volume_segment>(kg, lamp, randu, randv, P, path_flag, ls)) {
- return false;
- }
-
- ls->pdf_selection = kernel_data.integrator.distribution_pdf_lights;
- ls->pdf *= ls->pdf_selection;
+ *emitter_object = kdistribution->mesh_light.object_id;
+ *emitter_prim = kdistribution->prim;
+ *emitter_shader_flag = kdistribution->mesh_light.shader_flag;
+ *emitter_pdf_selection = kernel_data.integrator.distribution_pdf_lights;
return true;
}
diff --git a/intern/cycles/kernel/light/sample.h b/intern/cycles/kernel/light/sample.h
index a462d0e01ac..f9c6b7988b2 100644
--- a/intern/cycle
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list