[Bf-blender-cvs] [fd52dc58dde] master: Cycles: GPU code generation optimizations for direct lighting

Patrick Mours noreply at git.blender.org
Mon Aug 26 10:35:28 CEST 2019


Commit: fd52dc58ddea642b7162b01538c0c6c48abb0507
Author: Patrick Mours
Date:   Sun Aug 25 18:11:41 2019 +0200
Branches: master
https://developer.blender.org/rBfd52dc58ddea642b7162b01538c0c6c48abb0507

Cycles: GPU code generation optimizations for direct lighting

Use a single loop to iterate over all lights, reducing divergence and amount
of code to generate. Moving ray intersection calls out of conditionals will
also help the Optix compiler.

Ref D5363

===================================================================

M	intern/cycles/kernel/kernel_light.h
M	intern/cycles/kernel/kernel_path.h
M	intern/cycles/kernel/kernel_path_surface.h
M	intern/cycles/kernel/kernel_path_volume.h
M	intern/cycles/kernel/split/kernel_direct_lighting.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index 758e91159b6..ce908ce0fe2 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -1076,7 +1076,7 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu)
   int len = kernel_data.integrator.num_distribution + 1;
   float r = *randu;
 
-  while (len > 0) {
+  do {
     int half_len = len >> 1;
     int middle = first + half_len;
 
@@ -1087,7 +1087,7 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu)
       first = middle + 1;
       len = len - half_len - 1;
     }
-  }
+  } while (len > 0);
 
   /* Clamping should not be needed but float rounding errors seem to
    * make this fail on rare occasions. */
@@ -1104,42 +1104,49 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu)
 
 /* Generic Light */
 
-ccl_device bool light_select_reached_max_bounces(KernelGlobals *kg, int index, int bounce)
+ccl_device_inline bool light_select_reached_max_bounces(KernelGlobals *kg, int index, int bounce)
 {
   return (bounce > kernel_tex_fetch(__lights, index).max_bounces);
 }
 
-ccl_device_noinline bool light_sample(
-    KernelGlobals *kg, float randu, float randv, float time, float3 P, int bounce, LightSample *ls)
+ccl_device_noinline bool light_sample(KernelGlobals *kg,
+                                      int lamp,
+                                      float randu,
+                                      float randv,
+                                      float time,
+                                      float3 P,
+                                      int bounce,
+                                      LightSample *ls)
 {
-  /* sample index */
-  int index = light_distribution_sample(kg, &randu);
-
-  /* fetch light data */
-  const ccl_global KernelLightDistribution *kdistribution = &kernel_tex_fetch(__light_distribution,
-                                                                              index);
-  int prim = kdistribution->prim;
-
-  if (prim >= 0) {
-    int object = kdistribution->mesh_light.object_id;
-    int shader_flag = kdistribution->mesh_light.shader_flag;
+  if (lamp < 0) {
+    /* sample index */
+    int index = light_distribution_sample(kg, &randu);
+
+    /* fetch light data */
+    const ccl_global KernelLightDistribution *kdistribution = &kernel_tex_fetch(
+        __light_distribution, index);
+    int prim = kdistribution->prim;
+
+    if (prim >= 0) {
+      int object = kdistribution->mesh_light.object_id;
+      int shader_flag = kdistribution->mesh_light.shader_flag;
+
+      triangle_light_sample(kg, prim, object, randu, randv, time, ls, P);
+      ls->shader |= shader_flag;
+      return (ls->pdf > 0.0f);
+    }
 
-    triangle_light_sample(kg, prim, object, randu, randv, time, ls, P);
-    ls->shader |= shader_flag;
-    return (ls->pdf > 0.0f);
+    lamp = -prim - 1;
   }
-  else {
-    int lamp = -prim - 1;
 
-    if (UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) {
-      return false;
-    }
-
-    return lamp_light_sample(kg, lamp, randu, randv, P, ls);
+  if (UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) {
+    return false;
   }
+
+  return lamp_light_sample(kg, lamp, randu, randv, P, ls);
 }
 
-ccl_device int light_select_num_samples(KernelGlobals *kg, int index)
+ccl_device_inline int light_select_num_samples(KernelGlobals *kg, int index)
 {
   return kernel_tex_fetch(__lights, index).samples;
 }
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 1e8d54a23bf..63be0a7f505 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -474,12 +474,10 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 #    endif /* __SUBSURFACE__ */
 
 #    if defined(__EMISSION__)
-        if (kernel_data.integrator.use_direct_light) {
-          int all = (kernel_data.integrator.sample_all_lights_indirect) ||
-                    (state->flag & PATH_RAY_SHADOW_CATCHER);
-          kernel_branched_path_surface_connect_light(
-              kg, sd, emission_sd, state, throughput, 1.0f, L, all);
-        }
+        int all = (kernel_data.integrator.sample_all_lights_indirect) ||
+                  (state->flag & PATH_RAY_SHADOW_CATCHER);
+        kernel_branched_path_surface_connect_light(
+            kg, sd, emission_sd, state, throughput, 1.0f, L, all);
 #    endif /* defined(__EMISSION__) */
 
 #    ifdef __VOLUME__
diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h
index 07444a98d82..d299106ea96 100644
--- a/intern/cycles/kernel/kernel_path_surface.h
+++ b/intern/cycles/kernel/kernel_path_surface.h
@@ -32,140 +32,100 @@ ccl_device_noinline_cpu void kernel_branched_path_surface_connect_light(
 {
 #  ifdef __EMISSION__
   /* sample illumination from lights to find path contribution */
-  if (!(sd->flag & SD_BSDF_HAS_EVAL))
-    return;
-
-  Ray light_ray;
   BsdfEval L_light;
-  bool is_lamp;
-
-#    ifdef __OBJECT_MOTION__
-  light_ray.time = sd->time;
-#    endif
-
-  if (sample_all_lights) {
-    /* lamp sampling */
-    for (int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
-      if (UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce)))
-        continue;
-
-      int num_samples = ceil_to_int(num_samples_adjust * light_select_num_samples(kg, i));
-      float num_samples_inv = num_samples_adjust /
-                              (num_samples * kernel_data.integrator.num_all_lights);
-      uint lamp_rng_hash = cmj_hash(state->rng_hash, i);
-
-      for (int j = 0; j < num_samples; j++) {
-        float light_u, light_v;
-        path_branched_rng_2D(
-            kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
-        float terminate = path_branched_rng_light_termination(
-            kg, lamp_rng_hash, state, j, num_samples);
 
-        LightSample ls;
-        if (lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) {
-          /* The sampling probability returned by lamp_light_sample assumes that all lights were
-           * sampled.
-           * However, this code only samples lamps, so if the scene also had mesh lights, the real
-           * probability is twice as high. */
-          if (kernel_data.integrator.pdf_triangles != 0.0f)
-            ls.pdf *= 2.0f;
+  int num_lights = 0;
+  if (kernel_data.integrator.use_direct_light) {
+    if (sample_all_lights) {
+      num_lights = kernel_data.integrator.num_all_lights;
+      if (kernel_data.integrator.pdf_triangles != 0.0f) {
+        num_lights += 1;
+      }
+    }
+    else {
+      num_lights = 1;
+    }
+  }
 
-          if (direct_emission(
-                  kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
-            /* trace shadow ray */
-            float3 shadow;
-
-            if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
-              /* accumulate */
-              path_radiance_accum_light(L,
-                                        state,
-                                        throughput * num_samples_inv,
-                                        &L_light,
-                                        shadow,
-                                        num_samples_inv,
-                                        is_lamp);
-            }
-            else {
-              path_radiance_accum_total_light(L, state, throughput * num_samples_inv, &L_light);
-            }
-          }
+  for (int i = 0; i < num_lights; i++) {
+    /* sample one light at random */
+    int num_samples = 1;
+    int num_all_lights = 1;
+    uint lamp_rng_hash = state->rng_hash;
+    bool double_pdf = false;
+    bool is_mesh_light = false;
+    bool is_lamp = false;
+
+    if (sample_all_lights) {
+      /* lamp sampling */
+      is_lamp = i < kernel_data.integrator.num_all_lights;
+      if (is_lamp) {
+        if (UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce))) {
+          continue;
         }
+        num_samples = ceil_to_int(num_samples_adjust * light_select_num_samples(kg, i));
+        num_all_lights = kernel_data.integrator.num_all_lights;
+        lamp_rng_hash = cmj_hash(state->rng_hash, i);
+        double_pdf = kernel_data.integrator.pdf_triangles != 0.0f;
+      }
+      /* mesh light sampling */
+      else {
+        num_samples = ceil_to_int(num_samples_adjust * kernel_data.integrator.mesh_light_samples);
+        double_pdf = kernel_data.integrator.num_all_lights != 0;
+        is_mesh_light = true;
       }
     }
 
-    /* mesh light sampling */
-    if (kernel_data.integrator.pdf_triangles != 0.0f) {
-      int num_samples = ceil_to_int(num_samples_adjust *
-                                    kernel_data.integrator.mesh_light_samples);
-      float num_samples_inv = num_samples_adjust / num_samples;
+    float num_samples_inv = num_samples_adjust / (num_samples * num_all_lights);
 
-      for (int j = 0; j < num_samples; j++) {
+    for (int j = 0; j < num_samples; j++) {
+      Ray light_ray;
+      light_ray.t = 0.0f; /* reset ray */
+#    ifdef __OBJECT_MOTION__
+      light_ray.time = sd->time;
+#    endif
+      bool has_emission = false;
+
+      if (kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL)) {
         float light_u, light_v;
         path_branched_rng_2D(
-            kg, state->rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+            kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
         float terminate = path_branched_rng_light_termination(
-            kg, state->rng_hash, state, j, num_samples);
+            kg, lamp_rng_hash, state, j, num_samples);
 
         /* only sample triangle lights */
-        if (kernel_data.integrator.num_all_lights)
+        if (is_mesh_light && double_pdf) {
           light_u = 0.5f * light_u;
+        }
 
         LightSample ls;
-        if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
-          /* Same as above, probability needs to be corrected since the sampling was forced to
-           * select a mesh l

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list