[Bf-blender-cvs] [db257e679a6] master: Cycles: remove workaround to pass ray by value

Patrick Mours noreply at git.blender.org
Mon Aug 26 10:35:21 CEST 2019


Commit: db257e679a63b1a6a5e361a0b1906e89e8de50cf
Author: Patrick Mours
Date:   Wed Aug 21 12:06:26 2019 +0200
Branches: master
https://developer.blender.org/rBdb257e679a63b1a6a5e361a0b1906e89e8de50cf

Cycles: remove workaround to pass ray by value

CUDA is working correct without it now, and it's more efficient not to do this.

Ref D5363

===================================================================

M	intern/cycles/kernel/bvh/bvh.h
M	intern/cycles/kernel/kernel_path.h
M	intern/cycles/kernel/kernel_shadow.h
M	intern/cycles/kernel/kernel_subsurface.h
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/kernel/osl/osl_services.cpp
M	intern/cycles/kernel/svm/svm_ao.h
M	intern/cycles/kernel/svm/svm_bevel.h

===================================================================

diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index be0f05285e8..162b2fb5cdb 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -177,24 +177,23 @@ ccl_device_inline bool scene_intersect_valid(const Ray *ray)
   return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x);
 }
 
-/* Note: ray is passed by value to work around a possible CUDA compiler bug. */
 ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
-                                          const Ray ray,
+                                          const Ray *ray,
                                           const uint visibility,
                                           Intersection *isect)
 {
   PROFILING_INIT(kg, PROFILING_INTERSECT);
 
-  if (!scene_intersect_valid(&ray)) {
+  if (!scene_intersect_valid(ray)) {
     return false;
   }
 #ifdef __EMBREE__
   if (kernel_data.bvh.scene) {
-    isect->t = ray.t;
+    isect->t = ray->t;
     CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
     IntersectContext rtc_ctx(&ctx);
     RTCRayHit ray_hit;
-    kernel_embree_setup_rayhit(ray, ray_hit, visibility);
+    kernel_embree_setup_rayhit(*ray, ray_hit, visibility);
     rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit);
     if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID &&
         ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
@@ -207,42 +206,43 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
 #ifdef __OBJECT_MOTION__
   if (kernel_data.bvh.have_motion) {
 #  ifdef __HAIR__
-    if (kernel_data.bvh.have_curves)
-      return bvh_intersect_hair_motion(kg, &ray, isect, visibility);
+    if (kernel_data.bvh.have_curves) {
+      return bvh_intersect_hair_motion(kg, ray, isect, visibility);
+    }
 #  endif /* __HAIR__ */
 
-    return bvh_intersect_motion(kg, &ray, isect, visibility);
+    return bvh_intersect_motion(kg, ray, isect, visibility);
   }
 #endif /* __OBJECT_MOTION__ */
 
 #ifdef __HAIR__
-  if (kernel_data.bvh.have_curves)
-    return bvh_intersect_hair(kg, &ray, isect, visibility);
+  if (kernel_data.bvh.have_curves) {
+    return bvh_intersect_hair(kg, ray, isect, visibility);
+  }
 #endif /* __HAIR__ */
 
 #ifdef __KERNEL_CPU__
 
 #  ifdef __INSTANCING__
-  if (kernel_data.bvh.have_instancing)
-    return bvh_intersect_instancing(kg, &ray, isect, visibility);
+  if (kernel_data.bvh.have_instancing) {
+    return bvh_intersect_instancing(kg, ray, isect, visibility);
+  }
 #  endif /* __INSTANCING__ */
-
-  return bvh_intersect(kg, &ray, isect, visibility);
+  return bvh_intersect(kg, ray, isect, visibility);
 #else /* __KERNEL_CPU__ */
 
 #  ifdef __INSTANCING__
-  return bvh_intersect_instancing(kg, &ray, isect, visibility);
+  return bvh_intersect_instancing(kg, ray, isect, visibility);
 #  else
-  return bvh_intersect(kg, &ray, isect, visibility);
+  return bvh_intersect(kg, ray, isect, visibility);
 #  endif /* __INSTANCING__ */
 
 #endif /* __KERNEL_CPU__ */
 }
 
 #ifdef __BVH_LOCAL__
-/* Note: ray is passed by value to work around a possible CUDA compiler bug. */
 ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
-                                                const Ray ray,
+                                                const Ray *ray,
                                                 LocalIntersection *local_isect,
                                                 int local_object,
                                                 uint *lcg_state,
@@ -250,7 +250,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
 {
   PROFILING_INIT(kg, PROFILING_INTERSECT_LOCAL);
 
-  if (!scene_intersect_valid(&ray)) {
+  if (!scene_intersect_valid(ray)) {
     local_isect->num_hits = 0;
     return false;
   }
@@ -264,19 +264,19 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
     ctx.sss_object_id = local_object;
     IntersectContext rtc_ctx(&ctx);
     RTCRay rtc_ray;
-    kernel_embree_setup_ray(ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
+    kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
 
     /* Get the Embree scene for this intersection. */
     RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2);
     if (geom) {
-      float3 P = ray.P;
-      float3 dir = ray.D;
-      float3 idir = ray.D;
+      float3 P = ray->P;
+      float3 dir = ray->D;
+      float3 idir = ray->D;
       const int object_flag = kernel_tex_fetch(__object_flag, local_object);
       if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
         Transform ob_itfm;
         rtc_ray.tfar = bvh_instance_motion_push(
-            kg, local_object, &ray, &P, &dir, &idir, ray.t, &ob_itfm);
+            kg, local_object, ray, &P, &dir, &idir, ray->t, &ob_itfm);
         /* bvh_instance_motion_push() returns the inverse transform but
          * it's not needed here. */
         (void)ob_itfm;
@@ -299,10 +299,10 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
 #  endif /* __EMBREE__ */
 #  ifdef __OBJECT_MOTION__
   if (kernel_data.bvh.have_motion) {
-    return bvh_intersect_local_motion(kg, &ray, local_isect, local_object, lcg_state, max_hits);
+    return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
   }
 #  endif /* __OBJECT_MOTION__ */
-  return bvh_intersect_local(kg, &ray, local_isect, local_object, lcg_state, max_hits);
+  return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
 }
 #endif
 
@@ -377,15 +377,18 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
   if (!scene_intersect_valid(ray)) {
     return false;
   }
+
 #  ifdef __OBJECT_MOTION__
   if (kernel_data.bvh.have_motion) {
     return bvh_intersect_volume_motion(kg, ray, isect, visibility);
   }
 #  endif /* __OBJECT_MOTION__ */
+
 #  ifdef __KERNEL_CPU__
 #    ifdef __INSTANCING__
-  if (kernel_data.bvh.have_instancing)
+  if (kernel_data.bvh.have_instancing) {
     return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
+  }
 #    endif /* __INSTANCING__ */
   return bvh_intersect_volume(kg, ray, isect, visibility);
 #  else /* __KERNEL_CPU__ */
@@ -422,15 +425,18 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
     rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
     return rtc_ray.tfar == -INFINITY;
   }
-#  endif
+#  endif /* __EMBREE__ */
+
 #  ifdef __OBJECT_MOTION__
   if (kernel_data.bvh.have_motion) {
     return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
   }
 #  endif /* __OBJECT_MOTION__ */
+
 #  ifdef __INSTANCING__
-  if (kernel_data.bvh.have_instancing)
+  if (kernel_data.bvh.have_instancing) {
     return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits, visibility);
+  }
 #  endif /* __INSTANCING__ */
   return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
 }
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index c7e49930701..1e8d54a23bf 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -65,7 +65,7 @@ ccl_device_forceinline bool kernel_path_scene_intersect(KernelGlobals *kg,
     ray->t = kernel_data.background.ao_distance;
   }
 
-  bool hit = scene_intersect(kg, *ray, visibility, isect);
+  bool hit = scene_intersect(kg, ray, visibility, isect);
 
 #ifdef __KERNEL_DEBUG__
   if (state->flag & PATH_RAY_CAMERA) {
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h
index 6640f64518a..c02d7d77faf 100644
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -103,7 +103,7 @@ ccl_device bool shadow_blocked_opaque(KernelGlobals *kg,
                                       Intersection *isect,
                                       float3 *shadow)
 {
-  const bool blocked = scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect);
+  const bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect);
 #ifdef __VOLUME__
   if (!blocked && state->volume_stack[0].shader != SHADER_NONE) {
     /* Apply attenuation from current volume shader. */
@@ -318,7 +318,7 @@ ccl_device bool shadow_blocked_transparent_stepped_loop(KernelGlobals *kg,
       if (bounce >= kernel_data.integrator.transparent_max_bounce) {
         return true;
       }
-      if (!scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_TRANSPARENT, isect)) {
+      if (!scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_TRANSPARENT, isect)) {
         break;
       }
       if (!shader_transparent_shadow(kg, isect)) {
@@ -374,7 +374,7 @@ ccl_device bool shadow_blocked_transparent_stepped(KernelGlobals *kg,
                                                    Intersection *isect,
                                                    float3 *shadow)
 {
-  bool blocked = scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect);
+  bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect);
   bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, isect) : false;
   return shadow_blocked_transparent_stepped_loop(
       kg, sd, shadow_sd, state, visibility, ray, isect, blocked, is_transparent_isect, shadow);
@@ -433,7 +433,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
    * TODO(sergey): Check why using record-all behavior causes slowdown in such
    * cases. Could that be caused by a higher spill pressure?
    */
-  const bool blocked = scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, &isect);
+  const bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, &isect);
   const bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, &isect) : false;
   if (!blocked || !is_transparent_isect || max_hits + 1 >= SHADOW_STACK_MAX_HITS) {
     return shadow_blocked_transparent_stepped_loop(
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index 7510e50a962..8dc1904058d 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -222,7 +222,7 @@ ccl_device_

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list