[Bf-blender-cvs] [dde740b] master: Cycles / CUDA: Change inline rules for BVH intersection functions.

Thomas Dinges noreply at git.blender.org
Sun Oct 5 03:54:37 CEST 2014


Commit: dde740bcd723f3fc149b44d06155396fb425ce4a
Author: Thomas Dinges
Date:   Sun Oct 5 03:53:51 2014 +0200
Branches: master
https://developer.blender.org/rBdde740bcd723f3fc149b44d06155396fb425ce4a

Cycles / CUDA: Change inline rules for BVH intersection functions.

* On sm_30 and above there is no change (was not inlined already before), this just fixes a speed regression from yesterday. 6359c36ba407
* On sm_2x (tested with sm_21), I get a nice 8% speedup in the bmw scene with this. As a bonus, cubin compilation time and memory usage is significantly reduced. Regular cubin size went from 2.5MB to 2.0MB, Experimental one from 3.8MB to 2.5MB.

===================================================================

M	intern/cycles/kernel/geom/geom_bvh.h

===================================================================

diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h
index 8c47202..c5336e0 100644
--- a/intern/cycles/kernel/geom/geom_bvh.h
+++ b/intern/cycles/kernel/geom/geom_bvh.h
@@ -28,6 +28,13 @@
 
 CCL_NAMESPACE_BEGIN
 
+/* Don't inline intersect functions on GPU, this is faster */
+#ifdef __KERNEL_GPU__
+#define ccl_device_intersect ccl_device_noinline
+#else
+#define ccl_device_intersect ccl_device_inline
+#endif
+
 /* BVH intersection function variations */
 
 #define BVH_INSTANCING			1
@@ -161,7 +168,7 @@ CCL_NAMESPACE_BEGIN
 #include "geom_bvh_volume.h"
 #endif
 
-ccl_device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect,
+ccl_device_intersect bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect,
 					 uint *lcg_state, float difl, float extmax)
 {
 #ifdef __OBJECT_MOTION__
@@ -200,7 +207,7 @@ ccl_device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const
 }
 
 #ifdef __SUBSURFACE__
-ccl_device_inline uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
+ccl_device_intersect uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
 {
 #ifdef __OBJECT_MOTION__
 	if(kernel_data.bvh.have_motion) {
@@ -239,7 +246,7 @@ ccl_device_inline uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *
 #endif
 
 #ifdef __SHADOW_RECORD_ALL__
-ccl_device_inline bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits)
+ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits)
 {
 #ifdef __OBJECT_MOTION__
 	if(kernel_data.bvh.have_motion) {
@@ -267,7 +274,7 @@ ccl_device_inline bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *
 #endif
 
 #ifdef __VOLUME__
-ccl_device_inline bool scene_intersect_volume(KernelGlobals *kg,
+ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
                             const Ray *ray,
                             Intersection *isect)
 {




More information about the Bf-blender-cvs mailing list