[Bf-blender-cvs] [5a384fa] dyntopo_holes: Cycles / CUDA: Change inline rules for BVH intersection functions.
Thomas Dinges
noreply at git.blender.org
Sun Oct 5 14:54:19 CEST 2014
Commit: 5a384faaa1aa3be4305fd1a661696fd92671c1b7
Author: Thomas Dinges
Date: Sun Oct 5 03:53:51 2014 +0200
Branches: dyntopo_holes
https://developer.blender.org/rB5a384faaa1aa3be4305fd1a661696fd92671c1b7
Cycles / CUDA: Change inline rules for BVH intersection functions.
* On sm_30 and above there is no change (was not inlined already before), this just fixes a speed regression from yesterday. 6359c36ba407
* On sm_2x (tested with sm_21), I get a nice 8% speedup in the bmw scene with this. As a bonus, cubin compilation time and memory usage is significantly reduced. Regular cubin size went from 2.5MB to 2.0MB, Experimental one from 3.8MB to 2.5MB.
===================================================================
M intern/cycles/kernel/geom/geom_bvh.h
===================================================================
diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h
index 8c47202..c5336e0 100644
--- a/intern/cycles/kernel/geom/geom_bvh.h
+++ b/intern/cycles/kernel/geom/geom_bvh.h
@@ -28,6 +28,13 @@
CCL_NAMESPACE_BEGIN
+/* Don't inline intersect functions on GPU, this is faster */
+#ifdef __KERNEL_GPU__
+#define ccl_device_intersect ccl_device_noinline
+#else
+#define ccl_device_intersect ccl_device_inline
+#endif
+
/* BVH intersection function variations */
#define BVH_INSTANCING 1
@@ -161,7 +168,7 @@ CCL_NAMESPACE_BEGIN
#include "geom_bvh_volume.h"
#endif
-ccl_device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect,
+ccl_device_intersect bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect,
uint *lcg_state, float difl, float extmax)
{
#ifdef __OBJECT_MOTION__
@@ -200,7 +207,7 @@ ccl_device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const
}
#ifdef __SUBSURFACE__
-ccl_device_inline uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
+ccl_device_intersect uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
{
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
@@ -239,7 +246,7 @@ ccl_device_inline uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *
#endif
#ifdef __SHADOW_RECORD_ALL__
-ccl_device_inline bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits)
+ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *ray, Intersection *isect, uint max_hits, uint *num_hits)
{
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
@@ -267,7 +274,7 @@ ccl_device_inline bool scene_intersect_shadow_all(KernelGlobals *kg, const Ray *
#endif
#ifdef __VOLUME__
-ccl_device_inline bool scene_intersect_volume(KernelGlobals *kg,
+ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
const Ray *ray,
Intersection *isect)
{
More information about the Bf-blender-cvs
mailing list