[Bf-blender-cvs] [8bca34f] master: Cysles: Avoid having ShaderData on the stack
Sergey Sharybin
noreply at git.blender.org
Wed Nov 25 09:01:29 CET 2015
Commit: 8bca34fe326d10cc2f20df7fa541179e9ba835d2
Author: Sergey Sharybin
Date: Sun Nov 22 15:00:29 2015 +0500
Branches: master
https://developer.blender.org/rB8bca34fe326d10cc2f20df7fa541179e9ba835d2
Cysles: Avoid having ShaderData on the stack
This commit introduces a SSS-oriented intersection structure which is replacing
old logic of having separate arrays for just intersections and shader data and
encapsulates all the data needed for SSS evaluation.
This giver a huge stack memory saving on GPU. In own experiments it gave 25%
memory usage reduction on GTX560Ti (722MB vs. 946MB).
Unfortunately, this gave some performance loss of 20% which only happens on GPU.
This is perhaps due to different memory access pattern. Will be solved in the
future, hopefully.
Famous saying: won in memory - lost in time (which is also valid in other way
around).
===================================================================
M intern/cycles/kernel/geom/geom_bvh.h
M intern/cycles/kernel/geom/geom_bvh_subsurface.h
M intern/cycles/kernel/geom/geom_motion_triangle.h
M intern/cycles/kernel/geom/geom_qbvh_subsurface.h
M intern/cycles/kernel/geom/geom_triangle_intersect.h
M intern/cycles/kernel/kernel_path.h
M intern/cycles/kernel/kernel_path_branched.h
M intern/cycles/kernel/kernel_subsurface.h
M intern/cycles/kernel/kernel_types.h
===================================================================
diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h
index 3d0d406..cea5050 100644
--- a/intern/cycles/kernel/geom/geom_bvh.h
+++ b/intern/cycles/kernel/geom/geom_bvh.h
@@ -255,38 +255,81 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg, const Ray *ray, con
}
#ifdef __SUBSURFACE__
-ccl_device_intersect uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
+ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg,
+ const Ray *ray,
+ SubsurfaceIntersection *ss_isect,
+ int subsurface_object,
+ uint *lcg_state,
+ int max_hits)
{
#ifdef __OBJECT_MOTION__
if(kernel_data.bvh.have_motion) {
#ifdef __HAIR__
- if(kernel_data.bvh.have_curves)
- return bvh_intersect_subsurface_hair_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+ if(kernel_data.bvh.have_curves) {
+ return bvh_intersect_subsurface_hair_motion(kg,
+ ray,
+ ss_isect,
+ subsurface_object,
+ lcg_state,
+ max_hits);
+ }
#endif /* __HAIR__ */
- return bvh_intersect_subsurface_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+ return bvh_intersect_subsurface_motion(kg,
+ ray,
+ ss_isect,
+ subsurface_object,
+ lcg_state,
+ max_hits);
}
#endif /* __OBJECT_MOTION__ */
-#ifdef __HAIR__
- if(kernel_data.bvh.have_curves)
- return bvh_intersect_subsurface_hair(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+#ifdef __HAIR__
+ if(kernel_data.bvh.have_curves) {
+ return bvh_intersect_subsurface_hair(kg,
+ ray,
+ ss_isect,
+ subsurface_object,
+ lcg_state,
+ max_hits);
+ }
#endif /* __HAIR__ */
#ifdef __KERNEL_CPU__
#ifdef __INSTANCING__
- if(kernel_data.bvh.have_instancing)
- return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+ if(kernel_data.bvh.have_instancing) {
+ return bvh_intersect_subsurface_instancing(kg,
+ ray,
+ ss_isect,
+ subsurface_object,
+ lcg_state,
+ max_hits);
+ }
#endif /* __INSTANCING__ */
- return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+ return bvh_intersect_subsurface(kg,
+ ray,
+ ss_isect,
+ subsurface_object,
+ lcg_state,
+ max_hits);
#else /* __KERNEL_CPU__ */
#ifdef __INSTANCING__
- return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+ return bvh_intersect_subsurface_instancing(kg,
+ ray,
+ ss_isect,
+ subsurface_object,
+ lcg_state,
+ max_hits);
#else
- return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+ return bvh_intersect_subsurface(kg,
+ ray,
+ ss_isect,
+ subsurface_object,
+ lcg_state,
+ max_hits);
#endif /* __INSTANCING__ */
#endif /* __KERNEL_CPU__ */
diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
index a093b9b..b9f1a46 100644
--- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
@@ -30,9 +30,9 @@
*
*/
-ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
+ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
const Ray *ray,
- Intersection *isect_array,
+ SubsurfaceIntersection *ss_isect,
int subsurface_object,
uint *lcg_state,
int max_hits)
@@ -60,7 +60,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
int object = OBJECT_NONE;
float isect_t = ray->t;
- uint num_hits = 0;
+ ss_isect->num_hits = 0;
#if BVH_FEATURE(BVH_MOTION)
Transform ob_itfm;
@@ -210,7 +210,15 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
if(tri_object != subsurface_object)
continue;
- triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
+ triangle_intersect_subsurface(kg,
+ &isect_precalc,
+ ss_isect,
+ P,
+ object,
+ primAddr,
+ isect_t,
+ lcg_state,
+ max_hits);
}
break;
}
@@ -223,7 +231,16 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
if(tri_object != subsurface_object)
continue;
- motion_triangle_intersect_subsurface(kg, isect_array, P, dir, ray->time, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
+ motion_triangle_intersect_subsurface(kg,
+ ss_isect,
+ P,
+ dir,
+ ray->time,
+ object,
+ primAddr,
+ isect_t,
+ lcg_state,
+ max_hits);
}
break;
}
@@ -301,13 +318,11 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
}
#endif /* FEATURE(BVH_INSTANCING) */
} while(nodeAddr != ENTRYPOINT_SENTINEL);
-
- return num_hits;
}
-ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
+ccl_device_inline void BVH_FUNCTION_NAME(KernelGlobals *kg,
const Ray *ray,
- Intersection *isect_array,
+ SubsurfaceIntersection *ss_isect,
int subsurface_object,
uint *lcg_state,
int max_hits)
@@ -316,7 +331,7 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
if(kernel_data.bvh.use_qbvh) {
return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
ray,
- isect_array,
+ ss_isect,
subsurface_object,
lcg_state,
max_hits);
@@ -327,7 +342,7 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
kernel_assert(kernel_data.bvh.use_qbvh == false);
return BVH_FUNCTION_FULL_NAME(BVH)(kg,
ray,
- isect_array,
+ ss_isect,
subsurface_object,
lcg_state,
max_hits);
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h
index 86f93f2..a7b3f5c 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -358,8 +358,17 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, Intersection
* multiple hits we pick a single random primitive as the intersection point. */
#ifdef __SUBSURFACE__
-ccl_device_inline void motion_triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array,
- float3 P, float3 dir, float time, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits)
+ccl_device_inline void motion_triangle_intersect_subsurface(
+ KernelGlobals *kg,
+ SubsurfaceIntersection *ss_isect,
+ float3 P,
+ float3 dir,
+ float time,
+ int object,
+ int triAddr,
+ float tmax,
+ uint *lcg_state,
+ int max_hits)
{
/* primitive index for vertex location lookup */
int prim = kernel_tex_fetch(__prim_index, triAddr);
@@ -373,30 +382,34 @@ ccl_device_inline void motion_triangle_intersect_subsurface(KernelGlobals *kg, I
float t, u, v;
if(ray_triangle_intersect_uv(P, dir, tmax, verts[2], verts[0], verts[1], &u, &v, &t)
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list