[Bf-blender-cvs] [8bca34f] master: Cysles: Avoid having ShaderData on the stack

Sergey Sharybin noreply at git.blender.org
Wed Nov 25 09:01:29 CET 2015


Commit: 8bca34fe326d10cc2f20df7fa541179e9ba835d2
Author: Sergey Sharybin
Date:   Sun Nov 22 15:00:29 2015 +0500
Branches: master
https://developer.blender.org/rB8bca34fe326d10cc2f20df7fa541179e9ba835d2

Cysles: Avoid having ShaderData on the stack

This commit introduces a SSS-oriented intersection structure which is replacing
old logic of having separate arrays for just intersections and shader data and
encapsulates all the data needed for SSS evaluation.

This giver a huge stack memory saving on GPU. In own experiments it gave 25%
memory usage reduction on GTX560Ti (722MB vs. 946MB).

Unfortunately, this gave some performance loss of 20% which only happens on GPU.
This is perhaps due to different memory access pattern. Will be solved in the
future, hopefully.

Famous saying: won in memory - lost in time (which is also valid in other way
around).

===================================================================

M	intern/cycles/kernel/geom/geom_bvh.h
M	intern/cycles/kernel/geom/geom_bvh_subsurface.h
M	intern/cycles/kernel/geom/geom_motion_triangle.h
M	intern/cycles/kernel/geom/geom_qbvh_subsurface.h
M	intern/cycles/kernel/geom/geom_triangle_intersect.h
M	intern/cycles/kernel/kernel_path.h
M	intern/cycles/kernel/kernel_path_branched.h
M	intern/cycles/kernel/kernel_subsurface.h
M	intern/cycles/kernel/kernel_types.h

===================================================================

diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h
index 3d0d406..cea5050 100644
--- a/intern/cycles/kernel/geom/geom_bvh.h
+++ b/intern/cycles/kernel/geom/geom_bvh.h
@@ -255,38 +255,81 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg, const Ray *ray, con
 }
 
 #ifdef __SUBSURFACE__
-ccl_device_intersect uint scene_intersect_subsurface(KernelGlobals *kg, const Ray *ray, Intersection *isect, int subsurface_object, uint *lcg_state, int max_hits)
+ccl_device_intersect void scene_intersect_subsurface(KernelGlobals *kg,
+                                                     const Ray *ray,
+                                                     SubsurfaceIntersection *ss_isect,
+                                                     int subsurface_object,
+                                                     uint *lcg_state,
+                                                     int max_hits)
 {
 #ifdef __OBJECT_MOTION__
 	if(kernel_data.bvh.have_motion) {
 #ifdef __HAIR__
-		if(kernel_data.bvh.have_curves)
-			return bvh_intersect_subsurface_hair_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+		if(kernel_data.bvh.have_curves) {
+			return bvh_intersect_subsurface_hair_motion(kg,
+			                                            ray,
+			                                            ss_isect,
+			                                            subsurface_object,
+			                                            lcg_state,
+			                                            max_hits);
+		}
 #endif /* __HAIR__ */
 
-		return bvh_intersect_subsurface_motion(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+		return bvh_intersect_subsurface_motion(kg,
+		                                       ray,
+		                                       ss_isect,
+		                                       subsurface_object,
+		                                       lcg_state,
+		                                       max_hits);
 	}
 #endif /* __OBJECT_MOTION__ */
 
-#ifdef __HAIR__ 
-	if(kernel_data.bvh.have_curves)
-		return bvh_intersect_subsurface_hair(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+#ifdef __HAIR__
+	if(kernel_data.bvh.have_curves) {
+		return bvh_intersect_subsurface_hair(kg,
+		                                     ray,
+		                                     ss_isect,
+		                                     subsurface_object,
+		                                     lcg_state,
+		                                     max_hits);
+	}
 #endif /* __HAIR__ */
 
 #ifdef __KERNEL_CPU__
 
 #ifdef __INSTANCING__
-	if(kernel_data.bvh.have_instancing)
-		return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+	if(kernel_data.bvh.have_instancing) {
+		return bvh_intersect_subsurface_instancing(kg,
+		                                           ray,
+		                                           ss_isect,
+		                                           subsurface_object,
+		                                           lcg_state,
+		                                           max_hits);
+	}
 #endif /* __INSTANCING__ */
 
-	return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+	return bvh_intersect_subsurface(kg,
+	                                ray,
+	                                ss_isect,
+	                                subsurface_object,
+	                                lcg_state,
+	                                max_hits);
 #else /* __KERNEL_CPU__ */
 
 #ifdef __INSTANCING__
-	return bvh_intersect_subsurface_instancing(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+	return bvh_intersect_subsurface_instancing(kg,
+	                                           ray,
+	                                           ss_isect,
+	                                           subsurface_object,
+	                                           lcg_state,
+	                                           max_hits);
 #else
-	return bvh_intersect_subsurface(kg, ray, isect, subsurface_object, lcg_state, max_hits);
+	return bvh_intersect_subsurface(kg,
+	                                ray,
+	                                ss_isect,
+	                                subsurface_object,
+	                                lcg_state,
+	                                max_hits);
 #endif /* __INSTANCING__ */
 
 #endif /* __KERNEL_CPU__ */
diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
index a093b9b..b9f1a46 100644
--- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
@@ -30,9 +30,9 @@
  *
  */
 
-ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
+ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
                                             const Ray *ray,
-                                            Intersection *isect_array,
+                                            SubsurfaceIntersection *ss_isect,
                                             int subsurface_object,
                                             uint *lcg_state,
                                             int max_hits)
@@ -60,7 +60,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 	int object = OBJECT_NONE;
 	float isect_t = ray->t;
 
-	uint num_hits = 0;
+	ss_isect->num_hits = 0;
 
 #if BVH_FEATURE(BVH_MOTION)
 	Transform ob_itfm;
@@ -210,7 +210,15 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 								uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
 								if(tri_object != subsurface_object)
 									continue;
-								triangle_intersect_subsurface(kg, &isect_precalc, isect_array, P, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
+								triangle_intersect_subsurface(kg,
+								                              &isect_precalc,
+								                              ss_isect,
+								                              P,
+								                              object,
+								                              primAddr,
+								                              isect_t,
+								                              lcg_state,
+								                              max_hits);
 							}
 							break;
 						}
@@ -223,7 +231,16 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 								uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
 								if(tri_object != subsurface_object)
 									continue;
-								motion_triangle_intersect_subsurface(kg, isect_array, P, dir, ray->time, object, primAddr, isect_t, &num_hits, lcg_state, max_hits);
+								motion_triangle_intersect_subsurface(kg,
+								                                     ss_isect,
+								                                     P,
+								                                     dir,
+								                                     ray->time,
+								                                     object,
+								                                     primAddr,
+								                                     isect_t,
+								                                     lcg_state,
+								                                     max_hits);
 							}
 							break;
 						}
@@ -301,13 +318,11 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 		}
 #endif  /* FEATURE(BVH_INSTANCING) */
 	} while(nodeAddr != ENTRYPOINT_SENTINEL);
-
-	return num_hits;
 }
 
-ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
+ccl_device_inline void BVH_FUNCTION_NAME(KernelGlobals *kg,
                                          const Ray *ray,
-                                         Intersection *isect_array,
+                                         SubsurfaceIntersection *ss_isect,
                                          int subsurface_object,
                                          uint *lcg_state,
                                          int max_hits)
@@ -316,7 +331,7 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
 	if(kernel_data.bvh.use_qbvh) {
 		return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
 		                                    ray,
-		                                    isect_array,
+		                                    ss_isect,
 		                                    subsurface_object,
 		                                    lcg_state,
 		                                    max_hits);
@@ -327,7 +342,7 @@ ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
 		kernel_assert(kernel_data.bvh.use_qbvh == false);
 		return BVH_FUNCTION_FULL_NAME(BVH)(kg,
 		                                   ray,
-		                                   isect_array,
+		                                   ss_isect,
 		                                   subsurface_object,
 		                                   lcg_state,
 		                                   max_hits);
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h
index 86f93f2..a7b3f5c 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -358,8 +358,17 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, Intersection
  * multiple hits we pick a single random primitive as the intersection point. */
 
 #ifdef __SUBSURFACE__
-ccl_device_inline void motion_triangle_intersect_subsurface(KernelGlobals *kg, Intersection *isect_array,
-	float3 P, float3 dir, float time, int object, int triAddr, float tmax, uint *num_hits, uint *lcg_state, int max_hits)
+ccl_device_inline void motion_triangle_intersect_subsurface(
+        KernelGlobals *kg,
+        SubsurfaceIntersection *ss_isect,
+        float3 P,
+        float3 dir,
+        float time,
+        int object,
+        int triAddr,
+        float tmax,
+        uint *lcg_state,
+        int max_hits)
 {
 	/* primitive index for vertex location lookup */
 	int prim = kernel_tex_fetch(__prim_index, triAddr);
@@ -373,30 +382,34 @@ ccl_device_inline void motion_triangle_intersect_subsurface(KernelGlobals *kg, I
 	float t, u, v;
 
 	if(ray_triangle_intersect_uv(P, dir, tmax, verts[2], verts[0], verts[1], &u, &v, &t)

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list