[Bf-blender-cvs] [638b979] cycles_memory_experiments: Cycles: Add debug option to disable pre-aligned triangles storage

Sergey Sharybin noreply at git.blender.org
Fri Apr 17 20:36:06 CEST 2015


Commit: 638b979895b25e630c7138ac4304321c2544971f
Author: Sergey Sharybin
Date:   Fri Apr 17 23:31:41 2015 +0500
Branches: cycles_memory_experiments
https://developer.blender.org/rB638b979895b25e630c7138ac4304321c2544971f

Cycles: Add debug option to disable pre-aligned triangles storage

This option makes it so BVH does not use pre-aligned storage for triangle vertex
coordinates which was originally needed for faster coordinate lookup when doing
triangle intersection test. This array gives up to 10% performance comparing to
fetching individual coordinates but it also used 12 floats per BVH primitive,
which might translate to quite huge array in a complex scene.

Intention of this option is to investigate if this is a right direction to make
gooseberry files being able to render on local farm which is not totally great
in memory.

Current approach is not totally cheap, meaning even the case with the storage
enabled might be slower, but currently it's within 1%.

===================================================================

M	intern/cycles/blender/addon/properties.py
M	intern/cycles/blender/addon/ui.py
M	intern/cycles/blender/blender_sync.cpp
M	intern/cycles/bvh/bvh.cpp
M	intern/cycles/bvh/bvh_params.h
M	intern/cycles/kernel/geom/geom_triangle_intersect.h
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/render/mesh.cpp
M	intern/cycles/render/scene.h

===================================================================

diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 633ce1a..84744f9 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -456,6 +456,12 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
                 description="Use BVH spatial splits: longer builder time, faster render",
                 default=False,
                 )
+        cls.debug_use_triangle_storage = BoolProperty(
+                name="Use Triangle Storage",
+                description="use special storage with aligned triangle coordinates for faster "
+                            "intesection check in expense of higher mmeory usage",
+                default=True,
+                )
         cls.use_cache = BoolProperty(
                 name="Cache BVH",
                 description="Cache last built BVH to disk for faster re-render if no geometry changed",
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index 2631e26..1c70a1e 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -325,6 +325,7 @@ class CyclesRender_PT_performance(CyclesButtonsPanel, Panel):
 
         col.label(text="Acceleration structure:")
         col.prop(cscene, "debug_use_spatial_splits")
+        col.prop(cscene, "debug_use_triangle_storage")
 
 
 class CyclesRender_PT_layer_options(CyclesButtonsPanel, Panel):
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index 266e170..4d4427e 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -431,6 +431,8 @@ SceneParams BlenderSync::get_scene_params(BL::Scene b_scene, bool background, bo
 		params.use_qbvh = false;
 	}
 
+	params.use_bvh_triangle_storage = RNA_boolean_get(&cscene, "debug_use_triangle_storage");
+
 	return params;
 }
 
diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index d1c3fee..528febf 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -282,28 +282,34 @@ void BVH::pack_triangle(int idx, float4 woop[3])
 
 void BVH::pack_primitives()
 {
-	int nsize = TRI_NODE_SIZE;
+	const int nsize = TRI_NODE_SIZE;
+	const bool use_triangle_storage = params.use_triangle_storage;
 	size_t tidx_size = pack.prim_index.size();
 
 	pack.tri_woop.clear();
-	pack.tri_woop.resize(tidx_size * nsize);
+	if (use_triangle_storage) {
+		pack.tri_woop.resize(tidx_size * nsize);
+	}
+	else {
+		pack.tri_woop.resize(0);
+	}
 	pack.prim_visibility.clear();
 	pack.prim_visibility.resize(tidx_size);
 
 	for(unsigned int i = 0; i < tidx_size; i++) {
 		if(pack.prim_index[i] != -1) {
-			float4 woop[3];
-
-			if(pack.prim_type[i] & PRIMITIVE_TRIANGLE) {
-				pack_triangle(i, woop);
-			}
-			else {
-				/* Avoid use of uninitialized memory. */
-				memset(&woop, 0, sizeof(woop));
+			if(use_triangle_storage) {
+				float4 woop[3];
+				if(pack.prim_type[i] & PRIMITIVE_TRIANGLE) {
+					pack_triangle(i, woop);
+				}
+				else {
+					/* Avoid use of uninitialized memory. */
+					memset(&woop, 0, sizeof(woop));
+				}
+				memcpy(&pack.tri_woop[i * nsize], woop, sizeof(float4)*3);
 			}
 
-			memcpy(&pack.tri_woop[i * nsize], woop, sizeof(float4)*3);
-
 			int tob = pack.prim_object[i];
 			Object *ob = objects[tob];
 			pack.prim_visibility[i] = ob->visibility;
@@ -312,7 +318,9 @@ void BVH::pack_primitives()
 				pack.prim_visibility[i] |= PATH_RAY_CURVE;
 		}
 		else {
-			memset(&pack.tri_woop[i * nsize], 0, sizeof(float4)*3);
+			if(use_triangle_storage) {
+				memset(&pack.tri_woop[i * nsize], 0, sizeof(float4)*3);
+			}
 			pack.prim_visibility[i] = 0;
 		}
 	}
diff --git a/intern/cycles/bvh/bvh_params.h b/intern/cycles/bvh/bvh_params.h
index af8d8ee..892fd90 100644
--- a/intern/cycles/bvh/bvh_params.h
+++ b/intern/cycles/bvh/bvh_params.h
@@ -49,6 +49,9 @@ public:
 	/* QBVH */
 	bool use_qbvh;
 
+	/* Use pre-aligned tringle storage for faster lookup. */
+	bool use_triangle_storage;
+
 	/* fixed parameters */
 	enum {
 		MAX_DEPTH = 64,
@@ -73,6 +76,7 @@ public:
 		top_level = false;
 		use_cache = false;
 		use_qbvh = false;
+		use_triangle_storage = true;
 	}
 
 	/* SAH costs */
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index 3990bae..ba1bac5 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -110,9 +110,19 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
 	const float Sz = isect_precalc->Sz;
 
 	/* Calculate vertices relative to ray origin. */
-	const float4 tri_a = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0),
-	             tri_b = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1),
-	             tri_c = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
+	float4 tri_a, tri_b, tri_c;
+	if (kernel_data.bvh.use_tri_storage) {
+		tri_a = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0);
+		tri_b = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1);
+		tri_c = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
+	}
+	else {
+		const int prim = kernel_tex_fetch(__prim_index, triAddr);
+		const float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+		tri_a = kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x));
+		tri_b = kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y));
+		tri_c = kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z));
+	}
 	const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z);
 	const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z);
 	const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z);
@@ -203,9 +213,19 @@ ccl_device_inline void triangle_intersect_subsurface(
 	const float Sz = isect_precalc->Sz;
 
 	/* Calculate vertices relative to ray origin. */
-	const float4 tri_a = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0),
-	             tri_b = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1),
-	             tri_c = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
+	float4 tri_a, tri_b, tri_c;
+	if (kernel_data.bvh.use_tri_storage) {
+		tri_a = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0);
+		tri_b = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1);
+		tri_c = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
+	}
+	else {
+		const int prim = kernel_tex_fetch(__prim_index, triAddr);
+		const float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+		tri_a = kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x));
+		tri_b = kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y));
+		tri_c = kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z));
+	}
 	const float3 A = make_float3(tri_a.x - P.x, tri_a.y - P.y, tri_a.z - P.z);
 	const float3 B = make_float3(tri_b.x - P.x, tri_b.y - P.y, tri_b.z - P.z);
 	const float3 C = make_float3(tri_c.x - P.x, tri_c.y - P.y, tri_c.z - P.z);
@@ -318,9 +338,19 @@ ccl_device_inline float3 triangle_refine(KernelGlobals *kg,
 
 	P = P + D*t;
 
-	const float4 tri_a = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0),
-	             tri_b = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1),
-	             tri_c = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2);
+	float4 tri_a, tri_b, tri_c;
+	if (kernel_data.bvh.use_tri_storage) {
+		tri_a = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0);
+		tri_b = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1);
+		tri_c = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2);
+	}
+	else {
+		const int prim = kernel_tex_fetch(__prim_index, isect->prim);
+		const float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+		tri_a = kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x));
+		tri_b = kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y));
+		tri_c = kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z));
+	}
 	float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
 	float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
 	float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
@@ -375,9 +405,19 @@ ccl_device_inline float3 triangle_refine_subsurface(KernelGlobals *kg,
 
 	P = P + D*t;
 
-	const float4 tri_a = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0),
-	             tri_b = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1),
-	             tri_c = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2);
+	float4 tri_a, tri_b, tri_c;
+	if (kernel_data.bvh.use_tri_storage) {
+		tri_a = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0);
+		tri_b = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+1);
+		tri_c = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+2);
+	}
+	else {
+		const int prim = kernel_tex_fetch(__prim_index, isect->prim);
+		const float4 tri_vindex = kernel_tex_fetch(__tri_vindex, prim);
+		tri_a = kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x));
+		tri_b = kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y));
+		tri_c = kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z));
+	}
 	float3 edge1 = make_float3(tri_a.x - tri_c.x, tri_a.y - tri_c.y, tri_a.z - tri_c.z);
 	float3 edge2 = make_float3(tri_b.x - tri_c.x, tri_b.y - tri_c.y, tri_b.z - tri_c.z);
 	float3 tvec = make_float3(P.x - tri_c.x, P.y - tri_c.y, P.z - tri_c.z);
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 238b4b0..bfaffa4 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -957,7 +957,8 @@ typedef struct KernelBVH {
 	int have_curves;
 	int have_instancing;
 	int use_qbvh;
-	int pad1, pad2;
+	int use_tri_storage;
+	int pad1;
 } KernelBVH;
 
 typedef enum CurveFlag {
diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp
index e665fce..06c34b7 100644
--- a/intern/cycles/render/mesh.cpp
+++ b/intern/cycles/render/mesh.cpp
@@ -515,6 +515,7 @@ void Mesh::compute_bvh(SceneParams *params, Progress *progress, int n, int total
 			BVHParams bparams;
 			bparam

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list