[Bf-blender-cvs] [bf55afb] master: Cycles: Make spatial split BVH multi-threaded

Mon Apr 4 15:23:08 CEST 2016

Commit: bf55afbf266c681df6193f938e8651d647bf39ac
Author: Sergey Sharybin
Date:   Mon Apr 4 14:43:21 2016 +0200
Branches: master
https://developer.blender.org/rBbf55afbf266c681df6193f938e8651d647bf39ac

Cycles: Make spatial split BVH multi-threaded

The title actually covers it all, This commit exploits all the work
being done in previous changes to make it possible to build spatial
splits in threads.

Works quite nicely, but has a downside of some extra memory usage.
In practice it doesn't seem to be a huge problem and that we can
always look into later if it becomes a real showstopper.

In practice it shows some nice speedup:

- BMW27 scene takes 3 now (used to be 4)
- Agent shot takes 5 sec (used to be 80)

Such non-linear speedup is most likely coming from much less amount
of heap re-allocations. A a downside, there's a bit of extra memory
used by BVH arrays. From the tests amount of extra memory is below
0.001% so far, so it's not that bad at all.

Reviewers: brecht, juicyfruit, dingto, lukasstockner97

Differential Revision: https://developer.blender.org/D1820

===================================================================

M	intern/cycles/bvh/bvh_build.cpp
M	intern/cycles/bvh/bvh_build.h
M	intern/cycles/bvh/bvh_split.cpp
M	intern/cycles/bvh/bvh_split.h

===================================================================

diff --git a/intern/cycles/bvh/bvh_build.cpp b/intern/cycles/bvh/bvh_build.cpp
index 64fc6e0..5139a62 100644
--- a/intern/cycles/bvh/bvh_build.cpp
+++ b/intern/cycles/bvh/bvh_build.cpp
@@ -40,13 +40,49 @@ CCL_NAMESPACE_BEGIN
 
 class BVHBuildTask : public Task {
 public:
-	BVHBuildTask(BVHBuild *build, InnerNode *node, int child, BVHObjectBinning& range_, int level)
-	: range(range_)
+	BVHBuildTask(BVHBuild *build,
+	             InnerNode *node,
+	             int child,
+	             const BVHObjectBinning& range,
+	             int level)
+	: range_(range)
 	{
-		run = function_bind(&BVHBuild::thread_build_node, build, node, child, &range, level);
+		run = function_bind(&BVHBuild::thread_build_node,
+		                    build,
+		                    node,
+		                    child,
+		                    &range_,
+		                    level);
 	}
+private:
+	BVHObjectBinning range_;
+};
 
-	BVHObjectBinning range;
+class BVHSpatialSplitBuildTask : public Task {
+public:
+	BVHSpatialSplitBuildTask(BVHBuild *build,
+	                         InnerNode *node,
+	                         int child,
+	                         const BVHRange& range,
+	                         const vector<BVHReference>& references,
+	                         int level)
+	: range_(range),
+	  references_(references.begin() + range.start(),
+	              references.begin() + range.end())
+	{
+		range_.set_start(0);
+		run = function_bind(&BVHBuild::thread_build_spatial_split_node,
+		                    build,
+		                    node,
+		                    child,
+		                    &range_,
+		                    &references_,
+		                    level,
+		                    _1);
+	}
+private:
+	BVHRange range_;
+	vector<BVHReference> references_;
 };
 
 /* Constructor / Destructor */
@@ -231,7 +267,6 @@ BVHNode* BVHBuild::run()
 	}
 
 	spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha;
-
 	if(params.use_spatial_split) {
 		/* NOTE: The API here tries to be as much ready for multi-threaded build
 		 * as possible, but at the same time it tries not to introduce any
@@ -241,13 +276,14 @@ BVHNode* BVHBuild::run()
 		 * So we currently allocate single storage for now, which is only used by
 		 * the only thread working on the spatial BVH build.
 		 */
-		spatial_storage.resize(1);
+		spatial_storage.resize(TaskScheduler::num_threads() + 1);
 		size_t num_bins = max(root.size(), (int)BVHParams::NUM_SPATIAL_BINS) - 1;
 		foreach(BVHSpatialStorage &storage, spatial_storage) {
 			storage.right_bounds.clear();
 			storage.right_bounds.resize(num_bins);
 		}
 	}
+	spatial_free_index = 0;
 
 	/* init progress updates */
 	double build_start_time;
@@ -264,11 +300,12 @@ BVHNode* BVHBuild::run()
 	BVHNode *rootnode;
 
 	if(params.use_spatial_split) {
-		/* singlethreaded spatial split build */
-		rootnode = build_node(root, 0);
+		/* Perform multithreaded spatial split build. */
+		rootnode = build_node(root, &references, 0, 0);
+		task_pool.wait_work();
 	}
 	else {
-		/* multithreaded binning build */
+		/* Perrform multithreaded binning build. */
 		BVHObjectBinning rootbin(root, (references.size())? &references[0]: NULL);
 		rootnode = build_node(rootbin, 0);
 		task_pool.wait_work();
@@ -320,7 +357,10 @@ void BVHBuild::progress_update()
 	progress_start_time = time_dt(); 
 }
 
-void BVHBuild::thread_build_node(InnerNode *inner, int child, BVHObjectBinning *range, int level)
+void BVHBuild::thread_build_node(InnerNode *inner,
+                                 int child,
+                                 BVHObjectBinning *range,
+                                 int level)
 {
 	if(progress.get_cancel())
 		return;
@@ -342,14 +382,33 @@ void BVHBuild::thread_build_node(InnerNode *inner, int child, BVHObjectBinning *
 	}
 }
 
-bool BVHBuild::range_within_max_leaf_size(const BVHRange& range) const
+void BVHBuild::thread_build_spatial_split_node(InnerNode *inner,
+                                               int child,
+                                               BVHRange *range,
+                                               vector<BVHReference> *references,
+                                               int level,
+                                               int thread_id)
+{
+	if(progress.get_cancel()) {
+		return;
+	}
+
+	/* build nodes */
+	BVHNode *node = build_node(*range, references, level, thread_id);
+
+	/* set child in inner node */
+	inner->children[child] = node;
+}
+
+bool BVHBuild::range_within_max_leaf_size(const BVHRange& range,
+                                          const vector<BVHReference>& references) const
 {
 	size_t size = range.size();
 	size_t max_leaf_size = max(params.max_triangle_leaf_size, params.max_curve_leaf_size);
 
 	if(size > max_leaf_size)
 		return false;
-	
+
 	size_t num_triangles = 0;
 	size_t num_curves = 0;
 	size_t num_motion_curves = 0;
@@ -381,8 +440,11 @@ BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level)
 	 * visibility tests, since object instances do not check visibility flag */
 	if(!(range.size() > 0 && params.top_level && level == 0)) {
 		/* make leaf node when threshold reached or SAH tells us */
-		if(params.small_enough_for_leaf(size, level) || (range_within_max_leaf_size(range) && leafSAH < splitSAH))
-			return create_leaf_node(range);
+		if((params.small_enough_for_leaf(size, level)) ||
+		   (range_within_max_leaf_size(range, references) && leafSAH < splitSAH))
+		{
+			return create_leaf_node(range, references);
+		}
 	}
 
 	/* perform split */
@@ -410,48 +472,86 @@ BVHNode* BVHBuild::build_node(const BVHObjectBinning& range, int level)
 	return inner;
 }
 
-/* single threaded spatial split builder */
-BVHNode* BVHBuild::build_node(const BVHRange& range, int level)
+/* multithreaded spatial split builder */
+BVHNode* BVHBuild::build_node(const BVHRange& range,
+                              vector<BVHReference> *references,
+                              int level,
+                              int thread_id)
 {
-	/* progress update */
+	/* Update progress.
+	 *
+	 * TODO(sergey): Currently it matches old behavior, but we can move it to the
+	 * task thread (which will mimic non=split builder) and save some CPU ticks
+	 * on checking cancel status.
+	 */
 	progress_update();
-	if(progress.get_cancel())
+	if(progress.get_cancel()) {
 		return NULL;
+	}
 
-	/* small enough or too deep => create leaf. */
+	/* Small enough or too deep => create leaf. */
 	if(!(range.size() > 0 && params.top_level && level == 0)) {
 		if(params.small_enough_for_leaf(range.size(), level)) {
 			progress_count += range.size();
-			return create_leaf_node(range);
+			return create_leaf_node(range, *references);
 		}
 	}
 
-	/* splitting test */
-	BVHMixedSplit split(this, &spatial_storage[0], range, level);
+	/* Perform splitting test. */
+	BVHSpatialStorage *storage = &spatial_storage[thread_id];
+	BVHMixedSplit split(this, storage, range, references, level);
 
 	if(!(range.size() > 0 && params.top_level && level == 0)) {
 		if(split.no_split) {
 			progress_count += range.size();
-			return create_leaf_node(range);
+			return create_leaf_node(range, *references);
 		}
 	}
-	
-	/* do split */
+
+	/* Do split. */
 	BVHRange left, right;
 	split.split(this, left, right, range);
 
 	progress_total += left.size() + right.size() - range.size();
-	size_t total = progress_total;
 
-	/* left node */
-	BVHNode *leftnode = build_node(left, level + 1);
+	/* Create inner node. */
+	InnerNode *inner;
 
-	/* right node (modify start for splits) */
-	right.set_start(right.start() + progress_total - total);
-	BVHNode *rightnode = build_node(right, level + 1);
+	if(range.size() < THREAD_TASK_SIZE) {
+		/* Local build. */
+
+		/* Build left node. */
+		vector<BVHReference> copy(references->begin() + right.start(),
+		                          references->begin() + right.end());
+		right.set_start(0);
+
+		BVHNode *leftnode = build_node(left, references, level + 1, thread_id);
+
+		/* Build right node. */
+		BVHNode *rightnode = build_node(right, &copy, level + 1, thread_id);
+
+		inner = new InnerNode(range.bounds(), leftnode, rightnode);
+	}
+	else {
+		/* Threaded build. */
+		inner = new InnerNode(range.bounds());
+		task_pool.push(new BVHSpatialSplitBuildTask(this,
+		                                            inner,
+		                                            0,
+		                                            left,
+		                                            *references,
+		                                            level + 1),
+		               true);
+		task_pool.push(new BVHSpatialSplitBuildTask(this,
+		                                            inner,
+		                                            1,
+		                                            right,
+		                                            *references,
+		                                            level + 1),
+		               true);
+	}
 
-	/* inner node */
-	return new InnerNode(range.bounds(), leftnode, rightnode);
+	return inner;
 }
 
 /* Create Nodes */
@@ -484,23 +584,8 @@ BVHNode *BVHBuild::create_object_leaf_nodes(const BVHReference *ref, int start,
 	}
 }
 
-BVHNode *BVHBuild::create_primitive_leaf_node(const int *p_type,
-                                              const int *p_index,
-                                              const int *p_object,
-                                              const BoundBox& bounds,
-                                              uint visibility,
-                                              int start,
-                                              int num)
-{
-	for(int i = 0; i < num; ++i) {
-		prim_type[start + i] = p_type[i];
-		prim_index[start + i] = p_index[i];
-		prim_object[start + i] = p_object[i];
-	}
-	return new LeafNode(bounds, visibility, start, start + num);
-}
-
-BVHNode* BVHBuild::create_leaf_node(const BVHRange& range)
+BVHNode* BVHBuild::create_leaf_node(const BVHRange& range,
+                                    const vector<BVHReference>& references)
 {
 	/* This is a bit overallocating here (considering leaf size into account),
 	 * but chunk-based re-allocation in vector makes it difficult to use small
@@ -522,6 +607,9 @@ BVH

@@ Diff output truncated at 10240 characters. @@