[Bf-blender-cvs] [d08e988] master: BLI_kdopbvh: switch from OMP to BLI_task.

Bastien Montagne noreply at git.blender.org
Mon Dec 28 23:20:05 CET 2015


Commit: d08e9883bdfc5011b1e5728545abd100d8553da2
Author: Bastien Montagne
Date:   Mon Dec 28 21:37:46 2015 +0100
Branches: master
https://developer.blender.org/rBd08e9883bdfc5011b1e5728545abd100d8553da2

BLI_kdopbvh: switch from OMP to BLI_task.

Gives the usual 10%-30% speedup on affected functions themselves (BLI_bvhtree_overlap() and
BLI_bvhtree_balance()), and about 2% speedup to overall cloth sim e.g. (measured from
main Cloth modifier func).

===================================================================

M	source/blender/blenlib/intern/BLI_kdopbvh.c

===================================================================

diff --git a/source/blender/blenlib/intern/BLI_kdopbvh.c b/source/blender/blenlib/intern/BLI_kdopbvh.c
index 18df2a5..fb808ca 100644
--- a/source/blender/blenlib/intern/BLI_kdopbvh.c
+++ b/source/blender/blenlib/intern/BLI_kdopbvh.c
@@ -49,6 +49,7 @@
 #include "BLI_kdopbvh.h"
 #include "BLI_math.h"
 #include "BLI_strict_flags.h"
+#include "BLI_task.h"
 
 #ifdef _OPENMP
 #include <omp.h>
@@ -740,6 +741,81 @@ static void split_leafs(BVHNode **leafs_array, int *nth, int partitions, int spl
 	}
 }
 
+typedef struct BVHDivNodesData {
+	BVHTree *tree;
+	BVHNode *branches_array;
+	BVHNode **leafs_array;
+
+	int tree_type;
+	int tree_offset;
+
+	BVHBuildHelper *data;
+
+	int depth;
+	int i;
+	int first_of_next_level;
+} BVHDivNodesData;
+
+static void non_recursive_bvh_div_nodes_task_cb(void *userdata, void *UNUSED(userdata_chunk), int j)
+{
+	BVHDivNodesData *data = userdata;
+
+	int k;
+	const int parent_level_index = j - data->i;
+	BVHNode *parent = data->branches_array + j;
+	int nth_positions[MAX_TREETYPE + 1];
+	char split_axis;
+
+	int parent_leafs_begin = implicit_leafs_index(data->data, data->depth, parent_level_index);
+	int parent_leafs_end   = implicit_leafs_index(data->data, data->depth, parent_level_index + 1);
+
+	/* This calculates the bounding box of this branch
+	 * and chooses the largest axis as the axis to divide leafs */
+	refit_kdop_hull(data->tree, parent, parent_leafs_begin, parent_leafs_end);
+	split_axis = get_largest_axis(parent->bv);
+
+	/* Save split axis (this can be used on raytracing to speedup the query time) */
+	parent->main_axis = split_axis / 2;
+
+	/* Split the childs along the split_axis, note: its not needed to sort the whole leafs array
+	 * Only to assure that the elements are partitioned on a way that each child takes the elements
+	 * it would take in case the whole array was sorted.
+	 * Split_leafs takes care of that "sort" problem. */
+	nth_positions[0] = parent_leafs_begin;
+	nth_positions[data->tree_type] = parent_leafs_end;
+	for (k = 1; k < data->tree_type; k++) {
+		const int child_index = j * data->tree_type + data->tree_offset + k;
+		const int child_level_index = child_index - data->first_of_next_level; /* child level index */
+		nth_positions[k] = implicit_leafs_index(data->data, data->depth + 1, child_level_index);
+	}
+
+	split_leafs(data->leafs_array, nth_positions, data->tree_type, split_axis);
+
+	/* Setup children and totnode counters
+	 * Not really needed but currently most of BVH code relies on having an explicit children structure */
+	for (k = 0; k < data->tree_type; k++) {
+		const int child_index = j * data->tree_type + data->tree_offset + k;
+		const int child_level_index = child_index - data->first_of_next_level; /* child level index */
+
+		const int child_leafs_begin = implicit_leafs_index(data->data, data->depth + 1, child_level_index);
+		const int child_leafs_end   = implicit_leafs_index(data->data, data->depth + 1, child_level_index + 1);
+
+		if (child_leafs_end - child_leafs_begin > 1) {
+			parent->children[k] = data->branches_array + child_index;
+			parent->children[k]->parent = parent;
+		}
+		else if (child_leafs_end - child_leafs_begin == 1) {
+			parent->children[k] = data->leafs_array[child_leafs_begin];
+			parent->children[k]->parent = parent;
+		}
+		else {
+			break;
+		}
+
+		parent->totnode = (char)(k + 1);
+	}
+}
+
 /**
  * This functions builds an optimal implicit tree from the given leafs.
  * Where optimal stands for:
@@ -787,6 +863,12 @@ static void non_recursive_bvh_div_nodes(BVHTree *tree, BVHNode *branches_array,
 
 	build_implicit_tree_helper(tree, &data);
 
+	BVHDivNodesData cb_data = {
+		.tree = tree, .branches_array = branches_array, .leafs_array = leafs_array,
+		.tree_type = tree_type, .tree_offset = tree_offset, .data = &data,
+	    .first_of_next_level = 0, .depth = 0, .i = 0,
+	};
+
 	/* Loop tree levels (log N) loops */
 	for (i = 1, depth = 1; i <= num_branches; i = i * tree_type + tree_offset, depth++) {
 		const int first_of_next_level = i * tree_type + tree_offset;
@@ -794,63 +876,16 @@ static void non_recursive_bvh_div_nodes(BVHTree *tree, BVHNode *branches_array,
 		int j;
 
 		/* Loop all branches on this level */
+		cb_data.first_of_next_level = first_of_next_level;
+		cb_data.i = i;
+		cb_data.depth = depth;
 
-#pragma omp parallel for private(j) schedule(static) if (num_leafs > KDOPBVH_OMP_LIMIT)
-		for (j = i; j < end_j; j++) {
-			int k;
-			const int parent_level_index = j - i;
-			BVHNode *parent = branches_array + j;
-			int nth_positions[MAX_TREETYPE + 1];
-			char split_axis;
-
-			int parent_leafs_begin = implicit_leafs_index(&data, depth, parent_level_index);
-			int parent_leafs_end   = implicit_leafs_index(&data, depth, parent_level_index + 1);
-
-			/* This calculates the bounding box of this branch
-			 * and chooses the largest axis as the axis to divide leafs */
-			refit_kdop_hull(tree, parent, parent_leafs_begin, parent_leafs_end);
-			split_axis = get_largest_axis(parent->bv);
-
-			/* Save split axis (this can be used on raytracing to speedup the query time) */
-			parent->main_axis = split_axis / 2;
-
-			/* Split the childs along the split_axis, note: its not needed to sort the whole leafs array
-			 * Only to assure that the elements are partitioned on a way that each child takes the elements
-			 * it would take in case the whole array was sorted.
-			 * Split_leafs takes care of that "sort" problem. */
-			nth_positions[0] = parent_leafs_begin;
-			nth_positions[tree_type] = parent_leafs_end;
-			for (k = 1; k < tree_type; k++) {
-				int child_index = j * tree_type + tree_offset + k;
-				int child_level_index = child_index - first_of_next_level; /* child level index */
-				nth_positions[k] = implicit_leafs_index(&data, depth + 1, child_level_index);
-			}
-
-			split_leafs(leafs_array, nth_positions, tree_type, split_axis);
-
-
-			/* Setup children and totnode counters
-			 * Not really needed but currently most of BVH code relies on having an explicit children structure */
-			for (k = 0; k < tree_type; k++) {
-				int child_index = j * tree_type + tree_offset + k;
-				int child_level_index = child_index - first_of_next_level; /* child level index */
-
-				int child_leafs_begin = implicit_leafs_index(&data, depth + 1, child_level_index);
-				int child_leafs_end   = implicit_leafs_index(&data, depth + 1, child_level_index + 1);
-
-				if (child_leafs_end - child_leafs_begin > 1) {
-					parent->children[k] = branches_array + child_index;
-					parent->children[k]->parent = parent;
-				}
-				else if (child_leafs_end - child_leafs_begin == 1) {
-					parent->children[k] = leafs_array[child_leafs_begin];
-					parent->children[k]->parent = parent;
-				}
-				else {
-					break;
-				}
-
-				parent->totnode = (char)(k + 1);
+		if (num_leafs > KDOPBVH_OMP_LIMIT) {
+			BLI_task_parallel_range_ex(i, end_j, &cb_data, NULL, 0, non_recursive_bvh_div_nodes_task_cb, 0, false);
+		}
+		else {
+			for (j = i; j < end_j; j++) {
+				non_recursive_bvh_div_nodes_task_cb(&cb_data, NULL, j);
 			}
 		}
 	}
@@ -1172,6 +1207,23 @@ int BLI_bvhtree_overlap_thread_num(const BVHTree *tree)
 	return (int)MIN2(tree->tree_type, tree->nodes[tree->totleaf]->totnode);
 }
 
+static void bvhtree_overlap_task_cb(void *userdata, void *UNUSED(userdata_chunk), int j)
+{
+	BVHOverlapData_Thread *data = &((BVHOverlapData_Thread *)userdata)[j];
+	BVHOverlapData_Shared *data_shared = data->shared;
+
+	if (data_shared->callback) {
+		tree_overlap_traverse_cb(
+		            data, data_shared->tree1->nodes[data_shared->tree1->totleaf]->children[j],
+		            data_shared->tree2->nodes[data_shared->tree2->totleaf]);
+	}
+	else {
+		tree_overlap_traverse(
+		            data, data_shared->tree1->nodes[data_shared->tree1->totleaf]->children[j],
+		            data_shared->tree2->nodes[data_shared->tree2->totleaf]);
+	}
+}
+
 BVHTreeOverlap *BLI_bvhtree_overlap(
         const BVHTree *tree1, const BVHTree *tree2, unsigned int *r_overlap_tot,
         /* optional callback to test the overlap before adding (must be thread-safe!) */
@@ -1220,13 +1272,12 @@ BVHTreeOverlap *BLI_bvhtree_overlap(
 		data[j].thread = j;
 	}
 
-#pragma omp parallel for private(j) schedule(static)  if (tree1->totleaf > KDOPBVH_OMP_LIMIT)
-	for (j = 0; j < thread_num; j++) {
-		if (callback) {
-			tree_overlap_traverse_cb(&data[j], tree1->nodes[tree1->totleaf]->children[j], tree2->nodes[tree2->totleaf]);
-		}
-		else {
-			tree_overlap_traverse(&data[j], tree1->nodes[tree1->totleaf]->children[j], tree2->nodes[tree2->totleaf]);
+	if (tree1->totleaf > KDOPBVH_OMP_LIMIT) {
+		BLI_task_parallel_range_ex(0, thread_num, data, NULL, 0, bvhtree_overlap_task_cb, 0, false);
+	}
+	else {
+		for (j = 0; j < thread_num; j++) {
+			bvhtree_overlap_task_cb(data, NULL, j);
 		}
 	}




More information about the Bf-blender-cvs mailing list