[Bf-blender-cvs] [31d907f] master: Cleanup: BLI_task - API changes.

Sat Jan 16 16:05:10 CET 2016

Commit: 31d907fa0ad1afa8c8a1829e16d3af394ab9e301
Author: Bastien Montagne
Date:   Sat Jan 16 15:59:37 2016 +0100
Branches: master
https://developer.blender.org/rB31d907fa0ad1afa8c8a1829e16d3af394ab9e301

Cleanup: BLI_task - API changes.

Based on usages so far:
- Split callback worker func in two, 'basic' and 'extended' versions. The former goes back
  to the simplest verion, while the later keeps the 'userdata_chunk', and gets the thread_id too.
- Add use_threading to simple BLI_task_parallel_range(), turns out we need this pretty much systematically,
  and allows to get rid of most usages of BLI_task_parallel_range_ex().
- Now BLI_task_parallel_range() expects 'basic' version of callback, while BLI_task_parallel_range_ex()
  expectes 'extended' version of the callback.

All in all, this should make common usage of BLI_task_parallel_range simpler (less verbose), and add
access to advanced callback to thread id, which is mandatory in some (future) cases.

===================================================================

M	source/blender/blenkernel/intern/ocean.c
M	source/blender/blenlib/BLI_task.h
M	source/blender/blenlib/intern/BLI_kdopbvh.c
M	source/blender/blenlib/intern/math_statistics.c
M	source/blender/blenlib/intern/task.c
M	source/blender/editors/sculpt_paint/sculpt.c
M	source/blender/editors/sculpt_paint/sculpt_undo.c
M	source/blender/modifiers/intern/MOD_meshdeform.c
M	source/blender/modifiers/intern/MOD_ocean.c
M	source/blender/modifiers/intern/MOD_uvwarp.c
M	source/blender/modifiers/intern/MOD_weightvgproximity.c

===================================================================

diff --git a/source/blender/blenkernel/intern/ocean.c b/source/blender/blenkernel/intern/ocean.c
index 5a75505..c5179e5 100644
--- a/source/blender/blenkernel/intern/ocean.c
+++ b/source/blender/blenkernel/intern/ocean.c
@@ -502,7 +502,7 @@ typedef struct OceanSimulateData {
 	float chop_amount;
 } OceanSimulateData;
 
-static void ocean_compute_htilda(void *userdata, void *UNUSED(userdata_chunk), int i)
+static void ocean_compute_htilda(void *userdata, const int i)
 {
 	OceanSimulateData *osd = userdata;
 	const Ocean *o = osd->o;
@@ -748,7 +748,7 @@ void BKE_ocean_simulate(struct Ocean *o, float t, float scale, float chop_amount
 	 * This is not optimal in all cases, but remains reasonably simple and should be OK most of the time. */
 
 	/* compute a new htilda */
-	BLI_task_parallel_range(0, o->_M, &osd, ocean_compute_htilda);
+	BLI_task_parallel_range(0, o->_M, &osd, ocean_compute_htilda, o->_M > 16);
 
 	if (o->_do_disp_y) {
 		BLI_task_pool_push(pool, ocean_compute_displacement_y, NULL, false, TASK_PRIORITY_HIGH);
diff --git a/source/blender/blenlib/BLI_task.h b/source/blender/blenlib/BLI_task.h
index 7b9a3c5..63a0795 100644
--- a/source/blender/blenlib/BLI_task.h
+++ b/source/blender/blenlib/BLI_task.h
@@ -112,19 +112,20 @@ ThreadMutex *BLI_task_pool_user_mutex(TaskPool *pool);
 size_t BLI_task_pool_tasks_done(TaskPool *pool);
 
 /* Parallel for routines */
-typedef void (*TaskParallelRangeFunc)(void *userdata, void *userdata_chunk, int iter);
+typedef void (*TaskParallelRangeFunc)(void *userdata, const int iter);
+typedef void (*TaskParallelRangeFuncEx)(void *userdata, void *userdata_chunk, const int iter, const int thread_id);
 void BLI_task_parallel_range_ex(
         int start, int stop,
         void *userdata,
         void *userdata_chunk,
-        const size_t userdata_chunk_size,
-        TaskParallelRangeFunc func,
+        const size_t userdata_chunk_size, TaskParallelRangeFuncEx func_ex,
         const bool use_threading,
         const bool use_dynamic_scheduling);
 void BLI_task_parallel_range(
         int start, int stop,
         void *userdata,
-        TaskParallelRangeFunc func);
+        TaskParallelRangeFunc func,
+        const bool use_threading);
 
 #ifdef __cplusplus
 }
diff --git a/source/blender/blenlib/intern/BLI_kdopbvh.c b/source/blender/blenlib/intern/BLI_kdopbvh.c
index f1ed49f..c4bf2ae 100644
--- a/source/blender/blenlib/intern/BLI_kdopbvh.c
+++ b/source/blender/blenlib/intern/BLI_kdopbvh.c
@@ -750,7 +750,7 @@ typedef struct BVHDivNodesData {
 	int first_of_next_level;
 } BVHDivNodesData;
 
-static void non_recursive_bvh_div_nodes_task_cb(void *userdata, void *UNUSED(userdata_chunk), int j)
+static void non_recursive_bvh_div_nodes_task_cb(void *userdata, const int j)
 {
 	BVHDivNodesData *data = userdata;
 
@@ -873,9 +873,9 @@ static void non_recursive_bvh_div_nodes(BVHTree *tree, BVHNode *branches_array,
 		cb_data.i = i;
 		cb_data.depth = depth;
 
-		BLI_task_parallel_range_ex(
-		            i, end_j, &cb_data, NULL, 0, non_recursive_bvh_div_nodes_task_cb,
-		            num_leafs > KDOPBVH_THREAD_LEAF_THRESHOLD, false);
+		BLI_task_parallel_range(
+		            i, end_j, &cb_data, non_recursive_bvh_div_nodes_task_cb,
+		            num_leafs > KDOPBVH_THREAD_LEAF_THRESHOLD);
 	}
 }
 
@@ -1195,7 +1195,7 @@ int BLI_bvhtree_overlap_thread_num(const BVHTree *tree)
 	return (int)MIN2(tree->tree_type, tree->nodes[tree->totleaf]->totnode);
 }
 
-static void bvhtree_overlap_task_cb(void *userdata, void *UNUSED(userdata_chunk), int j)
+static void bvhtree_overlap_task_cb(void *userdata, const int j)
 {
 	BVHOverlapData_Thread *data = &((BVHOverlapData_Thread *)userdata)[j];
 	BVHOverlapData_Shared *data_shared = data->shared;
@@ -1260,9 +1260,9 @@ BVHTreeOverlap *BLI_bvhtree_overlap(
 		data[j].thread = j;
 	}
 
-	BLI_task_parallel_range_ex(
-	            0, thread_num, data, NULL, 0, bvhtree_overlap_task_cb,
-	            tree1->totleaf > KDOPBVH_THREAD_LEAF_THRESHOLD, false);
+	BLI_task_parallel_range(
+	            0, thread_num, data, bvhtree_overlap_task_cb,
+	            tree1->totleaf > KDOPBVH_THREAD_LEAF_THRESHOLD);
 	
 	for (j = 0; j < thread_num; j++)
 		total += BLI_stack_count(data[j].overlap);
diff --git a/source/blender/blenlib/intern/math_statistics.c b/source/blender/blenlib/intern/math_statistics.c
index 97c224e..fbd6563 100644
--- a/source/blender/blenlib/intern/math_statistics.c
+++ b/source/blender/blenlib/intern/math_statistics.c
@@ -46,7 +46,7 @@ typedef struct CovarianceData {
 	int nbr_cos_vn;
 } CovarianceData;
 
-static void covariance_m_vn_ex_task_cb(void *userdata, void *UNUSED(userdata_chunk), int a)
+static void covariance_m_vn_ex_task_cb(void *userdata, const int a)
 {
 	CovarianceData *data = userdata;
 	const float *cos_vn = data->cos_vn;
@@ -117,8 +117,8 @@ void BLI_covariance_m_vn_ex(
 	    .covfac = covfac, .n = n, .nbr_cos_vn = nbr_cos_vn,
 	};
 
-	BLI_task_parallel_range_ex(
-	            0, n * n, &data, NULL, 0, covariance_m_vn_ex_task_cb, (nbr_cos_vn * n * n) >= 10000, false);
+	BLI_task_parallel_range(
+	            0, n * n, &data, covariance_m_vn_ex_task_cb, (nbr_cos_vn * n * n) >= 10000);
 }
 
 /**
diff --git a/source/blender/blenlib/intern/task.c b/source/blender/blenlib/intern/task.c
index 2be688a..f0edcc7 100644
--- a/source/blender/blenlib/intern/task.c
+++ b/source/blender/blenlib/intern/task.c
@@ -584,7 +584,9 @@ typedef struct ParallelRangeState {
 	void *userdata;
 	void *userdata_chunk;
 	size_t userdata_chunk_size;
+
 	TaskParallelRangeFunc func;
+	TaskParallelRangeFuncEx func_ex;
 
 	int iter;
 	int chunk_size;
@@ -610,23 +612,31 @@ BLI_INLINE bool parallel_range_next_iter_get(
 static void parallel_range_func(
         TaskPool * __restrict pool,
         void *UNUSED(taskdata),
-        int UNUSED(threadid))
+        int threadid)
 {
 	ParallelRangeState * __restrict state = BLI_task_pool_userdata(pool);
 	int iter, count;
 
-	const bool use_userdata_chunk = (state->userdata_chunk_size != 0) && (state->userdata_chunk != NULL);
+	const bool use_userdata_chunk = (state->func_ex != NULL) &&
+	                                (state->userdata_chunk_size != 0) && (state->userdata_chunk != NULL);
 	void *userdata_chunk = use_userdata_chunk ? MALLOCA(state->userdata_chunk_size) : NULL;
 
 	while (parallel_range_next_iter_get(state, &iter, &count)) {
 		int i;
 
-		if (use_userdata_chunk) {
-			memcpy(userdata_chunk, state->userdata_chunk, state->userdata_chunk_size);
-		}
+		if (state->func_ex) {
+			if (use_userdata_chunk) {
+				memcpy(userdata_chunk, state->userdata_chunk, state->userdata_chunk_size);
+			}
 
-		for (i = 0; i < count; ++i) {
-			state->func(state->userdata, userdata_chunk, iter + i);
+			for (i = 0; i < count; ++i) {
+				state->func_ex(state->userdata, userdata_chunk, iter + i, threadid);
+			}
+		}
+		else {
+			for (i = 0; i < count; ++i) {
+				state->func(state->userdata, iter + i);
+			}
 		}
 	}
 
@@ -642,18 +652,20 @@ static void parallel_range_func(
  * \param userdata_chunk Optional, each instance of looping chunks will get a copy of this data
  *                       (similar to OpenMP's firstprivate).
  * \param userdata_chunk_size Memory size of \a userdata_chunk.
- * \param func Callback function.
+ * \param func Callback function (simple version).
+ * \param func_ex Callback function (advanced version).
  * \param use_threading If \a true, actually split-execute loop in threads, else just do a sequential forloop
  *                      (allows caller to use any kind of test to switch on parallelization or not).
  * \param use_dynamic_scheduling If \a true, the whole range is divided in a lot of small chunks (of size 32 currently),
  *                               otherwise whole range is split in a few big chunks (num_threads * 2 chunks currently).
  */
-void BLI_task_parallel_range_ex(
+static void task_parallel_range_ex(
         int start, int stop,
         void *userdata,
         void *userdata_chunk,
         const size_t userdata_chunk_size,
         TaskParallelRangeFunc func,
+        TaskParallelRangeFuncEx func_ex,
         const bool use_threading,
         const bool use_dynamic_scheduling)
 {
@@ -666,25 +678,37 @@ void BLI_task_parallel_range_ex(
 		return;
 	}
 
-	BLI_assert(start <= stop);
+	BLI_assert(start < stop);
+	if (userdata_chunk_size != 0) {
+		BLI_assert(func_ex != NULL && func == NULL);
+		BLI_assert(userdata_chunk != NULL);
+	}
 
 	/* If it's not enough data to be crunched, don't bother with tasks at all,
 	 * do everything from the main thread.
 	 */
 	if (!use_threading) {
-		const bool use_userdata_chunk = (userdata_chunk_size != 0) && (userdata_chunk != NULL);
-		void *userdata_chunk_local = NULL;
+		if (func_ex) {
+			const bool use_userdata_chunk = (userdata_chunk_size != 0) && (userdata_chunk != NULL);
+			void *userdata_chunk_local = NULL;
 
-		if (use_userdata_chunk) {
-			userdata_chunk_local = MALLOCA(userdata_chunk_size);
-			memcpy(userdata_chunk_local, userdata_chunk, userdata_chunk_size);
-		}
+			if (use_userdata_chunk) {
+				userdata_chunk_local = MALLOCA(userdata_chunk_size);
+				memcpy(userdata_chunk_local, userdata_chunk, userdata_chunk_size);
+			}
 
-		for (i = start; i < stop; ++i) {
-			func(userdata, userdata_chunk_local, i);
+			for (i = start; i < stop; ++i) {
+				func_ex(userdata, userdata_chunk, i, 0);
+			}
+
+			MALLOCA_FREE(userdata_chunk_local, userdata_chunk_size);
+		}
+		else {
+			for (i = start; i < stop; ++i) {
+				func(userdata, i);
+			}
 		}
 
-		MALLOCA_FREE(userdata_chunk_local, userdata_chunk_size);
 		return;
 	}
 
@@ -705,6 +729,7 @@ void BLI_task_parallel_range_ex(
 	state.userdata_chunk = userdata_chunk;
 	state.userdata_chunk_size = userdata_chunk_size;
 	state.func = func;
+	state.func_ex = func_ex;
 	state.iter = start;
 	if (use_dynamic_scheduling) {
 		state.chunk_size = 32;
@@ -729,15 +754,46 @@ void BLI_task_parallel_range_ex(
 }
 
 /**
+ * This function allows to parallelized for loops in a similar way to OpenMP's 'parallel for' statement.
+ *
+ * \param start First index to process.
+ * \param stop Index to stop looping (excluded).
+ * \param userdata Common userdata passed to all instances of \a func.
+ * \param userdata_chunk Op

@@ Diff output truncated at 10240 characters. @@