[Bf-blender-cvs] [b36a0c4] master: Switch from OMP to BLI_task in BKE's part of Ocean simulation.

Bastien Montagne noreply at git.blender.org
Sun Dec 6 17:39:35 CET 2015


Commit: b36a0c44bb1ca12060cf724678ffe62bfa9d523f
Author: Bastien Montagne
Date:   Sun Dec 6 17:37:10 2015 +0100
Branches: master
https://developer.blender.org/rBb36a0c44bb1ca12060cf724678ffe62bfa9d523f

Switch from OMP to BLI_task in BKE's part of Ocean simulation.

Not much to say, gives about 8% to 9% speedup in ocean simulation.

===================================================================

M	source/blender/blenkernel/intern/ocean.c

===================================================================

diff --git a/source/blender/blenkernel/intern/ocean.c b/source/blender/blenkernel/intern/ocean.c
index 1a178fb..b1720d4 100644
--- a/source/blender/blenkernel/intern/ocean.c
+++ b/source/blender/blenkernel/intern/ocean.c
@@ -41,6 +41,7 @@
 #include "BLI_math.h"
 #include "BLI_path_util.h"
 #include "BLI_rand.h"
+#include "BLI_task.h"
 #include "BLI_threads.h"
 #include "BLI_utildefines.h"
 
@@ -494,231 +495,296 @@ void BKE_ocean_eval_ij(struct Ocean *oc, struct OceanResult *ocr, int i, int j)
 	BLI_rw_mutex_unlock(&oc->oceanmutex);
 }
 
-void BKE_ocean_simulate(struct Ocean *o, float t, float scale, float chop_amount)
+typedef struct OceanSimulateData {
+	Ocean *o;
+	float t;
+	float scale;
+	float chop_amount;
+} OceanSimulateData;
+
+static void ocean_compute_htilda_cb(void *userdata, void *UNUSED(userdata_chunk), int i)
+{
+	OceanSimulateData *osd = userdata;
+	const Ocean *o = osd->o;
+	const float scale = osd->scale;
+	const float t = osd->t;
+
+	int j;
+
+	/* note the <= _N/2 here, see the fftw doco about the mechanics of the complex->real fft storage */
+	for (j = 0; j <= o->_N / 2; ++j) {
+		fftw_complex exp_param1;
+		fftw_complex exp_param2;
+		fftw_complex conj_param;
+
+		init_complex(exp_param1, 0.0, omega(o->_k[i * (1 + o->_N / 2) + j], o->_depth) * t);
+		init_complex(exp_param2, 0.0, -omega(o->_k[i * (1 + o->_N / 2) + j], o->_depth) * t);
+		exp_complex(exp_param1, exp_param1);
+		exp_complex(exp_param2, exp_param2);
+		conj_complex(conj_param, o->_h0_minus[i * o->_N + j]);
+
+		mul_complex_c(exp_param1, o->_h0[i * o->_N + j], exp_param1);
+		mul_complex_c(exp_param2, conj_param, exp_param2);
+
+		add_comlex_c(o->_htilda[i * (1 + o->_N / 2) + j], exp_param1, exp_param2);
+		mul_complex_f(o->_fft_in[i * (1 + o->_N / 2) + j], o->_htilda[i * (1 + o->_N / 2) + j], scale);
+	}
+}
+
+static void ocean_compute_displacement_y(TaskPool *pool, void *UNUSED(taskdata), int UNUSED(threadid))
+{
+	OceanSimulateData *osd = BLI_task_pool_userdata(pool);
+	const Ocean *o = osd->o;
+
+	fftw_execute(o->_disp_y_plan);
+}
+
+static void ocean_compute_displacement_x(TaskPool *pool, void *UNUSED(taskdata), int UNUSED(threadid))
 {
+	OceanSimulateData *osd = BLI_task_pool_userdata(pool);
+	const Ocean *o = osd->o;
+	const float scale = osd->scale;
+	const float chop_amount = osd->chop_amount;
 	int i, j;
 
-	scale *= o->normalize_factor;
+	for (i = 0; i < o->_M; ++i) {
+		for (j = 0; j <= o->_N / 2; ++j) {
+			fftw_complex mul_param;
+			fftw_complex minus_i;
+
+			init_complex(minus_i, 0.0, -1.0);
+			init_complex(mul_param, -scale, 0);
+			mul_complex_f(mul_param, mul_param, chop_amount);
+			mul_complex_c(mul_param, mul_param, minus_i);
+			mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
+			mul_complex_f(mul_param, mul_param,
+			              ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
+			               0.0f :
+			               o->_kx[i] / o->_k[i * (1 + o->_N / 2) + j]));
+			init_complex(o->_fft_in_x[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
+		}
+	}
+	fftw_execute(o->_disp_x_plan);
+}
 
-	BLI_rw_mutex_lock(&o->oceanmutex, THREAD_LOCK_WRITE);
+static void ocean_compute_displacement_z(TaskPool *pool, void *UNUSED(taskdata), int UNUSED(threadid))
+{
+	OceanSimulateData *osd = BLI_task_pool_userdata(pool);
+	const Ocean *o = osd->o;
+	const float scale = osd->scale;
+	const float chop_amount = osd->chop_amount;
+	int i, j;
 
-	/* compute a new htilda */
-#pragma omp parallel for private(i, j)
 	for (i = 0; i < o->_M; ++i) {
-		/* note the <= _N/2 here, see the fftw doco about the mechanics of the complex->real fft storage */
 		for (j = 0; j <= o->_N / 2; ++j) {
-			fftw_complex exp_param1;
-			fftw_complex exp_param2;
-			fftw_complex conj_param;
+			fftw_complex mul_param;
+			fftw_complex minus_i;
+
+			init_complex(minus_i, 0.0, -1.0);
+			init_complex(mul_param, -scale, 0);
+			mul_complex_f(mul_param, mul_param, chop_amount);
+			mul_complex_c(mul_param, mul_param, minus_i);
+			mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
+			mul_complex_f(mul_param, mul_param,
+			              ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
+			               0.0f :
+			               o->_kz[j] / o->_k[i * (1 + o->_N / 2) + j]));
+			init_complex(o->_fft_in_z[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
+		}
+	}
+	fftw_execute(o->_disp_z_plan);
+}
+
+static void ocean_compute_jacobian_jxx(TaskPool *pool, void *UNUSED(taskdata), int UNUSED(threadid))
+{
+	OceanSimulateData *osd = BLI_task_pool_userdata(pool);
+	const Ocean *o = osd->o;
+	const float chop_amount = osd->chop_amount;
+	int i, j;
 
+	for (i = 0; i < o->_M; ++i) {
+		for (j = 0; j <= o->_N / 2; ++j) {
+			fftw_complex mul_param;
+
+			/* init_complex(mul_param, -scale, 0); */
+			init_complex(mul_param, -1, 0);
+
+			mul_complex_f(mul_param, mul_param, chop_amount);
+			mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
+			mul_complex_f(mul_param, mul_param,
+			              ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
+			               0.0f :
+			               o->_kx[i] * o->_kx[i] / o->_k[i * (1 + o->_N / 2) + j]));
+			init_complex(o->_fft_in_jxx[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
+		}
+	}
+	fftw_execute(o->_Jxx_plan);
 
-			init_complex(exp_param1, 0.0, omega(o->_k[i * (1 + o->_N / 2) + j], o->_depth) * t);
-			init_complex(exp_param2, 0.0, -omega(o->_k[i * (1 + o->_N / 2) + j], o->_depth) * t);
-			exp_complex(exp_param1, exp_param1);
-			exp_complex(exp_param2, exp_param2);
-			conj_complex(conj_param, o->_h0_minus[i * o->_N + j]);
+	for (i = 0; i < o->_M; ++i) {
+		for (j = 0; j < o->_N; ++j) {
+			o->_Jxx[i * o->_N + j] += 1.0;
+		}
+	}
+}
 
-			mul_complex_c(exp_param1, o->_h0[i * o->_N + j], exp_param1);
-			mul_complex_c(exp_param2, conj_param, exp_param2);
+static void ocean_compute_jacobian_jzz(TaskPool *pool, void *UNUSED(taskdata), int UNUSED(threadid))
+{
+	OceanSimulateData *osd = BLI_task_pool_userdata(pool);
+	const Ocean *o = osd->o;
+	const float chop_amount = osd->chop_amount;
+	int i, j;
 
-			add_comlex_c(o->_htilda[i * (1 + o->_N / 2) + j], exp_param1, exp_param2);
-			mul_complex_f(o->_fft_in[i * (1 + o->_N / 2) + j], o->_htilda[i * (1 + o->_N / 2) + j], scale);
+	for (i = 0; i < o->_M; ++i) {
+		for (j = 0; j <= o->_N / 2; ++j) {
+			fftw_complex mul_param;
+
+			/* init_complex(mul_param, -scale, 0); */
+			init_complex(mul_param, -1, 0);
+
+			mul_complex_f(mul_param, mul_param, chop_amount);
+			mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
+			mul_complex_f(mul_param, mul_param,
+			              ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
+			               0.0f :
+			               o->_kz[j] * o->_kz[j] / o->_k[i * (1 + o->_N / 2) + j]));
+			init_complex(o->_fft_in_jzz[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
 		}
 	}
+	fftw_execute(o->_Jzz_plan);
 
-#pragma omp parallel sections private(i, j)
-	{
+	for (i = 0; i < o->_M; ++i) {
+		for (j = 0; j < o->_N; ++j) {
+			o->_Jzz[i * o->_N + j] += 1.0;
+		}
+	}
+}
 
-#pragma omp section
-		{
-			if (o->_do_disp_y) {
-				/* y displacement */
-				fftw_execute(o->_disp_y_plan);
-			}
-		} /* section 1 */
-
-#pragma omp section
-		{
-			if (o->_do_chop) {
-				/* x displacement */
-				for (i = 0; i < o->_M; ++i) {
-					for (j = 0; j <= o->_N / 2; ++j) {
-						fftw_complex mul_param;
-						fftw_complex minus_i;
-
-						init_complex(minus_i, 0.0, -1.0);
-						init_complex(mul_param, -scale, 0);
-						mul_complex_f(mul_param, mul_param, chop_amount);
-						mul_complex_c(mul_param, mul_param, minus_i);
-						mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
-						mul_complex_f(mul_param, mul_param,
-						              ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
-						               0.0f :
-						               o->_kx[i] / o->_k[i * (1 + o->_N / 2) + j]));
-						init_complex(o->_fft_in_x[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
-					}
-				}
-				fftw_execute(o->_disp_x_plan);
-			}
-		} /* section 2 */
-
-#pragma omp section
-		{
-			if (o->_do_chop) {
-				/* z displacement */
-				for (i = 0; i < o->_M; ++i) {
-					for (j = 0; j <= o->_N / 2; ++j) {
-						fftw_complex mul_param;
-						fftw_complex minus_i;
-
-						init_complex(minus_i, 0.0, -1.0);
-						init_complex(mul_param, -scale, 0);
-						mul_complex_f(mul_param, mul_param, chop_amount);
-						mul_complex_c(mul_param, mul_param, minus_i);
-						mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
-						mul_complex_f(mul_param, mul_param,
-						              ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
-						               0.0f :
-						               o->_kz[j] / o->_k[i * (1 + o->_N / 2) + j]));
-						init_complex(o->_fft_in_z[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
-					}
-				}
-				fftw_execute(o->_disp_z_plan);
-			}
-		} /* section 3 */
-
-#pragma omp section
-		{
-			if (o->_do_jacobian) {
-				/* Jxx */
-				for (i = 0; i < o->_M; ++i) {
-					for (j = 0; j <= o->_N / 2; ++j) {
-						fftw_complex mul_param;
-
-						/* init_complex(mul_param, -scale, 0); */
-						init_complex(mul_param, -1, 0);
-
-						mul_complex_f(mul_param, mul_param, chop_amount);
-						mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
-						mul_complex_f(mul_param, mul_param,
-						              ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
-						               0.0f :
-						               o->_kx[i] * o->_kx[i] / o->_k[i * (1 + o->_N / 2) + j]));
-						init_complex(o->_fft_in_jxx[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
-					}
-				}
-				fftw_execute(o->_Jxx_plan);
+static void ocean_compute_jacobian_jxz(TaskPool *pool, void *UNUSED(taskdata), int UNUSED(threadid))
+{
+	OceanSimulateData *osd = BLI_task_pool_userdata(pool);
+	const Ocean *o = osd->o;
+	const float chop_amount = osd->chop_amount;
+	int i, j;
 
-				for (i = 0; i < o->_M; ++i) {
-					for (j = 0; j < o->_N; ++j) {
-						o->_Jxx[i * o->_N + j] += 1.0;
-					}
-				}
-			}
-		} /* section 4 */
-
-#pragma omp section
-		{
-			if (o->_do_jacobian)

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list