[Bf-blender-cvs] [b36a0c4] master: Switch from OMP to BLI_task in BKE's part of Ocean simulation.
Bastien Montagne
noreply at git.blender.org
Sun Dec 6 17:39:35 CET 2015
Commit: b36a0c44bb1ca12060cf724678ffe62bfa9d523f
Author: Bastien Montagne
Date: Sun Dec 6 17:37:10 2015 +0100
Branches: master
https://developer.blender.org/rBb36a0c44bb1ca12060cf724678ffe62bfa9d523f
Switch from OMP to BLI_task in BKE's part of Ocean simulation.
Not much to say, gives about 8% to 9% speedup in ocean simulation.
===================================================================
M source/blender/blenkernel/intern/ocean.c
===================================================================
diff --git a/source/blender/blenkernel/intern/ocean.c b/source/blender/blenkernel/intern/ocean.c
index 1a178fb..b1720d4 100644
--- a/source/blender/blenkernel/intern/ocean.c
+++ b/source/blender/blenkernel/intern/ocean.c
@@ -41,6 +41,7 @@
#include "BLI_math.h"
#include "BLI_path_util.h"
#include "BLI_rand.h"
+#include "BLI_task.h"
#include "BLI_threads.h"
#include "BLI_utildefines.h"
@@ -494,231 +495,296 @@ void BKE_ocean_eval_ij(struct Ocean *oc, struct OceanResult *ocr, int i, int j)
BLI_rw_mutex_unlock(&oc->oceanmutex);
}
-void BKE_ocean_simulate(struct Ocean *o, float t, float scale, float chop_amount)
+typedef struct OceanSimulateData {
+ Ocean *o;
+ float t;
+ float scale;
+ float chop_amount;
+} OceanSimulateData;
+
+static void ocean_compute_htilda_cb(void *userdata, void *UNUSED(userdata_chunk), int i)
+{
+ OceanSimulateData *osd = userdata;
+ const Ocean *o = osd->o;
+ const float scale = osd->scale;
+ const float t = osd->t;
+
+ int j;
+
+ /* note the <= _N/2 here, see the fftw doco about the mechanics of the complex->real fft storage */
+ for (j = 0; j <= o->_N / 2; ++j) {
+ fftw_complex exp_param1;
+ fftw_complex exp_param2;
+ fftw_complex conj_param;
+
+ init_complex(exp_param1, 0.0, omega(o->_k[i * (1 + o->_N / 2) + j], o->_depth) * t);
+ init_complex(exp_param2, 0.0, -omega(o->_k[i * (1 + o->_N / 2) + j], o->_depth) * t);
+ exp_complex(exp_param1, exp_param1);
+ exp_complex(exp_param2, exp_param2);
+ conj_complex(conj_param, o->_h0_minus[i * o->_N + j]);
+
+ mul_complex_c(exp_param1, o->_h0[i * o->_N + j], exp_param1);
+ mul_complex_c(exp_param2, conj_param, exp_param2);
+
+ add_comlex_c(o->_htilda[i * (1 + o->_N / 2) + j], exp_param1, exp_param2);
+ mul_complex_f(o->_fft_in[i * (1 + o->_N / 2) + j], o->_htilda[i * (1 + o->_N / 2) + j], scale);
+ }
+}
+
+static void ocean_compute_displacement_y(TaskPool *pool, void *UNUSED(taskdata), int UNUSED(threadid))
+{
+ OceanSimulateData *osd = BLI_task_pool_userdata(pool);
+ const Ocean *o = osd->o;
+
+ fftw_execute(o->_disp_y_plan);
+}
+
+static void ocean_compute_displacement_x(TaskPool *pool, void *UNUSED(taskdata), int UNUSED(threadid))
{
+ OceanSimulateData *osd = BLI_task_pool_userdata(pool);
+ const Ocean *o = osd->o;
+ const float scale = osd->scale;
+ const float chop_amount = osd->chop_amount;
int i, j;
- scale *= o->normalize_factor;
+ for (i = 0; i < o->_M; ++i) {
+ for (j = 0; j <= o->_N / 2; ++j) {
+ fftw_complex mul_param;
+ fftw_complex minus_i;
+
+ init_complex(minus_i, 0.0, -1.0);
+ init_complex(mul_param, -scale, 0);
+ mul_complex_f(mul_param, mul_param, chop_amount);
+ mul_complex_c(mul_param, mul_param, minus_i);
+ mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
+ mul_complex_f(mul_param, mul_param,
+ ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
+ 0.0f :
+ o->_kx[i] / o->_k[i * (1 + o->_N / 2) + j]));
+ init_complex(o->_fft_in_x[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
+ }
+ }
+ fftw_execute(o->_disp_x_plan);
+}
- BLI_rw_mutex_lock(&o->oceanmutex, THREAD_LOCK_WRITE);
+static void ocean_compute_displacement_z(TaskPool *pool, void *UNUSED(taskdata), int UNUSED(threadid))
+{
+ OceanSimulateData *osd = BLI_task_pool_userdata(pool);
+ const Ocean *o = osd->o;
+ const float scale = osd->scale;
+ const float chop_amount = osd->chop_amount;
+ int i, j;
- /* compute a new htilda */
-#pragma omp parallel for private(i, j)
for (i = 0; i < o->_M; ++i) {
- /* note the <= _N/2 here, see the fftw doco about the mechanics of the complex->real fft storage */
for (j = 0; j <= o->_N / 2; ++j) {
- fftw_complex exp_param1;
- fftw_complex exp_param2;
- fftw_complex conj_param;
+ fftw_complex mul_param;
+ fftw_complex minus_i;
+
+ init_complex(minus_i, 0.0, -1.0);
+ init_complex(mul_param, -scale, 0);
+ mul_complex_f(mul_param, mul_param, chop_amount);
+ mul_complex_c(mul_param, mul_param, minus_i);
+ mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
+ mul_complex_f(mul_param, mul_param,
+ ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
+ 0.0f :
+ o->_kz[j] / o->_k[i * (1 + o->_N / 2) + j]));
+ init_complex(o->_fft_in_z[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
+ }
+ }
+ fftw_execute(o->_disp_z_plan);
+}
+
+static void ocean_compute_jacobian_jxx(TaskPool *pool, void *UNUSED(taskdata), int UNUSED(threadid))
+{
+ OceanSimulateData *osd = BLI_task_pool_userdata(pool);
+ const Ocean *o = osd->o;
+ const float chop_amount = osd->chop_amount;
+ int i, j;
+ for (i = 0; i < o->_M; ++i) {
+ for (j = 0; j <= o->_N / 2; ++j) {
+ fftw_complex mul_param;
+
+ /* init_complex(mul_param, -scale, 0); */
+ init_complex(mul_param, -1, 0);
+
+ mul_complex_f(mul_param, mul_param, chop_amount);
+ mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
+ mul_complex_f(mul_param, mul_param,
+ ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
+ 0.0f :
+ o->_kx[i] * o->_kx[i] / o->_k[i * (1 + o->_N / 2) + j]));
+ init_complex(o->_fft_in_jxx[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
+ }
+ }
+ fftw_execute(o->_Jxx_plan);
- init_complex(exp_param1, 0.0, omega(o->_k[i * (1 + o->_N / 2) + j], o->_depth) * t);
- init_complex(exp_param2, 0.0, -omega(o->_k[i * (1 + o->_N / 2) + j], o->_depth) * t);
- exp_complex(exp_param1, exp_param1);
- exp_complex(exp_param2, exp_param2);
- conj_complex(conj_param, o->_h0_minus[i * o->_N + j]);
+ for (i = 0; i < o->_M; ++i) {
+ for (j = 0; j < o->_N; ++j) {
+ o->_Jxx[i * o->_N + j] += 1.0;
+ }
+ }
+}
- mul_complex_c(exp_param1, o->_h0[i * o->_N + j], exp_param1);
- mul_complex_c(exp_param2, conj_param, exp_param2);
+static void ocean_compute_jacobian_jzz(TaskPool *pool, void *UNUSED(taskdata), int UNUSED(threadid))
+{
+ OceanSimulateData *osd = BLI_task_pool_userdata(pool);
+ const Ocean *o = osd->o;
+ const float chop_amount = osd->chop_amount;
+ int i, j;
- add_comlex_c(o->_htilda[i * (1 + o->_N / 2) + j], exp_param1, exp_param2);
- mul_complex_f(o->_fft_in[i * (1 + o->_N / 2) + j], o->_htilda[i * (1 + o->_N / 2) + j], scale);
+ for (i = 0; i < o->_M; ++i) {
+ for (j = 0; j <= o->_N / 2; ++j) {
+ fftw_complex mul_param;
+
+ /* init_complex(mul_param, -scale, 0); */
+ init_complex(mul_param, -1, 0);
+
+ mul_complex_f(mul_param, mul_param, chop_amount);
+ mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
+ mul_complex_f(mul_param, mul_param,
+ ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
+ 0.0f :
+ o->_kz[j] * o->_kz[j] / o->_k[i * (1 + o->_N / 2) + j]));
+ init_complex(o->_fft_in_jzz[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
}
}
+ fftw_execute(o->_Jzz_plan);
-#pragma omp parallel sections private(i, j)
- {
+ for (i = 0; i < o->_M; ++i) {
+ for (j = 0; j < o->_N; ++j) {
+ o->_Jzz[i * o->_N + j] += 1.0;
+ }
+ }
+}
-#pragma omp section
- {
- if (o->_do_disp_y) {
- /* y displacement */
- fftw_execute(o->_disp_y_plan);
- }
- } /* section 1 */
-
-#pragma omp section
- {
- if (o->_do_chop) {
- /* x displacement */
- for (i = 0; i < o->_M; ++i) {
- for (j = 0; j <= o->_N / 2; ++j) {
- fftw_complex mul_param;
- fftw_complex minus_i;
-
- init_complex(minus_i, 0.0, -1.0);
- init_complex(mul_param, -scale, 0);
- mul_complex_f(mul_param, mul_param, chop_amount);
- mul_complex_c(mul_param, mul_param, minus_i);
- mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
- mul_complex_f(mul_param, mul_param,
- ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
- 0.0f :
- o->_kx[i] / o->_k[i * (1 + o->_N / 2) + j]));
- init_complex(o->_fft_in_x[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
- }
- }
- fftw_execute(o->_disp_x_plan);
- }
- } /* section 2 */
-
-#pragma omp section
- {
- if (o->_do_chop) {
- /* z displacement */
- for (i = 0; i < o->_M; ++i) {
- for (j = 0; j <= o->_N / 2; ++j) {
- fftw_complex mul_param;
- fftw_complex minus_i;
-
- init_complex(minus_i, 0.0, -1.0);
- init_complex(mul_param, -scale, 0);
- mul_complex_f(mul_param, mul_param, chop_amount);
- mul_complex_c(mul_param, mul_param, minus_i);
- mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
- mul_complex_f(mul_param, mul_param,
- ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
- 0.0f :
- o->_kz[j] / o->_k[i * (1 + o->_N / 2) + j]));
- init_complex(o->_fft_in_z[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
- }
- }
- fftw_execute(o->_disp_z_plan);
- }
- } /* section 3 */
-
-#pragma omp section
- {
- if (o->_do_jacobian) {
- /* Jxx */
- for (i = 0; i < o->_M; ++i) {
- for (j = 0; j <= o->_N / 2; ++j) {
- fftw_complex mul_param;
-
- /* init_complex(mul_param, -scale, 0); */
- init_complex(mul_param, -1, 0);
-
- mul_complex_f(mul_param, mul_param, chop_amount);
- mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
- mul_complex_f(mul_param, mul_param,
- ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
- 0.0f :
- o->_kx[i] * o->_kx[i] / o->_k[i * (1 + o->_N / 2) + j]));
- init_complex(o->_fft_in_jxx[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
- }
- }
- fftw_execute(o->_Jxx_plan);
+static void ocean_compute_jacobian_jxz(TaskPool *pool, void *UNUSED(taskdata), int UNUSED(threadid))
+{
+ OceanSimulateData *osd = BLI_task_pool_userdata(pool);
+ const Ocean *o = osd->o;
+ const float chop_amount = osd->chop_amount;
+ int i, j;
- for (i = 0; i < o->_M; ++i) {
- for (j = 0; j < o->_N; ++j) {
- o->_Jxx[i * o->_N + j] += 1.0;
- }
- }
- }
- } /* section 4 */
-
-#pragma omp section
- {
- if (o->_do_jacobian)
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list