[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [50416] branches/soc-2011-tomato/source/ blender/editors/space_sequencer/sequencer_scopes.c: Sequencer: multi-threaded histogram calculation

Wed Sep 5 13:43:11 CEST 2012

Revision: 50416
          http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=50416
Author:   nazgul
Date:     2012-09-05 11:43:11 +0000 (Wed, 05 Sep 2012)
Log Message:
-----------
Sequencer: multi-threaded histogram calculation

This gives some percentage of speedup, which compensates slowdown
caused by converting image buffer into display space.

Used OpenMP for this. Still feel skeptic about this, discussed with
Brecht and we decided this approach actually could be used since
seems all the platforms has got OpenMP issues solved.

Waveform and vector scopes are still single-threaded since they're
a bit tricker to be done multi-threaded and probably not so commonly
used.

Modified Paths:
--------------
    branches/soc-2011-tomato/source/blender/editors/space_sequencer/sequencer_scopes.c

Modified: branches/soc-2011-tomato/source/blender/editors/space_sequencer/sequencer_scopes.c
===================================================================

--- branches/soc-2011-tomato/source/blender/editors/space_sequencer/sequencer_scopes.c	2012-09-05 11:43:05 UTC (rev 50415)
+++ branches/soc-2011-tomato/source/blender/editors/space_sequencer/sequencer_scopes.c	2012-09-05 11:43:11 UTC (rev 50416)
@@ -460,13 +460,29 @@
 
 	memset(bins, 0, sizeof(bins));
 
+	#pragma omp parallel for shared(bins, src, ibuf) private(x, y) if (ibuf->y >= 256)
 	for (y = 0; y < ibuf->y; y++) {
+		unsigned int cur_bins[3][512];
+
+		memset(cur_bins, 0, sizeof(cur_bins));
+
 		for (x = 0; x < ibuf->x; x++) {
-			bins[0][*src++]++;
-			bins[1][*src++]++;
-			bins[2][*src++]++;
-			src++;
+			unsigned char *pixel = src + (y * ibuf->x + x) * 4;
+
+			cur_bins[0][pixel[0]]++;
+			cur_bins[1][pixel[1]]++;
+			cur_bins[2][pixel[2]]++;
 		}
+
+		#pragma omp critical
+		{
+			int i;
+			for (i = 0; i < 512; i++) {
+				bins[0][i] += cur_bins[0][i];
+				bins[1][i] += cur_bins[1][i];
+				bins[2][i] += cur_bins[2][i];
+			}
+		}
 	}
 
 	n = 0;
@@ -490,7 +506,7 @@
 	return rval;
 }
 
-static int get_bin_float(float f)
+BLI_INLINE int get_bin_float(float f)
 {
 	if (f < -0.25f) {
 		return 0;
@@ -512,13 +528,29 @@
 
 	memset(bins, 0, sizeof(bins));
 
+	#pragma omp parallel for shared(bins, src, ibuf) private(x, y) if (ibuf->y >= 256)
 	for (y = 0; y < ibuf->y; y++) {
+		unsigned int cur_bins[3][512];
+
+		memset(cur_bins, 0, sizeof(cur_bins));
+
 		for (x = 0; x < ibuf->x; x++) {
-			bins[0][get_bin_float(*src++)]++;
-			bins[1][get_bin_float(*src++)]++;
-			bins[2][get_bin_float(*src++)]++;
-			src++;
+			float *pixel = src + (y * ibuf->x + x) * 4;
+
+			cur_bins[0][get_bin_float(pixel[0])]++;
+			cur_bins[1][get_bin_float(pixel[1])]++;
+			cur_bins[2][get_bin_float(pixel[2])]++;
 		}
+
+		#pragma omp critical
+		{
+			int i;
+			for (i = 0; i < 512; i++) {
+				bins[0][i] += cur_bins[0][i];
+				bins[1][i] += cur_bins[1][i];
+				bins[2][i] += cur_bins[2][i];
+			}
+		}
 	}
 
 	draw_histogram_marker(rval, get_bin_float(0.0));