[Bf-blender-cvs] [774e034] master: Make scopes update multi-threaded

Sergey Sharybin noreply at git.blender.org
Thu Jun 11 23:39:38 CEST 2015


Commit: 774e034d40f9494a2523076992fc19e534ecfb17
Author: Sergey Sharybin
Date:   Tue May 19 20:18:26 2015 +0500
Branches: master
https://developer.blender.org/rB774e034d40f9494a2523076992fc19e534ecfb17

Make scopes update multi-threaded

This commits makes scopes evaluation multithreaded by using OpenMP
for the outer loop of pixel processor. it also makes all the changes
needed for keeping performance as high as possible by keeping data
local to thread for as long as it's possible.

Reviewers: campbellbarton

Differential Revision: https://developer.blender.org/D1334

===================================================================

M	source/blender/blenkernel/intern/colortools.c

===================================================================

diff --git a/source/blender/blenkernel/intern/colortools.c b/source/blender/blenkernel/intern/colortools.c
index 96488d0..0af809e 100644
--- a/source/blender/blenkernel/intern/colortools.c
+++ b/source/blender/blenkernel/intern/colortools.c
@@ -43,6 +43,7 @@
 #include "BLI_blenlib.h"
 #include "BLI_math.h"
 #include "BLI_utildefines.h"
+#include "BLI_threads.h"
 
 #include "BKE_colortools.h"
 #include "BKE_curve.h"
@@ -52,6 +53,10 @@
 #include "IMB_colormanagement.h"
 #include "IMB_imbuf_types.h"
 
+#ifdef _OPENMP
+#  include <omp.h>
+#endif
+
 /* ********************************* color curve ********************* */
 
 /* ***************** operations on full struct ************* */
@@ -1029,11 +1034,25 @@ void BKE_histogram_update_sample_line(Histogram *hist, ImBuf *ibuf, const ColorM
 void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *view_settings,
                    const ColorManagedDisplaySettings *display_settings)
 {
+#ifdef _OPENMP
+	const int num_threads = BLI_system_thread_count();
+#else
+	const int num_threads = 1;
+#endif
 	int a, y;
 	unsigned int nl, na, nr, ng, nb;
 	double divl, diva, divr, divg, divb;
 	unsigned char *display_buffer;
-	unsigned int *bin_lum, *bin_r, *bin_g, *bin_b, *bin_a;
+	unsigned int bin_lum[256] = {0},
+	             bin_r[256] = {0},
+	             bin_g[256] = {0},
+	             bin_b[256] = {0},
+	             bin_a[256] = {0};
+	unsigned int bin_lum_t[BLENDER_MAX_THREADS][256] = {{0}},
+	             bin_r_t[BLENDER_MAX_THREADS][256] = {{0}},
+	             bin_g_t[BLENDER_MAX_THREADS][256] = {{0}},
+	             bin_b_t[BLENDER_MAX_THREADS][256] = {{0}},
+	             bin_a_t[BLENDER_MAX_THREADS][256] = {{0}};
 	int ycc_mode = -1;
 	const bool is_float = (ibuf->rect_float != NULL);
 	void *cache_handle = NULL;
@@ -1068,13 +1087,6 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
 			break;
 	}
 
-	/* temp table to count pix value for histogram */
-	bin_r     = MEM_callocN(256 * sizeof(unsigned int), "temp historgram bins");
-	bin_g     = MEM_callocN(256 * sizeof(unsigned int), "temp historgram bins");
-	bin_b     = MEM_callocN(256 * sizeof(unsigned int), "temp historgram bins");
-	bin_a = MEM_callocN(256 * sizeof(unsigned int), "temp historgram bins");
-	bin_lum   = MEM_callocN(256 * sizeof(unsigned int), "temp historgram bins");
-
 	/* convert to number of lines with logarithmic scale */
 	scopes->sample_lines = (scopes->accuracy * 0.01f) * (scopes->accuracy * 0.01f) * ibuf->y;
 	
@@ -1114,9 +1126,19 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
 		                                                             &cache_handle);
 	}
 
+	/* Keep number of threads in sync with the merge parts below. */
+#pragma omp parallel for private(y) schedule(static) num_threads(num_threads) if(ibuf->y > 256)
 	for (y = 0; y < ibuf->y; y++) {
+#ifdef _OPENMP
+		const int thread_idx = omp_get_thread_num();
+#else
+		const int thread_idx = 0;
+#endif
 		const float *rf = NULL;
-		unsigned char *rc = NULL;
+		const unsigned char *rc = NULL;
+		const bool do_sample_line = (y % rows_per_sample_line) == 0;
+		float min[3] = { FLT_MAX,  FLT_MAX,  FLT_MAX},
+		      max[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
 		int x, c;
 		if (is_float)
 			rf = ibuf->rect_float + ((size_t)y) * ibuf->x * ibuf->channels;
@@ -1140,27 +1162,27 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
 			/* check for min max */
 			if (ycc_mode == -1) {
 				for (c = 0; c < 3; c++) {
-					if (rgba[c] < scopes->minmax[c][0]) scopes->minmax[c][0] = rgba[c];
-					if (rgba[c] > scopes->minmax[c][1]) scopes->minmax[c][1] = rgba[c];
+					if (rgba[c] < min[c]) min[c] = rgba[c];
+					if (rgba[c] > max[c]) max[c] = rgba[c];
 				}
 			}
 			else {
 				rgb_to_ycc(rgba[0], rgba[1], rgba[2], &ycc[0], &ycc[1], &ycc[2], ycc_mode);
 				for (c = 0; c < 3; c++) {
 					ycc[c] *= INV_255;
-					if (ycc[c] < scopes->minmax[c][0]) scopes->minmax[c][0] = ycc[c];
-					if (ycc[c] > scopes->minmax[c][1]) scopes->minmax[c][1] = ycc[c];
+					if (ycc[c] < min[c]) min[c] = ycc[c];
+					if (ycc[c] > max[c]) max[c] = ycc[c];
 				}
 			}
 			/* increment count for histo*/
-			bin_lum[get_bin_float(luma)] += 1;
-			bin_r[get_bin_float(rgba[0])] += 1;
-			bin_g[get_bin_float(rgba[1])] += 1;
-			bin_b[get_bin_float(rgba[2])] += 1;
-			bin_a[get_bin_float(rgba[3])] += 1;
+			bin_lum_t[thread_idx][get_bin_float(luma)] += 1;
+			bin_r_t[thread_idx][get_bin_float(rgba[0])] += 1;
+			bin_g_t[thread_idx][get_bin_float(rgba[1])] += 1;
+			bin_b_t[thread_idx][get_bin_float(rgba[2])] += 1;
+			bin_a_t[thread_idx][get_bin_float(rgba[3])] += 1;
 
 			/* save sample if needed */
-			if (y % rows_per_sample_line == 0) {
+			if (do_sample_line) {
 				const float fx = (float)x / (float)ibuf->x;
 				const int savedlines = y / rows_per_sample_line;
 				const int idx = 2 * (ibuf->x * savedlines + x);
@@ -1170,16 +1192,46 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
 			rf += ibuf->channels;
 			rc += ibuf->channels;
 		}
+#pragma omp critical
+		{
+			for (c = 0; c < 3; c++) {
+				if (min[c] < scopes->minmax[c][0]) scopes->minmax[c][0] = min[c];
+				if (max[c] > scopes->minmax[c][1]) scopes->minmax[c][1] = max[c];
+			}
+		}
+	}
+
+#ifdef _OPENMP
+	if (ibuf->y > 256) {
+		for (a = 0; a < num_threads; a++) {
+			int b;
+			for (b = 0; b < 256; b++) {
+				bin_lum[b] += bin_lum_t[a][b];
+				bin_r[b] += bin_r_t[a][b];
+				bin_g[b] += bin_g_t[a][b];
+				bin_b[b] += bin_b_t[a][b];
+				bin_a[b] += bin_a_t[a][b];
+			}
+		}
+	}
+	else
+#endif
+	{
+		memcpy(bin_lum, bin_lum_t[0], sizeof(bin_lum));
+		memcpy(bin_r, bin_r_t[0], sizeof(bin_r));
+		memcpy(bin_g, bin_g_t[0], sizeof(bin_g));
+		memcpy(bin_b, bin_b_t[0], sizeof(bin_b));
+		memcpy(bin_a, bin_a_t[0], sizeof(bin_a));
 	}
 
 	/* test for nicer distribution even - non standard, leave it out for a while */
 #if 0
-	for (x = 0; x < 256; x++) {
-		bin_lum[x] = sqrt (bin_lum[x]);
-		bin_r[x] = sqrt(bin_r[x]);
-		bin_g[x] = sqrt(bin_g[x]);
-		bin_b[x] = sqrt(bin_b[x]);
-		bin_a[x] = sqrt(bin_a[x]);
+	for (a = 0; a < 256; a++) {
+		bin_lum[a] = sqrt (bin_lum[a]);
+		bin_r[a] = sqrt(bin_r[a]);
+		bin_g[a] = sqrt(bin_g[a]);
+		bin_b[a] = sqrt(bin_b[a]);
+		bin_a[a] = sqrt(bin_a[a]);
 	}
 #endif
 	
@@ -1205,11 +1257,6 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
 		scopes->hist.data_b[a] = bin_b[a] * divb;
 		scopes->hist.data_a[a] = bin_a[a] * diva;
 	}
-	MEM_freeN(bin_lum);
-	MEM_freeN(bin_r);
-	MEM_freeN(bin_g);
-	MEM_freeN(bin_b);
-	MEM_freeN(bin_a);
 
 	if (cm_processor)
 		IMB_colormanagement_processor_free(cm_processor);




More information about the Bf-blender-cvs mailing list