[Bf-blender-cvs] [9b38d8d] compositor-2016: Image viewer scopes update: OMP->BLI_task.

Bastien Montagne noreply at git.blender.org
Wed Jun 8 21:50:17 CEST 2016


Commit: 9b38d8d181441b8126bdbba13b78865542830d13
Author: Bastien Montagne
Date:   Thu May 26 14:30:14 2016 +0200
Branches: compositor-2016
https://developer.blender.org/rB9b38d8d181441b8126bdbba13b78865542830d13

Image viewer scopes update: OMP->BLI_task.

Gives over 50% faster scope update (from 4.5ms to 2.2ms here with SD shot)!
Probably mostly due to more clever usage of thread-local data (which avoids any lock,
when OMP code had a rather stupid critical section for minmax)...

===================================================================

M	source/blender/blenkernel/intern/colortools.c

===================================================================

diff --git a/source/blender/blenkernel/intern/colortools.c b/source/blender/blenkernel/intern/colortools.c
index bac59c8..c1f1f01 100644
--- a/source/blender/blenkernel/intern/colortools.c
+++ b/source/blender/blenkernel/intern/colortools.c
@@ -43,6 +43,7 @@
 #include "BLI_blenlib.h"
 #include "BLI_math.h"
 #include "BLI_utildefines.h"
+#include "BLI_task.h"
 #include "BLI_threads.h"
 
 #include "BKE_colortools.h"
@@ -53,10 +54,6 @@
 #include "IMB_colormanagement.h"
 #include "IMB_imbuf_types.h"
 
-#ifdef _OPENMP
-#  include <omp.h>
-#endif
-
 /* ********************************* color curve ********************* */
 
 /* ***************** operations on full struct ************* */
@@ -1089,31 +1086,170 @@ void BKE_histogram_update_sample_line(Histogram *hist, ImBuf *ibuf, const ColorM
 }
 
 /* if view_settings, it also applies this to byte buffers */
+typedef struct ScopesUpdateData {
+	Scopes *scopes;
+	const ImBuf *ibuf;
+	struct ColormanageProcessor *cm_processor;
+	const unsigned char *display_buffer;
+	const int ycc_mode;
+
+	unsigned int *bin_lum, *bin_r, *bin_g, *bin_b, *bin_a;
+} ScopesUpdateData;
+
+typedef struct ScopesUpdateDataChunk {
+	unsigned int bin_lum[256];
+	unsigned int bin_r[256];
+	unsigned int bin_g[256];
+	unsigned int bin_b[256];
+	unsigned int bin_a[256];
+	float min[3], max[3];
+} ScopesUpdateDataChunk;
+
+static void scopes_update_cb(void *userdata, void *userdata_chunk, const int y, const int UNUSED(threadid))
+{
+	const ScopesUpdateData *data = userdata;
+
+	Scopes *scopes = data->scopes;
+	const ImBuf *ibuf = data->ibuf;
+	struct ColormanageProcessor *cm_processor = data->cm_processor;
+	const unsigned char *display_buffer = data->display_buffer;
+	const int ycc_mode = data->ycc_mode;
+
+	ScopesUpdateDataChunk *data_chunk = userdata_chunk;
+	unsigned int *bin_lum = data_chunk->bin_lum;
+	unsigned int *bin_r = data_chunk->bin_r;
+	unsigned int *bin_g = data_chunk->bin_g;
+	unsigned int *bin_b = data_chunk->bin_b;
+	unsigned int *bin_a = data_chunk->bin_a;
+	float *min = data_chunk->min;
+	float *max = data_chunk->max;
+
+	const float *rf = NULL;
+	const unsigned char *rc = NULL;
+	const int rows_per_sample_line = ibuf->y / scopes->sample_lines;
+	const int savedlines = y / rows_per_sample_line;
+	const bool do_sample_line = (savedlines < scopes->sample_lines) && (y % rows_per_sample_line) == 0;
+	const bool is_float = (ibuf->rect_float != NULL);
+
+	if (is_float)
+		rf = ibuf->rect_float + ((size_t)y) * ibuf->x * ibuf->channels;
+	else {
+		rc = display_buffer + ((size_t)y) * ibuf->x * ibuf->channels;
+	}
+
+	for (int x = 0; x < ibuf->x; x++) {
+		float rgba[4], ycc[3], luma;
+
+		if (is_float) {
+			switch (ibuf->channels) {
+				case 4:
+					copy_v4_v4(rgba, rf);
+					IMB_colormanagement_processor_apply_v4(cm_processor, rgba);
+					break;
+				case 3:
+					copy_v3_v3(rgba, rf);
+					IMB_colormanagement_processor_apply_v3(cm_processor, rgba);
+					rgba[3] = 1.0f;
+					break;
+				case 2:
+					copy_v3_fl(rgba, rf[0]);
+					rgba[3] = rf[1];
+					break;
+				case 1:
+					copy_v3_fl(rgba, rf[0]);
+					rgba[3] = 1.0f;
+					break;
+				default:
+					BLI_assert(0);
+			}
+		}
+		else {
+			for (int c = 4; c--;)
+				rgba[c] = rc[c] * INV_255;
+		}
+
+		/* we still need luma for histogram */
+		luma = IMB_colormanagement_get_luminance(rgba);
+
+		/* check for min max */
+		if (ycc_mode == -1) {
+			minmax_v3v3_v3(min, max, rgba);
+		}
+		else {
+			rgb_to_ycc(rgba[0], rgba[1], rgba[2], &ycc[0], &ycc[1], &ycc[2], ycc_mode);
+			mul_v3_fl(ycc, INV_255);
+			minmax_v3v3_v3(min, max, ycc);
+		}
+		/* increment count for histo*/
+		bin_lum[get_bin_float(luma)]++;
+		bin_r[get_bin_float(rgba[0])]++;
+		bin_g[get_bin_float(rgba[1])]++;
+		bin_b[get_bin_float(rgba[2])]++;
+		bin_a[get_bin_float(rgba[3])]++;
+
+		/* save sample if needed */
+		if (do_sample_line) {
+			const float fx = (float)x / (float)ibuf->x;
+			const int idx = 2 * (ibuf->x * savedlines + x);
+			save_sample_line(scopes, idx, fx, rgba, ycc);
+		}
+
+		rf += ibuf->channels;
+		rc += ibuf->channels;
+	}
+}
+
+static void scopes_update_finalize(void *userdata, void *userdata_chunk)
+{
+	const ScopesUpdateData *data = userdata;
+	const ScopesUpdateDataChunk *data_chunk = userdata_chunk;
+
+	unsigned int *bin_lum = data->bin_lum;
+	unsigned int *bin_r = data->bin_r;
+	unsigned int *bin_g = data->bin_g;
+	unsigned int *bin_b = data->bin_b;
+	unsigned int *bin_a = data->bin_a;
+	const unsigned int *bin_lum_c = data_chunk->bin_lum;
+	const unsigned int *bin_r_c = data_chunk->bin_r;
+	const unsigned int *bin_g_c = data_chunk->bin_g;
+	const unsigned int *bin_b_c = data_chunk->bin_b;
+	const unsigned int *bin_a_c = data_chunk->bin_a;
+
+	float (*minmax)[2] = data->scopes->minmax;
+	const float *min = data_chunk->min;
+	const float *max = data_chunk->max;
+
+	for (int b = 256; b--;) {
+		bin_lum[b] += bin_lum_c[b];
+		bin_r[b] += bin_r_c[b];
+		bin_g[b] += bin_g_c[b];
+		bin_b[b] += bin_b_c[b];
+		bin_a[b] += bin_a_c[b];
+	}
+
+	for (int c = 3; c--;) {
+		if (min[c] < minmax[c][0])
+			minmax[c][0] = min[c];
+		if (max[c] > minmax[c][1])
+			minmax[c][1] = max[c];
+	}
+}
+
 void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *view_settings,
                    const ColorManagedDisplaySettings *display_settings)
 {
-#ifdef _OPENMP
-	const int num_threads = BLI_system_thread_count();
-#endif
-	int a, y;
+	int a;
 	unsigned int nl, na, nr, ng, nb;
 	double divl, diva, divr, divg, divb;
-	unsigned char *display_buffer;
+	const unsigned char *display_buffer = NULL;
 	unsigned int bin_lum[256] = {0},
 	             bin_r[256] = {0},
 	             bin_g[256] = {0},
 	             bin_b[256] = {0},
 	             bin_a[256] = {0};
-	unsigned int bin_lum_t[BLENDER_MAX_THREADS][256] = {{0}},
-	             bin_r_t[BLENDER_MAX_THREADS][256] = {{0}},
-	             bin_g_t[BLENDER_MAX_THREADS][256] = {{0}},
-	             bin_b_t[BLENDER_MAX_THREADS][256] = {{0}},
-	             bin_a_t[BLENDER_MAX_THREADS][256] = {{0}};
 	int ycc_mode = -1;
-	const bool is_float = (ibuf->rect_float != NULL);
 	void *cache_handle = NULL;
 	struct ColormanageProcessor *cm_processor = NULL;
-	int rows_per_sample_line;
 
 	if (ibuf->rect == NULL && ibuf->rect_float == NULL) return;
 
@@ -1151,7 +1287,6 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
 		scopes->sample_lines = ibuf->y;
 
 	/* scan the image */
-	rows_per_sample_line = ibuf->y / scopes->sample_lines;
 	for (a = 0; a < 3; a++) {
 		scopes->minmax[a][0] = 25500.0f;
 		scopes->minmax[a][1] = -25500.0f;
@@ -1177,129 +1312,21 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
 		cm_processor = IMB_colormanagement_display_processor_new(view_settings, display_settings);
 	}
 	else {
-		display_buffer = (unsigned char *)IMB_display_buffer_acquire(ibuf,
-		                                                             view_settings,
-		                                                             display_settings,
-		                                                             &cache_handle);
+		display_buffer = (const unsigned char *)IMB_display_buffer_acquire(
+		                                            ibuf, view_settings, display_settings, &cache_handle);
 	}
 
 	/* Keep number of threads in sync with the merge parts below. */
-#pragma omp parallel for private(y) schedule(static) num_threads(num_threads) if (ibuf->y > 256)
-	for (y = 0; y < ibuf->y; y++) {
-#ifdef _OPENMP
-		const int thread_idx = omp_get_thread_num();
-#else
-		const int thread_idx = 0;
-#endif
-		const float *rf = NULL;
-		const unsigned char *rc = NULL;
-		const int savedlines = y / rows_per_sample_line;
-		const bool do_sample_line = (savedlines < scopes->sample_lines) && (y % rows_per_sample_line) == 0;
-		float min[3] = { FLT_MAX,  FLT_MAX,  FLT_MAX},
-		      max[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
-		int x, c;
-		if (is_float)
-			rf = ibuf->rect_float + ((size_t)y) * ibuf->x * ibuf->channels;
-		else {
-			rc = display_buffer + ((size_t)y) * ibuf->x * ibuf->channels;
-		}
-		for (x = 0; x < ibuf->x; x++) {
-			float rgba[4], ycc[3], luma;
-			if (is_float) {
-
-				switch (ibuf->channels) {
-					case 4:
-						copy_v4_v4(rgba, rf);
-						IMB_colormanagement_processor_apply_v4(cm_processor, rgba);
-						break;
-					case 3:
-						copy_v3_v3(rgba, rf);
-						IMB_colormanagement_processor_apply_v3(cm_processor, rgba);
-						rgba[3] = 1.0f;
-						break;
-					case 2:
-						copy_v3_fl(rgba, rf[0]);
-						rgba[3] = rf[1];
-						break;
-					case 1:
-						copy_v3_fl(rgba, rf[0]);
-						rgba[3] = 1.0f;
-						break;
-					default:
-						BLI_assert(0);
-				}
-			}
-			else {
-				for (c = 0; c < 4; c++)
-					rgba[c] = rc[c] * INV_255;
-			}
-
-			/* we still need luma for histogram */
-			luma = IMB_colormanagement_get_luminance(rgba);
-
-			/* check for min max */
-			if (ycc_mode == -1) {
-				for (c = 0; c < 3; c++) {
-					if (rgba[c] < min[c]) min[c] = rgba[c];
-					if (rgba[c] > max[c]) max[c] = rgba[c];
-				}
-			}
-			else {
-				rgb_to_ycc(rgba[0], rgba[1], rgba[2], &ycc[0], &ycc[1], &ycc[2], ycc_mode);
-				for (c = 0; c < 3; c++) {
-					ycc[c] *= INV_255;
-					if (ycc[c] < min[c]) min[c] = ycc[c];
-					if (ycc[c] > max[c]) max[c] = ycc[c];
-				}
-			}
-			/* increment count for histo*/
-			bin_lum_t[thread_idx][get_bin_float(luma)] += 1;
-			bin_r_t[thread_idx][get_bin_float(rgba[0])] += 1;
-			bin_g_t[thread_idx][get_bin_float(rgba[1])] += 1;
-			bin_b_t[thread_idx][get_bin_float(rgba[2])] += 1;
-			bin_a_t[thread_idx][get_bin_float(rgba[3])] += 1;
-
-			/* save sample if needed */
-			if (do_sample_line) {
-				const float fx = (float)x / (float)ibuf->x;
-				const int idx = 2 * (ibuf->x * savedlines + x);
-				save_sample_line(scopes, idx, fx, rgba, ycc);
-			}
-
-			rf += ibuf->channels;
-			rc += ibuf->channels;
-		}
-#pragma omp critical
-		{
-			for (c = 0; c < 3; c++) {
-				if (min[c] < scopes->minmax[c][0]) scopes->minmax[c][0] = min[c];
-				if (max[c] > scopes->minmax[c][1]) scopes->minmax[c][1] = max[c];
-			}
-		}
-	}
-
-#ifdef _OPENMP
-	if (ibuf->y > 256) {
-		for (a = 0; a < num_threads; a++) {
-			int b;
-			for (b =

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list