[Bf-blender-cvs] [64aef25] master: Use generic task scheduler for threaded image processor

Sergey Sharybin noreply at git.blender.org
Wed Dec 25 15:36:55 CET 2013


Commit: 64aef25b8359d196b1246da4b202c2c27e54efb9
Author: Sergey Sharybin
Date:   Wed Dec 25 20:32:13 2013 +0600
https://developer.blender.org/rB64aef25b8359d196b1246da4b202c2c27e54efb9

Use generic task scheduler for threaded image processor

It allows to schedule tasks of smaller size without having
threads overhead or extra worry about splitting tasks into
smaller pieces.

This simplifies code in color management which was manually
splitting task into smaller chunks to keep memory usage low.

Further optimization is possible by avoid malloc called from
threads, but that's how it used to work for ages already
and would be optimized as a separate patch.

===================================================================

M	source/blender/imbuf/intern/colormanagement.c
M	source/blender/imbuf/intern/imageprocess.c

===================================================================

diff --git a/source/blender/imbuf/intern/colormanagement.c b/source/blender/imbuf/intern/colormanagement.c
index cc8a453..6ca3e1a 100644
--- a/source/blender/imbuf/intern/colormanagement.c
+++ b/source/blender/imbuf/intern/colormanagement.c
@@ -1318,13 +1318,11 @@ static void display_buffer_init_handle(void *handle_v, int start_line, int tot_l
 	handle->float_colorspace = init_data->float_colorspace;
 }
 
-static void display_buffer_apply_get_linear_buffer(DisplayBufferThread *handle, int start_scanline, int num_scanlines,
+static void display_buffer_apply_get_linear_buffer(DisplayBufferThread *handle, int height,
                                                    float *linear_buffer, bool *is_straight_alpha)
 {
 	int channels = handle->channels;
 	int width = handle->width;
-	int height = num_scanlines;
-	int scanline_offset = channels * start_scanline * width;
 
 	int buffer_size = channels * width * height;
 
@@ -1342,7 +1340,7 @@ static void display_buffer_apply_get_linear_buffer(DisplayBufferThread *handle,
 		int i;
 
 		/* first convert byte buffer to float, keep in image space */
-		for (i = 0, fp = linear_buffer, cp = byte_buffer + scanline_offset;
+		for (i = 0, fp = linear_buffer, cp = byte_buffer;
 		     i < width * height;
 		     i++, fp += channels, cp += channels)
 		{
@@ -1375,7 +1373,7 @@ static void display_buffer_apply_get_linear_buffer(DisplayBufferThread *handle,
 		const char *from_colorspace = handle->float_colorspace;
 		const char *to_colorspace = global_role_scene_linear;
 
-		memcpy(linear_buffer, handle->buffer + scanline_offset, buffer_size * sizeof(float));
+		memcpy(linear_buffer, handle->buffer, buffer_size * sizeof(float));
 
 		if (!is_data && !is_data_display) {
 			IMB_colormanagement_transform(linear_buffer, width, height, channels,
@@ -1391,7 +1389,7 @@ static void display_buffer_apply_get_linear_buffer(DisplayBufferThread *handle,
 		 * using duplicated buffer here
 		 */
 
-		memcpy(linear_buffer, handle->buffer + scanline_offset, buffer_size * sizeof(float));
+		memcpy(linear_buffer, handle->buffer, buffer_size * sizeof(float));
 
 		*is_straight_alpha = false;
 	}
@@ -1421,69 +1419,50 @@ static void *do_display_buffer_apply_thread(void *handle_v)
 		}
 	}
 	else {
-#define SCANLINE_BLOCK_SIZE 64
-		/* TODO(sergey): Instead of nasty scanline-blocking in per-scanline-block thread we might
-		 *               better to use generic task scheduler, but that would need extra testing
-		 *               before deploying into production.
-		 */
-
-		int scanlines = (height + SCANLINE_BLOCK_SIZE - 1) / SCANLINE_BLOCK_SIZE;
-		int i;
-		float *linear_buffer = MEM_mallocN(channels * width * SCANLINE_BLOCK_SIZE * sizeof(float),
+		bool is_straight_alpha, predivide;
+		float *linear_buffer = MEM_mallocN(channels * width * height * sizeof(float),
 		                                   "color conversion linear buffer");
 
-		for (i = 0; i < scanlines; i ++) {
-			int start_scanline = i * SCANLINE_BLOCK_SIZE;
-			int num_scanlines = (i == scanlines - 1) ?
-			                    (height - SCANLINE_BLOCK_SIZE * i) :
-			                    SCANLINE_BLOCK_SIZE;
-			int scanline_offset = channels * start_scanline * width;
-			int scanline_offset4 = 4 * start_scanline * width;
-			bool is_straight_alpha, predivide;
-
-			display_buffer_apply_get_linear_buffer(handle, start_scanline, num_scanlines,
-			                                       linear_buffer, &is_straight_alpha);
-			predivide = is_straight_alpha == false;
-
-			if (is_data) {
-				/* special case for data buffers - no color space conversions,
-				 * only generate byte buffers
-				 */
-			}
-			else {
-				/* apply processor */
-				IMB_colormanagement_processor_apply(cm_processor, linear_buffer, width, num_scanlines, channels,
-				                                    predivide);
-			}
+		display_buffer_apply_get_linear_buffer(handle, height, linear_buffer, &is_straight_alpha);
 
-			/* copy result to output buffers */
-			if (display_buffer_byte) {
-				/* do conversion */
-				IMB_buffer_byte_from_float(display_buffer_byte + scanline_offset4, linear_buffer,
-				                           channels, dither, IB_PROFILE_SRGB, IB_PROFILE_SRGB,
-				                           predivide, width, num_scanlines, width, width);
-			}
+		predivide = is_straight_alpha == false;
+
+		if (is_data) {
+			/* special case for data buffers - no color space conversions,
+			 * only generate byte buffers
+			 */
+		}
+		else {
+			/* apply processor */
+			IMB_colormanagement_processor_apply(cm_processor, linear_buffer, width, height, channels,
+			                                    predivide);
+		}
 
-			if (display_buffer) {
-				memcpy(display_buffer + scanline_offset, linear_buffer, width * num_scanlines * channels * sizeof(float));
+		/* copy result to output buffers */
+		if (display_buffer_byte) {
+			/* do conversion */
+			IMB_buffer_byte_from_float(display_buffer_byte, linear_buffer,
+			                           channels, dither, IB_PROFILE_SRGB, IB_PROFILE_SRGB,
+			                           predivide, width, height, width, width);
+		}
 
-				if (is_straight_alpha && channels == 4) {
-					int i;
-					float *fp;
+		if (display_buffer) {
+			memcpy(display_buffer, linear_buffer, width * height * channels * sizeof(float));
 
-					for (i = 0, fp = display_buffer;
-					     i < width * num_scanlines;
-					     i++, fp += channels)
-					{
-						straight_to_premul_v4(fp);
-					}
+			if (is_straight_alpha && channels == 4) {
+				int i;
+				float *fp;
+
+				for (i = 0, fp = display_buffer;
+				     i < width * height;
+				     i++, fp += channels)
+				{
+					straight_to_premul_v4(fp);
 				}
 			}
 		}
 
 		MEM_freeN(linear_buffer);
-
-#undef SCANLINE_BLOCK_SIZE
 	}
 
 	return NULL;
diff --git a/source/blender/imbuf/intern/imageprocess.c b/source/blender/imbuf/intern/imageprocess.c
index 71d5f51..e0a6e03 100644
--- a/source/blender/imbuf/intern/imageprocess.c
+++ b/source/blender/imbuf/intern/imageprocess.c
@@ -41,7 +41,7 @@
 #include "MEM_guardedalloc.h"
 
 #include "BLI_utildefines.h"
-#include "BLI_threads.h"
+#include "BLI_task.h"
 #include "BLI_listbase.h"
 #include "BLI_math.h"
 
@@ -288,48 +288,54 @@ void nearest_interpolation(ImBuf *in, ImBuf *out, float x, float y, int xout, in
 
 /*********************** Threaded image processing *************************/
 
+static void processor_apply_func(TaskPool *pool, void *taskdata, int UNUSED(threadid))
+{
+	void (*do_thread) (void *) = (void (*) (void *)) BLI_task_pool_userdata(pool);
+	do_thread(taskdata);
+}
+
 void IMB_processor_apply_threaded(int buffer_lines, int handle_size, void *init_customdata,
                                   void (init_handle) (void *handle, int start_line, int tot_line,
                                                       void *customdata),
                                   void *(do_thread) (void *))
 {
-	void *handles;
-	ListBase threads;
+	const int lines_per_task = 64;
+
+	TaskScheduler *task_scheduler = BLI_task_scheduler_get();
+	TaskPool *task_pool;
 
-	int i, tot_thread = BLI_system_thread_count();
-	int start_line, tot_line;
+	void *handles;
+	int total_tasks = (buffer_lines + lines_per_task - 1) / lines_per_task;
+	int i, start_line;
 
-	handles = MEM_callocN(handle_size * tot_thread, "processor apply threaded handles");
+	task_pool = BLI_task_pool_create(task_scheduler, do_thread);
 
-	if (tot_thread > 1)
-		BLI_init_threads(&threads, do_thread, tot_thread);
+	handles = MEM_callocN(handle_size * total_tasks, "processor apply threaded handles");
 
 	start_line = 0;
-	tot_line = ((float)(buffer_lines / tot_thread)) + 0.5f;
 
-	for (i = 0; i < tot_thread; i++) {
-		int cur_tot_line;
+	for (i = 0; i < total_tasks; i++) {
+		int lines_per_current_task;
 		void *handle = ((char *) handles) + handle_size * i;
 
-		if (i < tot_thread - 1)
-			cur_tot_line = tot_line;
+		if (i < total_tasks - 1)
+			lines_per_current_task = lines_per_task;
 		else
-			cur_tot_line = buffer_lines - start_line;
+			lines_per_current_task = buffer_lines - start_line;
 
-		init_handle(handle, start_line, cur_tot_line, init_customdata);
+		init_handle(handle, start_line, lines_per_current_task, init_customdata);
 
-		if (tot_thread > 1)
-			BLI_insert_thread(&threads, handle);
+		BLI_task_pool_push(task_pool, processor_apply_func, handle, false, TASK_PRIORITY_LOW);
 
-		start_line += tot_line;
+		start_line += lines_per_task;
 	}
 
-	if (tot_thread > 1)
-		BLI_end_threads(&threads);
-	else
-		do_thread(handles);
+	/* work and wait until tasks are done */
+	BLI_task_pool_work_and_wait(task_pool);
 
+	/* Free memory. */
 	MEM_freeN(handles);
+	BLI_task_pool_free(task_pool);
 }
 
 /* Alpha-under */




More information about the Bf-blender-cvs mailing list