[Bf-blender-cvs] [7b30a3e98de] master: Performance: Use parallel range for ImBuf scanline processor.

Jeroen Bakker noreply at git.blender.org
Fri Jun 11 15:56:30 CEST 2021


Commit: 7b30a3e98def6f9f158da0e315b7077655acfd20
Author: Jeroen Bakker
Date:   Fri Jun 11 15:55:09 2021 +0200
Branches: master
https://developer.blender.org/rB7b30a3e98def6f9f158da0e315b7077655acfd20

Performance: Use parallel range for ImBuf scanline processor.

Scanline processor did its own heurestic what didn't scale well when
having a multiple cores. In stead of using our own code this patch will
leave it to TBB to determine how to split the scanlines over the
available threads.

Performance of the IMB_transform before this change was 0.002123s, with
this change 0.001601s. This change increases performance in other areas
as well including color management conversions.

Reviewed By: zeddb

Differential Revision: https://developer.blender.org/D11578

===================================================================

M	source/blender/blenkernel/intern/image_gen.c
M	source/blender/imbuf/IMB_imbuf.h
M	source/blender/imbuf/intern/colormanagement.c
M	source/blender/imbuf/intern/divers.c
M	source/blender/imbuf/intern/imageprocess.c
M	source/blender/imbuf/intern/rectop.c

===================================================================

diff --git a/source/blender/blenkernel/intern/image_gen.c b/source/blender/blenkernel/intern/image_gen.c
index ceb13c4955e..1a0cc8c2924 100644
--- a/source/blender/blenkernel/intern/image_gen.c
+++ b/source/blender/blenkernel/intern/image_gen.c
@@ -69,10 +69,11 @@ static void image_buf_fill_color_slice(
   }
 }
 
-static void image_buf_fill_color_thread_do(void *data_v, int start_scanline, int num_scanlines)
+static void image_buf_fill_color_thread_do(void *data_v, int scanline)
 {
   FillColorThreadData *data = (FillColorThreadData *)data_v;
-  size_t offset = ((size_t)start_scanline) * data->width * 4;
+  const int num_scanlines = 1;
+  size_t offset = ((size_t)scanline) * data->width * 4;
   unsigned char *rect = (data->rect != NULL) ? (data->rect + offset) : NULL;
   float *rect_float = (data->rect_float != NULL) ? (data->rect_float + offset) : NULL;
   image_buf_fill_color_slice(rect, rect_float, data->width, num_scanlines, data->color);
@@ -197,13 +198,14 @@ typedef struct FillCheckerThreadData {
   int width;
 } FillCheckerThreadData;
 
-static void image_buf_fill_checker_thread_do(void *data_v, int start_scanline, int num_scanlines)
+static void image_buf_fill_checker_thread_do(void *data_v, int scanline)
 {
   FillCheckerThreadData *data = (FillCheckerThreadData *)data_v;
-  size_t offset = ((size_t)start_scanline) * data->width * 4;
+  size_t offset = ((size_t)scanline) * data->width * 4;
+  const int num_scanlines = 1;
   unsigned char *rect = (data->rect != NULL) ? (data->rect + offset) : NULL;
   float *rect_float = (data->rect_float != NULL) ? (data->rect_float + offset) : NULL;
-  image_buf_fill_checker_slice(rect, rect_float, data->width, num_scanlines, start_scanline);
+  image_buf_fill_checker_slice(rect, rect_float, data->width, num_scanlines, scanline);
 }
 
 void BKE_image_buf_fill_checker(unsigned char *rect, float *rect_float, int width, int height)
@@ -444,16 +446,15 @@ typedef struct FillCheckerColorThreadData {
   int width, height;
 } FillCheckerColorThreadData;
 
-static void checker_board_color_prepare_thread_do(void *data_v,
-                                                  int start_scanline,
-                                                  int num_scanlines)
+static void checker_board_color_prepare_thread_do(void *data_v, int scanline)
 {
   FillCheckerColorThreadData *data = (FillCheckerColorThreadData *)data_v;
-  size_t offset = ((size_t)data->width) * start_scanline * 4;
+  const int num_scanlines = 1;
+  size_t offset = ((size_t)data->width) * scanline * 4;
   unsigned char *rect = (data->rect != NULL) ? (data->rect + offset) : NULL;
   float *rect_float = (data->rect_float != NULL) ? (data->rect_float + offset) : NULL;
   checker_board_color_prepare_slice(
-      rect, rect_float, data->width, num_scanlines, start_scanline, data->height);
+      rect, rect_float, data->width, num_scanlines, scanline, data->height);
 }
 
 void BKE_image_buf_fill_checker_color(unsigned char *rect,
diff --git a/source/blender/imbuf/IMB_imbuf.h b/source/blender/imbuf/IMB_imbuf.h
index 651c69726ff..69a80d6e0d3 100644
--- a/source/blender/imbuf/IMB_imbuf.h
+++ b/source/blender/imbuf/IMB_imbuf.h
@@ -733,7 +733,7 @@ void IMB_processor_apply_threaded(
     void(init_handle)(void *handle, int start_line, int tot_line, void *customdata),
     void *(do_thread)(void *));
 
-typedef void (*ScanlineThreadFunc)(void *custom_data, int start_scanline, int num_scanlines);
+typedef void (*ScanlineThreadFunc)(void *custom_data, int scanline);
 void IMB_processor_apply_threaded_scanlines(int total_scanlines,
                                             ScanlineThreadFunc do_thread,
                                             void *custom_data);
diff --git a/source/blender/imbuf/intern/colormanagement.c b/source/blender/imbuf/intern/colormanagement.c
index 68d0b516828..71e513fb405 100644
--- a/source/blender/imbuf/intern/colormanagement.c
+++ b/source/blender/imbuf/intern/colormanagement.c
@@ -3539,12 +3539,11 @@ typedef struct PartialThreadData {
   int xmin, ymin, xmax;
 } PartialThreadData;
 
-static void partial_buffer_update_rect_thread_do(void *data_v,
-                                                 int start_scanline,
-                                                 int num_scanlines)
+static void partial_buffer_update_rect_thread_do(void *data_v, int scanline)
 {
   PartialThreadData *data = (PartialThreadData *)data_v;
-  int ymin = data->ymin + start_scanline;
+  int ymin = data->ymin + scanline;
+  const int num_scanlines = 1;
   partial_buffer_update_rect(data->ibuf,
                              data->display_buffer,
                              data->linear_buffer,
diff --git a/source/blender/imbuf/intern/divers.c b/source/blender/imbuf/intern/divers.c
index 5f580449e12..47712456014 100644
--- a/source/blender/imbuf/intern/divers.c
+++ b/source/blender/imbuf/intern/divers.c
@@ -536,13 +536,12 @@ typedef struct FloatToFloatThreadData {
   int stride_from;
 } FloatToFloatThreadData;
 
-static void imb_buffer_float_from_float_thread_do(void *data_v,
-                                                  int start_scanline,
-                                                  int num_scanlines)
+static void imb_buffer_float_from_float_thread_do(void *data_v, int scanline)
 {
+  const int num_scanlines = 1;
   FloatToFloatThreadData *data = (FloatToFloatThreadData *)data_v;
-  size_t offset_from = ((size_t)start_scanline) * data->stride_from * data->channels_from;
-  size_t offset_to = ((size_t)start_scanline) * data->stride_to * data->channels_from;
+  size_t offset_from = ((size_t)scanline) * data->stride_from * data->channels_from;
+  size_t offset_to = ((size_t)scanline) * data->stride_to * data->channels_from;
   IMB_buffer_float_from_float(data->rect_to + offset_to,
                               data->rect_from + offset_from,
                               data->channels_from,
diff --git a/source/blender/imbuf/intern/imageprocess.c b/source/blender/imbuf/intern/imageprocess.c
index 4320f30884b..a9b6e2bbb88 100644
--- a/source/blender/imbuf/intern/imageprocess.c
+++ b/source/blender/imbuf/intern/imageprocess.c
@@ -412,42 +412,34 @@ static void imb_transform_calc_add_y(const float transform_matrix[3][3],
 typedef void (*InterpolationColorFunction)(
     struct ImBuf *in, unsigned char outI[4], float outF[4], float u, float v);
 BLI_INLINE void imb_transform_scanlines(const TransformUserData *user_data,
-                                        int start_scanline,
-                                        int num_scanlines,
+                                        int scanline,
                                         InterpolationColorFunction interpolation)
 {
   const int width = user_data->dst->x;
 
-  float next_line_start_uv[2];
-  madd_v2_v2v2fl(next_line_start_uv, user_data->start_uv, user_data->add_y, start_scanline);
+  float uv[2];
+  madd_v2_v2v2fl(uv, user_data->start_uv, user_data->add_y, scanline);
 
   unsigned char *outI = NULL;
   float *outF = NULL;
-  pixel_from_buffer(user_data->dst, &outI, &outF, 0, start_scanline);
-
-  for (int yi = start_scanline; yi < start_scanline + num_scanlines; yi++) {
-    float uv[2];
-    copy_v2_v2(uv, next_line_start_uv);
-    add_v2_v2(next_line_start_uv, user_data->add_y);
-    for (int xi = 0; xi < width; xi++) {
-      if (uv[0] >= user_data->src_crop.xmin && uv[0] < user_data->src_crop.xmax &&
-          uv[1] >= user_data->src_crop.ymin && uv[1] < user_data->src_crop.ymax) {
-        interpolation(user_data->src, outI, outF, uv[0], uv[1]);
-      }
-      add_v2_v2(uv, user_data->add_x);
-      if (outI) {
-        outI += 4;
-      }
-      if (outF) {
-        outF += 4;
-      }
+  pixel_from_buffer(user_data->dst, &outI, &outF, 0, scanline);
+
+  for (int xi = 0; xi < width; xi++) {
+    if (uv[0] >= user_data->src_crop.xmin && uv[0] < user_data->src_crop.xmax &&
+        uv[1] >= user_data->src_crop.ymin && uv[1] < user_data->src_crop.ymax) {
+      interpolation(user_data->src, outI, outF, uv[0], uv[1]);
+    }
+    add_v2_v2(uv, user_data->add_x);
+    if (outI) {
+      outI += 4;
+    }
+    if (outF) {
+      outF += 4;
     }
   }
 }
 
-static void imb_transform_nearest_scanlines(void *custom_data,
-                                            int start_scanline,
-                                            int num_scanlines)
+static void imb_transform_nearest_scanlines(void *custom_data, int scanline)
 {
   const TransformUserData *user_data = custom_data;
   InterpolationColorFunction interpolation = NULL;
@@ -457,12 +449,10 @@ static void imb_transform_nearest_scanlines(void *custom_data,
   else {
     interpolation = nearest_interpolation_color_char;
   }
-  imb_transform_scanlines(user_data, start_scanline, num_scanlines, interpolation);
+  imb_transform_scanlines(user_data, scanline, interpolation);
 }
 
-static void imb_transform_bilinear_scanlines(void *custom_data,
-                                             int start_scanline,
-                                             int num_scanlines)
+static void imb_transform_bilinear_scanlines(void *custom_data, int scanline)
 {
   const TransformUserData *user_data = custom_data;
   InterpolationColorFunction interpolation = NULL;
@@ -472,7 +462,7 @@ static void imb_transform_bilinear_scanlines(void *custom_data,
   else if (user_data->dst->rect) {
     interpolation = bilinear_interpolation_color_char;
   }
-  imb_transform_scanlines(user_data, start_scanline, num_scanlines, interpolation);
+  imb_transform_scanlines(user_data, scanline, interpolation);
 }
 
 static ScanlineThreadFunc imb_transform_scanline_func(const eIMBInterpolationFilterMode filter)
@@ -568,41 +558,28 @@ void IMB_processor_apply_threaded(
 typedef struct ScanlineGlobalData {
   void *custom_data;
   ScanlineThreadFunc do_thread;
-  int scanlines_per_task;
-  int total_scanlines;
 } ScanlineGlobalData;
 
-static void processor_apply_scanline_func(TaskPool *__restrict pool, void *taskdata)
+static void processor_apply_parallel(void *__restrict userdata,
+                                     const int scanline,
+                                     const TaskParallelTLS *__restrict UNUSED(tls))
 {
-  ScanlineGlobalData *data = BLI_task_pool

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list