[Bf-blender-cvs] [247b869] master: Compositor: implement OpenCL backend for gaussian blur

Sergey Sharybin noreply at git.blender.org
Mon Oct 6 16:36:21 CEST 2014


Commit: 247b869967812891f6b77585184a3b09f3f16a18
Author: Sergey Sharybin
Date:   Mon Oct 6 14:59:26 2014 +0200
Branches: master
https://developer.blender.org/rB247b869967812891f6b77585184a3b09f3f16a18

Compositor: implement OpenCL backend for gaussian blur

Pretty much straightforward change which gives around 30%
speedup on my laptop and around 2x speedup on desktop in
the BI (which uses gts580). Tested with huge blurs (like
10% of blur) which was rather common during Caminandes.

For now OpenCL is only limited for blur size more than
100 pixels.

This is a bit experimental still, feedback is welcome.

Reviewers: jbakker, lukastoenne

Subscribers: ton

Differential Revision: https://developer.blender.org/D576

===================================================================

M	source/blender/compositor/nodes/COM_BlurNode.cpp
M	source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp
M	source/blender/compositor/operations/COM_GaussianXBlurOperation.h
M	source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp
M	source/blender/compositor/operations/COM_GaussianYBlurOperation.h
M	source/blender/compositor/operations/COM_OpenCLKernels.cl

===================================================================

diff --git a/source/blender/compositor/nodes/COM_BlurNode.cpp b/source/blender/compositor/nodes/COM_BlurNode.cpp
index 76e52c1..f3d0c33 100644
--- a/source/blender/compositor/nodes/COM_BlurNode.cpp
+++ b/source/blender/compositor/nodes/COM_BlurNode.cpp
@@ -105,6 +105,7 @@ void BlurNode::convertToOperations(NodeConverter &converter, const CompositorCon
 		GaussianXBlurOperation *operationx = new GaussianXBlurOperation();
 		operationx->setData(data);
 		operationx->setQuality(quality);
+		operationx->checkOpenCL();
 		
 		converter.addOperation(operationx);
 		converter.mapInputSocket(getInputSocket(1), operationx->getInputSocket(1));
@@ -112,6 +113,7 @@ void BlurNode::convertToOperations(NodeConverter &converter, const CompositorCon
 		GaussianYBlurOperation *operationy = new GaussianYBlurOperation();
 		operationy->setData(data);
 		operationy->setQuality(quality);
+		operationy->checkOpenCL();
 
 		converter.addOperation(operationy);
 		converter.mapInputSocket(getInputSocket(1), operationy->getInputSocket(1));
diff --git a/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp b/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp
index 0aefba3..0838d28 100644
--- a/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp
+++ b/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp
@@ -21,6 +21,7 @@
  */
 
 #include "COM_GaussianXBlurOperation.h"
+#include "COM_OpenCLDevice.h"
 #include "BLI_math.h"
 #include "MEM_guardedalloc.h"
 
@@ -124,6 +125,32 @@ void GaussianXBlurOperation::executePixel(float output[4], int x, int y, void *d
 	mul_v4_v4fl(output, color_accum, 1.0f / multiplier_accum);
 }
 
+void GaussianXBlurOperation::executeOpenCL(OpenCLDevice *device,
+                                           MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
+                                           MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
+                                           list<cl_kernel> *clKernelsToCleanUp)
+{
+	cl_kernel gaussianXBlurOperationKernel = device->COM_clCreateKernel("gaussianXBlurOperationKernel", NULL);
+	cl_int filter_size = this->m_filtersize;
+
+	cl_mem gausstab = clCreateBuffer(device->getContext(),
+	                                 CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+	                                 sizeof(float) * (this->m_filtersize * 2 + 1),
+	                                 this->m_gausstab,
+	                                 NULL);
+
+	device->COM_clAttachMemoryBufferToKernelParameter(gaussianXBlurOperationKernel, 0, 1, clMemToCleanUp, inputMemoryBuffers, this->m_inputProgram);
+	device->COM_clAttachOutputMemoryBufferToKernelParameter(gaussianXBlurOperationKernel, 2, clOutputBuffer);
+	device->COM_clAttachMemoryBufferOffsetToKernelParameter(gaussianXBlurOperationKernel, 3, outputMemoryBuffer);
+	clSetKernelArg(gaussianXBlurOperationKernel, 4, sizeof(cl_int), &filter_size);
+	device->COM_clAttachSizeToKernelParameter(gaussianXBlurOperationKernel, 5, this);
+	clSetKernelArg(gaussianXBlurOperationKernel, 6, sizeof(cl_mem), &gausstab);
+
+	device->COM_clEnqueueRange(gaussianXBlurOperationKernel, outputMemoryBuffer, 7, this);
+
+	clReleaseMemObject(gausstab);
+}
+
 void GaussianXBlurOperation::deinitExecution()
 {
 	BlurBaseOperation::deinitExecution();
diff --git a/source/blender/compositor/operations/COM_GaussianXBlurOperation.h b/source/blender/compositor/operations/COM_GaussianXBlurOperation.h
index e391320..d7ae8b1 100644
--- a/source/blender/compositor/operations/COM_GaussianXBlurOperation.h
+++ b/source/blender/compositor/operations/COM_GaussianXBlurOperation.h
@@ -40,7 +40,12 @@ public:
 	 * @brief the inner loop of this program
 	 */
 	void executePixel(float output[4], int x, int y, void *data);
-	
+
+	void executeOpenCL(OpenCLDevice *device,
+	                   MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
+	                   MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
+	                   list<cl_kernel> *clKernelsToCleanUp);
+
 	/**
 	 * @brief initialize the execution
 	 */
@@ -53,5 +58,9 @@ public:
 	
 	void *initializeTileData(rcti *rect);
 	bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output);
+
+	void checkOpenCL() {
+		this->setOpenCL(m_data.sizex >= 128);
+	}
 };
 #endif
diff --git a/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp b/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp
index a05a1ab..6172f95 100644
--- a/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp
+++ b/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp
@@ -21,6 +21,7 @@
  */
 
 #include "COM_GaussianYBlurOperation.h"
+#include "COM_OpenCLDevice.h"
 #include "BLI_math.h"
 #include "MEM_guardedalloc.h"
 
@@ -126,6 +127,32 @@ void GaussianYBlurOperation::executePixel(float output[4], int x, int y, void *d
 	mul_v4_v4fl(output, color_accum, 1.0f / multiplier_accum);
 }
 
+void GaussianYBlurOperation::executeOpenCL(OpenCLDevice *device,
+                                           MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
+                                           MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
+                                           list<cl_kernel> *clKernelsToCleanUp)
+{
+	cl_kernel gaussianYBlurOperationKernel = device->COM_clCreateKernel("gaussianYBlurOperationKernel", NULL);
+	cl_int filter_size = this->m_filtersize;
+
+	cl_mem gausstab = clCreateBuffer(device->getContext(),
+	                                 CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+	                                 sizeof(float) * (this->m_filtersize * 2 + 1),
+	                                 this->m_gausstab,
+	                                 NULL);
+
+	device->COM_clAttachMemoryBufferToKernelParameter(gaussianYBlurOperationKernel, 0, 1, clMemToCleanUp, inputMemoryBuffers, this->m_inputProgram);
+	device->COM_clAttachOutputMemoryBufferToKernelParameter(gaussianYBlurOperationKernel, 2, clOutputBuffer);
+	device->COM_clAttachMemoryBufferOffsetToKernelParameter(gaussianYBlurOperationKernel, 3, outputMemoryBuffer);
+	clSetKernelArg(gaussianYBlurOperationKernel, 4, sizeof(cl_int), &filter_size);
+	device->COM_clAttachSizeToKernelParameter(gaussianYBlurOperationKernel, 5, this);
+	clSetKernelArg(gaussianYBlurOperationKernel, 6, sizeof(cl_mem), &gausstab);
+
+	device->COM_clEnqueueRange(gaussianYBlurOperationKernel, outputMemoryBuffer, 7, this);
+
+	clReleaseMemObject(gausstab);
+}
+
 void GaussianYBlurOperation::deinitExecution()
 {
 	BlurBaseOperation::deinitExecution();
diff --git a/source/blender/compositor/operations/COM_GaussianYBlurOperation.h b/source/blender/compositor/operations/COM_GaussianYBlurOperation.h
index 22b6562..4b5751c 100644
--- a/source/blender/compositor/operations/COM_GaussianYBlurOperation.h
+++ b/source/blender/compositor/operations/COM_GaussianYBlurOperation.h
@@ -40,7 +40,12 @@ public:
 	 * the inner loop of this program
 	 */
 	void executePixel(float output[4], int x, int y, void *data);
-	
+
+	void executeOpenCL(OpenCLDevice *device,
+	                   MemoryBuffer *outputMemoryBuffer, cl_mem clOutputBuffer,
+	                   MemoryBuffer **inputMemoryBuffers, list<cl_mem> *clMemToCleanUp,
+	                   list<cl_kernel> *clKernelsToCleanUp);
+
 	/**
 	 * @brief initialize the execution
 	 */
@@ -53,5 +58,9 @@ public:
 	
 	void *initializeTileData(rcti *rect);
 	bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation *readOperation, rcti *output);
+
+	void checkOpenCL() {
+		this->setOpenCL(m_data.sizex >= 128);
+	}
 };
 #endif
diff --git a/source/blender/compositor/operations/COM_OpenCLKernels.cl b/source/blender/compositor/operations/COM_OpenCLKernels.cl
index 00b3825..1b965eb 100644
--- a/source/blender/compositor/operations/COM_OpenCLKernels.cl
+++ b/source/blender/compositor/operations/COM_OpenCLKernels.cl
@@ -250,3 +250,66 @@ __kernel void directionalBlurKernel(__read_only image2d_t inputImage,  __write_o
 
 	write_imagef(output, coords, col);
 }
+
+// KERNEL --- GAUSSIAN BLUR ---
+__kernel void gaussianXBlurOperationKernel(__read_only image2d_t inputImage,
+                                           int2 offsetInput,
+                                           __write_only image2d_t output,
+                                           int2 offsetOutput,
+                                           int filter_size,
+                                           int2 dimension,
+                                           __global float *gausstab,
+                                           int2 offset)
+{
+	float4 color = {0.0f, 0.0f, 0.0f, 0.0f};
+	int2 coords = {get_global_id(0), get_global_id(1)};
+	coords += offset;
+	const int2 realCoordinate = coords + offsetOutput;
+	int2 inputCoordinate = realCoordinate - offsetInput;
+	float weight = 0.0f;
+
+	int xmin = max(realCoordinate.x - filter_size,     0) - offsetInput.x;
+	int xmax = min(realCoordinate.x + filter_size + 1, dimension.x) - offsetInput.x;
+
+	for (int nx = xmin, i = max(filter_size - realCoordinate.x, 0); nx < xmax; ++nx, ++i) {
+		float w = gausstab[i];
+		inputCoordinate.x = nx;
+		color += read_imagef(inputImage, SAMPLER_NEAREST, inputCoordinate) * w;
+		weight += w;
+	}
+
+	color *= (1.0f / weight);
+
+	write_imagef(output, coords, color);
+}
+
+__kernel void gaussianYBlurOperationKernel(__read_only image2d_t inputImage,
+                                           int2 offsetInput,
+                                           __write_only image2d_t output,
+                                           int2 offsetOutput,
+                                           int filter_size,
+                                           int2 dimension,
+                                           __global float *gausstab,
+                                           int2 offset)
+{
+	float4 color = {0.0f, 0.0f, 0.0f, 0.0f};
+	int2 coords = {get_global_id(0), get_global_id(1)};
+	coords += offset;
+	const int2 realCoordinate = coords + offsetOutput;
+	int2 inputCoordinate = realCoordinate - offsetInput;
+	float weight = 0.0f;
+
+	int ymin = max(realCoordinate.y 

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list