[Bf-blender-cvs] [69c7522] master: Fix T40379: world MIS causing too much CUDA memory usage.

Brecht Van Lommel noreply at git.blender.org
Tue May 27 15:12:21 CEST 2014


Commit: 69c7522b2463245ef16ebcf2806645c78e83b4df
Author: Brecht Van Lommel
Date:   Tue May 27 13:20:07 2014 +0200
https://developer.blender.org/rB69c7522b2463245ef16ebcf2806645c78e83b4df

Fix T40379: world MIS causing too much CUDA memory usage.

The kernel for baking the world texture was the same as the one used for
baking. Now that's separate which allows the kernel to reserve much less
memory.

===================================================================

M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/device/device_opencl.cpp
M	intern/cycles/kernel/kernel.cl
M	intern/cycles/kernel/kernel.cpp
M	intern/cycles/kernel/kernel.cu
M	intern/cycles/kernel/kernel_avx.cpp
M	intern/cycles/kernel/kernel_bake.h
M	intern/cycles/kernel/kernel_sse2.cpp
M	intern/cycles/kernel/kernel_sse3.cpp
M	intern/cycles/kernel/kernel_sse41.cpp

===================================================================

diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index b19f5e2..48d1c18 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -746,7 +746,12 @@ public:
 		CUdeviceptr d_output = cuda_device_ptr(task.shader_output);
 
 		/* get kernel function */
-		cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_shader"));
+		if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
+			cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake"));
+		}
+		else {
+			cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_shader"));
+		}
 
 		/* do tasks in smaller chunks, so we can cancel it */
 		const int shader_chunk_size = 65536;
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 694ec9d..f841dab 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -321,6 +321,7 @@ public:
 	cl_kernel ckFilmConvertByteKernel;
 	cl_kernel ckFilmConvertHalfFloatKernel;
 	cl_kernel ckShaderKernel;
+	cl_kernel ckBakeKernel;
 	cl_int ciErr;
 
 	typedef map<string, device_vector<uchar>*> ConstMemMap;
@@ -443,6 +444,7 @@ public:
 		ckFilmConvertByteKernel = NULL;
 		ckFilmConvertHalfFloatKernel = NULL;
 		ckShaderKernel = NULL;
+		ckBakeKernel = NULL;
 		null_mem = 0;
 		device_initialized = false;
 
@@ -791,6 +793,10 @@ public:
 		if(opencl_error(ciErr))
 			return false;
 
+		ckBakeKernel = clCreateKernel(cpProgram, "kernel_ocl_bake", &ciErr);
+		if(opencl_error(ciErr))
+			return false;
+
 		return true;
 	}
 
@@ -1054,19 +1060,26 @@ public:
 		/* sample arguments */
 		cl_uint narg = 0;
 
-		opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_data), (void*)&d_data));
-		opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_input), (void*)&d_input));
-		opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_output), (void*)&d_output));
+		cl_kernel kernel;
+
+		if(task.shader_eval_type >= SHADER_EVAL_BAKE)
+			kernel = ckBakeKernel;
+		else
+			kernel = ckShaderKernel;
+
+		opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_data), (void*)&d_data));
+		opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_input), (void*)&d_input));
+		opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_output), (void*)&d_output));
 
 #define KERNEL_TEX(type, ttype, name) \
-	set_kernel_arg_mem(ckShaderKernel, &narg, #name);
+	set_kernel_arg_mem(kernel, &narg, #name);
 #include "kernel_textures.h"
 
-		opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type));
-		opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x));
-		opencl_assert(clSetKernelArg(ckShaderKernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w));
+		opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_eval_type), (void*)&d_shader_eval_type));
+		opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_x), (void*)&d_shader_x));
+		opencl_assert(clSetKernelArg(kernel, narg++, sizeof(d_shader_w), (void*)&d_shader_w));
 
-		enqueue_kernel(ckShaderKernel, task.shader_w, 1);
+		enqueue_kernel(kernel, task.shader_w, 1);
 	}
 
 	void thread_run(DeviceTask *task)
diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl
index 1dc0793..d7d3438 100644
--- a/intern/cycles/kernel/kernel.cl
+++ b/intern/cycles/kernel/kernel.cl
@@ -131,3 +131,28 @@ __kernel void kernel_ocl_shader(
 		kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, x);
 }
 
+__kernel void kernel_ocl_bake(
+	ccl_constant KernelData *data,
+	ccl_global uint4 *input,
+	ccl_global float4 *output,
+
+#define KERNEL_TEX(type, ttype, name) \
+	ccl_global type *name,
+#include "kernel_textures.h"
+
+	int type, int sx, int sw)
+{
+	KernelGlobals kglobals, *kg = &kglobals;
+
+	kg->data = data;
+
+#define KERNEL_TEX(type, ttype, name) \
+	kg->name = name;
+#include "kernel_textures.h"
+
+	int x = sx + get_global_id(0);
+
+	if(x < sx + sw)
+		kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, x);
+}
+
diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp
index a0b6b8e..a535659 100644
--- a/intern/cycles/kernel/kernel.cpp
+++ b/intern/cycles/kernel/kernel.cpp
@@ -122,7 +122,10 @@ void kernel_cpu_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float *bu
 
 void kernel_cpu_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
 {
-	kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+	if(type >= SHADER_EVAL_BAKE)
+		kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+	else
+		kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu
index b9b41f7..bb20819 100644
--- a/intern/cycles/kernel/kernel.cu
+++ b/intern/cycles/kernel/kernel.cu
@@ -153,5 +153,14 @@ kernel_cuda_shader(uint4 *input, float4 *output, int type, int sx)
 	kernel_shader_evaluate(NULL, input, output, (ShaderEvalType)type, x);
 }
 
+extern "C" __global__ void
+CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
+kernel_cuda_bake(uint4 *input, float4 *output, int type, int sx)
+{
+	int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
+
+	kernel_bake_evaluate(NULL, input, output, (ShaderEvalType)type, x);
+}
+
 #endif
 
diff --git a/intern/cycles/kernel/kernel_avx.cpp b/intern/cycles/kernel/kernel_avx.cpp
index c572fcd..7d354de 100644
--- a/intern/cycles/kernel/kernel_avx.cpp
+++ b/intern/cycles/kernel/kernel_avx.cpp
@@ -69,7 +69,10 @@ void kernel_cpu_avx_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, float
 
 void kernel_cpu_avx_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
 {
-	kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+	if(type >= SHADER_EVAL_BAKE)
+		kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+	else
+		kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 6e73551..c3ae2b6 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -356,11 +356,6 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
 
 ccl_device void kernel_shader_evaluate(KernelGlobals *kg, ccl_global uint4 *input, ccl_global float4 *output, ShaderEvalType type, int i)
 {
-	if(type >= SHADER_EVAL_BAKE) {
-		kernel_bake_evaluate(kg, input, output, type, i);
-		return;
-	}
-
 	ShaderData sd;
 	uint4 in = input[i];
 	float3 out;
diff --git a/intern/cycles/kernel/kernel_sse2.cpp b/intern/cycles/kernel/kernel_sse2.cpp
index 455cac0..3b5faea 100644
--- a/intern/cycles/kernel/kernel_sse2.cpp
+++ b/intern/cycles/kernel/kernel_sse2.cpp
@@ -66,7 +66,10 @@ void kernel_cpu_sse2_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa
 
 void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
 {
-	kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+	if(type >= SHADER_EVAL_BAKE)
+		kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+	else
+		kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_sse3.cpp b/intern/cycles/kernel/kernel_sse3.cpp
index 29aca52..3b18b16 100644
--- a/intern/cycles/kernel/kernel_sse3.cpp
+++ b/intern/cycles/kernel/kernel_sse3.cpp
@@ -68,7 +68,10 @@ void kernel_cpu_sse3_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, floa
 
 void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
 {
-	kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+	if(type >= SHADER_EVAL_BAKE)
+		kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+	else
+		kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_sse41.cpp b/intern/cycles/kernel/kernel_sse41.cpp
index 0ece67e..a3731d7 100644
--- a/intern/cycles/kernel/kernel_sse41.cpp
+++ b/intern/cycles/kernel/kernel_sse41.cpp
@@ -69,7 +69,10 @@ void kernel_cpu_sse41_convert_to_half_float(KernelGlobals *kg, uchar4 *rgba, flo
 
 void kernel_cpu_sse41_shader(KernelGlobals *kg, uint4 *input, float4 *output, int type, int i)
 {
-	kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
+	if(type >= SHADER_EVAL_BAKE)
+		kernel_bake_evaluate(kg, input, output, (ShaderEvalType)type, i);
+	else
+		kernel_shader_evaluate(kg, input, output, (ShaderEvalType)type, i);
 }
 
 CCL_NAMESPACE_END




More information about the Bf-blender-cvs mailing list