[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [36795] branches/cycles/intern/cycles: Cycles: some steps to getting OpenCL backend to compile.

Brecht Van Lommel brechtvanlommel at pandora.be
Fri May 20 14:26:01 CEST 2011


Revision: 36795
          http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=36795
Author:   blendix
Date:     2011-05-20 12:26:01 +0000 (Fri, 20 May 2011)
Log Message:
-----------
Cycles: some steps to getting OpenCL backend to compile.

Modified Paths:
--------------
    branches/cycles/intern/cycles/device/device_opencl.cpp
    branches/cycles/intern/cycles/kernel/CMakeLists.txt
    branches/cycles/intern/cycles/kernel/kernel.cl
    branches/cycles/intern/cycles/kernel/kernel_compat_cuda.h
    branches/cycles/intern/cycles/kernel/kernel_compat_opencl.h
    branches/cycles/intern/cycles/kernel/kernel_globals.h
    branches/cycles/intern/cycles/kernel/kernel_light.h
    branches/cycles/intern/cycles/kernel/kernel_triangle.h
    branches/cycles/intern/cycles/kernel/kernel_types.h
    branches/cycles/intern/cycles/kernel/svm/bsdf_ashikhmin_velvet.h
    branches/cycles/intern/cycles/kernel/svm/bsdf_diffuse.h
    branches/cycles/intern/cycles/kernel/svm/bsdf_microfacet.h
    branches/cycles/intern/cycles/kernel/svm/bsdf_ward.h
    branches/cycles/intern/cycles/kernel/svm/bsdf_westin.h
    branches/cycles/intern/cycles/kernel/svm/svm_blend.h
    branches/cycles/intern/cycles/kernel/svm/svm_displace.h
    branches/cycles/intern/cycles/kernel/svm/svm_distorted_noise.h
    branches/cycles/intern/cycles/kernel/svm/svm_image.h
    branches/cycles/intern/cycles/kernel/svm/svm_mix.h
    branches/cycles/intern/cycles/kernel/svm/svm_sky.h
    branches/cycles/intern/cycles/kernel/svm/svm_texture.h
    branches/cycles/intern/cycles/kernel/svm/svm_types.h
    branches/cycles/intern/cycles/util/util_color.h
    branches/cycles/intern/cycles/util/util_math.h

Added Paths:
-----------
    branches/cycles/intern/cycles/kernel/kernel_textures.h

Modified: branches/cycles/intern/cycles/device/device_opencl.cpp
===================================================================
--- branches/cycles/intern/cycles/device/device_opencl.cpp	2011-05-20 11:15:44 UTC (rev 36794)
+++ branches/cycles/intern/cycles/device/device_opencl.cpp	2011-05-20 12:26:01 UTC (rev 36795)
@@ -55,6 +55,7 @@
 	cl_int ciErr;
 	map<string, device_vector<uchar>*> const_mem_map;
 	map<string, device_memory*> mem_map;
+	device_ptr null_mem;
 
 	const char *opencl_error_string(cl_int err)
 	{
@@ -125,10 +126,10 @@
 		ciErr = clGetPlatformIDs(1, &cpPlatform, NULL);
 		opencl_assert(ciErr);
 
-		ciErr = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 1, &cdDevice, NULL);
+		ciErr = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_CPU, 1, &cdDevice, NULL);
 		opencl_assert(ciErr);
 
-		cxGPUContext = clCreateContext(0, 1, &cdDevice, NULL /*clLogMessagesToStdoutAPPLE */, NULL, &ciErr);
+		cxGPUContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErr);
 		opencl_assert(ciErr);
 
 		cqCommandQueue = clCreateCommandQueue(cxGPUContext, cdDevice, 0, &ciErr);
@@ -137,11 +138,17 @@
 		/* compile kernel */
 		string source = string_printf("#include \"kernel.cl\" // %lf\n", time_dt());
 		size_t source_len = source.size();
-		string build_options = "-I ../kernel -I ../util -Werror -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END="; //" + path_get("kernel") + " -Werror";
-		//printf("path %s\n", path_get("kernel").c_str());
 
-		//clUnloadCompiler();
+		string build_options = "";
 
+		//string csource = "../blender/intern/cycles";
+		//build_options += "-I " + csource + "/kernel -I " + csource + "/util";
+
+		build_options += " -I " + path_get("kernel"); /* todo: escape path */
+
+		build_options += " -Werror";
+		build_options += " -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END=";
+
 		cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&source, &source_len, &ciErr);
 
 		opencl_assert(ciErr);
@@ -170,10 +177,15 @@
 		opencl_assert(ciErr);
 		ckFilmConvertKernel = clCreateKernel(cpProgram, "kernel_ocl_tonemap", &ciErr);
 		opencl_assert(ciErr);
+
+		null_mem = (device_ptr)clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr);
 	}
 
 	~OpenCLDevice()
 	{
+
+		clReleaseMemObject(CL_MEM_PTR(null_mem));
+
 		map<string, device_vector<uchar>*>::iterator mt;
 		for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) {
 			mem_free(*(mt->second));
@@ -261,6 +273,7 @@
 	void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
 	{
 		mem_alloc(mem, MEM_READ_ONLY);
+		mem_copy_to(mem);
 		mem_map[name] = &mem;
 	}
 
@@ -295,6 +308,11 @@
 		ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data);
 		ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_buffer), (void*)&d_buffer);
 		ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_rng_state), (void*)&d_rng_state);
+
+#define KERNEL_TEX(type, ttype, name) \
+	ciErr |= set_kernel_arg_mem(ckPathTraceKernel, &narg, #name);
+#include "kernel_textures.h"
+
 		ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_pass), (void*)&d_pass);
 		ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_x), (void*)&d_x);
 		ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y);
@@ -314,11 +332,21 @@
 
 	cl_int set_kernel_arg_mem(cl_kernel kernel, int *narg, const char *name)
 	{
-		device_memory *mem = mem_map[name];
-		cl_mem ptr = CL_MEM_PTR(mem->device_pointer);
-		cl_int size = mem->data_width;
-		cl_int err = 0;
+		cl_mem ptr;
+		cl_int size, err = 0;
+
+		if(mem_map.find(name) != mem_map.end()) {
+			device_memory *mem = mem_map[name];
 		
+			ptr = CL_MEM_PTR(mem->device_pointer);
+			size = mem->data_width;
+		}
+		else {
+			/* work around NULL not working, even though the spec says otherwise */
+			ptr = CL_MEM_PTR(null_mem);
+			size = 1;
+		}
+		
 		err |= clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr);
 		opencl_assert(err);
 		err |= clSetKernelArg(kernel, (*narg)++, sizeof(size), (void*)&size);
@@ -347,9 +375,11 @@
 		ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data);
 		ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba);
 		ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer);
-		ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, "__response_curve_R");
-		ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, "__response_curve_G");
-		ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, "__response_curve_B");
+
+#define KERNEL_TEX(type, ttype, name) \
+	ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, #name);
+#include "kernel_textures.h"
+
 		ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_pass), (void*)&d_pass);
 		ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_resolution), (void*)&d_resolution);
 		ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x);

Modified: branches/cycles/intern/cycles/kernel/CMakeLists.txt
===================================================================
--- branches/cycles/intern/cycles/kernel/CMakeLists.txt	2011-05-20 11:15:44 UTC (rev 36794)
+++ branches/cycles/intern/cycles/kernel/CMakeLists.txt	2011-05-20 12:26:01 UTC (rev 36795)
@@ -25,8 +25,11 @@
 	kernel_qbvh.h
 	kernel_random.h
 	kernel_shader.h
+	kernel_textures.h
 	kernel_triangle.h
-	kernel_types.h
+	kernel_types.h)
+
+SET(svm_headers
 	svm/bsdf.h
 	svm/bsdf_ashikhmin_velvet.h
 	svm/bsdf_diffuse.h
@@ -78,7 +81,7 @@
 ENDIF()
 
 IF(WITH_CYCLES_CUDA)
-	SET(cuda_sources kernel.cu ${headers})
+	SET(cuda_sources kernel.cu ${headers} ${svm_headers})
 	SET(cuda_cubins)
 
 	FOREACH(arch ${CYCLES_CUDA_ARCH})
@@ -106,9 +109,23 @@
 
 INCLUDE_DIRECTORIES(. ../util osl svm)
 
-ADD_LIBRARY(cycles_kernel ${sources} ${headers})
+ADD_LIBRARY(cycles_kernel ${sources} ${headers} ${svm_headers})
 
 IF(WITH_CYCLES_CUDA)
 	ADD_DEPENDENCIES(cycles_kernel cycles_kernel_cuda)
 ENDIF()
 
+# OPENCL kernel
+
+IF(WITH_CYCLES_OPENCL)
+	SET(util_headers
+		../util/util_color.h
+		../util/util_math.h
+		../util/util_transform.h
+		../util/util_types.h)
+
+	INSTALL(FILES kernel.cl ${headers} DESTINATION ${CYCLES_INSTALL_PATH}/cycles/kernel)
+	INSTALL(FILES ${svm_headers} DESTINATION ${CYCLES_INSTALL_PATH}/cycles/kernel/svm)
+	INSTALL(FILES ${util_headers} DESTINATION ${CYCLES_INSTALL_PATH}/cycles/kernel)
+ENDIF()
+

Modified: branches/cycles/intern/cycles/kernel/kernel.cl
===================================================================
--- branches/cycles/intern/cycles/kernel/kernel.cl	2011-05-20 11:15:44 UTC (rev 36794)
+++ branches/cycles/intern/cycles/kernel/kernel.cl	2011-05-20 12:26:01 UTC (rev 36795)
@@ -23,72 +23,62 @@
 #include "kernel_types.h"
 #include "kernel_globals.h"
 
-typedef struct KernelGlobals {
-	__constant KernelData *data;
-
-	__global float *__response_curve_R;
-	int __response_curve_R_width;
-
-	__global float *__response_curve_G;
-	int __response_curve_G_width;
-
-	__global float *__response_curve_B;
-	int __response_curve_B_width;
-} KernelGlobals;
-
 #include "kernel_film.h"
-//#include "kernel_path.h"
+#include "kernel_path.h"
 //#include "kernel_displace.h"
 
-__kernel void kernel_ocl_path_trace(__constant KernelData *data, __global float4 *buffer, __global uint *rng_state, int pass, int sx, int sy, int sw, int sh)
+__kernel void kernel_ocl_path_trace(
+	__constant KernelData *data,
+	__global float4 *buffer,
+	__global uint *rng_state,
+
+#define KERNEL_TEX(type, ttype, name) \
+	__global type *name, \
+	int name##_width,
+#include "kernel_textures.h"
+
+	int pass,
+	int sx, int sy, int sw, int sh)
 {
 	KernelGlobals kglobals, *kg = &kglobals;
+
 	kg->data = data;
 
-	int x = get_global_id(0);
-	int y = get_global_id(1);
+#define KERNEL_TEX(type, ttype, name) \
+	kg->name = name; \
+	kg->name##_width = name##_width;
+#include "kernel_textures.h"
+
+	int x = sx + get_global_id(0);
+	int y = sy + get_global_id(1);
 	int w = kernel_data.cam.width;
 
-	if(x < sx + sw && y < sy + sh) {
-		if(pass == 0) {
-			buffer[x + w*y].x = 0.5f;
-			buffer[x + w*y].y = 0.5f;
-			buffer[x + w*y].z = 0.5f;
-		}
-		else {
-			buffer[x + w*y].x += 0.5f;
-			buffer[x + w*y].y += 0.5f;
-			buffer[x + w*y].z += 0.5f;
-		}
-		
-		//= make_float3(1.0f, 0.9f, 0.0f);
-		//kernel_path_trace(buffer, rng_state, pass, x, y);
-	}
+	if(x < sx + sw && y < sy + sh)
+		kernel_path_trace(kg, buffer, rng_state, pass, x, y);
 }
 
 __kernel void kernel_ocl_tonemap(
 	__constant KernelData *data,
 	__global uchar4 *rgba,
 	__global float4 *buffer,
-	__global float *__response_curve_R,
-	int __response_curve_R_width,
-	__global float *__response_curve_G,
-	int __response_curve_G_width,
-	__global float *__response_curve_B,
-	int __response_curve_B_width,
+
+#define KERNEL_TEX(type, ttype, name) \
+	__global type *name, \
+	int name##_width,
+#include "kernel_textures.h"
+
 	int pass, int resolution,
 	int sx, int sy, int sw, int sh)
 {
 	KernelGlobals kglobals, *kg = &kglobals;
 
 	kg->data = data;
-	kg->__response_curve_R = __response_curve_R;
-	kg->__response_curve_R_width = __response_curve_R_width;
-	kg->__response_curve_G = __response_curve_G;
-	kg->__response_curve_G_width = __response_curve_G_width;
-	kg->__response_curve_B = __response_curve_B;
-	kg->__response_curve_B_width = __response_curve_B_width;
 
+#define KERNEL_TEX(type, ttype, name) \
+	kg->name = name; \
+	kg->name##_width = name##_width;
+#include "kernel_textures.h"
+
 	int x = sx + get_global_id(0);
 	int y = sy + get_global_id(1);
 
@@ -96,10 +86,10 @@
 		kernel_film_tonemap(kg, rgba, buffer, pass, resolution, x, y);
 }
 
-__kernel void kernel_ocl_displace(__global uint4 *input, __global float3 *offset, int sx)
+/*__kernel void kernel_ocl_displace(__global uint4 *input, __global float3 *offset, int sx)
 {
 	int x = sx + get_global_id(0);
 
 	kernel_displace(input, offset, x);
-}
+}*/
 

Modified: branches/cycles/intern/cycles/kernel/kernel_compat_cuda.h
===================================================================
--- branches/cycles/intern/cycles/kernel/kernel_compat_cuda.h	2011-05-20 11:15:44 UTC (rev 36794)
+++ branches/cycles/intern/cycles/kernel/kernel_compat_cuda.h	2011-05-20 12:26:01 UTC (rev 36795)
@@ -35,7 +35,7 @@
 #define __device_inline  __device__ __inline__
 #define __global
 #define __shared __shared__
-#define __constant __constant__
+#define __constant
 
 /* No assert supported for CUDA */
 


@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list