[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [42771] trunk/blender/intern/cycles: Cycles: some tweaks for apple opencl with ATI cards, to get it working up to

Brecht Van Lommel brechtvanlommel at pandora.be
Tue Dec 20 18:37:05 CET 2011


Revision: 42771
          http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=42771
Author:   blendix
Date:     2011-12-20 17:36:56 +0000 (Tue, 20 Dec 2011)
Log Message:
-----------
Cycles: some tweaks for apple opencl with ATI cards, to get it working up to
the level of ambient occlusion render, shaders still fail. Fixes found with
much help from Jens and Dalai.

Modified Paths:
--------------
    trunk/blender/intern/cycles/device/device.cpp
    trunk/blender/intern/cycles/device/device.h
    trunk/blender/intern/cycles/device/device_opencl.cpp
    trunk/blender/intern/cycles/kernel/CMakeLists.txt
    trunk/blender/intern/cycles/kernel/kernel_compat_opencl.h

Modified: trunk/blender/intern/cycles/device/device.cpp
===================================================================
--- trunk/blender/intern/cycles/device/device.cpp	2011-12-20 17:24:20 UTC (rev 42770)
+++ trunk/blender/intern/cycles/device/device.cpp	2011-12-20 17:36:56 UTC (rev 42771)
@@ -24,6 +24,7 @@
 
 #include "util_cuda.h"
 #include "util_debug.h"
+#include "util_foreach.h"
 #include "util_math.h"
 #include "util_opencl.h"
 #include "util_opengl.h"
@@ -41,9 +42,33 @@
 {
 }
 
-void DeviceTask::split(ThreadQueue<DeviceTask>& tasks, int num)
+void DeviceTask::split_max_size(list<DeviceTask>& tasks, int max_size)
 {
+	int num;
+
 	if(type == DISPLACE) {
+		num = (displace_w + max_size - 1)/max_size;
+	}
+	else {
+		max_size = max(1, max_size/w);
+		num = (h + max_size - 1)/max_size;
+	}
+
+	split(tasks, num);
+}
+
+void DeviceTask::split(ThreadQueue<DeviceTask>& queue, int num)
+{
+	list<DeviceTask> tasks;
+	split(tasks, num);
+
+	foreach(DeviceTask& task, tasks)
+		queue.push(task);
+}
+
+void DeviceTask::split(list<DeviceTask>& tasks, int num)
+{
+	if(type == DISPLACE) {
 		num = min(displace_w, num);
 
 		for(int i = 0; i < num; i++) {
@@ -55,7 +80,7 @@
 			task.displace_x = tx;
 			task.displace_w = tw;
 
-			tasks.push(task);
+			tasks.push_back(task);
 		}
 	}
 	else {
@@ -70,7 +95,7 @@
 			task.y = ty;
 			task.h = th;
 
-			tasks.push(task);
+			tasks.push_back(task);
 		}
 	}
 }

Modified: trunk/blender/intern/cycles/device/device.h
===================================================================
--- trunk/blender/intern/cycles/device/device.h	2011-12-20 17:24:20 UTC (rev 42770)
+++ trunk/blender/intern/cycles/device/device.h	2011-12-20 17:36:56 UTC (rev 42771)
@@ -23,6 +23,7 @@
 
 #include "device_memory.h"
 
+#include "util_list.h"
 #include "util_string.h"
 #include "util_thread.h"
 #include "util_types.h"
@@ -67,7 +68,10 @@
 	int displace_x, displace_w;
 
 	DeviceTask(Type type = PATH_TRACE);
+
+	void split(list<DeviceTask>& tasks, int num);
 	void split(ThreadQueue<DeviceTask>& tasks, int num);
+	void split_max_size(list<DeviceTask>& tasks, int max_size);
 };
 
 /* Device */

Modified: trunk/blender/intern/cycles/device/device_opencl.cpp
===================================================================
--- trunk/blender/intern/cycles/device/device_opencl.cpp	2011-12-20 17:24:20 UTC (rev 42770)
+++ trunk/blender/intern/cycles/device/device_opencl.cpp	2011-12-20 17:36:56 UTC (rev 42771)
@@ -25,6 +25,7 @@
 #include "device.h"
 #include "device_intern.h"
 
+#include "util_foreach.h"
 #include "util_map.h"
 #include "util_math.h"
 #include "util_md5.h"
@@ -52,6 +53,7 @@
 	map<string, device_memory*> mem_map;
 	device_ptr null_mem;
 	bool device_initialized;
+	string platform_name;
 
 	const char *opencl_error_string(cl_int err)
 	{
@@ -175,6 +177,10 @@
 		if(opencl_error(ciErr))
 			return;
 
+		char name[256];
+		clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(name), &name, NULL);
+		platform_name = name;
+
 		cxContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErr);
 		if(opencl_error(ciErr))
 			return;
@@ -191,7 +197,7 @@
 	{
 		char version[256];
 
-		int major, minor, req_major = 1, req_minor = 0;
+		int major, minor, req_major = 1, req_minor = 1;
 
 		clGetPlatformInfo(cpPlatform, CL_PLATFORM_VERSION, sizeof(version), &version, NULL);
 
@@ -277,15 +283,12 @@
 	{
 		string build_options = " -cl-fast-relaxed-math ";
 		
-		/* Full Shading only on NVIDIA cards at the moment */
-		char vendor[256];
+		/* full shading only on NVIDIA cards at the moment */
+		if(platform_name == "NVIDIA CUDA")
+			build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ -cl-nv-maxrregcount=24 -cl-nv-verbose ";
+		if(platform_name == "Apple")
+			build_options += " -D__CL_NO_FLOAT3__ ";
 
-		clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(vendor), &vendor, NULL);
-		string name = vendor;
-		
-		if(name == "NVIDIA CUDA")
-			build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ ";
-
 		return build_options;
 	}
 
@@ -657,12 +660,24 @@
 		opencl_assert(clFinish(cqCommandQueue));
 	}
 
-	void task_add(DeviceTask& task)
+	void task_add(DeviceTask& maintask)
 	{
-		if(task.type == DeviceTask::TONEMAP)
-			tonemap(task);
-		else if(task.type == DeviceTask::PATH_TRACE)
-			path_trace(task);
+		list<DeviceTask> tasks;
+
+		/* arbitrary limit to work around apple ATI opencl issue */
+		if(platform_name == "Apple")
+			maintask.split_max_size(tasks, 76800);
+		else
+			tasks.push_back(maintask);
+
+		DeviceTask task;
+
+		foreach(DeviceTask& task, tasks) {
+			if(task.type == DeviceTask::TONEMAP)
+				tonemap(task);
+			else if(task.type == DeviceTask::PATH_TRACE)
+				path_trace(task);
+		}
 	}
 
 	void task_wait()

Modified: trunk/blender/intern/cycles/kernel/CMakeLists.txt
===================================================================
--- trunk/blender/intern/cycles/kernel/CMakeLists.txt	2011-12-20 17:24:20 UTC (rev 42770)
+++ trunk/blender/intern/cycles/kernel/CMakeLists.txt	2011-12-20 17:36:56 UTC (rev 42771)
@@ -143,7 +143,7 @@
 #set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl)
 #add_custom_command(
 #	OUTPUT ${KERNEL_PREPROCESSED}
-#	COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DWITH_OPENCL -o ${KERNEL_PREPROCESSED}
+#	COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED}
 #	DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS})
 #add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED})
 #delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)

Modified: trunk/blender/intern/cycles/kernel/kernel_compat_opencl.h
===================================================================
--- trunk/blender/intern/cycles/kernel/kernel_compat_opencl.h	2011-12-20 17:24:20 UTC (rev 42770)
+++ trunk/blender/intern/cycles/kernel/kernel_compat_opencl.h	2011-12-20 17:36:56 UTC (rev 42771)
@@ -25,12 +25,21 @@
 /* no namespaces in opencl */
 #define CCL_NAMESPACE_BEGIN
 #define CCL_NAMESPACE_END
-#define WITH_OPENCL
 
+#ifdef __CL_NO_FLOAT3__
+#define float3 float4
+#endif
+
+#ifdef __CL_NOINLINE__
+#define __noinline __attribute__((noinline))
+#else
+#define __noinline
+#endif
+
 /* in opencl all functions are device functions, so leave this empty */
 #define __device
-#define __device_inline
-#define __device_noinline
+#define __device_inline __device
+#define __device_noinline  __device __noinline
 
 /* no assert in opencl */
 #define kernel_assert(cond)
@@ -68,7 +77,11 @@
 #endif
 
 #define make_float2(x, y) ((float2)(x, y))
+#ifdef __CL_NO_FLOAT3__
+#define make_float3(x, y, z) ((float4)(x, y, z, 0.0))
+#else
 #define make_float3(x, y, z) ((float3)(x, y, z))
+#endif
 #define make_float4(x, y, z, w) ((float4)(x, y, z, w))
 #define make_int2(x, y) ((int2)(x, y))
 #define make_int3(x, y, z) ((int3)(x, y, z))




More information about the Bf-blender-cvs mailing list