[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [41882] trunk/blender/intern/cycles: Fix #29259: cycles issues on certain processors.

Brecht Van Lommel brechtvanlommel at pandora.be
Tue Nov 15 16:13:39 CET 2011


Revision: 41882
          http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=41882
Author:   blendix
Date:     2011-11-15 15:13:38 +0000 (Tue, 15 Nov 2011)
Log Message:
-----------
Fix #29259: cycles issues on certain processors. Now two versions of the kernel
are compiled, one SSE optimized and the other not, and it will choose between
them at runtime.

Modified Paths:
--------------
    trunk/blender/intern/cycles/CMakeLists.txt
    trunk/blender/intern/cycles/SConscript
    trunk/blender/intern/cycles/device/device_cpu.cpp
    trunk/blender/intern/cycles/kernel/CMakeLists.txt
    trunk/blender/intern/cycles/kernel/kernel.h
    trunk/blender/intern/cycles/util/util_system.cpp
    trunk/blender/intern/cycles/util/util_system.h

Added Paths:
-----------
    trunk/blender/intern/cycles/kernel/kernel_optimized.cpp

Modified: trunk/blender/intern/cycles/CMakeLists.txt
===================================================================
--- trunk/blender/intern/cycles/CMakeLists.txt	2011-11-15 14:58:14 UTC (rev 41881)
+++ trunk/blender/intern/cycles/CMakeLists.txt	2011-11-15 15:13:38 UTC (rev 41882)
@@ -9,32 +9,19 @@
 # Build Flags
 
 if(WITH_RAYOPTIMIZATION AND SUPPORT_SSE_BUILD)
-	set(GCC_OPTIM_FLAGS "-ffast-math -msse -msse2 -msse3")
-endif()
+	set(WITH_CYCLES_OPTIMIZED_KERNEL ON)
 
-if(APPLE)
-	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_OPTIM_FLAGS}")
-	set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
-endif()
-
-if(WIN32)
-	if(MSVC)
-		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Ox /Ot /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /EHsc /fp:fast")
-		set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
+	if(WIN32 AND MSVC)
+		set(CYCLES_OPTIMIZED_KERNEL_FLAGS "/Ox /Ot /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /EHsc /fp:fast")
 	elseif(CMAKE_COMPILER_IS_GNUCC)
-		set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_OPTIM_FLAGS}")
-		set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
+		set(CYCLES_OPTIMIZED_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -DGOGOGO")
 	endif()
 endif()
 
-if(UNIX AND NOT APPLE)
-	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_OPTIM_FLAGS}")
-	set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
-endif()
+# for OSL, not needed yet
+# set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
+# set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
 
-# not needed yet, is for open shading language
-set(RTTI_DISABLE_FLAGS "")
-
 # Definitions and Includes
 
 add_definitions(${BOOST_DEFINITIONS} ${OPENIMAGEIO_DEFINITIONS})
@@ -42,6 +29,10 @@
 add_definitions(-DCCL_NAMESPACE_BEGIN=namespace\ ccl\ {)
 add_definitions(-DCCL_NAMESPACE_END=})
 
+if(WITH_CYCLES_OPTIMIZED_KERNEL)
+	add_definitions(-DWITH_OPTIMIZED_KERNEL)
+endif()
+
 if(WITH_CYCLES_NETWORK)
   add_definitions(-DWITH_NETWORK)
 endif()

Modified: trunk/blender/intern/cycles/SConscript
===================================================================
--- trunk/blender/intern/cycles/SConscript	2011-11-15 14:58:14 UTC (rev 41881)
+++ trunk/blender/intern/cycles/SConscript	2011-11-15 15:13:38 UTC (rev 41882)
@@ -10,11 +10,10 @@
 
 sources.remove(path.join('util', 'util_view.cpp'))
 sources.remove(path.join('render', 'film_response.cpp'))
+sources.remove(path.join('kernel', 'kernel_optimized.cpp'))
 
 incs = [] 
 defs = []
-ccflags = []
-cxxflags = []
 
 defs.append('CCL_NAMESPACE_BEGIN=namespace ccl {')
 defs.append('CCL_NAMESPACE_END=}')
@@ -23,14 +22,6 @@
 defs.append('WITH_MULTI')
 defs.append('WITH_CUDA')
 
-if env['OURPLATFORM'] in ('win32-mingw'):
-    if env['WITH_BF_RAYOPTIMIZATION']:
-        cxxflags.append('-ffast-math -msse -msse2 -msse3'.split())
-        ccflags.append('-ffast-math -msse -msse2 -msse3'.split())
-    # not needed yet, is for open shading language
-    # cxxflags.append('-fno-rtti'.split())
-    # defs.append('BOOST_NO_RTTI BOOST_NO_TYPEID'.split())
-
 incs.extend('. bvh render device kernel kernel/osl kernel/svm util subd'.split())
 incs.extend('#intern/guardedalloc #source/blender/makesrna #source/blender/makesdna'.split())
 incs.extend('#source/blender/blenloader ../../source/blender/makesrna/intern'.split())
@@ -39,5 +30,20 @@
 incs.append(cycles['BF_BOOST_INC'])
 incs.append(cycles['BF_PYTHON_INC'])
 
-cycles.BlenderLib('bf_intern_cycles', sources, incs, defs, libtype=['intern'], priority=[0], compileflags=[None], cc_compileflags=ccflags, cxx_compileflags=cxxflags)
+# optimized kernel
+if env['WITH_BF_RAYOPTIMIZATION']:
+    optim_cxxflags = []
 
+    if env['OURPLATFORM'] in ('win32-vc', 'win64-vc'):
+        optim_cxxflags.append('/Ox /Ot /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /EHsc /fp:fast'.split())
+    else:
+        optim_cxxflags.append('-ffast-math -msse -msse2 -msse3'.split())
+    
+    optim_defs = defs + ['WITH_OPTIMIZED_KERNEL']
+    optim_sources = [path.join('kernel', 'kernel_optimized.cpp')]
+
+    cycles_optim = cycles.Clone()
+    cycles_optim.BlenderLib('bf_intern_cycles_optimized', optim_sources, incs, optim_defs, libtype=['intern'], priority=[0], compileflags=[None], cxx_compileflags=optim_cxxflags)
+
+cycles.BlenderLib('bf_intern_cycles', sources, incs, defs, libtype=['intern'], priority=[0], compileflags=[None])
+

Modified: trunk/blender/intern/cycles/device/device_cpu.cpp
===================================================================
--- trunk/blender/intern/cycles/device/device_cpu.cpp	2011-11-15 14:58:14 UTC (rev 41881)
+++ trunk/blender/intern/cycles/device/device_cpu.cpp	2011-11-15 15:13:38 UTC (rev 41882)
@@ -48,6 +48,9 @@
 	{
 		kg = kernel_globals_create();
 
+		/* do now to avoid thread issues */
+		system_cpu_support_optimized();
+
 		if(threads_num == 0)
 			threads_num = system_cpu_thread_count();
 
@@ -155,14 +158,28 @@
 			OSLShader::thread_init(kg);
 #endif
 
-		for(int y = task.y; y < task.y + task.h; y++) {
-			for(int x = task.x; x < task.x + task.w; x++)
-				kernel_cpu_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y);
+#ifdef WITH_OPTIMIZED_KERNEL
+		if(system_cpu_support_optimized()) {
+			for(int y = task.y; y < task.y + task.h; y++) {
+				for(int x = task.x; x < task.x + task.w; x++)
+					kernel_cpu_optimized_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y);
 
-			if(tasks.worker_cancel())
-				break;
+				if(tasks.worker_cancel())
+					break;
+			}
 		}
+		else
+#endif
+		{
+			for(int y = task.y; y < task.y + task.h; y++) {
+				for(int x = task.x; x < task.x + task.w; x++)
+					kernel_cpu_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y);
 
+				if(tasks.worker_cancel())
+					break;
+			}
+		}
+
 #ifdef WITH_OSL
 		if(kernel_osl_use(kg))
 			OSLShader::thread_free(kg);
@@ -171,10 +188,19 @@
 
 	void thread_tonemap(DeviceTask& task)
 	{
-		for(int y = task.y; y < task.y + task.h; y++) {
-			for(int x = task.x; x < task.x + task.w; x++)
-				kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, task.sample, task.resolution, x, y);
+#ifdef WITH_OPTIMIZED_KERNEL
+		if(system_cpu_support_optimized()) {
+			for(int y = task.y; y < task.y + task.h; y++)
+				for(int x = task.x; x < task.x + task.w; x++)
+					kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, task.sample, task.resolution, x, y);
 		}
+		else
+#endif
+		{
+			for(int y = task.y; y < task.y + task.h; y++)
+				for(int x = task.x; x < task.x + task.w; x++)
+					kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, task.sample, task.resolution, x, y);
+		}
 	}
 
 	void thread_displace(DeviceTask& task)
@@ -184,13 +210,26 @@
 			OSLShader::thread_init(kg);
 #endif
 
-		for(int x = task.displace_x; x < task.displace_x + task.displace_w; x++) {
-			kernel_cpu_displace(kg, (uint4*)task.displace_input, (float3*)task.displace_offset, x);
+#ifdef WITH_OPTIMIZED_KERNEL
+		if(system_cpu_support_optimized()) {
+			for(int x = task.displace_x; x < task.displace_x + task.displace_w; x++) {
+				kernel_cpu_optimized_displace(kg, (uint4*)task.displace_input, (float3*)task.displace_offset, x);
 
-			if(tasks.worker_cancel())
-				break;
+				if(tasks.worker_cancel())
+					break;
+			}
 		}
+		else
+#endif
+		{
+			for(int x = task.displace_x; x < task.displace_x + task.displace_w; x++) {
+				kernel_cpu_displace(kg, (uint4*)task.displace_input, (float3*)task.displace_offset, x);
 
+				if(tasks.worker_cancel())
+					break;
+			}
+		}
+
 #ifdef WITH_OSL
 		if(kernel_osl_use(kg))
 			OSLShader::thread_free(kg);

Modified: trunk/blender/intern/cycles/kernel/CMakeLists.txt
===================================================================
--- trunk/blender/intern/cycles/kernel/CMakeLists.txt	2011-11-15 14:58:14 UTC (rev 41881)
+++ trunk/blender/intern/cycles/kernel/CMakeLists.txt	2011-11-15 15:13:38 UTC (rev 41882)
@@ -8,6 +8,7 @@
 
 set(SRC
 	kernel.cpp
+	kernel_optimized.cpp
 	kernel.cl
 	kernel.cu
 )
@@ -123,11 +124,15 @@
 
 add_library(cycles_kernel ${SRC} ${SRC_HEADERS} ${SRC_SVM_HEADERS})
 
+if(WITH_CYCLES_OPTIMIZED_KERNEL)
+	SET_SOURCE_FILES_PROPERTIES(kernel_optimized.cpp PROPERTIES COMPILE_FLAGS ${CYCLES_OPTIMIZED_KERNEL_FLAGS})
+endif()
+
 if(WITH_CYCLES_CUDA)
 	add_dependencies(cycles_kernel cycles_kernel_cuda)
 endif()
 
-# OPENCL kernel
+# OpenCL kernel
 
 #set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl)
 #add_custom_command(
@@ -142,3 +147,4 @@
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/svm)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel)
+

Modified: trunk/blender/intern/cycles/kernel/kernel.h
===================================================================
--- trunk/blender/intern/cycles/kernel/kernel.h	2011-11-15 14:58:14 UTC (rev 41881)
+++ trunk/blender/intern/cycles/kernel/kernel.h	2011-11-15 15:13:38 UTC (rev 41882)
@@ -38,9 +38,14 @@
 
 void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y);
 void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y);
-
 void kernel_cpu_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i);
 
+#ifdef WITH_OPTIMIZED_KERNEL
+void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y);
+void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y);
+void kernel_cpu_optimized_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i);
+#endif
+
 CCL_NAMESPACE_END
 
 #endif /* __KERNEL_H__ */

Copied: trunk/blender/intern/cycles/kernel/kernel_optimized.cpp (from rev 41876, trunk/blender/intern/cycles/kernel/kernel.cpp)
===================================================================
--- trunk/blender/intern/cycles/kernel/kernel_optimized.cpp	                        (rev 0)
+++ trunk/blender/intern/cycles/kernel/kernel_optimized.cpp	2011-11-15 15:13:38 UTC (rev 41882)
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list