[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [41882] trunk/blender/intern/cycles: Fix #29259: cycles issues on certain processors.
Brecht Van Lommel
brechtvanlommel at pandora.be
Tue Nov 15 16:13:39 CET 2011
Revision: 41882
http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=41882
Author: blendix
Date: 2011-11-15 15:13:38 +0000 (Tue, 15 Nov 2011)
Log Message:
-----------
Fix #29259: cycles issues on certain processors. Now two versions of the kernel
are compiled, one SSE optimized and the other not, and it will choose between
them at runtime.
Modified Paths:
--------------
trunk/blender/intern/cycles/CMakeLists.txt
trunk/blender/intern/cycles/SConscript
trunk/blender/intern/cycles/device/device_cpu.cpp
trunk/blender/intern/cycles/kernel/CMakeLists.txt
trunk/blender/intern/cycles/kernel/kernel.h
trunk/blender/intern/cycles/util/util_system.cpp
trunk/blender/intern/cycles/util/util_system.h
Added Paths:
-----------
trunk/blender/intern/cycles/kernel/kernel_optimized.cpp
Modified: trunk/blender/intern/cycles/CMakeLists.txt
===================================================================
--- trunk/blender/intern/cycles/CMakeLists.txt 2011-11-15 14:58:14 UTC (rev 41881)
+++ trunk/blender/intern/cycles/CMakeLists.txt 2011-11-15 15:13:38 UTC (rev 41882)
@@ -9,32 +9,19 @@
# Build Flags
if(WITH_RAYOPTIMIZATION AND SUPPORT_SSE_BUILD)
- set(GCC_OPTIM_FLAGS "-ffast-math -msse -msse2 -msse3")
-endif()
+ set(WITH_CYCLES_OPTIMIZED_KERNEL ON)
-if(APPLE)
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_OPTIM_FLAGS}")
- set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
-endif()
-
-if(WIN32)
- if(MSVC)
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Ox /Ot /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /EHsc /fp:fast")
- set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
+ if(WIN32 AND MSVC)
+ set(CYCLES_OPTIMIZED_KERNEL_FLAGS "/Ox /Ot /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /EHsc /fp:fast")
elseif(CMAKE_COMPILER_IS_GNUCC)
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_OPTIM_FLAGS}")
- set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
+ set(CYCLES_OPTIMIZED_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -DGOGOGO")
endif()
endif()
-if(UNIX AND NOT APPLE)
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_OPTIM_FLAGS}")
- set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
-endif()
+# for OSL, not needed yet
+# set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
+# set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
-# not needed yet, is for open shading language
-set(RTTI_DISABLE_FLAGS "")
-
# Definitions and Includes
add_definitions(${BOOST_DEFINITIONS} ${OPENIMAGEIO_DEFINITIONS})
@@ -42,6 +29,10 @@
add_definitions(-DCCL_NAMESPACE_BEGIN=namespace\ ccl\ {)
add_definitions(-DCCL_NAMESPACE_END=})
+if(WITH_CYCLES_OPTIMIZED_KERNEL)
+ add_definitions(-DWITH_OPTIMIZED_KERNEL)
+endif()
+
if(WITH_CYCLES_NETWORK)
add_definitions(-DWITH_NETWORK)
endif()
Modified: trunk/blender/intern/cycles/SConscript
===================================================================
--- trunk/blender/intern/cycles/SConscript 2011-11-15 14:58:14 UTC (rev 41881)
+++ trunk/blender/intern/cycles/SConscript 2011-11-15 15:13:38 UTC (rev 41882)
@@ -10,11 +10,10 @@
sources.remove(path.join('util', 'util_view.cpp'))
sources.remove(path.join('render', 'film_response.cpp'))
+sources.remove(path.join('kernel', 'kernel_optimized.cpp'))
incs = []
defs = []
-ccflags = []
-cxxflags = []
defs.append('CCL_NAMESPACE_BEGIN=namespace ccl {')
defs.append('CCL_NAMESPACE_END=}')
@@ -23,14 +22,6 @@
defs.append('WITH_MULTI')
defs.append('WITH_CUDA')
-if env['OURPLATFORM'] in ('win32-mingw'):
- if env['WITH_BF_RAYOPTIMIZATION']:
- cxxflags.append('-ffast-math -msse -msse2 -msse3'.split())
- ccflags.append('-ffast-math -msse -msse2 -msse3'.split())
- # not needed yet, is for open shading language
- # cxxflags.append('-fno-rtti'.split())
- # defs.append('BOOST_NO_RTTI BOOST_NO_TYPEID'.split())
-
incs.extend('. bvh render device kernel kernel/osl kernel/svm util subd'.split())
incs.extend('#intern/guardedalloc #source/blender/makesrna #source/blender/makesdna'.split())
incs.extend('#source/blender/blenloader ../../source/blender/makesrna/intern'.split())
@@ -39,5 +30,20 @@
incs.append(cycles['BF_BOOST_INC'])
incs.append(cycles['BF_PYTHON_INC'])
-cycles.BlenderLib('bf_intern_cycles', sources, incs, defs, libtype=['intern'], priority=[0], compileflags=[None], cc_compileflags=ccflags, cxx_compileflags=cxxflags)
+# optimized kernel
+if env['WITH_BF_RAYOPTIMIZATION']:
+ optim_cxxflags = []
+ if env['OURPLATFORM'] in ('win32-vc', 'win64-vc'):
+ optim_cxxflags.append('/Ox /Ot /arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /EHsc /fp:fast'.split())
+ else:
+ optim_cxxflags.append('-ffast-math -msse -msse2 -msse3'.split())
+
+ optim_defs = defs + ['WITH_OPTIMIZED_KERNEL']
+ optim_sources = [path.join('kernel', 'kernel_optimized.cpp')]
+
+ cycles_optim = cycles.Clone()
+ cycles_optim.BlenderLib('bf_intern_cycles_optimized', optim_sources, incs, optim_defs, libtype=['intern'], priority=[0], compileflags=[None], cxx_compileflags=optim_cxxflags)
+
+cycles.BlenderLib('bf_intern_cycles', sources, incs, defs, libtype=['intern'], priority=[0], compileflags=[None])
+
Modified: trunk/blender/intern/cycles/device/device_cpu.cpp
===================================================================
--- trunk/blender/intern/cycles/device/device_cpu.cpp 2011-11-15 14:58:14 UTC (rev 41881)
+++ trunk/blender/intern/cycles/device/device_cpu.cpp 2011-11-15 15:13:38 UTC (rev 41882)
@@ -48,6 +48,9 @@
{
kg = kernel_globals_create();
+ /* do now to avoid thread issues */
+ system_cpu_support_optimized();
+
if(threads_num == 0)
threads_num = system_cpu_thread_count();
@@ -155,14 +158,28 @@
OSLShader::thread_init(kg);
#endif
- for(int y = task.y; y < task.y + task.h; y++) {
- for(int x = task.x; x < task.x + task.w; x++)
- kernel_cpu_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y);
+#ifdef WITH_OPTIMIZED_KERNEL
+ if(system_cpu_support_optimized()) {
+ for(int y = task.y; y < task.y + task.h; y++) {
+ for(int x = task.x; x < task.x + task.w; x++)
+ kernel_cpu_optimized_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y);
- if(tasks.worker_cancel())
- break;
+ if(tasks.worker_cancel())
+ break;
+ }
}
+ else
+#endif
+ {
+ for(int y = task.y; y < task.y + task.h; y++) {
+ for(int x = task.x; x < task.x + task.w; x++)
+ kernel_cpu_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y);
+ if(tasks.worker_cancel())
+ break;
+ }
+ }
+
#ifdef WITH_OSL
if(kernel_osl_use(kg))
OSLShader::thread_free(kg);
@@ -171,10 +188,19 @@
void thread_tonemap(DeviceTask& task)
{
- for(int y = task.y; y < task.y + task.h; y++) {
- for(int x = task.x; x < task.x + task.w; x++)
- kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, task.sample, task.resolution, x, y);
+#ifdef WITH_OPTIMIZED_KERNEL
+ if(system_cpu_support_optimized()) {
+ for(int y = task.y; y < task.y + task.h; y++)
+ for(int x = task.x; x < task.x + task.w; x++)
+ kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, task.sample, task.resolution, x, y);
}
+ else
+#endif
+ {
+ for(int y = task.y; y < task.y + task.h; y++)
+ for(int x = task.x; x < task.x + task.w; x++)
+ kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, task.sample, task.resolution, x, y);
+ }
}
void thread_displace(DeviceTask& task)
@@ -184,13 +210,26 @@
OSLShader::thread_init(kg);
#endif
- for(int x = task.displace_x; x < task.displace_x + task.displace_w; x++) {
- kernel_cpu_displace(kg, (uint4*)task.displace_input, (float3*)task.displace_offset, x);
+#ifdef WITH_OPTIMIZED_KERNEL
+ if(system_cpu_support_optimized()) {
+ for(int x = task.displace_x; x < task.displace_x + task.displace_w; x++) {
+ kernel_cpu_optimized_displace(kg, (uint4*)task.displace_input, (float3*)task.displace_offset, x);
- if(tasks.worker_cancel())
- break;
+ if(tasks.worker_cancel())
+ break;
+ }
}
+ else
+#endif
+ {
+ for(int x = task.displace_x; x < task.displace_x + task.displace_w; x++) {
+ kernel_cpu_displace(kg, (uint4*)task.displace_input, (float3*)task.displace_offset, x);
+ if(tasks.worker_cancel())
+ break;
+ }
+ }
+
#ifdef WITH_OSL
if(kernel_osl_use(kg))
OSLShader::thread_free(kg);
Modified: trunk/blender/intern/cycles/kernel/CMakeLists.txt
===================================================================
--- trunk/blender/intern/cycles/kernel/CMakeLists.txt 2011-11-15 14:58:14 UTC (rev 41881)
+++ trunk/blender/intern/cycles/kernel/CMakeLists.txt 2011-11-15 15:13:38 UTC (rev 41882)
@@ -8,6 +8,7 @@
set(SRC
kernel.cpp
+ kernel_optimized.cpp
kernel.cl
kernel.cu
)
@@ -123,11 +124,15 @@
add_library(cycles_kernel ${SRC} ${SRC_HEADERS} ${SRC_SVM_HEADERS})
+if(WITH_CYCLES_OPTIMIZED_KERNEL)
+ SET_SOURCE_FILES_PROPERTIES(kernel_optimized.cpp PROPERTIES COMPILE_FLAGS ${CYCLES_OPTIMIZED_KERNEL_FLAGS})
+endif()
+
if(WITH_CYCLES_CUDA)
add_dependencies(cycles_kernel cycles_kernel_cuda)
endif()
-# OPENCL kernel
+# OpenCL kernel
#set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl)
#add_custom_command(
@@ -142,3 +147,4 @@
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/svm)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel)
+
Modified: trunk/blender/intern/cycles/kernel/kernel.h
===================================================================
--- trunk/blender/intern/cycles/kernel/kernel.h 2011-11-15 14:58:14 UTC (rev 41881)
+++ trunk/blender/intern/cycles/kernel/kernel.h 2011-11-15 15:13:38 UTC (rev 41882)
@@ -38,9 +38,14 @@
void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y);
void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y);
-
void kernel_cpu_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i);
+#ifdef WITH_OPTIMIZED_KERNEL
+void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y);
+void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y);
+void kernel_cpu_optimized_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i);
+#endif
+
CCL_NAMESPACE_END
#endif /* __KERNEL_H__ */
Copied: trunk/blender/intern/cycles/kernel/kernel_optimized.cpp (from rev 41876, trunk/blender/intern/cycles/kernel/kernel.cpp)
===================================================================
--- trunk/blender/intern/cycles/kernel/kernel_optimized.cpp (rev 0)
+++ trunk/blender/intern/cycles/kernel/kernel_optimized.cpp 2011-11-15 15:13:38 UTC (rev 41882)
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2011, Blender Foundation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list