[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [54299] trunk/blender/intern/cycles: Fix cycles intersection issue with overlapping faces on windows 32 bit and CPU
Brecht Van Lommel
brechtvanlommel at pandora.be
Mon Feb 4 17:12:38 CET 2013
Revision: 54299
http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=54299
Author: blendix
Date: 2013-02-04 16:12:37 +0000 (Mon, 04 Feb 2013)
Log Message:
-----------
Fix cycles intersection issue with overlapping faces on windows 32 bit and CPU
without SSE3 support, due to 80 bit precision float register being used for one
bounding box but not the one next to it.
Modified Paths:
--------------
trunk/blender/intern/cycles/CMakeLists.txt
trunk/blender/intern/cycles/SConscript
trunk/blender/intern/cycles/device/device_cpu.cpp
trunk/blender/intern/cycles/kernel/CMakeLists.txt
trunk/blender/intern/cycles/kernel/kernel.h
trunk/blender/intern/cycles/kernel/kernel_bvh.h
trunk/blender/intern/cycles/util/util_system.cpp
trunk/blender/intern/cycles/util/util_system.h
Added Paths:
-----------
trunk/blender/intern/cycles/kernel/kernel_sse2.cpp
trunk/blender/intern/cycles/kernel/kernel_sse3.cpp
Removed Paths:
-------------
trunk/blender/intern/cycles/kernel/kernel_optimized.cpp
Modified: trunk/blender/intern/cycles/CMakeLists.txt
===================================================================
--- trunk/blender/intern/cycles/CMakeLists.txt 2013-02-04 16:12:27 UTC (rev 54298)
+++ trunk/blender/intern/cycles/CMakeLists.txt 2013-02-04 16:12:37 UTC (rev 54299)
@@ -13,10 +13,12 @@
endif()
if(WIN32 AND MSVC)
- set(CYCLES_OPTIMIZED_KERNEL_FLAGS "/arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc")
+ set(CYCLES_SSE2_KERNEL_FLAGS "/arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc")
+ set(CYCLES_SSE3_KERNEL_FLAGS "/arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /fp:fast -D_CRT_SECURE_NO_WARNINGS /EHsc")
elseif(CMAKE_COMPILER_IS_GNUCC)
- set(CYCLES_OPTIMIZED_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mfpmath=sse")
+ set(CYCLES_SSE2_KERNEL_FLAGS "-ffast-math -msse -msse2 -mfpmath=sse")
+ set(CYCLES_SSE3_KERNEL_FLAGS "-ffast-math -msse -msse2 -msse3 -mfpmath=sse")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math")
endif()
Modified: trunk/blender/intern/cycles/SConscript
===================================================================
--- trunk/blender/intern/cycles/SConscript 2013-02-04 16:12:27 UTC (rev 54298)
+++ trunk/blender/intern/cycles/SConscript 2013-02-04 16:12:37 UTC (rev 54299)
@@ -36,7 +36,8 @@
sources.remove(path.join('util', 'util_view.cpp'))
sources.remove(path.join('render', 'film_response.cpp'))
-sources.remove(path.join('kernel', 'kernel_optimized.cpp'))
+sources.remove(path.join('kernel', 'kernel_sse2.cpp'))
+sources.remove(path.join('kernel', 'kernel_sse3.cpp'))
incs = []
defs = []
@@ -73,22 +74,30 @@
# optimized kernel
if env['WITH_BF_RAYOPTIMIZATION']:
- optim_cxxflags = Split(env['CXXFLAGS'])
+ sse2_cxxflags = Split(env['CXXFLAGS'])
+ sse3_cxxflags = Split(env['CXXFLAGS'])
if env['OURPLATFORM'] == 'win32-vc':
- optim_cxxflags.append('/arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc'.split())
+ sse2_cxxflags.append('/arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc'.split())
+ sse3_cxxflags.append('/arch:SSE2 -D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc'.split())
elif env['OURPLATFORM'] == 'win64-vc':
- optim_cxxflags.append('-D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc'.split())
+ sse2_cxxflags.append('-D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc'.split())
+ sse3_cxxflags.append('-D_CRT_SECURE_NO_WARNINGS /fp:fast /EHsc'.split())
else:
- optim_cxxflags.append('-ffast-math -msse -msse2 -msse3 -mfpmath=sse'.split())
+ sse2_cxxflags.append('-ffast-math -msse -msse2 -mfpmath=sse'.split())
+ sse3_cxxflags.append('-ffast-math -msse -msse2 -msse3 -mfpmath=sse'.split())
defs.append('WITH_OPTIMIZED_KERNEL')
optim_defs = defs[:]
- optim_sources = [path.join('kernel', 'kernel_optimized.cpp')]
- cycles_optim = cycles.Clone()
- cycles_optim.BlenderLib('bf_intern_cycles_optimized', optim_sources, incs, optim_defs, libtype=['intern'], priority=[10], cxx_compileflags=optim_cxxflags)
+ cycles_sse3 = cycles.Clone()
+ sse3_sources = [path.join('kernel', 'kernel_sse3.cpp')]
+ cycles_sse3.BlenderLib('bf_intern_cycles_sse3', sse3_sources, incs, optim_defs, libtype=['intern'], priority=[10], cxx_compileflags=sse3_cxxflags)
+ cycles_sse2 = cycles.Clone()
+ sse2_sources = [path.join('kernel', 'kernel_sse2.cpp')]
+ cycles_sse2.BlenderLib('bf_intern_cycles_sse2', sse2_sources, incs, optim_defs, libtype=['intern'], priority=[10], cxx_compileflags=sse2_cxxflags)
+
cycles.BlenderLib('bf_intern_cycles', sources, incs, defs, libtype=['intern'], priority=[0], cxx_compileflags=cxxflags)
if env['WITH_BF_CYCLES_OSL']:
Modified: trunk/blender/intern/cycles/device/device_cpu.cpp
===================================================================
--- trunk/blender/intern/cycles/device/device_cpu.cpp 2013-02-04 16:12:27 UTC (rev 54298)
+++ trunk/blender/intern/cycles/device/device_cpu.cpp 2013-02-04 16:12:37 UTC (rev 54299)
@@ -58,7 +58,8 @@
#endif
/* do now to avoid thread issues */
- system_cpu_support_optimized();
+ system_cpu_support_sse2();
+ system_cpu_support_sse3();
}
~CPUDevice()
@@ -170,7 +171,7 @@
int end_sample = tile.start_sample + tile.num_samples;
#ifdef WITH_OPTIMIZED_KERNEL
- if(system_cpu_support_optimized()) {
+ if(system_cpu_support_sse2()) {
for(int sample = start_sample; sample < end_sample; sample++) {
if (task.get_cancel() || task_pool.cancelled()) {
if(task.need_finish_queue == false)
@@ -179,7 +180,7 @@
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++) {
- kernel_cpu_optimized_path_trace(&kg, render_buffer, rng_state,
+ kernel_cpu_sse2_path_trace(&kg, render_buffer, rng_state,
sample, x, y, tile.offset, tile.stride);
}
}
@@ -189,6 +190,25 @@
task.update_progress(tile);
}
}
+ else if(system_cpu_support_sse3()) {
+ for(int sample = start_sample; sample < end_sample; sample++) {
+ if (task.get_cancel() || task_pool.cancelled()) {
+ if(task.need_finish_queue == false)
+ break;
+ }
+
+ for(int y = tile.y; y < tile.y + tile.h; y++) {
+ for(int x = tile.x; x < tile.x + tile.w; x++) {
+ kernel_cpu_sse3_path_trace(&kg, render_buffer, rng_state,
+ sample, x, y, tile.offset, tile.stride);
+ }
+ }
+
+ tile.sample = sample + 1;
+
+ task.update_progress(tile);
+ }
+ }
else
#endif
{
@@ -227,12 +247,18 @@
void thread_tonemap(DeviceTask& task)
{
#ifdef WITH_OPTIMIZED_KERNEL
- if(system_cpu_support_optimized()) {
+ if(system_cpu_support_sse2()) {
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
- kernel_cpu_optimized_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
+ kernel_cpu_sse2_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
task.sample, task.resolution, x, y, task.offset, task.stride);
}
+ else if(system_cpu_support_sse3()) {
+ for(int y = task.y; y < task.y + task.h; y++)
+ for(int x = task.x; x < task.x + task.w; x++)
+ kernel_cpu_sse3_tonemap(&kernel_globals, (uchar4*)task.rgba, (float*)task.buffer,
+ task.sample, task.resolution, x, y, task.offset, task.stride);
+ }
else
#endif
{
@@ -252,14 +278,22 @@
#endif
#ifdef WITH_OPTIMIZED_KERNEL
- if(system_cpu_support_optimized()) {
+ if(system_cpu_support_sse2()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
- kernel_cpu_optimized_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+ kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
if(task_pool.cancelled())
break;
}
}
+ else if(system_cpu_support_sse3()) {
+ for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
+ kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
+
+ if(task_pool.cancelled())
+ break;
+ }
+ }
else
#endif
{
Modified: trunk/blender/intern/cycles/kernel/CMakeLists.txt
===================================================================
--- trunk/blender/intern/cycles/kernel/CMakeLists.txt 2013-02-04 16:12:27 UTC (rev 54298)
+++ trunk/blender/intern/cycles/kernel/CMakeLists.txt 2013-02-04 16:12:37 UTC (rev 54299)
@@ -12,7 +12,8 @@
set(SRC
kernel.cpp
- kernel_optimized.cpp
+ kernel_sse2.cpp
+ kernel_sse3.cpp
kernel.cl
kernel.cu
)
@@ -149,7 +150,8 @@
add_library(cycles_kernel ${SRC} ${SRC_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_SVM_HEADERS})
if(WITH_CYCLES_OPTIMIZED_KERNEL)
- set_source_files_properties(kernel_optimized.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_OPTIMIZED_KERNEL_FLAGS}")
+ set_source_files_properties(kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
+ set_source_files_properties(kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
endif()
if(WITH_CYCLES_CUDA)
Modified: trunk/blender/intern/cycles/kernel/kernel.h
===================================================================
--- trunk/blender/intern/cycles/kernel/kernel.h 2013-02-04 16:12:27 UTC (rev 54298)
+++ trunk/blender/intern/cycles/kernel/kernel.h 2013-02-04 16:12:37 UTC (rev 54299)
@@ -44,12 +44,19 @@
int type, int i);
#ifdef WITH_OPTIMIZED_KERNEL
-void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
+void kernel_cpu_sse2_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
int sample, int x, int y, int offset, int stride);
-void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer,
+void kernel_cpu_sse2_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer,
int sample, int resolution, int x, int y, int offset, int stride);
-void kernel_cpu_optimized_shader(KernelGlobals *kg, uint4 *input, float4 *output,
+void kernel_cpu_sse2_shader(KernelGlobals *kg, uint4 *input, float4 *output,
int type, int i);
+
+void kernel_cpu_sse3_path_trace(KernelGlobals *kg, float *buffer, unsigned int *rng_state,
+ int sample, int x, int y, int offset, int stride);
+void kernel_cpu_sse3_tonemap(KernelGlobals *kg, uchar4 *rgba, float *buffer,
+ int sample, int resolution, int x, int y, int offset, int stride);
+void kernel_cpu_sse3_shader(KernelGlobals *kg, uint4 *input, float4 *output,
+ int type, int i);
#endif
CCL_NAMESPACE_END
Modified: trunk/blender/intern/cycles/kernel/kernel_bvh.h
===================================================================
--- trunk/blender/intern/cycles/kernel/kernel_bvh.h 2013-02-04 16:12:27 UTC (rev 54298)
+++ trunk/blender/intern/cycles/kernel/kernel_bvh.h 2013-02-04 16:12:37 UTC (rev 54299)
@@ -126,21 +126,21 @@
/* intersect ray against child nodes */
float3 ood = P * idir;
- float c0lox = n0xy.x * idir.x - ood.x;
- float c0hix = n0xy.y * idir.x - ood.x;
- float c0loy = n0xy.z * idir.y - ood.y;
- float c0hiy = n0xy.w * idir.y - ood.y;
- float c0loz = nz.x * idir.z - ood.z;
- float c0hiz = nz.y * idir.z - ood.z;
+ NO_EXTENDED_PRECISION float c0lox = n0xy.x * idir.x - ood.x;
+ NO_EXTENDED_PRECISION float c0hix = n0xy.y * idir.x - ood.x;
+ NO_EXTENDED_PRECISION float c0loy = n0xy.z * idir.y - ood.y;
+ NO_EXTENDED_PRECISION float c0hiy = n0xy.w * idir.y - ood.y;
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list