[Bf-blender-cvs] [05aefab] experimental-build: Cycles Experimental Build v4

Lukas Stockner noreply at git.blender.org
Tue Dec 20 16:16:09 CET 2016


Commit: 05aefabb7b9c16c6774c5d0f758b2d76b59f21b9
Author: Lukas Stockner
Date:   Tue Dec 20 16:00:56 2016 +0100
Branches: experimental-build
https://developer.blender.org/rB05aefabb7b9c16c6774c5d0f758b2d76b59f21b9

Cycles Experimental Build v4

Generally the same as last time, but now the denoising black spot bug on Windows should be fixed.

Also, this version includes skipping attributes for nodes that are optimized away (D2285) and an operator for saving the current viewport rendering state as a Render Result.

===================================================================

M	CMakeLists.txt
M	intern/cycles/CMakeLists.txt
M	intern/cycles/app/CMakeLists.txt
A	intern/cycles/app/cycles_denoising.cpp
A	intern/cycles/app/cycles_denoising.h
A	intern/cycles/app/cycles_dithering.cpp
M	intern/cycles/app/cycles_standalone.cpp
A	intern/cycles/app/cycles_standalone.h
M	intern/cycles/blender/CMakeLists.txt
M	intern/cycles/blender/addon/__init__.py
M	intern/cycles/blender/addon/engine.py
M	intern/cycles/blender/addon/properties.py
M	intern/cycles/blender/addon/ui.py
M	intern/cycles/blender/blender_object.cpp
M	intern/cycles/blender/blender_python.cpp
M	intern/cycles/blender/blender_session.cpp
M	intern/cycles/blender/blender_session.h
M	intern/cycles/blender/blender_shader.cpp
M	intern/cycles/blender/blender_sync.cpp
M	intern/cycles/blender/blender_sync.h
M	intern/cycles/blender/blender_util.h
M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/device/device_task.cpp
M	intern/cycles/device/device_task.h
M	intern/cycles/device/opencl/opencl_mega.cpp
M	intern/cycles/device/opencl/opencl_split.cpp
M	intern/cycles/kernel/CMakeLists.txt
M	intern/cycles/kernel/bvh/bvh.h
M	intern/cycles/kernel/bvh/bvh_shadow_all.h
M	intern/cycles/kernel/bvh/qbvh_shadow_all.h
M	intern/cycles/kernel/closure/bsdf.h
M	intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
M	intern/cycles/kernel/closure/bsdf_diffuse.h
M	intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
A	intern/cycles/kernel/closure/bsdf_disney_diffuse.h
A	intern/cycles/kernel/closure/bsdf_disney_sheen.h
M	intern/cycles/kernel/closure/bsdf_microfacet.h
M	intern/cycles/kernel/closure/bsdf_microfacet_multi.h
M	intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
M	intern/cycles/kernel/closure/bsdf_oren_nayar.h
M	intern/cycles/kernel/closure/bsdf_phong_ramp.h
M	intern/cycles/kernel/closure/bsdf_toon.h
M	intern/cycles/kernel/closure/bsdf_util.h
M	intern/cycles/kernel/closure/bssrdf.h
A	intern/cycles/kernel/filter/filter.h
A	intern/cycles/kernel/filter/filter_features.h
A	intern/cycles/kernel/filter/filter_features_sse.h
A	intern/cycles/kernel/filter/filter_final_pass_impl.h
A	intern/cycles/kernel/filter/filter_nlm.h
A	intern/cycles/kernel/filter/filter_prefilter.h
A	intern/cycles/kernel/filter/filter_wlr.h
A	intern/cycles/kernel/filter/filter_wlr_cuda.h
A	intern/cycles/kernel/filter/filter_wlr_sse.h
M	intern/cycles/kernel/geom/geom_object.h
M	intern/cycles/kernel/geom/geom_triangle.h
M	intern/cycles/kernel/kernel.h
M	intern/cycles/kernel/kernel_accumulate.h
M	intern/cycles/kernel/kernel_bake.h
M	intern/cycles/kernel/kernel_compat_cpu.h
M	intern/cycles/kernel/kernel_compat_cuda.h
M	intern/cycles/kernel/kernel_emission.h
M	intern/cycles/kernel/kernel_light.h
M	intern/cycles/kernel/kernel_passes.h
M	intern/cycles/kernel/kernel_path.h
M	intern/cycles/kernel/kernel_path_branched.h
M	intern/cycles/kernel/kernel_path_state.h
M	intern/cycles/kernel/kernel_path_surface.h
M	intern/cycles/kernel/kernel_path_volume.h
M	intern/cycles/kernel/kernel_projection.h
M	intern/cycles/kernel/kernel_random.h
M	intern/cycles/kernel/kernel_shader.h
M	intern/cycles/kernel/kernel_shadow.h
M	intern/cycles/kernel/kernel_subsurface.h
M	intern/cycles/kernel/kernel_textures.h
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/kernel/kernel_volume.h
M	intern/cycles/kernel/kernels/cpu/kernel_avx.cpp
M	intern/cycles/kernel/kernels/cpu/kernel_avx2.cpp
M	intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M	intern/cycles/kernel/kernels/cpu/kernel_sse2.cpp
M	intern/cycles/kernel/kernels/cpu/kernel_sse3.cpp
M	intern/cycles/kernel/kernels/cpu/kernel_sse41.cpp
M	intern/cycles/kernel/kernels/cuda/kernel.cu
M	intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
M	intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
M	intern/cycles/kernel/osl/osl_bssrdf.cpp
M	intern/cycles/kernel/osl/osl_closures.cpp
M	intern/cycles/kernel/osl/osl_closures.h
M	intern/cycles/kernel/osl/osl_services.cpp
M	intern/cycles/kernel/shaders/CMakeLists.txt
A	intern/cycles/kernel/shaders/node_disney_bsdf.osl
A	intern/cycles/kernel/shaders/node_ies_light.osl
M	intern/cycles/kernel/shaders/stdosl.h
M	intern/cycles/kernel/split/kernel_background_buffer_update.h
M	intern/cycles/kernel/split/kernel_data_init.h
M	intern/cycles/kernel/split/kernel_direct_lighting.h
M	intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
M	intern/cycles/kernel/split/kernel_lamp_emission.h
M	intern/cycles/kernel/split/kernel_next_iteration_setup.h
M	intern/cycles/kernel/svm/svm.h
M	intern/cycles/kernel/svm/svm_closure.h
M	intern/cycles/kernel/svm/svm_displace.h
M	intern/cycles/kernel/svm/svm_geometry.h
A	intern/cycles/kernel/svm/svm_ies.h
M	intern/cycles/kernel/svm/svm_image.h
M	intern/cycles/kernel/svm/svm_types.h
M	intern/cycles/render/CMakeLists.txt
M	intern/cycles/render/buffers.cpp
M	intern/cycles/render/buffers.h
A	intern/cycles/render/denoising.cpp
A	intern/cycles/render/denoising.h
M	intern/cycles/render/film.cpp
M	intern/cycles/render/film.h
M	intern/cycles/render/graph.cpp
M	intern/cycles/render/graph.h
M	intern/cycles/render/image.cpp
M	intern/cycles/render/image.h
M	intern/cycles/render/integrator.cpp
M	intern/cycles/render/integrator.h
M	intern/cycles/render/light.cpp
M	intern/cycles/render/light.h
M	intern/cycles/render/mesh.cpp
M	intern/cycles/render/nodes.cpp
M	intern/cycles/render/nodes.h
M	intern/cycles/render/object.cpp
M	intern/cycles/render/object.h
M	intern/cycles/render/osl.cpp
M	intern/cycles/render/scene.h
M	intern/cycles/render/session.cpp
M	intern/cycles/render/session.h
M	intern/cycles/render/sobol.cpp
M	intern/cycles/render/sobol.h
M	intern/cycles/render/svm.cpp
M	intern/cycles/render/tile.cpp
M	intern/cycles/render/tile.h
M	intern/cycles/util/CMakeLists.txt
M	intern/cycles/util/util_atomic.h
M	intern/cycles/util/util_debug.cpp
M	intern/cycles/util/util_debug.h
M	intern/cycles/util/util_math.h
A	intern/cycles/util/util_math_matrix.h
M	intern/cycles/util/util_simd.h
M	intern/cycles/util/util_system.cpp
M	intern/cycles/util/util_system.h
M	intern/cycles/util/util_texture.h
M	intern/cycles/util/util_types.h
M	intern/opencolorio/fallback_impl.cc
M	intern/opencolorio/gpu_shader_display_transform.glsl
M	intern/opencolorio/ocio_capi.cc
M	intern/opencolorio/ocio_capi.h
M	intern/opencolorio/ocio_impl.h
M	intern/opencolorio/ocio_impl_glsl.cc
M	release/scripts/startup/bl_ui/space_image.py
M	release/scripts/startup/nodeitems_builtins.py
M	source/blender/blenkernel/BKE_node.h
M	source/blender/blenkernel/BKE_scene.h
M	source/blender/blenkernel/intern/colortools.c
M	source/blender/blenkernel/intern/node.c
M	source/blender/blenkernel/intern/scene.c
M	source/blender/blenloader/intern/readfile.c
M	source/blender/blenloader/intern/versioning_270.c
M	source/blender/editors/interface/interface_templates.c
M	source/blender/editors/render/render_intern.h
M	source/blender/editors/render/render_internal.c
M	source/blender/editors/render/render_opengl.c
M	source/blender/editors/render/render_ops.c
M	source/blender/editors/render/render_preview.c
M	source/blender/editors/render/render_view.c
M	source/blender/editors/space_image/image_buttons.c
M	source/blender/editors/space_image/image_intern.h
M	source/blender/editors/space_image/image_ops.c
M	source/blender/editors/space_image/space_image.c
M	source/blender/editors/space_node/drawnode.c
M	source/blender/gpu/shaders/gpu_shader_material.glsl
M	source/blender/imbuf/intern/colormanagement.c
M	source/blender/makesdna/DNA_color_types.h
M	source/blender/makesdna/DNA_node_types.h
M	source/blender/makesdna/DNA_scene_types.h
M	source/blender/makesrna/RNA_access.h
M	source/blender/makesrna/intern/rna_color.c
M	source/blender/makesrna/intern/rna_nodetree.c
M	source/blender/makesrna/intern/rna_render.c
M	source/blender/makesrna/intern/rna_scene.c
M	source/blender/nodes/CMakeLists.txt
M	source/blender/nodes/NOD_shader.h
M	source/blender/nodes/NOD_static_types.h
A	source/blender/nodes/shader/nodes/node_shader_bsdf_disney.c
A	source/blender/nodes/shader/nodes/node_shader_ies_light.c
M	source/blender/render/extern/include/RE_engine.h
M	source/blender/render/extern/include/RE_pipeline.h
M	source/blender/render/intern/include/render_result.h
M	source/blender/render/intern/source/envmap.c
M	source/blender/render/intern/source/external_engine.c
M	source/blender/render/intern/source/pipeline.c
M	source/blender/render/intern/source/render_result.c
M	source/blender/windowmanager/WM_api.h
M	source/blenderplayer/bad_level_call_stubs/stubs.c

===================================================================

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8055f4f..1ab5f8a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -409,10 +409,14 @@ mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
 unset(PLATFORM_DEFAULT)
 option(WITH_CYCLES_LOGGING	"Build Cycles with logging support" ON)
 option(WITH_CYCLES_DEBUG	"Build Cycles with extra debug capabilities" OFF)
+option(WITH_CYCLES_DEBUG_FILTER	"Build Cycles with extra debug capabilities in the denoising filter" OFF)
 option(WITH_CYCLES_NATIVE_ONLY	"Build Cycles with native kernel only (which fits current CPU, use for development only)" OFF)
+option(WITH_CYCLES_DEBUG_FPE    "Build Cycles with floating point exceptions enabled for easier debugging of numerical issues (only for CPU rendering)" OFF)
 mark_as_advanced(WITH_CYCLES_LOGGING)
 mark_as_advanced(WITH_CYCLES_DEBUG)
+mark_as_advanced(WITH_CYCLES_DEBUG_FILTER)
 mark_as_advanced(WITH_CYCLES_NATIVE_ONLY)
+mark_as_advanced(WITH_CYCLES_DEBUG_FPE)
 
 option(WITH_CUDA_DYNLOAD "Dynamically load CUDA libraries at runtime" ON)
 mark_as_advanced(WITH_CUDA_DYNLOAD)
@@ -757,6 +761,9 @@ if(WITH_CYCLES)
 	if(WITH_CYCLES_OSL)
 		set(WITH_LLVM ON CACHE BOOL "" FORCE)
 	endif()
+	if(WITH_CYCLES_DEBUG_FPE AND APPLE)
+		message(FATAL_ERROR "WITH_CYCLES_DEBUG_FPE is not supported on OSX!")
+	endif()
 else()
 	set(WITH_CYCLES_OSL OFF)
 endif()
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
index 97854a8..ea7d137 100644
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -109,6 +109,10 @@ if(CXX_HAS_AVX2)
 	add_definitions(-DWITH_KERNEL_AVX2)
 endif()
 
+if(WITH_CYCLES_DEBUG_FPE)
+	add_definitions(-DWITH_CYCLES_DEBUG_FPE)
+endif()
+
 if(WITH_CYCLES_OSL)
 	if(WIN32 AND MSVC)
 		set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
@@ -200,6 +204,10 @@ if(WITH_CYCLES_DEBUG)
 	add_definitions(-DWITH_CYCLES_DEBUG)
 endif()
 
+if(WITH_CYCLES_DEBUG_FILTER)
+	add_definitions(-DWITH_CYCLES_DEBUG_FILTER)
+endif()
+
 include_directories(
 	SYSTEM
 	${BOOST_INCLUDE_DIR}
diff --git a/intern/cycles/app/CMakeLists.txt b/intern/cycles/app/CMakeLists.txt
index 8cd499b..ff72bd7 100644
--- a/intern/cycles/app/CMakeLists.txt
+++ b/intern/cycles/app/CMakeLists.txt
@@ -107,8 +107,11 @@ endmacro()
 if(WITH_CYCLES_STANDALONE)
 	set(SRC
 		cycles_standalone.cpp
+		cycles_standalone.h
 		cycles_xml.cpp
 		cycles_xml.h
+		cycles_denoising.cpp
+		cycles_denoising.h
 	)
 	add_executable(cycles ${SRC})
 	cycles_target_link_libraries(cycles)
diff --git a/intern/cycles/app/cycles_denoising.cpp b/intern/cycles/app/cycles_denoising.cpp
new file mode 100644
index 0000000..a5c0130
--- /dev/null
+++ b/intern/cycles/app/cycles_denoising.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cycles_denoising.h"
+
+#include "denoising.h"
+
+#include "util_image.h"
+
+CCL_NAMESPACE_BEGIN
+
+bool cycles_denoising_session()
+{
+	vector<string> frames;
+	if(options.frame_range.y >= options.frame_range.x) {
+		string pattern = options.filepaths[0];
+		size_t pos = pattern.find("%");
+		if(options.filepaths.size() != 1 || pos == string::npos || pattern.size() <= pos+3 ||!isdigit(pattern[pos+1]) || pattern[pos+2] != 'd') {
+			printf("ERROR: When using the frame range option, specify the image file as a single filename including %%Xd, there X is the length of the frame numbers.");
+			delete options.session;
+			return false;
+		}
+
+		char pad_length = pattern[pos+1];
+		vector<string> new_filepaths;
+		for(int frame = options.frame_range.x; frame <= options.frame_range.y; frame++) {
+			string name = pattern.substr(0, pos);
+			name += string_printf(string_printf("%%0%cd", pad_length).c_str(), frame);
+			name += pattern.substr(pos+3);
+			frames.push_back(name);
+		}
+	}
+
+	return denoise_standalone(options.session_params, frames, options.denoise_frame);
+}
+
+CCL_NAMESPACE_END
\ No newline at end of file
diff --git a/intern/cycles/render/sobol.h b/intern/cycles/app/cycles_denoising.h
similarity index 68%
copy from intern/cycles/render/sobol.h
copy to intern/cycles/app/cycles_denoising.h
index 574f148..ef18e17 100644
--- a/intern/cycles/render/sobol.h
+++ b/intern/cycles/app/cycles_denoising.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2011-2013 Blender Foundation
+ * Copyright 2016 Blender Foundation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,19 +14,15 @@
  * limitations under the License.
  */
 
-#ifndef __SOBOL_H__
-#define __SOBOL_H__
+ #include "cycles_standalone.h"
 
-#include "util_types.h"
+#ifndef __CYCLES_DENOISING_H__
+#define __CYCLES_DENOISING_H__
 
 CCL_NAMESPACE_BEGIN
 
-#define SOBOL_BITS 32
-#define SOBOL_MAX_DIMENSIONS 21201
-
-void sobol_generate_direction_vectors(uint vectors[][SOBOL_BITS], int dimensions);
+bool cycles_denoising_session();
 
 CCL_NAMESPACE_END
 
-#endif /* __SOBOL_H__ */
-
+#endif /* __CYCLES_DENOISING_H__ */
diff --git a/intern/cycles/app/cycles_dithering.cpp b/intern/cycles/app/cycles_dithering.cpp
new file mode 100644
index 0000000..e1ccef5
--- /dev/null
+++ b/intern/cycles/app/cycles_dithering.cpp
@@ -0,0 +1,308 @@
+/*
+ * Copyright 2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This code implements a matrix optimization based on simulated annealing
+ * that minimizes the energy function described in the paper "Blue-noise Dithered Sampling".
+ *
+ * Dimensionality and Size of the matrix are hardcoded as #defines below.
+ * It includes both a easily readable scalar implementation as well as a SSE4.1-optimized code path (which only supports DIM=2 currently).
+ *
+ * For full speed, compile with: g++ -o cycles_dithering cycles_dithering.cpp -O3 -march=native --std=c++11
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include <math.h>
+#include <random>
+
+std::mt19937 rng(time(0));
+
+float *mat;
+float starting_temp;
+uint64_t num_iter;
+
+/* Note: SSE code paths are hardcoded for DIM = 2. */
+#define DIM 2
+
+/* Note: The random swapping code is only designed for SIZE=2048 or less. */
+#define SIZE 128
+#define SIZEMASK 0x7f
+#define SIZEBITS 7
+
+#define MAT(x, y, d) mat[(((y)<<SIZEBITS)+(x)) + ((d)<<(SIZEBITS*2))]
+
+#if (DIM == 2) && __SSE4_1__
+#define USE_SSE_CODE
+#endif
+
+/* To speed up the process, only nearby pixels are considered.
+ * For pixels that are further away, the weight is nearly zero anyways. */
+#define WINDOW 10
+
+
+#define WRAP(x) ((x) & SIZEMASK)
+
+/* Fairly rough approximation of the exponential function. */
+inline float approx_exp(float x)
+{
+	x = (1.0f + x * (1.0f / 256.0f));
+	x *= x;
+	x *= x;
+	x *= x;
+	x *= x;
+	x *= x;
+	x *= x;
+	x *= x;
+	x *= x;
+	return x;
+}
+
+/* Decent approximation of the square root function. */
+inline float approx_sqrt(float x)
+{
+#ifdef __SSE__
+	return _mm_cvtss_f32(_mm_mul_ss(_mm_rsqrt_ss(_mm_set_ss(x)), _mm_set_ss(x)));
+#else
+	return sqrtf(x);
+#endif
+}
+
+
+inline float energy_pq(int px, int py, int qx, int qy)
+{
+	if(px == qx && py == qy) return 0.0f;
+	int dx = px-qx;
+	int dy = py-qy;
+	qx = WRAP(qx);
+	qy = WRAP(qy);
+	float dist = -(dx*dx+dy*dy)*(1.0f/4.41f);
+	float d_p = 0.0f;
+	for(int d = 0; d < DIM; d++) {
+		float d_c = MAT(px, py, d) - MAT(qx, qy, d);
+		d_p += d_c*d_c;
+	}
+	if(DIM == 2)
+		d_p = approx_sqrt(d_p);
+	else
+		d_p = powf(d_p, DIM * 0.25f);
+	return approx_exp(dist - d_p);
+}
+
+/* Returns all the energy in that specific pixel, by looping over the window around the pixel. */
+float pixel_energy_scalar(int x, int y)
+{
+	float energy = 0.0f;
+	for(int dy = -WINDOW; dy <= WINDOW; dy++) {
+		for(int dx = -WINDOW; dx <= WINDOW; dx++) {
+			energy += energy_pq(x, y, x+dx, y+dy);
+		}
+	}
+	return energy;
+}
+
+#ifdef USE_SSE_CODE
+inline __m128 approx_exp_sse(__m128 x)
+{
+	x = _mm_add_ps(_mm_set1_ps(1.0f), _mm_mul_ps(x, _mm_set1_ps(1.0f / 256.0f)));
+	x = _mm_mul_ps(x, x);
+	x = _mm_mul_ps(x, x);
+	x = _mm_mul_ps(x, x);
+	x = _mm_mul_ps(x, x);
+	x = _mm_mul_ps(x, x);
+	x = _mm_mul_ps(x, x);
+	x = _mm_mul_ps(x, x);
+	x = _mm_mul_ps(x, x);
+	return x;
+}
+
+inline __m128 approx_sqrt_sse(__m128 x)
+{
+	return _mm_mul_ps(_mm_rsqrt_ps(x), x);
+}
+
+inline __m128 energy_pq_sse(__m128 *center, float *row, __m128 dx4, __m128 dy4)
+{
+	__m128 dist = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(dx4, dx4), _mm_mul_ps(dy4, dy4)), _mm_set1_ps(-1.0f / 4.41f));
+	__m128 d1 = _mm_sub_ps(center[0], _mm_loadu_ps(row)), d2 = _mm_sub_ps(center[1], _mm_loadu_ps(row + SIZE*SIZE));
+	__m128 dist2 = _mm_add_ps(_mm_mul_ps(d1, d1), _mm_mul_ps(d2, d2));
+	return approx_exp_sse(_mm_sub_ps(dist, approx_sqrt_sse(dist2)));
+}
+
+/* Same as above, but processes 4 pixels at a time. */
+float pixel_energy_sse(int x, int y)
+{
+	__m128 energy = _mm_setzero_ps();
+	__m128 center_pixel[2] = {_mm_set1_ps(MAT(x, y, 0)), _mm_set1_ps(MAT(x, y, 1))};
+	for(int dy = -WINDOW; dy <= WINDOW; dy++) {
+		int wy = WRAP(y+dy);
+		__m128 dy4 = _mm_set1_ps(dy);
+		for(int dx = -WINDOW; dx <= WINDOW; dx += 4) {
+			__m128 dx4 = _mm_add_ps(_mm_set1_ps(dx), _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f));
+			__m128 active = _mm_cmple_ps(dx4, _mm_set1_ps(WINDOW)

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list