[Bf-blender-cvs] [d96ac3f] experimental-build: Another experimental Cycles build, second version
Lukas Stockner
noreply at git.blender.org
Mon Oct 3 18:08:17 CEST 2016
Commit: d96ac3ff3357c56d2460806441ffe2a0f5b5bcec
Author: Lukas Stockner
Date: Mon Oct 3 03:59:21 2016 +0200
Branches: experimental-build
https://developer.blender.org/rBd96ac3ff3357c56d2460806441ffe2a0f5b5bcec
Another experimental Cycles build, second version
This time with some CUDA fixes.
Contains:
- Cycles Denoising - NOTE: The Windows Black Square bug might still be in there, the fix is not as easy as it seemed...
- Probabilistic Light Sampling Termination (D2217)
- Better progress estimation system for Cycles (D2214)
- Red Highlighting for invalid Shader Node connections (D2190)
- IES Textures for Cycles Lights (D1543)
- Blue-Noise Dithered Sampling (D2149)
- Microjittered sampling (P366)
- Light Group Render Passes - contain only light from lamps and/or meshes that are in the specified group (unsubmitted, just a quick test)
- Cycles Shadow Catcher (D1788)
It should be obvious from the name of this branch, but these changes are all highly experimental right now - especially when combined together. Don't expect to be able to use this for productive work :)
===================================================================
M CMakeLists.txt
M intern/cycles/CMakeLists.txt
M intern/cycles/app/CMakeLists.txt
A intern/cycles/app/cycles_denoising.cpp
A intern/cycles/app/cycles_denoising.h
A intern/cycles/app/cycles_dithering.cpp
M intern/cycles/app/cycles_standalone.cpp
A intern/cycles/app/cycles_standalone.h
M intern/cycles/blender/addon/__init__.py
M intern/cycles/blender/addon/engine.py
M intern/cycles/blender/addon/properties.py
M intern/cycles/blender/addon/ui.py
M intern/cycles/blender/blender_object.cpp
M intern/cycles/blender/blender_python.cpp
M intern/cycles/blender/blender_session.cpp
M intern/cycles/blender/blender_session.h
M intern/cycles/blender/blender_shader.cpp
M intern/cycles/blender/blender_sync.cpp
M intern/cycles/blender/blender_sync.h
M intern/cycles/blender/blender_util.h
M intern/cycles/device/device.h
M intern/cycles/device/device_cpu.cpp
M intern/cycles/device/device_cuda.cpp
M intern/cycles/device/device_multi.cpp
M intern/cycles/device/device_network.cpp
M intern/cycles/device/device_opencl.cpp
M intern/cycles/device/device_task.cpp
M intern/cycles/device/device_task.h
M intern/cycles/kernel/CMakeLists.txt
M intern/cycles/kernel/bvh/bvh.h
M intern/cycles/kernel/bvh/bvh_shadow_all.h
M intern/cycles/kernel/bvh/qbvh_shadow_all.h
M intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
M intern/cycles/kernel/closure/bsdf_diffuse.h
M intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
M intern/cycles/kernel/closure/bsdf_microfacet.h
M intern/cycles/kernel/closure/bsdf_microfacet_multi.h
M intern/cycles/kernel/closure/bsdf_oren_nayar.h
M intern/cycles/kernel/closure/bsdf_phong_ramp.h
M intern/cycles/kernel/closure/bsdf_toon.h
M intern/cycles/kernel/closure/bssrdf.h
M intern/cycles/kernel/geom/geom_object.h
M intern/cycles/kernel/geom/geom_triangle.h
M intern/cycles/kernel/kernel_accumulate.h
M intern/cycles/kernel/kernel_bake.h
M intern/cycles/kernel/kernel_compat_cpu.h
M intern/cycles/kernel/kernel_compat_cuda.h
M intern/cycles/kernel/kernel_emission.h
A intern/cycles/kernel/kernel_filter.h
A intern/cycles/kernel/kernel_filter_old.h
A intern/cycles/kernel/kernel_filter_pre.h
A intern/cycles/kernel/kernel_filter_util.h
M intern/cycles/kernel/kernel_light.h
M intern/cycles/kernel/kernel_passes.h
M intern/cycles/kernel/kernel_path.h
M intern/cycles/kernel/kernel_path_branched.h
M intern/cycles/kernel/kernel_path_state.h
M intern/cycles/kernel/kernel_path_surface.h
M intern/cycles/kernel/kernel_path_volume.h
M intern/cycles/kernel/kernel_projection.h
M intern/cycles/kernel/kernel_random.h
M intern/cycles/kernel/kernel_shader.h
M intern/cycles/kernel/kernel_shadow.h
M intern/cycles/kernel/kernel_textures.h
M intern/cycles/kernel/kernel_types.h
M intern/cycles/kernel/kernel_volume.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M intern/cycles/kernel/kernels/cuda/kernel.cu
M intern/cycles/kernel/kernels/opencl/kernel_direct_lighting.cl
M intern/cycles/kernel/kernels/opencl/kernel_next_iteration_setup.cl
M intern/cycles/kernel/osl/osl_services.cpp
M intern/cycles/kernel/shaders/CMakeLists.txt
A intern/cycles/kernel/shaders/node_ies_light.osl
M intern/cycles/kernel/split/kernel_background_buffer_update.h
M intern/cycles/kernel/split/kernel_data_init.h
M intern/cycles/kernel/split/kernel_direct_lighting.h
M intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
M intern/cycles/kernel/split/kernel_lamp_emission.h
M intern/cycles/kernel/split/kernel_next_iteration_setup.h
M intern/cycles/kernel/svm/svm.h
M intern/cycles/kernel/svm/svm_closure.h
M intern/cycles/kernel/svm/svm_displace.h
A intern/cycles/kernel/svm/svm_ies.h
M intern/cycles/kernel/svm/svm_image.h
M intern/cycles/kernel/svm/svm_types.h
M intern/cycles/render/CMakeLists.txt
M intern/cycles/render/bake.cpp
M intern/cycles/render/bake.h
M intern/cycles/render/buffers.cpp
M intern/cycles/render/buffers.h
A intern/cycles/render/denoising.cpp
A intern/cycles/render/denoising.h
M intern/cycles/render/film.cpp
M intern/cycles/render/film.h
M intern/cycles/render/image.cpp
M intern/cycles/render/image.h
M intern/cycles/render/integrator.cpp
M intern/cycles/render/integrator.h
M intern/cycles/render/light.cpp
M intern/cycles/render/light.h
M intern/cycles/render/mesh.cpp
M intern/cycles/render/nodes.cpp
M intern/cycles/render/nodes.h
M intern/cycles/render/object.cpp
M intern/cycles/render/object.h
M intern/cycles/render/scene.h
M intern/cycles/render/session.cpp
M intern/cycles/render/session.h
M intern/cycles/render/sobol.cpp
M intern/cycles/render/sobol.h
M intern/cycles/render/tile.cpp
M intern/cycles/render/tile.h
M intern/cycles/util/CMakeLists.txt
M intern/cycles/util/util_debug.cpp
M intern/cycles/util/util_debug.h
M intern/cycles/util/util_math.h
A intern/cycles/util/util_math_matrix.h
M intern/cycles/util/util_progress.h
M intern/cycles/util/util_simd.h
M intern/cycles/util/util_system.cpp
M intern/cycles/util/util_system.h
M intern/cycles/util/util_texture.h
M intern/cycles/util/util_time.h
M intern/cycles/util/util_types.h
M intern/opencolorio/fallback_impl.cc
M intern/opencolorio/gpu_shader_display_transform.glsl
M intern/opencolorio/ocio_capi.cc
M intern/opencolorio/ocio_capi.h
M intern/opencolorio/ocio_impl.h
M intern/opencolorio/ocio_impl_glsl.cc
M release/scripts/startup/bl_ui/space_image.py
M release/scripts/startup/nodeitems_builtins.py
M source/blender/blenkernel/BKE_node.h
M source/blender/blenkernel/BKE_scene.h
M source/blender/blenkernel/intern/colortools.c
M source/blender/blenkernel/intern/node.c
M source/blender/blenkernel/intern/scene.c
M source/blender/blenloader/intern/readfile.c
M source/blender/blenloader/intern/versioning_270.c
M source/blender/editors/interface/interface_templates.c
M source/blender/editors/render/render_intern.h
M source/blender/editors/render/render_internal.c
M source/blender/editors/space_image/image_buttons.c
M source/blender/editors/space_image/image_intern.h
M source/blender/editors/space_image/image_ops.c
M source/blender/editors/space_image/space_image.c
M source/blender/editors/space_node/drawnode.c
M source/blender/imbuf/intern/colormanagement.c
M source/blender/makesdna/DNA_color_types.h
M source/blender/makesdna/DNA_node_types.h
M source/blender/makesdna/DNA_scene_types.h
M source/blender/makesrna/RNA_access.h
M source/blender/makesrna/intern/rna_color.c
M source/blender/makesrna/intern/rna_nodetree.c
M source/blender/makesrna/intern/rna_render.c
M source/blender/makesrna/intern/rna_scene.c
M source/blender/nodes/CMakeLists.txt
M source/blender/nodes/NOD_shader.h
M source/blender/nodes/NOD_static_types.h
A source/blender/nodes/shader/nodes/node_shader_ies_light.c
M source/blender/render/extern/include/RE_engine.h
M source/blender/render/extern/include/RE_pipeline.h
M source/blender/render/intern/include/render_result.h
M source/blender/render/intern/source/external_engine.c
M source/blender/render/intern/source/pipeline.c
M source/blender/render/intern/source/render_result.c
M source/blender/windowmanager/WM_api.h
M source/blenderplayer/bad_level_call_stubs/stubs.c
===================================================================
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e061fab..06db9fc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -409,10 +409,14 @@ mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
unset(PLATFORM_DEFAULT)
option(WITH_CYCLES_LOGGING "Build Cycles with logging support" ON)
option(WITH_CYCLES_DEBUG "Build Cycles with extra debug capabilities" OFF)
+option(WITH_CYCLES_DEBUG_FILTER "Build Cycles with extra debug capabilities in the denoising filter" OFF)
option(WITH_CYCLES_NATIVE_ONLY "Build Cycles with native kernel only (which fits current CPU, use for development only)" OFF)
+option(WITH_CYCLES_DEBUG_FPE "Build Cycles with floating point exceptions enabled for easier debugging of numerical issues (only for CPU rendering)" OFF)
mark_as_advanced(WITH_CYCLES_LOGGING)
mark_as_advanced(WITH_CYCLES_DEBUG)
+mark_as_advanced(WITH_CYCLES_DEBUG_FILTER)
mark_as_advanced(WITH_CYCLES_NATIVE_ONLY)
+mark_as_advanced(WITH_CYCLES_DEBUG_FPE)
option(WITH_CUDA_DYNLOAD "Dynamically load CUDA libraries at runtime" ON)
mark_as_advanced(WITH_CUDA_DYNLOAD)
@@ -733,6 +737,9 @@ if(WITH_CYCLES)
if(WITH_CYCLES_OSL)
set(WITH_LLVM ON CACHE BOOL "" FORCE)
endif()
+ if(WITH_CYCLES_DEBUG_FPE AND APPLE)
+ message(FATAL_ERROR "WITH_CYCLES_DEBUG_FPE is not supported on OSX!")
+ endif()
else()
set(WITH_CYCLES_OSL OFF)
endif()
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
index 97854a8..ea7d137 100644
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -109,6 +109,10 @@ if(CXX_HAS_AVX2)
add_definitions(-DWITH_KERNEL_AVX2)
endif()
+if(WITH_CYCLES_DEBUG_FPE)
+ add_definitions(-DWITH_CYCLES_DEBUG_FPE)
+endif()
+
if(WITH_CYCLES_OSL)
if(WIN32 AND MSVC)
set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
@@ -200,6 +204,10 @@ if(WITH_CYCLES_DEBUG)
add_definitions(-DWITH_CYCLES_DEBUG)
endif()
+if(WITH_CYCLES_DEBUG_FILTER)
+ add_definitions(-DWITH_CYCLES_DEBUG_FILTER)
+endif()
+
include_directories(
SYSTEM
${BOOST_INCLUDE_DIR}
diff --git a/intern/cycles/app/CMakeLists.txt b/intern/cycles/app/CMakeLists.txt
index 8cd499b..ff72bd7 100644
--- a/intern/cycles/app/CMakeLists.txt
+++ b/intern/cycles/app/CMakeLists.txt
@@ -107,8 +107,11 @@ endmacro()
if(WITH_CYCLES_STANDALONE)
set(SRC
cycles_standalone.cpp
+ cycles_standalone.h
cycles_xml.cpp
cycles_xml.h
+ cycles_denoising.cpp
+ cycles_denoising.h
)
add_executable(cycles ${SRC})
cycles_target_link_libraries(cycles)
diff --git a/intern/cycles/app/cycles_denoising.cpp b/intern/cycles/app/cycles_denoising.cpp
new file mode 100644
index 0000000..a5c0130
--- /dev/null
+++ b/intern/cycles/app/cycles_denoising.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cycles_denoising.h"
+
+#include "denoising.h"
+
+#include "util_image.h"
+
+CCL_NAMESPACE_BEGIN
+
+bool cycles_denoising_session()
+{
+ vector<string> frames;
+ if(options.frame_range.y >= options.frame_range.x) {
+ string pattern = options.filepaths[0];
+ size_t pos = pattern.find("%");
+ if(options.filepaths.size() != 1 || pos == string::npos || pattern.size() <= pos+3 ||!isdigit(pattern[pos+1]) || pattern[pos+2] != 'd') {
+ printf("ERROR: When using the frame range option, specify the image file as a single filename including %%Xd, there X is the length of the frame numbers.");
+ delete options.session;
+ return false;
+ }
+
+ char pad_length = pattern[pos+1];
+ vector<string> new_filepaths;
+ for(int frame = options.frame_range.x; frame <= options.frame_range.y; frame++) {
+ string name = pattern.substr(0, pos);
+ name += string_printf(string_printf("%%0%cd", pad_length).c_str(), frame);
+ name += pattern.substr(pos+3);
+ frames.push_back(name);
+ }
+ }
+
+ return denoise_standalone(options.session_params, frames, options.denoise_frame);
+}
+
+CCL_NAMESPACE_END
\ No newline at end of file
diff --git a/intern/cycles/render/sobol.h b/intern/cycles/app/cycles_denoising.h
similarity index 68%
copy from intern/cycles/render/sobol.h
copy to intern/cycles/app/cycles_denoising.h
index 574f148..ef18e17 100644
--- a/intern/cycles/render/sobol.h
+++ b/intern/cycles/app/cycles_denoising.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2011-2013 Blender Foundation
+ * Copyright 2016 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,19 +14,15 @@
* limitations under the License.
*/
-#ifndef __SOBOL_H__
-#define __SOBOL_H__
+ #include "cycles_standalone.h"
-#include "util_types.h"
+#ifndef __CYCLES_DENOISING_H__
+#define __CYCLES_DENOISING_H__
CCL_NAMESPACE_BEGIN
-#define SOBOL_BITS 32
-#define SOBOL_MAX_DIMENSIONS 21201
-
-void sobol_generate_direction_vectors(uint vectors[][SOBOL_BITS], int dimensions);
+bool cycles_denoising_session();
CCL_NAMESPACE_END
-#endif /* __SOBOL_H__ */
-
+#endif /* __CYCLES_DENOISING_H__ */
diff --git a/intern/cycles/app/cycles_dithering.cpp b/intern/cycles/app/cycles_dithering.cpp
new file mode 100644
index 0000000..e1ccef5
--- /dev/null
+++ b/intern/cycles/app/cycles_dithering.cpp
@@ -0,0 +1,308 @@
+/*
+ * Copyright 2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This code implements a matrix optimization based on simulated annealing
+ * that minimizes the energy function described in the paper "Blue-noise Dithered Sampling".
+ *
+ * Dimensionality and Size of the matrix are hardcoded as #defines below.
+ * It includes both a easily readable scalar implementation as well as a SSE4.1-optimized code path (which only supports DIM=2 currently).
+ *
+ * For full speed, compile with: g++ -o cycles_dithering cycles_dithering.cpp -O3 -march=native --std=c++11
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include <math.h>
+#include <random>
+
+std::mt19937 rng(time(0));
+
+float *mat;
+float starting_temp;
+uint64_t num_iter;
+
+/* Note: SSE code paths are hardcoded for DIM = 2. */
+#define DIM 2
+
+/* Note: The random swapping code is only designed for SIZE=2048 or less. */
+#define SIZE 128
+#define SIZEMASK 0x7f
+#define SIZEBITS 7
+
+#define MAT(x, y, d) mat[(((y)<<SIZEBITS)+(x)) + ((d)<<(SIZEBITS*2))]
+
+#if (DIM == 2) && __SSE4_1__
+#define USE_SSE_CODE
+#endif
+
+/* To speed up the process, only nearby pixels are considered.
+ * For pixels that are further away, the weight is nearly zero anyways. */
+#define WINDOW 10
+
+
+#define WRAP(x) ((x) & SIZEMASK)
+
+/* Fairly rough approximation of the exponential function. */
+inline float approx_exp(float x)
+{
+ x = (1.0f + x * (1.0f / 256.0f));
+ x *= x;
+ x *= x;
+ x *= x;
+ x *= x;
+ x *= x;
+ x *= x;
+ x *= x;
+ x *= x;
+ return x;
+}
+
+/* Decent approximation of the square root function. */
+inline float approx_sqrt(float x)
+{
+#ifdef __SSE__
+ return _mm_cvtss_f32(_mm_mul_ss(_mm_rsqrt_ss(_mm_set_ss(x)), _mm_set_ss(x)));
+#else
+ return sqrtf(x);
+#endif
+}
+
+
+inline float energy_pq(int px, int py, int qx, int qy)
+{
+ if(px == qx && py == qy) return 0.0f;
+ int dx = px-qx;
+ int dy = py-qy;
+ qx = WRAP(qx);
+ qy = WRAP(qy);
+ float dist = -(dx*dx+dy*dy)*(1.0f/4.41f);
+ float d_p = 0.0f;
+ for(int d = 0; d < DIM; d++) {
+ float d_c = MAT(px, py, d) - MAT(qx, qy, d);
+ d_p += d_c*d_c;
+ }
+ if(DIM == 2)
+ d_p = approx_sqrt(d_p);
+ else
+ d_p = powf(d_p, DIM * 0.25f);
+ return approx_exp(dist - d_p);
+}
+
+/* Returns all the energy in that specific pixel, by looping over the window around the pixel. */
+float pixel_energy_scalar(int x, int y)
+{
+ float energy = 0.0f;
+ for(int dy = -WINDOW; dy <= WINDOW; dy++) {
+ for(int dx = -WINDOW; dx <= WINDOW; dx++) {
+ energy += energy_pq(x, y, x+dx, y+dy);
+ }
+ }
+ return energy;
+}
+
+#ifdef USE_SSE_CODE
+inline __m128 approx_exp_sse(__m128 x)
+{
+ x = _mm_add_ps(_mm_set1_ps(1.0f), _mm_mul_ps(x, _mm_set1_ps(1.0f / 256.0f)));
+ x = _mm_mul_ps(x, x);
+ x = _mm_mul_ps(x, x);
+ x = _mm_mul_ps(x, x);
+ x = _mm_mul_ps(x, x);
+ x = _mm_mul_ps(x, x);
+ x = _mm_mul_ps(x, x);
+ x = _mm_mul_ps(x, x);
+ x = _mm_mul_ps(x, x);
+ return x;
+}
+
+inline __m128 approx_sqrt_sse(__m128 x)
+{
+ return _mm_mul_ps(_mm_rsqrt_ps(x), x);
+}
+
+inline __m128 energy_pq_sse(__m128 *center, float *row, __m128 dx4, __m128 dy4)
+{
+ __m128 dist = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(dx4, dx4), _mm_mul_ps(dy4, dy4)), _mm_set1_ps(-1.0f / 4.41f));
+ __m128 d1 = _mm_sub_ps(center[0], _mm_loadu_ps(row)), d2 = _mm_sub_ps(center[1], _mm_loadu_ps(row + SIZE*SIZE));
+ __m128 dist2 = _mm_add_ps(_mm_mul_ps(d1, d1), _mm_mul_ps(d2, d2));
+ return approx_exp_sse(_mm_sub_ps(dist, approx_sqrt_sse(dist2)));
+}
+
+/* Same as above, but processes 4 pixels at a time. */
+float pixel_energy_sse(int x, int y)
+{
+ __m128 energy = _mm_setzero_ps();
+ __m128 center_pixel[2] = {_mm_set1_ps(MAT(x, y, 0)), _mm_set1_ps(MAT(x, y, 1))};
+ for(int dy = -WINDOW; dy <= WINDOW; dy++) {
+ int wy = WRAP(y+dy);
+ __m128 dy4 = _mm_set1_ps(dy);
+ for(int dx = -WINDOW; dx <= WINDOW; dx += 4) {
+ __m128 dx4 = _mm_add_ps(_mm_set1_ps(dx), _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f));
+ __m128 active = _mm_cmple_ps(dx4, _mm_set1_ps(WINDOW)
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list