[Bf-blender-cvs] [c5e9fab] soc-2016-cycles_denoising: Merge remote-tracking branch 'origin/master' into soc-2016-cycles_denoising
Lukas Stockner
noreply at git.blender.org
Sun Aug 21 06:18:06 CEST 2016
Commit: c5e9fabe19c8721e6c4541ee0f75bdd82accca13
Author: Lukas Stockner
Date: Sun Aug 21 05:38:48 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rBc5e9fabe19c8721e6c4541ee0f75bdd82accca13
Merge remote-tracking branch 'origin/master' into soc-2016-cycles_denoising
This was an extremely hacky merge with a lot of rebasing and git tricks involved, I hope it works as it's supposed to.
===================================================================
===================================================================
diff --cc intern/cycles/device/device_cuda.cpp
index b30c4e1,76e5249..2ce8d64
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@@ -1450,41 -1240,30 +1495,41 @@@ public
void thread_run(DeviceTask *task)
{
- if(task->type == DeviceTask::PATH_TRACE) {
+ if(task->type == DeviceTask::RENDER) {
RenderTile tile;
-
+
bool branched = task->integrator_branched;
/* Upload Bindless Mapping */
load_bindless_mapping();
-
+
/* keep rendering tiles until done */
while(task->acquire_tile(this, tile)) {
- int start_sample = tile.start_sample;
- int end_sample = tile.start_sample + tile.num_samples;
+ if(tile.task == RenderTile::PATH_TRACE) {
+ int start_sample = tile.start_sample;
+ int end_sample = tile.start_sample + tile.num_samples;
- for(int sample = start_sample; sample < end_sample; sample++) {
- if(task->get_cancel()) {
- if(task->need_finish_queue == false)
- break;
- }
+ for(int sample = start_sample; sample < end_sample; sample++) {
+ if(task->get_cancel()) {
+ if(task->need_finish_queue == false)
+ break;
+ }
- path_trace(tile, sample, branched);
+ path_trace(tile, sample, branched);
- tile.sample = sample + 1;
+ tile.sample = sample + 1;
- task->update_progress(&tile);
+ task->update_progress(&tile);
+ }
+
+ if(tile.buffers->params.overscan) { /* TODO(lukas) Works, but seems hacky? */
+ denoise(tile, end_sample);
+ }
+ }
+ else if(tile.task == RenderTile::DENOISE) {
+ int sample = tile.start_sample + tile.num_samples;
+ denoise(tile, sample);
+ tile.sample = sample;
}
task->release_tile(tile);
diff --cc intern/cycles/kernel/CMakeLists.txt
index 0c44530,9317bfb..79a071e
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@@ -175,7 -176,7 +179,8 @@@ set(SRC_UTIL_HEADER
../util/util_half.h
../util/util_math.h
../util/util_math_fast.h
+ ../util/util_math_matrix.h
+ ../util/util_static_assert.h
../util/util_transform.h
../util/util_texture.h
../util/util_types.h
@@@ -245,13 -246,15 +250,21 @@@ if(WITH_CYCLES_CUDA_BINARIES
set(cuda_debug_flags "")
endif()
+ if(WITH_CYCLES_DEBUG_FILTER)
+ set(cuda_filter_debug_flags "-DWITH_CYCLES_DEBUG_FILTER")
+ else()
+ set(cuda_filter_debug_flags "")
+ endif()
+
- set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}")
+ set(cuda_nvcc_command ${CUDA_NVCC_EXECUTABLE})
+ set(cuda_nvcc_version ${CUDA_VERSION})
+
+ if(DEFINED CUDA_NVCC8_EXECUTABLE AND ((${arch} STREQUAL "sm_60") OR (${arch} STREQUAL "sm_61")))
+ set(cuda_nvcc_command ${CUDA_NVCC8_EXECUTABLE})
+ set(cuda_nvcc_version "80")
+ endif()
+
+ set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${cuda_nvcc_version}")
set(cuda_math_flags "--use_fast_math")
add_custom_command(
diff --cc intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
index f1a2665,7e0f5a7..a5ba2cb
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_velvet.h
@@@ -35,12 -35,20 +35,19 @@@
CCL_NAMESPACE_BEGIN
- ccl_device int bsdf_ashikhmin_velvet_setup(ShaderClosure *sc)
+ typedef ccl_addr_space struct VelvetBsdf {
+ SHADER_CLOSURE_BASE;
+
+ float sigma;
+ float invsigma2;
- float3 N;
+ } VelvetBsdf;
+
+ ccl_device int bsdf_ashikhmin_velvet_setup(VelvetBsdf *bsdf)
{
- float sigma = fmaxf(sc->data0, 0.01f);
- sc->data0 = 1.0f/(sigma * sigma); /* m_invsigma2 */
+ float sigma = fmaxf(bsdf->sigma, 0.01f);
+ bsdf->invsigma2 = 1.0f/(sigma * sigma);
- sc->type = CLOSURE_BSDF_ASHIKHMIN_VELVET_ID;
+ bsdf->type = CLOSURE_BSDF_ASHIKHMIN_VELVET_ID;
return SD_BSDF|SD_BSDF_HAS_EVAL;
}
diff --cc intern/cycles/kernel/closure/bsdf_diffuse.h
index 4b29bb0,dcd187f..ec6f1f2
--- a/intern/cycles/kernel/closure/bsdf_diffuse.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse.h
@@@ -35,11 -35,16 +35,15 @@@
CCL_NAMESPACE_BEGIN
+ typedef ccl_addr_space struct DiffuseBsdf {
+ SHADER_CLOSURE_BASE;
- float3 N;
+ } DiffuseBsdf;
+
/* DIFFUSE */
- ccl_device int bsdf_diffuse_setup(ShaderClosure *sc)
+ ccl_device int bsdf_diffuse_setup(DiffuseBsdf *bsdf)
{
- sc->type = CLOSURE_BSDF_DIFFUSE_ID;
+ bsdf->type = CLOSURE_BSDF_DIFFUSE_ID;
return SD_BSDF|SD_BSDF_HAS_EVAL;
}
diff --cc intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
index e0287e7,2d982a9..24f40af
--- a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h
@@@ -35,7 -35,16 +35,15 @@@
CCL_NAMESPACE_BEGIN
- ccl_device float3 bsdf_diffuse_ramp_get_color(const ShaderClosure *sc, const float3 colors[8], float pos)
+ #ifdef __OSL__
+
+ typedef ccl_addr_space struct DiffuseRampBsdf {
+ SHADER_CLOSURE_BASE;
+
- float3 N;
+ float3 *colors;
+ } DiffuseRampBsdf;
+
+ ccl_device float3 bsdf_diffuse_ramp_get_color(const float3 colors[8], float pos)
{
int MAXCOLORS = 8;
diff --cc intern/cycles/kernel/closure/bsdf_microfacet.h
index e7ec183,9da73f6..207661f
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@@ -35,6 -35,19 +35,18 @@@
CCL_NAMESPACE_BEGIN
+ typedef ccl_addr_space struct MicrofacetExtra {
+ float3 color;
+ } MicrofacetExtra;
+
+ typedef ccl_addr_space struct MicrofacetBsdf {
+ SHADER_CLOSURE_BASE;
+
+ float alpha_x, alpha_y, ior;
+ MicrofacetExtra *extra;
+ float3 T;
- float3 N;
+ } MicrofacetBsdf;
+
/* Beckmann and GGX microfacet importance sampling. */
ccl_device_inline void microfacet_beckmann_sample_slopes(
diff --cc intern/cycles/kernel/closure/bsdf_oren_nayar.h
index 61b7cb1,cb342a0..6b770fc
--- a/intern/cycles/kernel/closure/bsdf_oren_nayar.h
+++ b/intern/cycles/kernel/closure/bsdf_oren_nayar.h
@@@ -19,8 -19,18 +19,17 @@@
CCL_NAMESPACE_BEGIN
+ typedef ccl_addr_space struct OrenNayarBsdf {
+ SHADER_CLOSURE_BASE;
+
- float3 N;
+ float roughness;
+ float a;
+ float b;
+ } OrenNayarBsdf;
+
ccl_device float3 bsdf_oren_nayar_get_intensity(const ShaderClosure *sc, float3 n, float3 v, float3 l)
{
+ const OrenNayarBsdf *bsdf = (const OrenNayarBsdf*)sc;
float nl = max(dot(n, l), 0.0f);
float nv = max(dot(n, v), 0.0f);
float t = dot(l, v) - nl * nv;
diff --cc intern/cycles/kernel/closure/bsdf_phong_ramp.h
index 1ab15ee,e152a87..420f947
--- a/intern/cycles/kernel/closure/bsdf_phong_ramp.h
+++ b/intern/cycles/kernel/closure/bsdf_phong_ramp.h
@@@ -35,7 -35,17 +35,16 @@@
CCL_NAMESPACE_BEGIN
- ccl_device float3 bsdf_phong_ramp_get_color(const ShaderClosure *sc, const float3 colors[8], float pos)
+ #ifdef __OSL__
+
+ typedef ccl_addr_space struct PhongRampBsdf {
+ SHADER_CLOSURE_BASE;
+
- float3 N;
+ float exponent;
+ float3 *colors;
+ } PhongRampBsdf;
+
+ ccl_device float3 bsdf_phong_ramp_get_color(const float3 colors[8], float pos)
{
int MAXCOLORS = 8;
diff --cc intern/cycles/kernel/closure/bsdf_toon.h
index e5b6ab9,28e775b..d8b6d8d
--- a/intern/cycles/kernel/closure/bsdf_toon.h
+++ b/intern/cycles/kernel/closure/bsdf_toon.h
@@@ -35,13 -35,21 +35,20 @@@
CCL_NAMESPACE_BEGIN
+ typedef ccl_addr_space struct ToonBsdf {
+ SHADER_CLOSURE_BASE;
+
- float3 N;
+ float size;
+ float smooth;
+ } ToonBsdf;
+
/* DIFFUSE TOON */
- ccl_device int bsdf_diffuse_toon_setup(ShaderClosure *sc)
+ ccl_device int bsdf_diffuse_toon_setup(ToonBsdf *bsdf)
{
- sc->type = CLOSURE_BSDF_DIFFUSE_TOON_ID;
- sc->data0 = saturate(sc->data0);
- sc->data1 = saturate(sc->data1);
+ bsdf->type = CLOSURE_BSDF_DIFFUSE_TOON_ID;
+ bsdf->size = saturate(bsdf->size);
+ bsdf->smooth = saturate(bsdf->smooth);
return SD_BSDF|SD_BSDF_HAS_EVAL;
}
diff --cc intern/cycles/kernel/closure/bssrdf.h
index c24720c,35c9576..8012a4c
--- a/intern/cycles/kernel/closure/bssrdf.h
+++ b/intern/cycles/kernel/closure/bssrdf.h
@@@ -19,6 -19,17 +19,16 @@@
CCL_NAMESPACE_BEGIN
+ typedef ccl_addr_space struct Bssrdf {
+ SHADER_CLOSURE_BASE;
+
+ float radius;
+ float sharpness;
+ float d;
+ float texture_blur;
+ float albedo;
- float3 N;
+ } Bssrdf;
+
/* Planar Truncated Gaussian
*
* Note how this is different from the typical gaussian, this one integrates
diff --cc intern/cycles/kernel/kernel_passes.h
index 39642a6,20cf3fa..8010165
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@@ -120,92 -60,6 +120,98 @@@ ccl_device_inline void kernel_write_pas
#endif // __SPLIT_KERNEL__ && __WORK_STEALING__
}
+ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg, ccl_global float *buffer,
+ int sample, float2 shadow_info)
+{
+ if(kernel_data.film.pass_denoising == 0)
+ return;
+
+ if(sample & 1) buffer += 3;
+ buffer += kernel_data.film.pass_denoising + 14;
+
+ if(sample < 2) {
+ buffer[0] = shadow_info.x; /* Unoccluded lighting */
+ buffer[1] = shadow_info.y; /* Occluded lighting */
+ buffer[2] = 0.0f; /* Sample variance */
+ }
+ else {
+ float old_shadowing = buffer[1] / max(buffer[0], 1e-7f);
+ buffer[0] += shadow_info.x;
+ buffer[1] += shadow_info.y;
+ float new_shadowing = buffer[1] / max(buffer[0], 1e-7f);
+ float cur_shadowing = shadow_info.y / max(shadow_info.x, 1e-7f);
+ buffer[2] += (cur_shadowing - old_shadowing) * (cur_shadowing - new_shadowing);
+ }
+}
+
+ccl_device_inline bool kernel_write_denoising_passes(KernelGlobals *kg, ccl_global float *buffer,
+ ccl_addr_space PathState *state, ShaderData *sd, int sample, float3 world_albedo)
+{
+ if(kernel_data.film.pass_denoising == 0)
+ return false;
+ buffer += kernel_data.film.pass_denoising;
+
+ if(state->flag & PATH_RAY_DENOISING_PASS_DONE)
+ return false;
+
+ /* Can also be called if the ray misses the scene, sd is NULL in that case. */
+ if(sd) {
+ state->path_length += ccl_fetch(sd, ray_length);
+
+ float3 normal = make_float3(0.0f, 0.0f, 0.0f);
+ float3 albedo = make_float3(0.0f, 0.0f, 0.0f);
+ float sum_weight = 0.0f, max_weight = 0.0f;
+ int max_weight_closure = -1;
+
+ /* Average normal and albedo, determi
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list