[Bf-blender-cvs] [07462ddf6d] soc-2016-cycles_denoising: Cycles Denoising: Remove useless functions
Lukas Stockner
noreply at git.blender.org
Wed Feb 1 05:19:06 CET 2017
Commit: 07462ddf6d991065097d9435a6d97ce5e576b43a
Author: Lukas Stockner
Date: Sun Jan 15 18:59:44 2017 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB07462ddf6d991065097d9435a6d97ce5e576b43a
Cycles Denoising: Remove useless functions
===================================================================
M intern/cycles/device/device_cpu.cpp
M intern/cycles/device/device_cuda.cpp
M intern/cycles/kernel/filter/filter_features.h
M intern/cycles/kernel/filter/filter_final_pass_impl.h
M intern/cycles/kernel/filter/filter_nlm_cpu.h
M intern/cycles/kernel/filter/filter_nlm_gpu.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M intern/cycles/kernel/kernels/cuda/kernel.cu
===================================================================
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 2de0eee900..0bea511173 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -137,7 +137,6 @@ public:
KernelFunctions<void(*)(KernelGlobals*, int, float**, int, int, int, int, int*, int*, int*, int*, float*, float*, int*)> filter_get_feature_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)> filter_combine_halves_kernel;
KernelFunctions<void(*)(KernelGlobals*, int, float*, int, int, void*, int*)> filter_construct_transform_kernel;
- KernelFunctions<void(*)(KernelGlobals*, int, float*, int, int, int, int, float*, void*, float*, int*, int*)> filter_reconstruct_kernel;
KernelFunctions<void(*)(KernelGlobals*, int, int, int, float*, int, int)> filter_divide_combined_kernel;
KernelFunctions<void(*)(int, int, float*, float*, float*, int*, int, int, float, float)> filter_nlm_calc_difference_kernel;
@@ -167,7 +166,6 @@ public:
filter_get_feature_kernel(KERNEL_FUNCTIONS(filter_get_feature)),
filter_combine_halves_kernel(KERNEL_FUNCTIONS(filter_combine_halves)),
filter_construct_transform_kernel(KERNEL_FUNCTIONS(filter_construct_transform)),
- filter_reconstruct_kernel(KERNEL_FUNCTIONS(filter_reconstruct)),
filter_divide_combined_kernel(KERNEL_FUNCTIONS(filter_divide_combined)),
filter_nlm_calc_difference_kernel(KERNEL_FUNCTIONS(filter_nlm_calc_difference)),
filter_nlm_blur_kernel(KERNEL_FUNCTIONS(filter_nlm_blur)),
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 4659b1dc01..cf2838bcf3 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -908,7 +908,7 @@ public:
cuda_push_context();
CUfunction cuFilterDivideShadow, cuFilterGetFeature, cuFilterCombineHalves;
- CUfunction cuFilterConstructTransform, cuFilterReconstruct, cuFilterDivideCombined;
+ CUfunction cuFilterConstructTransform, cuFilterDivideCombined;
CUdeviceptr d_buffers = cuda_device_ptr(rtile.buffer);
cuda_assert(cuModuleGetFunction(&cuFilterDivideShadow, cuModule, "kernel_cuda_filter_divide_shadow"));
@@ -916,7 +916,6 @@ public:
cuda_assert(cuModuleGetFunction(&cuFilterCombineHalves, cuModule, "kernel_cuda_filter_combine_halves"));
cuda_assert(cuModuleGetFunction(&cuFilterConstructTransform, cuModule, "kernel_cuda_filter_construct_transform"));
- cuda_assert(cuModuleGetFunction(&cuFilterReconstruct, cuModule, "kernel_cuda_filter_reconstruct"));
cuda_assert(cuModuleGetFunction(&cuFilterDivideCombined, cuModule, "kernel_cuda_filter_divide_combined"));
cuda_assert(cuFuncSetCacheConfig(cuFilterDivideShadow, CU_FUNC_CACHE_PREFER_L1));
@@ -926,7 +925,6 @@ public:
bool l1 = false;
if(getenv("CYCLES_DENOISE_PREFER_L1")) l1 = true;
cuda_assert(cuFuncSetCacheConfig(cuFilterConstructTransform, l1? CU_FUNC_CACHE_PREFER_L1: CU_FUNC_CACHE_PREFER_SHARED));
- cuda_assert(cuFuncSetCacheConfig(cuFilterReconstruct, l1? CU_FUNC_CACHE_PREFER_L1: CU_FUNC_CACHE_PREFER_SHARED));
cuda_assert(cuFuncSetCacheConfig(cuFilterDivideCombined, l1? CU_FUNC_CACHE_PREFER_L1: CU_FUNC_CACHE_PREFER_SHARED));
if(have_error())
@@ -943,7 +941,7 @@ public:
min(filter_area.y + filter_area.w + hw, buffer_area.y + buffer_area.w));
int threads_per_block;
- cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuFilterReconstruct));
+ cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuFilterConstructTransform));
int xthreads = (int)sqrt((float)threads_per_block);
int ythreads = (int)sqrt((float)threads_per_block);
diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h
index 1a0bba1e5a..2ac0194fc3 100644
--- a/intern/cycles/kernel/filter/filter_features.h
+++ b/intern/cycles/kernel/filter/filter_features.h
@@ -148,11 +148,6 @@ ccl_device_inline float3 filter_get_pixel_color(float ccl_readonly_ptr buffer, i
return make_float3(ccl_get_feature(16), ccl_get_feature(18), ccl_get_feature(20));
}
-ccl_device_inline float3 filter_get_pixel_variance3(float ccl_readonly_ptr buffer, int pass_stride)
-{
- return make_float3(ccl_get_feature(17), ccl_get_feature(19), ccl_get_feature(21));
-}
-
ccl_device_inline float filter_get_pixel_variance(float ccl_readonly_ptr buffer, int pass_stride)
{
return average(make_float3(ccl_get_feature(17), ccl_get_feature(19), ccl_get_feature(21)));
diff --git a/intern/cycles/kernel/filter/filter_final_pass_impl.h b/intern/cycles/kernel/filter/filter_final_pass_impl.h
index 41d56dc510..e489b9869d 100644
--- a/intern/cycles/kernel/filter/filter_final_pass_impl.h
+++ b/intern/cycles/kernel/filter/filter_final_pass_impl.h
@@ -93,164 +93,6 @@ ccl_device_inline void kernel_filter_finalize(int x, int y, int storage_ofs, int
combined_buffer[2] = final_color.z;
}
-ccl_device void kernel_filter_reconstruct(KernelGlobals *kg, int sample, float ccl_readonly_ptr buffer, int x, int y, int offset, int stride, float *buffers, int filtered_passes, int2 color_passes, STORAGE_TYPE *storage, float *weight_cache, float ccl_readonly_ptr transform, int transform_stride, int4 filter_area, int4 rect)
-{
-#if 0
- int buffer_w = align_up(rect.z - rect.x, 4);
- int buffer_h = (rect.w - rect.y);
- int pass_stride = buffer_h * buffer_w * kernel_data.film.num_frames;
- color_passes *= pass_stride;
- int num_frames = kernel_data.film.num_frames;
- int prev_frames = kernel_data.film.prev_frames;
-
- int2 low = make_int2(max(rect.x, x - kernel_data.integrator.half_window),
- max(rect.y, y - kernel_data.integrator.half_window));
- int2 high = make_int2(min(rect.z, x + kernel_data.integrator.half_window + 1),
- min(rect.w, y + kernel_data.integrator.half_window + 1));
-
- float ccl_readonly_ptr pixel_buffer;
- float ccl_readonly_ptr center_buffer = buffer + (y - rect.y) * buffer_w + (x - rect.x);
- int3 pixel;
-
- float3 center_color = filter_get_pixel_color(center_buffer + color_passes.x, pass_stride);
- float sqrt_center_variance = sqrtf(filter_get_pixel_variance(center_buffer + color_passes.x, pass_stride));
-
- /* NFOR weighting directly writes to the design row, so it doesn't need the feature vector and always uses full rank. */
-# ifdef __KERNEL_CUDA__
- /* On GPUs, store the feature vector in shared memory for faster access. */
- __shared__ float shared_features[DENOISE_FEATURES*CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH];
- float *features = shared_features + DENOISE_FEATURES*(threadIdx.y*blockDim.x + threadIdx.x);
-# else
- float features[DENOISE_FEATURES];
-# endif
- const int rank = storage->rank;
- const int matrix_size = rank+1;
-
- float feature_means[DENOISE_FEATURES];
- filter_get_features(make_int3(x, y, 0), center_buffer, feature_means, NULL, pass_stride);
-
- /* Essentially, this function is just a first-order regression solver.
- * We model the pixel color as a linear function of the feature vectors.
- * So, we search the parameters S that minimize W*(X*S - y), where:
- * - X is the design matrix containing all the feature vectors
- * - y is the vector containing all the pixel colors
- * - W is the diagonal matrix containing all pixel weights
- * Since this is just regular least-squares, the solution is given by:
- * S = inv(Xt*W*X)*Xt*W*y */
-
- float XtWX[(DENOISE_FEATURES+1)*(DENOISE_FEATURES+1)], design_row[DENOISE_FEATURES+1];
- float3 solution[(DENOISE_FEATURES+1)];
-
- math_trimatrix_zero(XtWX, matrix_size);
- math_vec3_zero(solution, matrix_size);
- /* Construct Xt*W*X matrix and Xt*W*y vector (and fill weight cache, if used). */
- FOR_PIXEL_WINDOW {
- float3 color = filter_get_pixel_color(pixel_buffer + color_passes.x, pass_stride);
- float variance = filter_get_pixel_variance(pixel_buffer + color_passes.x, pass_stride);
- if(filter_firefly_rejection(color, variance, center_color, sqrt_center_variance)) {
-#ifdef WEIGHT_CACHING_CUDA
- if(cache_idx < CUDA_WEIGHT_CACHE_SIZE) weight_cache[cache_idx] = 0.0f;
-#elif defined(WEIGHT_CACHING_CPU)
- weight_cache[cache_idx] = 0.0f;
-#endif
- continue;
- }
-
- filter_get_design_row_transform(pixel, pixel_buffer, feature_means, pass_stride, features, rank, design_row, transform, transform_stride);
- float weight = nlm_weight(x, y, pixel.x, pixel.y, center_buffer + color_passes.y, pixel_buffer + color_passes.y, pass_stride, 1.0f, kernel_data.integrator.weighting_adjust, 4, rect);
-
- if(weight < 1e-5f) {
-#ifdef WEIGHT_CACHING_CUDA
- if(cache_idx < CUDA_WEIGHT_CACHE_SIZE) weight_cache[cache_idx] = 0.0f;
-#elif defined(WEIGHT_CACHING_CPU)
- weight_cache[cache_idx] = 0.0f;
-#endif
- continue;
- }
- weight /= max(1.0f, variance);
- weight_cache[cache_idx] = weight;
-
- math_trimatrix_add_gramian(XtWX, matrix_size, design_row, weight);
- math_vec3_add(solution, matrix_size, design_row, weight * color);
- } END_FOR_PIXEL_WINDOW
-
- math_trimatrix_vec3_solve(XtWX, solution, matrix_size);
-
- if(kernel_data.integrator.use_gradients) {
- FOR_PIXEL_WINDOW {
- float weight;
- float3 color;
-#if defined(WEIGHTING_CACHING_CPU) || defined(WEIGHTING_CACHING_CUDA)
-# ifdef WEIGHTING_CACHING_CUDA
- if(cache_idx < CUDA_WEIGHT_CACHE_SIZE)
-# endif
- {
- weight = weight_cache[cache_idx];
- if(weight == 0.0f) continue;
- color = filter_get_pixel_color(pixel_buffer + color_passes.x, pass_stride);
- filter_get_design_row_transform(pixel, pixel_buffer, feature_means, pass_stride, features, rank, design_row, transform, transform_stride);
- }
-# ifdef WEIGHTING_CACHING_CUDA
- else
-# endif
-#endif
-#ifndef WEIGHTING_CACHING_CPU
- {
- color = filter_get_pixel_color(pixel_buffer + color_passes.x, pass_stride);
- float variance = filter_get_pixel_variance(pixel_buffer + color_passes.x, pass_stride);
- if(filter_firefly_rejection(color, variance, center_color, sqrt_center_variance)) continue;
-
- filter_get_design_row_transform(pixel,
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list