[Bf-blender-cvs] [07462ddf6d] soc-2016-cycles_denoising: Cycles Denoising: Remove useless functions

Wed Feb 1 05:19:06 CET 2017

Commit: 07462ddf6d991065097d9435a6d97ce5e576b43a
Author: Lukas Stockner
Date:   Sun Jan 15 18:59:44 2017 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB07462ddf6d991065097d9435a6d97ce5e576b43a

Cycles Denoising: Remove useless functions

===================================================================

M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/kernel/filter/filter_features.h
M	intern/cycles/kernel/filter/filter_final_pass_impl.h
M	intern/cycles/kernel/filter/filter_nlm_cpu.h
M	intern/cycles/kernel/filter/filter_nlm_gpu.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M	intern/cycles/kernel/kernels/cuda/kernel.cu

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 2de0eee900..0bea511173 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -137,7 +137,6 @@ public:
 	KernelFunctions<void(*)(KernelGlobals*, int, float**, int, int, int, int, int*, int*, int*, int*, float*, float*, int*)>       filter_get_feature_kernel;
 	KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int)>                                                  filter_combine_halves_kernel;
 	KernelFunctions<void(*)(KernelGlobals*, int, float*, int, int, void*, int*)>                                      filter_construct_transform_kernel;
-	KernelFunctions<void(*)(KernelGlobals*, int, float*, int, int, int, int, float*, void*, float*, int*, int*)>      filter_reconstruct_kernel;
 	KernelFunctions<void(*)(KernelGlobals*, int, int, int, float*, int, int)>                                         filter_divide_combined_kernel;
 
 	KernelFunctions<void(*)(int, int, float*, float*, float*, int*, int, int, float, float)> filter_nlm_calc_difference_kernel;
@@ -167,7 +166,6 @@ public:
 	  filter_get_feature_kernel(KERNEL_FUNCTIONS(filter_get_feature)),
 	  filter_combine_halves_kernel(KERNEL_FUNCTIONS(filter_combine_halves)),
 	  filter_construct_transform_kernel(KERNEL_FUNCTIONS(filter_construct_transform)),
-	  filter_reconstruct_kernel(KERNEL_FUNCTIONS(filter_reconstruct)),
 	  filter_divide_combined_kernel(KERNEL_FUNCTIONS(filter_divide_combined)),
 	  filter_nlm_calc_difference_kernel(KERNEL_FUNCTIONS(filter_nlm_calc_difference)),
 	  filter_nlm_blur_kernel(KERNEL_FUNCTIONS(filter_nlm_blur)),
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 4659b1dc01..cf2838bcf3 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -908,7 +908,7 @@ public:
 		cuda_push_context();
 
 		CUfunction cuFilterDivideShadow, cuFilterGetFeature, cuFilterCombineHalves;
-		CUfunction cuFilterConstructTransform, cuFilterReconstruct, cuFilterDivideCombined;
+		CUfunction cuFilterConstructTransform, cuFilterDivideCombined;
 		CUdeviceptr d_buffers = cuda_device_ptr(rtile.buffer);
 
 		cuda_assert(cuModuleGetFunction(&cuFilterDivideShadow, cuModule, "kernel_cuda_filter_divide_shadow"));
@@ -916,7 +916,6 @@ public:
 		cuda_assert(cuModuleGetFunction(&cuFilterCombineHalves, cuModule, "kernel_cuda_filter_combine_halves"));
 
 		cuda_assert(cuModuleGetFunction(&cuFilterConstructTransform, cuModule, "kernel_cuda_filter_construct_transform"));
-		cuda_assert(cuModuleGetFunction(&cuFilterReconstruct, cuModule, "kernel_cuda_filter_reconstruct"));
 		cuda_assert(cuModuleGetFunction(&cuFilterDivideCombined, cuModule, "kernel_cuda_filter_divide_combined"));
 
 		cuda_assert(cuFuncSetCacheConfig(cuFilterDivideShadow, CU_FUNC_CACHE_PREFER_L1));
@@ -926,7 +925,6 @@ public:
 		bool l1 = false;
 		if(getenv("CYCLES_DENOISE_PREFER_L1")) l1 = true;
 		cuda_assert(cuFuncSetCacheConfig(cuFilterConstructTransform, l1? CU_FUNC_CACHE_PREFER_L1: CU_FUNC_CACHE_PREFER_SHARED));
-		cuda_assert(cuFuncSetCacheConfig(cuFilterReconstruct, l1? CU_FUNC_CACHE_PREFER_L1: CU_FUNC_CACHE_PREFER_SHARED));
 		cuda_assert(cuFuncSetCacheConfig(cuFilterDivideCombined, l1? CU_FUNC_CACHE_PREFER_L1: CU_FUNC_CACHE_PREFER_SHARED));
 
 		if(have_error())
@@ -943,7 +941,7 @@ public:
 		                      min(filter_area.y + filter_area.w + hw, buffer_area.y + buffer_area.w));
 
 		int threads_per_block;
-		cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuFilterReconstruct));
+		cuda_assert(cuFuncGetAttribute(&threads_per_block, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, cuFilterConstructTransform));
 
 		int xthreads = (int)sqrt((float)threads_per_block);
 		int ythreads = (int)sqrt((float)threads_per_block);
diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h
index 1a0bba1e5a..2ac0194fc3 100644
--- a/intern/cycles/kernel/filter/filter_features.h
+++ b/intern/cycles/kernel/filter/filter_features.h
@@ -148,11 +148,6 @@ ccl_device_inline float3 filter_get_pixel_color(float ccl_readonly_ptr buffer, i
 	return make_float3(ccl_get_feature(16), ccl_get_feature(18), ccl_get_feature(20));
 }
 
-ccl_device_inline float3 filter_get_pixel_variance3(float ccl_readonly_ptr buffer, int pass_stride)
-{
-	return make_float3(ccl_get_feature(17), ccl_get_feature(19), ccl_get_feature(21));
-}
-
 ccl_device_inline float filter_get_pixel_variance(float ccl_readonly_ptr buffer, int pass_stride)
 {
 	return average(make_float3(ccl_get_feature(17), ccl_get_feature(19), ccl_get_feature(21)));
diff --git a/intern/cycles/kernel/filter/filter_final_pass_impl.h b/intern/cycles/kernel/filter/filter_final_pass_impl.h
index 41d56dc510..e489b9869d 100644
--- a/intern/cycles/kernel/filter/filter_final_pass_impl.h
+++ b/intern/cycles/kernel/filter/filter_final_pass_impl.h
@@ -93,164 +93,6 @@ ccl_device_inline void kernel_filter_finalize(int x, int y, int storage_ofs, int
 	combined_buffer[2] = final_color.z;
 }
 
-ccl_device void kernel_filter_reconstruct(KernelGlobals *kg, int sample, float ccl_readonly_ptr buffer, int x, int y, int offset, int stride, float *buffers, int filtered_passes, int2 color_passes, STORAGE_TYPE *storage, float *weight_cache, float ccl_readonly_ptr transform, int transform_stride, int4 filter_area, int4 rect)
-{
-#if 0
-	int buffer_w = align_up(rect.z - rect.x, 4);
-	int buffer_h = (rect.w - rect.y);
-	int pass_stride = buffer_h * buffer_w * kernel_data.film.num_frames;
-	color_passes *= pass_stride;
-	int num_frames = kernel_data.film.num_frames;
-	int prev_frames = kernel_data.film.prev_frames;
-
-	int2 low  = make_int2(max(rect.x, x - kernel_data.integrator.half_window),
-	                      max(rect.y, y - kernel_data.integrator.half_window));
-	int2 high = make_int2(min(rect.z, x + kernel_data.integrator.half_window + 1),
-	                      min(rect.w, y + kernel_data.integrator.half_window + 1));
-
-	float ccl_readonly_ptr pixel_buffer;
-	float ccl_readonly_ptr center_buffer = buffer + (y - rect.y) * buffer_w + (x - rect.x);
-	int3 pixel;
-
-	float3 center_color  = filter_get_pixel_color(center_buffer + color_passes.x, pass_stride);
-	float sqrt_center_variance = sqrtf(filter_get_pixel_variance(center_buffer + color_passes.x, pass_stride));
-
-	/* NFOR weighting directly writes to the design row, so it doesn't need the feature vector and always uses full rank. */
-#  ifdef __KERNEL_CUDA__
-	/* On GPUs, store the feature vector in shared memory for faster access. */
-	__shared__ float shared_features[DENOISE_FEATURES*CUDA_THREADS_BLOCK_WIDTH*CUDA_THREADS_BLOCK_WIDTH];
-	float *features = shared_features + DENOISE_FEATURES*(threadIdx.y*blockDim.x + threadIdx.x);
-#  else
-	float features[DENOISE_FEATURES];
-#  endif
-	const int rank = storage->rank;
-	const int matrix_size = rank+1;
-
-	float feature_means[DENOISE_FEATURES];
-	filter_get_features(make_int3(x, y, 0), center_buffer, feature_means, NULL, pass_stride);
-
-	/* Essentially, this function is just a first-order regression solver.
-	 * We model the pixel color as a linear function of the feature vectors.
-	 * So, we search the parameters S that minimize W*(X*S - y), where:
-	 * - X is the design matrix containing all the feature vectors
-	 * - y is the vector containing all the pixel colors
-	 * - W is the diagonal matrix containing all pixel weights
-	 * Since this is just regular least-squares, the solution is given by:
-	 * S = inv(Xt*W*X)*Xt*W*y */
-
-	float XtWX[(DENOISE_FEATURES+1)*(DENOISE_FEATURES+1)], design_row[DENOISE_FEATURES+1];
-	float3 solution[(DENOISE_FEATURES+1)];
-
-	math_trimatrix_zero(XtWX, matrix_size);
-	math_vec3_zero(solution, matrix_size);
-	/* Construct Xt*W*X matrix and Xt*W*y vector (and fill weight cache, if used). */
-	FOR_PIXEL_WINDOW {
-		float3 color = filter_get_pixel_color(pixel_buffer + color_passes.x, pass_stride);
-		float variance = filter_get_pixel_variance(pixel_buffer + color_passes.x, pass_stride);
-		if(filter_firefly_rejection(color, variance, center_color, sqrt_center_variance)) {
-#ifdef WEIGHT_CACHING_CUDA
-			if(cache_idx < CUDA_WEIGHT_CACHE_SIZE) weight_cache[cache_idx] = 0.0f;
-#elif defined(WEIGHT_CACHING_CPU)
-			weight_cache[cache_idx] = 0.0f;
-#endif
-			continue;
-		}
-
-		filter_get_design_row_transform(pixel, pixel_buffer, feature_means, pass_stride, features, rank, design_row, transform, transform_stride);
-		float weight = nlm_weight(x, y, pixel.x, pixel.y, center_buffer + color_passes.y, pixel_buffer + color_passes.y, pass_stride, 1.0f, kernel_data.integrator.weighting_adjust, 4, rect);
-
-		if(weight < 1e-5f) {
-#ifdef WEIGHT_CACHING_CUDA
-			if(cache_idx < CUDA_WEIGHT_CACHE_SIZE) weight_cache[cache_idx] = 0.0f;
-#elif defined(WEIGHT_CACHING_CPU)
-			weight_cache[cache_idx] = 0.0f;
-#endif
-			continue;
-		}
-		weight /= max(1.0f, variance);
-		weight_cache[cache_idx] = weight;
-
-		math_trimatrix_add_gramian(XtWX, matrix_size, design_row, weight);
-		math_vec3_add(solution, matrix_size, design_row, weight * color);
-	} END_FOR_PIXEL_WINDOW
-
-	math_trimatrix_vec3_solve(XtWX, solution, matrix_size);
-
-	if(kernel_data.integrator.use_gradients) {
-		FOR_PIXEL_WINDOW {
-			float weight;
-			float3 color;
-#if defined(WEIGHTING_CACHING_CPU) || defined(WEIGHTING_CACHING_CUDA)
-#  ifdef WEIGHTING_CACHING_CUDA
-			if(cache_idx < CUDA_WEIGHT_CACHE_SIZE)
-#  endif
-			{
-				weight = weight_cache[cache_idx];
-				if(weight == 0.0f) continue;
-				color = filter_get_pixel_color(pixel_buffer + color_passes.x, pass_stride);
-				filter_get_design_row_transform(pixel, pixel_buffer, feature_means, pass_stride, features, rank, design_row, transform, transform_stride);
-			}
-#  ifdef WEIGHTING_CACHING_CUDA
-			else
-#  endif
-#endif
-#ifndef WEIGHTING_CACHING_CPU
-			{
-				color = filter_get_pixel_color(pixel_buffer + color_passes.x, pass_stride);
-				float variance = filter_get_pixel_variance(pixel_buffer + color_passes.x, pass_stride);
-				if(filter_firefly_rejection(color, variance, center_color, sqrt_center_variance)) continue;
-
-				filter_get_design_row_transform(pixel, 

@@ Diff output truncated at 10240 characters. @@