[Bf-blender-cvs] [a9da879efa] soc-2016-cycles_denoising: Cycles Denoising: Remove hidden NLM-only option

Lukas Stockner noreply at git.blender.org
Thu Jan 12 05:14:08 CET 2017


Commit: a9da879efa690be65ed3ea45a598be45525e2ff6
Author: Lukas Stockner
Date:   Wed Jan 11 06:26:19 2017 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rBa9da879efa690be65ed3ea45a598be45525e2ff6

Cycles Denoising: Remove hidden NLM-only option

===================================================================

M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/kernel/filter/filter_nlm.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 4e713e8469..c8203fe802 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -142,7 +142,6 @@ public:
 	KernelFunctions<void(*)(KernelGlobals*, int, float*, int, int, int, int, float*, void*, float*, int*, int*)>      filter_final_pass_wlr_kernel;
 	KernelFunctions<void(*)(KernelGlobals*, int, float*, int, int, int, int, float*, void*, float*, int*, int*)>      filter_final_pass_nlm_kernel;
 	KernelFunctions<void(*)(KernelGlobals*, int, int, int, float*, int, int)>                                         filter_divide_combined_kernel;
-	KernelFunctions<void(*)(int, int, float**, float**, float**, float**, int*, int, int, float, float)>              filter_non_local_means_3_kernel;
 
 #define KERNEL_FUNCTIONS(name) \
 	      KERNEL_NAME_EVAL(cpu, name), \
@@ -166,8 +165,7 @@ public:
 	  filter_estimate_wlr_params_kernel(KERNEL_FUNCTIONS(filter_estimate_wlr_params)),
 	  filter_final_pass_wlr_kernel(KERNEL_FUNCTIONS(filter_final_pass_wlr)),
 	  filter_final_pass_nlm_kernel(KERNEL_FUNCTIONS(filter_final_pass_nlm)),
-	  filter_divide_combined_kernel(KERNEL_FUNCTIONS(filter_divide_combined)),
-	  filter_non_local_means_3_kernel(KERNEL_FUNCTIONS(filter_non_local_means_3))
+	  filter_divide_combined_kernel(KERNEL_FUNCTIONS(filter_divide_combined))
 	{
 #ifdef WITH_OSL
 		kernel_globals.osl = &osl_globals;
@@ -455,7 +453,6 @@ public:
 
 	void denoise_run(KernelGlobals *kg, int sample, float *filter_buffer, int4 filter_area, int4 rect, int offset, int stride, float *buffers)
 	{
-		bool only_nlm_filter = getenv("ONLY_NLM_FILTER");
 		bool use_gradients = kg->__data.integrator.use_gradients;
 		bool nlm_weights = kg->__data.integrator.use_nlm_weights;
 
@@ -466,28 +463,7 @@ public:
 		int w = align_up(rect.z - rect.x, 4), h = (rect.w - rect.y);
 		int pass_stride = w*h;
 
-		if(only_nlm_filter) {
-			float *img[3] = {filter_buffer + 16*pass_stride, filter_buffer + 18*pass_stride, filter_buffer + 20*pass_stride};
-			float *var[3] = {filter_buffer + 17*pass_stride, filter_buffer + 19*pass_stride, filter_buffer + 21*pass_stride};
-			float *out[3] = {filter_buffer +  0*pass_stride, filter_buffer +  1*pass_stride, filter_buffer +  2*pass_stride};
-			for(int y = rect.y; y < rect.w; y++) {
-				for(int x = rect.x; x < rect.z; x++) {
-					filter_non_local_means_3_kernel()(x, y, img, img, var, out, &rect.x, 10, 4, 1, 0.04f);
-				}
-			}
-			for(int y = 0; y < filter_area.w; y++) {
-				int py = y + filter_area.y;
-				for(int x = 0; x < filter_area.z; x++) {
-					int px = x + filter_area.x;
-					int i = (py - rect.y)*w + (px - rect.x);
-					float *loc_buf = buffers + (offset + py*stride + px)*kg->__data.film.pass_stride;
-					loc_buf[0] = sample*filter_buffer[0*pass_stride + i];
-					loc_buf[1] = sample*filter_buffer[1*pass_stride + i];
-					loc_buf[2] = sample*filter_buffer[2*pass_stride + i];
-				}
-			}
-		}
-		else if(nlm_weights) {
+		if(nlm_weights) {
 			for(int y = 0; y < filter_area.w; y++) {
 				for(int x = 0; x < filter_area.z; x++) {
 					filter_construct_transform_kernel()(kg, sample, filter_buffer, x + filter_area.x, y + filter_area.y, storage + y*filter_area.z + x, &rect.x);
diff --git a/intern/cycles/kernel/filter/filter_nlm.h b/intern/cycles/kernel/filter/filter_nlm.h
index 59e9fed2f2..8888660370 100644
--- a/intern/cycles/kernel/filter/filter_nlm.h
+++ b/intern/cycles/kernel/filter/filter_nlm.h
@@ -66,74 +66,4 @@ ccl_device float nlm_weight(int px, int py, int qx, int qy, float ccl_readonly_p
 	return fast_expf(-max(0.0f, dI));
 }
 
-ccl_device void kernel_filter_non_local_means_3(int x, int y, float ccl_readonly_ptr noisyImage[3], float ccl_readonly_ptr weightImage[3], float ccl_readonly_ptr variance[3], float *filteredImage[3], int4 rect, int r, int f, float a, float k_2)
-{
-	int2 low  = make_int2(max(rect.x, x - r),
-	                      max(rect.y, y - r));
-	int2 high = make_int2(min(rect.z, x + r + 1),
-	                      min(rect.w, y + r + 1));
-
-	float sum_image[3] = {0.0f}, sum_weight = 0.0f;
-
-	int w = align_up(rect.z - rect.x, 4);
-	int p_idx = (y-rect.y)*w + (x - rect.x);
-	int q_idx = (low.y-rect.y)*w + (low.x-rect.x);
-#ifdef __KERNEL_SSE41__
-	__m128 a_sse = _mm_set1_ps(a), k_2_sse = _mm_set1_ps(k_2);
-#endif
-	/* Loop over the q's, center pixels of all relevant patches. */
-	for(int qy = low.y; qy < high.y; qy++) {
-		for(int qx = low.x; qx < high.x; qx++, q_idx++) {
-			int2  low_dPatch = make_int2(max(max(rect.x - qx, rect.x - x),  -f), max(max(rect.y - qy, rect.y - y),  -f));
-			int2 high_dPatch = make_int2(min(min(rect.z - qx, rect.z - x), f+1), min(min(rect.w - qy, rect.w - y), f+1));
-			/* Loop over the pixels in the patch.
-			 * Note that the patch must be small enough to be fully inside the rect, both at p and q.
-			 * Do avoid doing all the coordinate calculations twice, the code here computes both weights at once. */
-#ifdef __KERNEL_SSE41__
-			__m128 dI_sse = _mm_setzero_ps();
-			__m128 highX_sse = _mm_set1_ps(high_dPatch.x);
-			for(int k = 0; k < 3; k++) {
-				int dIdx = low_dPatch.x + low_dPatch.y*w;
-				for(int dy = low_dPatch.y; dy < high_dPatch.y; dy++) {
-					int dx;
-					for(dx = low_dPatch.x; dx < high_dPatch.x; dx+=4, dIdx+=4) {
-						__m128 diff = _mm_sub_ps(_mm_loadu_ps(weightImage[k] + p_idx + dIdx), _mm_loadu_ps(weightImage[k] + q_idx + dIdx));
-						__m128 pvar = _mm_loadu_ps(variance[k] + p_idx + dIdx);
-						__m128 qvar = _mm_loadu_ps(variance[k] + q_idx + dIdx);
-						__m128 d = _mm_mul_ps(_mm_sub_ps(_mm_mul_ps(diff, diff), _mm_mul_ps(a_sse, _mm_add_ps(pvar, _mm_min_ps(pvar, qvar)))), _mm_rcp_ps(_mm_add_ps(_mm_set1_ps(1e-7f), _mm_mul_ps(k_2_sse, _mm_add_ps(pvar, qvar)))));
-						dI_sse = _mm_add_ps(dI_sse, _mm_mask_ps(d, _mm_cmplt_ps(_mm_add_ps(_mm_set1_ps(dx), _mm_set_ps(3.0f, 2.0f, 1.0f, 0.0f)), highX_sse)));
-					}
-					dIdx += w-(dx - low_dPatch.x);
-				}
-			}
-			float dI = _mm_hsum_ss(dI_sse);
-#else
-			float dI = 0.0f;
-			for(int k = 0; k < 3; k++) {
-				int dIdx = low_dPatch.x + low_dPatch.y*w;
-				for(int dy = low_dPatch.y; dy < high_dPatch.y; dy++) {
-					for(int dx = low_dPatch.x; dx < high_dPatch.x; dx++, dIdx++) {
-						float diff = weightImage[k][p_idx+dIdx] - weightImage[k][q_idx+dIdx];
-						dI += (diff*diff - a*(variance[k][p_idx+dIdx] + min(variance[k][p_idx+dIdx], variance[k][q_idx+dIdx]))) * (1.0f / (1e-7f + k_2*(variance[k][p_idx+dIdx] + variance[k][q_idx+dIdx])));
-					}
-					dIdx += w-(high_dPatch.x - low_dPatch.x);
-				}
-			}
-#endif
-			dI *= 1.0f / (3.0f * (high_dPatch.x - low_dPatch.x) * (high_dPatch.y - low_dPatch.y));
-
-			float wI = fast_expf(-max(0.0f, dI));
-			sum_image[0] += wI*noisyImage[0][q_idx];
-			sum_image[1] += wI*noisyImage[1][q_idx];
-			sum_image[2] += wI*noisyImage[2][q_idx];
-			sum_weight += wI;
-		}
-		q_idx += w-(high.x-low.x);
-	}
-
-	filteredImage[0][p_idx] = sum_image[0] / sum_weight;
-	filteredImage[1][p_idx] = sum_image[1] / sum_weight;
-	filteredImage[2][p_idx] = sum_image[2] / sum_weight;
-}
-
 CCL_NAMESPACE_END
\ No newline at end of file
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index 4274ef271b..52a05ddf3b 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -88,15 +88,6 @@ void KERNEL_FUNCTION_FULL_NAME(filter_non_local_means)(int x, int y,
                                                        int r, int f,
                                                        float a, float k_2);
 
-void KERNEL_FUNCTION_FULL_NAME(filter_non_local_means_3)(int x, int y,
-                                                         float *noisyImage[3],
-                                                         float *weightImage[3],
-                                                         float *variance[3],
-                                                         float *filteredImage[3],
-                                                         int* rect,
-                                                         int r, int f,
-                                                         float a, float k_2);
-
 void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
                                                       float *mean,
                                                       float *variance,
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index 30e3bec3a7..56aa7e52f8 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -216,26 +216,6 @@ void KERNEL_FUNCTION_FULL_NAME(filter_non_local_means)(int x, int y,
 #endif
 }
 
-void KERNEL_FUNCTION_FULL_NAME(filter_non_local_means_3)(int x, int y,
-                                                         float *noisyImage[3],
-                                                         float *weightImage[3],
-                                                         float *variance[3],
-                                                         float *filteredImage[3],
-                                                         int* filter_rect,
-                                                         int r, int f,
-                                                         float a, float k_2)
-{
-#ifdef KERNEL_STUB
-	STUB_ASSERT(KERNEL_ARCH, filter_non_local_means_3);
-#else
-	int4 rect = make_int4(filter_rect[0], filter_rect[1], filter_rect[2], filter_rect[3]);
-	kernel_filter_non_local_means_3(x, y,
-	                                (float ccl_readonly_ptr*) noisyImage,
-	                                (float ccl_readonly_ptr*) weightImage,
-	                                (float ccl_readonly_ptr*) variance,
-	                                filteredImage, rect, r, f, a, k_2);
-#endif
-}
 
 void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
                                                       float *mean,




More information about the Bf-blender-cvs mailing list