[Bf-blender-cvs] [ffd605c2fa] soc-2016-cycles_denoising: Cycles Denoising: Change memory layout of noisy color and variance information

Lukas Stockner noreply at git.blender.org
Wed Feb 1 05:19:07 CET 2017


Commit: ffd605c2fa4b75540a1647d53d898f6cffb75f8e
Author: Lukas Stockner
Date:   Sun Jan 15 20:58:53 2017 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rBffd605c2fa4b75540a1647d53d898f6cffb75f8e

Cycles Denoising: Change memory layout of noisy color and variance information

===================================================================

M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/kernel/filter/filter_features.h
M	intern/cycles/kernel/filter/filter_final_pass_impl.h
M	intern/cycles/kernel/filter/filter_nlm_cpu.h
M	intern/cycles/kernel/filter/filter_nlm_gpu.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M	intern/cycles/kernel/kernels/cuda/kernel.cu

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 0bea511173..4e6fb119b2 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -145,8 +145,8 @@ public:
 	KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int, int)>       filter_nlm_update_output_kernel;
 	KernelFunctions<void(*)(float*, float*, int*, int)>                                      filter_nlm_normalize_kernel;
 
-	KernelFunctions<void(*)(int, int, float*, float*, int, void*, float*, float3*, int*, int*, int, int, int)>  filter_nlm_construct_gramian_kernel;
-	KernelFunctions<void(*)(int, int, int, int, int, float*, void*, float*, float3*, int*, int)>                filter_finalize_kernel;
+	KernelFunctions<void(*)(int, int, float*, float*, int, int,  void*, float*, float3*, int*, int*, int, int, int)>  filter_nlm_construct_gramian_kernel;
+	KernelFunctions<void(*)(int, int, int, int, int, float*, void*, float*, float3*, int*, int)>                      filter_finalize_kernel;
 
 #define KERNEL_FUNCTIONS(name) \
 	      KERNEL_NAME_EVAL(cpu, name), \
@@ -427,11 +427,11 @@ public:
 				if(cross_denoise) {
 					int mean_from[]      = {20, 21, 22, 26, 27, 28};
 					int variance_from[]  = {23, 24, 25, 29, 30, 31};
-					int offset_to[]      = {16, 18, 20, 22, 24, 26};
+					int offset_to[]      = {16, 17, 18, 22, 23, 24};
 					for(int i = 0; i < 6; i++) {
 						for(int y = rect.y; y < rect.w; y++) {
 							for(int x = rect.x; x < rect.z; x++) {
-								filter_get_feature_kernel()(kg, sample, buffer, mean_from[i], variance_from[i], x, y, tile_x, tile_y, offsets, strides, PASSPTR(offset_to[i]), PASSPTR(offset_to[i]+1), &rect.x);
+								filter_get_feature_kernel()(kg, sample, buffer, mean_from[i], variance_from[i], x, y, tile_x, tile_y, offsets, strides, PASSPTR(offset_to[i]), PASSPTR(offset_to[i]+3), &rect.x);
 							}
 						}
 					}
@@ -439,11 +439,11 @@ public:
 				else {
 					int mean_from[]      = {20, 21, 22};
 					int variance_from[]  = {23, 24, 25};
-					int offset_to[]      = {16, 18, 20};
+					int offset_to[]      = {16, 17, 18};
 					for(int i = 0; i < 3; i++) {
 						for(int y = rect.y; y < rect.w; y++) {
 							for(int x = rect.x; x < rect.z; x++) {
-								filter_get_feature_kernel()(kg, sample, buffer, mean_from[i], variance_from[i], x, y, tile_x, tile_y, offsets, strides, PASSPTR(offset_to[i]), PASSPTR(offset_to[i]+1), &rect.x);
+								filter_get_feature_kernel()(kg, sample, buffer, mean_from[i], variance_from[i], x, y, tile_x, tile_y, offsets, strides, PASSPTR(offset_to[i]), PASSPTR(offset_to[i]+3), &rect.x);
 							}
 						}
 					}
@@ -452,15 +452,15 @@ public:
 #ifdef WITH_CYCLES_DEBUG_FILTER
 			{
 				float *temp1 = new float[pass_stride], *temp2 = new float[pass_stride], *temp3 = new float[3*pass_stride], *out = new float[3*pass_stride];
-				non_local_means(rect, PASSPTR(16), PASSPTR(16), out, PASSPTR(17), temp1, temp2, temp3, 8, 4, 1, 0.5f, 2*pass_stride, pass_stride);
+				non_local_means(rect, PASSPTR(16), PASSPTR(16), out, PASSPTR(19), temp1, temp2, temp3, 8, 4, 1, 0.5f, pass_stride, pass_stride);
 				debug.add_pass("input0Filtered", out);
 				debug.add_pass("input1Filtered", out+pass_stride);
 				debug.add_pass("input2Filtered", out+2*pass_stride);
 				debug.add_pass("input0Unfiltered", PASSPTR(16));
-				debug.add_pass("input1Unfiltered", PASSPTR(18));
-				debug.add_pass("input2Unfiltered", PASSPTR(20));
-				debug.add_pass("input0Variance", PASSPTR(17));
-				debug.add_pass("input1Variance", PASSPTR(19));
+				debug.add_pass("input1Unfiltered", PASSPTR(17));
+				debug.add_pass("input2Unfiltered", PASSPTR(18));
+				debug.add_pass("input0Variance", PASSPTR(19));
+				debug.add_pass("input1Variance", PASSPTR(20));
 				debug.add_pass("input2Variance", PASSPTR(21));
 				delete[] temp1;
 				delete[] temp2;
@@ -502,7 +502,7 @@ public:
 			float a = 1.0f;
 			float k_2 = kg->__data.integrator.weighting_adjust;
 			float *weight = filter_buffer + 16*pass_stride;
-			float *variance = filter_buffer + 17*pass_stride;
+			float *variance = filter_buffer + 19*pass_stride;
 			float *difference = new float[pass_stride];
 			float *blurDifference = new float[pass_stride];
 			int local_filter_rect[4] = {filter_area.x-rect.x, filter_area.y-rect.y, filter_area.z, filter_area.w};
@@ -511,11 +511,11 @@ public:
 				int dx = i % (2*hw+1) - hw;
 
 				int local_rect[4] = {max(0, -dx), max(0, -dy), rect.z-rect.x - max(0, dx), rect.w-rect.y - max(0, dy)};
-				filter_nlm_calc_difference_kernel()(dx, dy, weight, variance, difference, local_rect, w, 2*pass_stride, a, k_2);
+				filter_nlm_calc_difference_kernel()(dx, dy, weight, variance, difference, local_rect, w, pass_stride, a, k_2);
 				filter_nlm_blur_kernel()(difference, blurDifference, local_rect, w, f);
 				filter_nlm_calc_weight_kernel()(blurDifference, difference, local_rect, w, f);
 				filter_nlm_blur_kernel()(difference, blurDifference, local_rect, w, f);
-				filter_nlm_construct_gramian_kernel()(dx, dy, blurDifference, filter_buffer, 0*pass_stride, storage, XtWX, XtWY, local_rect, local_filter_rect, w, h, 4);
+				filter_nlm_construct_gramian_kernel()(dx, dy, blurDifference, filter_buffer, 16, 19, storage, XtWX, XtWY, local_rect, local_filter_rect, w, h, 4);
 			}
 			delete[] difference;
 			delete[] blurDifference;
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index cf2838bcf3..35633109b0 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1056,10 +1056,11 @@ public:
 			{
 				int mean_from[]      = {20, 21, 22};
 				int variance_from[]  = {23, 24, 25};
-				int offset_to[]      = {16, 18, 20};
+				int mean_to[]        = {16, 17, 18};
+				int variance_to[]    = {19, 20, 21};
 				for(int i = 0; i < 3; i++) {
-					CUdeviceptr d_mean = CUDA_PTR_ADD(d_denoise_buffer, offset_to[i]*pass_stride);
-					CUdeviceptr d_variance = CUDA_PTR_ADD(d_denoise_buffer, (offset_to[i]+1)*pass_stride);
+					CUdeviceptr d_mean = CUDA_PTR_ADD(d_denoise_buffer, mean_to[i]*pass_stride);
+					CUdeviceptr d_variance = CUDA_PTR_ADD(d_denoise_buffer, variance_to[i]*pass_stride);
 
 					void *get_feature_args[] = {&sample, &d_buffer, &mean_from[i], &variance_from[i],
 					                            &buffer_area,
@@ -1118,10 +1119,11 @@ public:
 		int f = 4;
 		float a = 1.0f;
 		float k_2 = kernel_globals.integrator.weighting_adjust;
-		int color_pass = 0;
+		int color_pass = 16;
+		int variance_pass = 19;
 
 		CUdeviceptr color_buffer = CUDA_PTR_ADD(d_denoise_buffers, 16*pass_stride);
-		CUdeviceptr variance_buffer = CUDA_PTR_ADD(d_denoise_buffers, 17*pass_stride);
+		CUdeviceptr variance_buffer = CUDA_PTR_ADD(d_denoise_buffers, 19*pass_stride);
 		CUdeviceptr d_difference, d_blurDifference, d_XtWX, d_XtWY;
 		cuda_assert(cuMemAlloc(&d_difference, pass_stride*sizeof(float)));
 		cuda_assert(cuMemAlloc(&d_blurDifference, pass_stride*sizeof(float)));
@@ -1134,7 +1136,7 @@ public:
 		void *calc_difference_args[] = {&dx, &dy, &color_buffer, &variance_buffer, &d_difference, &local_rect, &w, &a, &k_2};
 		void *blur_args[] = {&d_difference, &d_blurDifference, &local_rect, &w, &f};
 		void *calc_weight_args[] = {&d_blurDifference, &d_difference, &local_rect, &w, &f};
-		void *construct_gramian_args[] = {&dx, &dy, &d_blurDifference, &d_denoise_buffers, &color_pass, &d_storage, &d_transforms, &d_XtWX, &d_XtWY, &local_rect, &local_filter_rect, &w, &h, &f};
+		void *construct_gramian_args[] = {&dx, &dy, &d_blurDifference, &d_denoise_buffers, &color_pass, &variance_pass, &d_storage, &d_transforms, &d_XtWX, &d_XtWY, &local_rect, &local_filter_rect, &w, &h, &f};
 
 		for(int i = 0; i < (2*hw+1)*(2*hw+1); i++) {
 			dy = i / (2*hw+1) - hw;
diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h
index 2ac0194fc3..967d0e9a3b 100644
--- a/intern/cycles/kernel/filter/filter_features.h
+++ b/intern/cycles/kernel/filter/filter_features.h
@@ -143,14 +143,14 @@ ccl_device_inline void filter_calculate_scale(float *scale)
 	scale[7] = 1.0f/max(sqrtf(scale[7]), 0.01f); //AlbedoB
 }
 
-ccl_device_inline float3 filter_get_pixel_color(float ccl_readonly_ptr buffer, int pass_stride)
+ccl_device_inline float3 filter_get_pixel_color(float ccl_readonly_ptr buffer, int channel, int pass_stride)
 {
-	return make_float3(ccl_get_feature(16), ccl_get_feature(18), ccl_get_feature(20));
+	return make_float3(ccl_get_feature(channel), ccl_get_feature(channel+1), ccl_get_feature(channel+2));
 }
 
-ccl_device_inline float filter_get_pixel_variance(float ccl_readonly_ptr buffer, int pass_stride)
+ccl_device_inline float filter_get_pixel_variance(float ccl_readonly_ptr buffer, int channel, int pass_stride)
 {
-	return average(make_float3(ccl_get_feature(17), ccl_get_feature(19), ccl_get_feature(21)));
+	return average(make_float3(ccl_get_feature(channel), ccl_get_feature(channel+1), ccl_get_feature(channel+2)));
 }
 
 ccl_device_inline bool filter_firefly_rejection(float3 pixel_color, float pixel_variance, float3 center_color, float sqrt_center_variance)
diff --git a/intern/cycles/kernel/filter/filter_final_pass_impl.h b/intern/cycles/kernel/filter/filter_final_pass_impl.h
index e489b9869d..470528c361 100644
--- a/intern/cycles/kernel/filter/filter_final_pass_impl.h
+++ b/intern/cycles/kernel/filter/filter_final_pass_impl.h
@@ -22,7 +22,7 @@ CCL_NAMESPACE_BEGIN
 #define STORAGE_TYPE FilterStorage
 #endif
 
-ccl_device_inline void kernel_filter_construct_gramian(int x, int y, int storage_ofs, int storage_size, int dx, int dy, int w, int h, float ccl_readonly_ptr buffer, int color_pass, STORAGE_TYPE *storage, float weight, float ccl_readonly_ptr transform, float *XtWX, float3 *XtWY)
+ccl_device_inline void kernel_filter_construct_gramian(int x, int y, int storage_ofs, int storage_size, int dx, int dy, int w, int h, float ccl_readonly_ptr buffer, int color_pass, int variance_pass, STORAGE_TYPE *storage, float weight, float ccl_readonly_ptr transform, float *XtWX, float3 *XtWY)
 {
 	const int pass_stride = w*h;
 
@@ -44,11 +44,11 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y, int storage


@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list