[Bf-blender-cvs] [ffd605c2fa] soc-2016-cycles_denoising: Cycles Denoising: Change memory layout of noisy color and variance information
Lukas Stockner
noreply at git.blender.org
Wed Feb 1 05:19:07 CET 2017
Commit: ffd605c2fa4b75540a1647d53d898f6cffb75f8e
Author: Lukas Stockner
Date: Sun Jan 15 20:58:53 2017 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rBffd605c2fa4b75540a1647d53d898f6cffb75f8e
Cycles Denoising: Change memory layout of noisy color and variance information
===================================================================
M intern/cycles/device/device_cpu.cpp
M intern/cycles/device/device_cuda.cpp
M intern/cycles/kernel/filter/filter_features.h
M intern/cycles/kernel/filter/filter_final_pass_impl.h
M intern/cycles/kernel/filter/filter_nlm_cpu.h
M intern/cycles/kernel/filter/filter_nlm_gpu.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M intern/cycles/kernel/kernels/cuda/kernel.cu
===================================================================
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 0bea511173..4e6fb119b2 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -145,8 +145,8 @@ public:
KernelFunctions<void(*)(int, int, float*, float*, float*, float*, int*, int, int)> filter_nlm_update_output_kernel;
KernelFunctions<void(*)(float*, float*, int*, int)> filter_nlm_normalize_kernel;
- KernelFunctions<void(*)(int, int, float*, float*, int, void*, float*, float3*, int*, int*, int, int, int)> filter_nlm_construct_gramian_kernel;
- KernelFunctions<void(*)(int, int, int, int, int, float*, void*, float*, float3*, int*, int)> filter_finalize_kernel;
+ KernelFunctions<void(*)(int, int, float*, float*, int, int, void*, float*, float3*, int*, int*, int, int, int)> filter_nlm_construct_gramian_kernel;
+ KernelFunctions<void(*)(int, int, int, int, int, float*, void*, float*, float3*, int*, int)> filter_finalize_kernel;
#define KERNEL_FUNCTIONS(name) \
KERNEL_NAME_EVAL(cpu, name), \
@@ -427,11 +427,11 @@ public:
if(cross_denoise) {
int mean_from[] = {20, 21, 22, 26, 27, 28};
int variance_from[] = {23, 24, 25, 29, 30, 31};
- int offset_to[] = {16, 18, 20, 22, 24, 26};
+ int offset_to[] = {16, 17, 18, 22, 23, 24};
for(int i = 0; i < 6; i++) {
for(int y = rect.y; y < rect.w; y++) {
for(int x = rect.x; x < rect.z; x++) {
- filter_get_feature_kernel()(kg, sample, buffer, mean_from[i], variance_from[i], x, y, tile_x, tile_y, offsets, strides, PASSPTR(offset_to[i]), PASSPTR(offset_to[i]+1), &rect.x);
+ filter_get_feature_kernel()(kg, sample, buffer, mean_from[i], variance_from[i], x, y, tile_x, tile_y, offsets, strides, PASSPTR(offset_to[i]), PASSPTR(offset_to[i]+3), &rect.x);
}
}
}
@@ -439,11 +439,11 @@ public:
else {
int mean_from[] = {20, 21, 22};
int variance_from[] = {23, 24, 25};
- int offset_to[] = {16, 18, 20};
+ int offset_to[] = {16, 17, 18};
for(int i = 0; i < 3; i++) {
for(int y = rect.y; y < rect.w; y++) {
for(int x = rect.x; x < rect.z; x++) {
- filter_get_feature_kernel()(kg, sample, buffer, mean_from[i], variance_from[i], x, y, tile_x, tile_y, offsets, strides, PASSPTR(offset_to[i]), PASSPTR(offset_to[i]+1), &rect.x);
+ filter_get_feature_kernel()(kg, sample, buffer, mean_from[i], variance_from[i], x, y, tile_x, tile_y, offsets, strides, PASSPTR(offset_to[i]), PASSPTR(offset_to[i]+3), &rect.x);
}
}
}
@@ -452,15 +452,15 @@ public:
#ifdef WITH_CYCLES_DEBUG_FILTER
{
float *temp1 = new float[pass_stride], *temp2 = new float[pass_stride], *temp3 = new float[3*pass_stride], *out = new float[3*pass_stride];
- non_local_means(rect, PASSPTR(16), PASSPTR(16), out, PASSPTR(17), temp1, temp2, temp3, 8, 4, 1, 0.5f, 2*pass_stride, pass_stride);
+ non_local_means(rect, PASSPTR(16), PASSPTR(16), out, PASSPTR(19), temp1, temp2, temp3, 8, 4, 1, 0.5f, pass_stride, pass_stride);
debug.add_pass("input0Filtered", out);
debug.add_pass("input1Filtered", out+pass_stride);
debug.add_pass("input2Filtered", out+2*pass_stride);
debug.add_pass("input0Unfiltered", PASSPTR(16));
- debug.add_pass("input1Unfiltered", PASSPTR(18));
- debug.add_pass("input2Unfiltered", PASSPTR(20));
- debug.add_pass("input0Variance", PASSPTR(17));
- debug.add_pass("input1Variance", PASSPTR(19));
+ debug.add_pass("input1Unfiltered", PASSPTR(17));
+ debug.add_pass("input2Unfiltered", PASSPTR(18));
+ debug.add_pass("input0Variance", PASSPTR(19));
+ debug.add_pass("input1Variance", PASSPTR(20));
debug.add_pass("input2Variance", PASSPTR(21));
delete[] temp1;
delete[] temp2;
@@ -502,7 +502,7 @@ public:
float a = 1.0f;
float k_2 = kg->__data.integrator.weighting_adjust;
float *weight = filter_buffer + 16*pass_stride;
- float *variance = filter_buffer + 17*pass_stride;
+ float *variance = filter_buffer + 19*pass_stride;
float *difference = new float[pass_stride];
float *blurDifference = new float[pass_stride];
int local_filter_rect[4] = {filter_area.x-rect.x, filter_area.y-rect.y, filter_area.z, filter_area.w};
@@ -511,11 +511,11 @@ public:
int dx = i % (2*hw+1) - hw;
int local_rect[4] = {max(0, -dx), max(0, -dy), rect.z-rect.x - max(0, dx), rect.w-rect.y - max(0, dy)};
- filter_nlm_calc_difference_kernel()(dx, dy, weight, variance, difference, local_rect, w, 2*pass_stride, a, k_2);
+ filter_nlm_calc_difference_kernel()(dx, dy, weight, variance, difference, local_rect, w, pass_stride, a, k_2);
filter_nlm_blur_kernel()(difference, blurDifference, local_rect, w, f);
filter_nlm_calc_weight_kernel()(blurDifference, difference, local_rect, w, f);
filter_nlm_blur_kernel()(difference, blurDifference, local_rect, w, f);
- filter_nlm_construct_gramian_kernel()(dx, dy, blurDifference, filter_buffer, 0*pass_stride, storage, XtWX, XtWY, local_rect, local_filter_rect, w, h, 4);
+ filter_nlm_construct_gramian_kernel()(dx, dy, blurDifference, filter_buffer, 16, 19, storage, XtWX, XtWY, local_rect, local_filter_rect, w, h, 4);
}
delete[] difference;
delete[] blurDifference;
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index cf2838bcf3..35633109b0 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1056,10 +1056,11 @@ public:
{
int mean_from[] = {20, 21, 22};
int variance_from[] = {23, 24, 25};
- int offset_to[] = {16, 18, 20};
+ int mean_to[] = {16, 17, 18};
+ int variance_to[] = {19, 20, 21};
for(int i = 0; i < 3; i++) {
- CUdeviceptr d_mean = CUDA_PTR_ADD(d_denoise_buffer, offset_to[i]*pass_stride);
- CUdeviceptr d_variance = CUDA_PTR_ADD(d_denoise_buffer, (offset_to[i]+1)*pass_stride);
+ CUdeviceptr d_mean = CUDA_PTR_ADD(d_denoise_buffer, mean_to[i]*pass_stride);
+ CUdeviceptr d_variance = CUDA_PTR_ADD(d_denoise_buffer, variance_to[i]*pass_stride);
void *get_feature_args[] = {&sample, &d_buffer, &mean_from[i], &variance_from[i],
&buffer_area,
@@ -1118,10 +1119,11 @@ public:
int f = 4;
float a = 1.0f;
float k_2 = kernel_globals.integrator.weighting_adjust;
- int color_pass = 0;
+ int color_pass = 16;
+ int variance_pass = 19;
CUdeviceptr color_buffer = CUDA_PTR_ADD(d_denoise_buffers, 16*pass_stride);
- CUdeviceptr variance_buffer = CUDA_PTR_ADD(d_denoise_buffers, 17*pass_stride);
+ CUdeviceptr variance_buffer = CUDA_PTR_ADD(d_denoise_buffers, 19*pass_stride);
CUdeviceptr d_difference, d_blurDifference, d_XtWX, d_XtWY;
cuda_assert(cuMemAlloc(&d_difference, pass_stride*sizeof(float)));
cuda_assert(cuMemAlloc(&d_blurDifference, pass_stride*sizeof(float)));
@@ -1134,7 +1136,7 @@ public:
void *calc_difference_args[] = {&dx, &dy, &color_buffer, &variance_buffer, &d_difference, &local_rect, &w, &a, &k_2};
void *blur_args[] = {&d_difference, &d_blurDifference, &local_rect, &w, &f};
void *calc_weight_args[] = {&d_blurDifference, &d_difference, &local_rect, &w, &f};
- void *construct_gramian_args[] = {&dx, &dy, &d_blurDifference, &d_denoise_buffers, &color_pass, &d_storage, &d_transforms, &d_XtWX, &d_XtWY, &local_rect, &local_filter_rect, &w, &h, &f};
+ void *construct_gramian_args[] = {&dx, &dy, &d_blurDifference, &d_denoise_buffers, &color_pass, &variance_pass, &d_storage, &d_transforms, &d_XtWX, &d_XtWY, &local_rect, &local_filter_rect, &w, &h, &f};
for(int i = 0; i < (2*hw+1)*(2*hw+1); i++) {
dy = i / (2*hw+1) - hw;
diff --git a/intern/cycles/kernel/filter/filter_features.h b/intern/cycles/kernel/filter/filter_features.h
index 2ac0194fc3..967d0e9a3b 100644
--- a/intern/cycles/kernel/filter/filter_features.h
+++ b/intern/cycles/kernel/filter/filter_features.h
@@ -143,14 +143,14 @@ ccl_device_inline void filter_calculate_scale(float *scale)
scale[7] = 1.0f/max(sqrtf(scale[7]), 0.01f); //AlbedoB
}
-ccl_device_inline float3 filter_get_pixel_color(float ccl_readonly_ptr buffer, int pass_stride)
+ccl_device_inline float3 filter_get_pixel_color(float ccl_readonly_ptr buffer, int channel, int pass_stride)
{
- return make_float3(ccl_get_feature(16), ccl_get_feature(18), ccl_get_feature(20));
+ return make_float3(ccl_get_feature(channel), ccl_get_feature(channel+1), ccl_get_feature(channel+2));
}
-ccl_device_inline float filter_get_pixel_variance(float ccl_readonly_ptr buffer, int pass_stride)
+ccl_device_inline float filter_get_pixel_variance(float ccl_readonly_ptr buffer, int channel, int pass_stride)
{
- return average(make_float3(ccl_get_feature(17), ccl_get_feature(19), ccl_get_feature(21)));
+ return average(make_float3(ccl_get_feature(channel), ccl_get_feature(channel+1), ccl_get_feature(channel+2)));
}
ccl_device_inline bool filter_firefly_rejection(float3 pixel_color, float pixel_variance, float3 center_color, float sqrt_center_variance)
diff --git a/intern/cycles/kernel/filter/filter_final_pass_impl.h b/intern/cycles/kernel/filter/filter_final_pass_impl.h
index e489b9869d..470528c361 100644
--- a/intern/cycles/kernel/filter/filter_final_pass_impl.h
+++ b/intern/cycles/kernel/filter/filter_final_pass_impl.h
@@ -22,7 +22,7 @@ CCL_NAMESPACE_BEGIN
#define STORAGE_TYPE FilterStorage
#endif
-ccl_device_inline void kernel_filter_construct_gramian(int x, int y, int storage_ofs, int storage_size, int dx, int dy, int w, int h, float ccl_readonly_ptr buffer, int color_pass, STORAGE_TYPE *storage, float weight, float ccl_readonly_ptr transform, float *XtWX, float3 *XtWY)
+ccl_device_inline void kernel_filter_construct_gramian(int x, int y, int storage_ofs, int storage_size, int dx, int dy, int w, int h, float ccl_readonly_ptr buffer, int color_pass, int variance_pass, STORAGE_TYPE *storage, float weight, float ccl_readonly_ptr transform, float *XtWX, float3 *XtWY)
{
const int pass_stride = w*h;
@@ -44,11 +44,11 @@ ccl_device_inline void kernel_filter_construct_gramian(int x, int y, int storage
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list