[Bf-blender-cvs] [2214c391062] cycles-x: Cycles X: Switch sample count pass from float to uint
Sergey Sharybin
noreply at git.blender.org
Wed May 12 16:10:46 CEST 2021
Commit: 2214c3910623b6c34652248589e64e75aff0043e
Author: Sergey Sharybin
Date: Mon May 10 15:04:45 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB2214c3910623b6c34652248589e64e75aff0043e
Cycles X: Switch sample count pass from float to uint
This way we will never run into rounding issues when relying on a
per-pixel sample count value.
There is some code duplication between the film conversion and the
pass accessor. Ideally we will de-duplicate the per-pixel processing
logic, but is better if that happens as a separate refactor.
===================================================================
M intern/cycles/integrator/denoiser_oidn.cpp
M intern/cycles/kernel/device/cuda/kernel.cu
M intern/cycles/kernel/kernel_accumulate.h
M intern/cycles/kernel/kernel_adaptive_sampling.h
M intern/cycles/kernel/kernel_film.h
M intern/cycles/render/pass_accessor.cpp
===================================================================
diff --git a/intern/cycles/integrator/denoiser_oidn.cpp b/intern/cycles/integrator/denoiser_oidn.cpp
index 5097e703dc2..b494edd86b4 100644
--- a/intern/cycles/integrator/denoiser_oidn.cpp
+++ b/intern/cycles/integrator/denoiser_oidn.cpp
@@ -143,7 +143,7 @@ static void oidn_add_pass_if_needed(oidn::FilterRef *oidn_filter,
float pixel_scale = scale;
if (pass_sample_count != PASS_UNUSED) {
- pixel_scale = 1.0f / buffer_pixel[pass_sample_count];
+ pixel_scale = 1.0f / __float_as_uint(buffer_pixel[pass_sample_count]);
}
scaled_row[x * 3 + 0] = pass_pixel[0] * pixel_scale;
diff --git a/intern/cycles/kernel/device/cuda/kernel.cu b/intern/cycles/kernel/device/cuda/kernel.cu
index c38f17b714b..d753932e01b 100644
--- a/intern/cycles/kernel/device/cuda/kernel.cu
+++ b/intern/cycles/kernel/device/cuda/kernel.cu
@@ -475,7 +475,7 @@ extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_KERNEL_BLOCK_NUM_THREADS,
pixel_scale = 1.0f / num_samples;
}
else {
- pixel_scale = 1.0f / buffer[pass_sample_count];
+ pixel_scale = 1.0f / __float_as_uint(buffer[pass_sample_count]);
}
if (num_inputs > 0) {
@@ -535,7 +535,7 @@ extern "C" __global__ void CUDA_LAUNCH_BOUNDS(CUDA_KERNEL_BLOCK_NUM_THREADS,
pixel_scale = num_samples;
}
else {
- pixel_scale = buffer[pass_sample_count];
+ pixel_scale = __float_as_uint(buffer[pass_sample_count]);
}
buffer[0] = in[0] * pixel_scale;
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index 4d893ce57f8..49719f0f681 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -332,7 +332,7 @@ ccl_device_inline int kernel_accum_sample(INTEGRATOR_STATE_CONST_ARGS,
ccl_global float *buffer = kernel_accum_pixel_render_buffer(INTEGRATOR_STATE_PASS,
render_buffer);
- return (int)atomic_add_and_fetch_float(buffer + kernel_data.film.pass_sample_count, 1.0f) - 1;
+ return atomic_fetch_and_add_uint32((uint *)(buffer) + kernel_data.film.pass_sample_count, 1);
}
ccl_device void kernel_accum_adaptive_buffer(INTEGRATOR_STATE_CONST_ARGS,
diff --git a/intern/cycles/kernel/kernel_adaptive_sampling.h b/intern/cycles/kernel/kernel_adaptive_sampling.h
index ae87855c81d..61ae0c60cb0 100644
--- a/intern/cycles/kernel/kernel_adaptive_sampling.h
+++ b/intern/cycles/kernel/kernel_adaptive_sampling.h
@@ -68,7 +68,7 @@ ccl_device bool kernel_adaptive_sampling_convergence_check(const KernelGlobals *
const float4 I = *((ccl_global float4 *)buffer);
- const float sample = buffer[kernel_data.film.pass_sample_count];
+ const float sample = __float_as_uint(buffer[kernel_data.film.pass_sample_count]);
/* The per pixel error as seen in section 2.1 of
* "A hierarchical automatic stopping condition for Monte Carlo global illumination"
diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h
index 09876146031..5d5f55565c7 100644
--- a/intern/cycles/kernel/kernel_film.h
+++ b/intern/cycles/kernel/kernel_film.h
@@ -47,7 +47,14 @@ ccl_device float4 film_get_pass_result(const KernelGlobals *kg, ccl_global float
}
else if (display_pass_components == 1) {
ccl_global const float *in = (ccl_global float *)(buffer + display_pass_offset);
- pass_result = make_float4(*in, *in, *in, 0.0f);
+ if (kernel_data.film.pass_sample_count != PASS_UNUSED &&
+ kernel_data.film.pass_sample_count == display_pass_offset) {
+ const float value = __float_as_uint(*in);
+ pass_result = make_float4(value, value, value, 0.0f);
+ }
+ else {
+ pass_result = make_float4(*in, *in, *in, 0.0f);
+ }
}
return pass_result;
@@ -84,7 +91,7 @@ ccl_device void kernel_film_convert_to_half_float(const KernelGlobals *kg,
* meaningful result (rather than becoming uniform buffer filled with 1). */
if (kernel_data.film.pass_sample_count != PASS_UNUSED &&
kernel_data.film.pass_sample_count != kernel_data.film.display_pass_offset) {
- sample_scale = 1.0f / buffer[kernel_data.film.pass_sample_count];
+ sample_scale = 1.0f / __float_as_uint(buffer[kernel_data.film.pass_sample_count]);
}
rgba_in *= sample_scale;
}
diff --git a/intern/cycles/render/pass_accessor.cpp b/intern/cycles/render/pass_accessor.cpp
index 1e93e6e61d6..56818165998 100644
--- a/intern/cycles/render/pass_accessor.cpp
+++ b/intern/cycles/render/pass_accessor.cpp
@@ -41,7 +41,7 @@ class Scaler {
{
/* Special trick to only scale the samples count pass with the sample scale. Otherwise the pass
* becomes a uniform 1.0. */
- if (sample_count_pass_ == pass_buffer) {
+ if (sample_count_pass_ == reinterpret_cast<const uint *>(pass_buffer)) {
sample_count_pass_ = nullptr;
}
@@ -86,15 +86,15 @@ class Scaler {
}
protected:
- const float *get_sample_count_pass(const PassAccessor *pass_accessor,
- const RenderBuffers *render_buffers)
+ const uint *get_sample_count_pass(const PassAccessor *pass_accessor,
+ const RenderBuffers *render_buffers)
{
const int pass_sample_count = pass_accessor->get_pass_offset(PASS_SAMPLE_COUNT);
if (pass_sample_count == PASS_UNUSED) {
return nullptr;
}
- return render_buffers->buffer.data() + pass_sample_count;
+ return reinterpret_cast<const uint *>(render_buffers->buffer.data()) + pass_sample_count;
}
const Pass *pass_;
@@ -103,7 +103,7 @@ class Scaler {
const float num_samples_inv_ = 1.0f;
const float exposure_ = 1.0f;
- const float *sample_count_pass_ = nullptr;
+ const uint *sample_count_pass_ = nullptr;
float scale_ = 0.0f;
float scale_exposure_ = 0.0f;
@@ -248,6 +248,16 @@ bool PassAccessor::get_render_tile_pixels(RenderBuffers *render_buffers, float *
pixels[0] = saturate(1.0f - f * scaler.scale_exposure(i));
}
}
+ else if (type == PASS_SAMPLE_COUNT) {
+ /* TODO(sergey): Consider normalizing into the [0..1] range, so that it is possible to see
+ * meaningful value when adaptive sampler stopped rendering image way before the maximum
+ * number of samples was reached (for examples when number of samples is set to 0 in
+ * viewport). */
+ for (int i = 0; i < size; i++, in += pass_stride, pixels++) {
+ const float f = *in;
+ pixels[0] = __float_as_uint(f) * scaler.scale(i);
+ }
+ }
#ifdef WITH_CYCLES_DEBUG
else if (type == PASS_BVH_TRAVERSED_NODES || type == PASS_BVH_TRAVERSED_INSTANCES ||
type == PASS_BVH_INTERSECTIONS || type == PASS_RAY_BOUNCES) {
More information about the Bf-blender-cvs
mailing list