[Bf-blender-cvs] [faa2a03968a] cycles-x: Fix misaligned address error in adaptive sampling
Brecht Van Lommel
noreply at git.blender.org
Mon Jul 26 19:18:14 CEST 2021
Commit: faa2a03968af5999648a8c2593a4ab9a7e055643
Author: Brecht Van Lommel
Date: Mon Jul 26 19:08:36 2021 +0200
Branches: cycles-x
https://developer.blender.org/rBfaa2a03968af5999648a8c2593a4ab9a7e055643
Fix misaligned address error in adaptive sampling
Due to recent alignment changes, can no longer assume float4 is aligned
in render buffer in the convergence check.
===================================================================
M intern/cycles/kernel/kernel_adaptive_sampling.h
M intern/cycles/kernel/kernel_write_passes.h
===================================================================
diff --git a/intern/cycles/kernel/kernel_adaptive_sampling.h b/intern/cycles/kernel/kernel_adaptive_sampling.h
index 8471058e178..993cfea951e 100644
--- a/intern/cycles/kernel/kernel_adaptive_sampling.h
+++ b/intern/cycles/kernel/kernel_adaptive_sampling.h
@@ -16,6 +16,8 @@
#pragma once
+#include "kernel/kernel_write_passes.h"
+
CCL_NAMESPACE_BEGIN
/* Check whether the pixel has converged and should not be sampled anymore. */
@@ -32,9 +34,8 @@ ccl_device_forceinline bool kernel_need_sample_pixel(INTEGRATOR_STATE_CONST_ARGS
kernel_data.film.pass_stride;
ccl_global float *buffer = render_buffer + render_buffer_offset;
- ccl_global float4 *aux = (ccl_global float4 *)(buffer +
- kernel_data.film.pass_adaptive_aux_buffer);
- return (*aux).w == 0.0f;
+ const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
+ return buffer[aux_w_offset] == 0.0f;
}
/* Determines whether to continue sampling a given pixel or if it has sufficiently converged. */
@@ -57,7 +58,7 @@ ccl_device bool kernel_adaptive_sampling_convergence_check(const KernelGlobals *
/* TODO(Stefan): Is this better in linear, sRGB or something else? */
- const float4 A = *(ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
+ const float4 A = kernel_read_pass_float4(buffer + kernel_data.film.pass_adaptive_aux_buffer);
if (!reset && A.w != 0.0f) {
/* If the pixel was considered converged, its state will not change in this kernmel. Early
* output before doing any math.
@@ -66,7 +67,7 @@ ccl_device bool kernel_adaptive_sampling_convergence_check(const KernelGlobals *
return true;
}
- const float4 I = *(ccl_global float4 *)(buffer + kernel_data.film.pass_combined);
+ const float4 I = kernel_read_pass_float4(buffer + kernel_data.film.pass_combined);
const float sample = __float_as_uint(buffer[kernel_data.film.pass_sample_count]);
@@ -77,7 +78,8 @@ ccl_device bool kernel_adaptive_sampling_convergence_check(const KernelGlobals *
(sample * 0.0001f + sqrtf(I.x + I.y + I.z));
const bool did_converge = (error < threshold * sample);
- buffer[kernel_data.film.pass_adaptive_aux_buffer + 3] = did_converge;
+ const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
+ buffer[aux_w_offset] = did_converge;
return did_converge;
}
@@ -99,20 +101,19 @@ ccl_device void kernel_adaptive_sampling_filter_x(const KernelGlobals *kg,
for (int x = start_x; x < start_x + width; ++x) {
int index = offset + x + y * stride;
ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride;
- ccl_global float4 *aux = (ccl_global float4 *)(buffer +
- kernel_data.film.pass_adaptive_aux_buffer);
- if ((*aux).w == 0.0f) {
+ const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
+
+ if (buffer[aux_w_offset] == 0.0f) {
if (x > start_x && !prev) {
index = index - 1;
buffer = render_buffer + index * kernel_data.film.pass_stride;
- aux = (ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
- (*aux).w = 0.0f;
+ buffer[aux_w_offset] = 0.0f;
}
prev = true;
}
else {
if (prev) {
- (*aux).w = 0.0f;
+ buffer[aux_w_offset] = 0.0f;
}
prev = false;
}
@@ -133,20 +134,19 @@ ccl_device void kernel_adaptive_sampling_filter_y(const KernelGlobals *kg,
for (int y = start_y; y < start_y + height; ++y) {
int index = offset + x + y * stride;
ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride;
- ccl_global float4 *aux = (ccl_global float4 *)(buffer +
- kernel_data.film.pass_adaptive_aux_buffer);
- if ((*aux).w == 0.0f) {
+ const uint aux_w_offset = kernel_data.film.pass_adaptive_aux_buffer + 3;
+
+ if (buffer[aux_w_offset] == 0.0f) {
if (y > start_y && !prev) {
index = index - stride;
buffer = render_buffer + index * kernel_data.film.pass_stride;
- aux = (ccl_global float4 *)(buffer + kernel_data.film.pass_adaptive_aux_buffer);
- (*aux).w = 0.0f;
+ buffer[aux_w_offset] = 0.0f;
}
prev = true;
}
else {
if (prev) {
- (*aux).w = 0.0f;
+ buffer[aux_w_offset] = 0.0f;
}
prev = false;
}
diff --git a/intern/cycles/kernel/kernel_write_passes.h b/intern/cycles/kernel/kernel_write_passes.h
index f6fb6fed0ef..9d379495629 100644
--- a/intern/cycles/kernel/kernel_write_passes.h
+++ b/intern/cycles/kernel/kernel_write_passes.h
@@ -24,11 +24,10 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline void kernel_write_pass_float(ccl_global float *ccl_restrict buffer, float value)
{
- ccl_global float *buf = buffer;
#ifdef __ATOMIC_PASS_WRITE__
- atomic_add_and_fetch_float(buf, value);
+ atomic_add_and_fetch_float(buffer, value);
#else
- *buf += value;
+ *buffer += value;
#endif
}
@@ -44,10 +43,9 @@ ccl_device_inline void kernel_write_pass_float3(ccl_global float *ccl_restrict b
atomic_add_and_fetch_float(buf_y, value.y);
atomic_add_and_fetch_float(buf_z, value.z);
#else
- ccl_global float *buf = (ccl_global float *)buffer;
- buf[0] += value.x;
- buf[1] += value.y;
- buf[2] += value.z;
+ buffer[0] += value.x;
+ buffer[1] += value.y;
+ buffer[2] += value.z;
#endif
}
@@ -65,12 +63,26 @@ ccl_device_inline void kernel_write_pass_float4(ccl_global float *ccl_restrict b
atomic_add_and_fetch_float(buf_z, value.z);
atomic_add_and_fetch_float(buf_w, value.w);
#else
- ccl_global float *buf = (ccl_global float *)buffer;
- buf[0] += value.x;
- buf[1] += value.y;
- buf[2] += value.z;
- buf[3] += value.w;
+ buffer[0] += value.x;
+ buffer[1] += value.y;
+ buffer[2] += value.z;
+ buffer[3] += value.w;
#endif
}
+ccl_device_inline float kernel_read_pass_float(ccl_global float *ccl_restrict buffer)
+{
+ return *buffer;
+}
+
+ccl_device_inline float3 kernel_read_pass_float3(ccl_global float *ccl_restrict buffer)
+{
+ return make_float3(buffer[0], buffer[1], buffer[2]);
+}
+
+ccl_device_inline float4 kernel_read_pass_float4(ccl_global float *ccl_restrict buffer)
+{
+ return make_float4(buffer[0], buffer[1], buffer[2], buffer[3]);
+}
+
CCL_NAMESPACE_END
More information about the Bf-blender-cvs
mailing list