[Bf-blender-cvs] [6eca3ca715f] cycles-x: Cycles X: remove all alignment requirements in render passes storage
Brecht Van Lommel
noreply at git.blender.org
Fri Jul 23 19:29:01 CEST 2021
Commit: 6eca3ca715f74b6399da04b32b96dad27501f2d1
Author: Brecht Van Lommel
Date: Thu Jul 15 16:55:09 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB6eca3ca715f74b6399da04b32b96dad27501f2d1
Cycles X: remove all alignment requirements in render passes storage
This saves memory, simplifies the code and has no apparent performance
impact. Whatever historical reasons there were for this, I don't think
they apply anymore.
Differential Revision: https://developer.blender.org/D12015
===================================================================
M intern/cycles/blender/addon/engine.py
M intern/cycles/integrator/pass_accessor.cpp
M intern/cycles/kernel/kernel_accumulate.h
M intern/cycles/kernel/kernel_film.h
M intern/cycles/kernel/kernel_passes.h
M intern/cycles/kernel/kernel_types.h
M intern/cycles/kernel/kernel_write_passes.h
M intern/cycles/kernel/svm/svm_aov.h
M intern/cycles/render/buffers.cpp
M intern/cycles/render/film.cpp
M intern/cycles/render/nodes.cpp
M intern/cycles/render/nodes.h
M intern/cycles/render/pass.cpp
M intern/cycles/render/pass.h
M intern/cycles/render/svm.cpp
===================================================================
diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py
index 19e85bd8438..df6bb2dc982 100644
--- a/intern/cycles/blender/addon/engine.py
+++ b/intern/cycles/blender/addon/engine.py
@@ -242,7 +242,7 @@ def list_render_passes(scene, srl):
if aov.type == 'VALUE':
yield (aov.name, "X", 'VALUE')
else:
- yield (aov.name, "RGBA", 'COLOR')
+ yield (aov.name, "RGB", 'COLOR')
def register_passes(engine, scene, view_layer):
diff --git a/intern/cycles/integrator/pass_accessor.cpp b/intern/cycles/integrator/pass_accessor.cpp
index ab715ad5ad9..9d38f0137f5 100644
--- a/intern/cycles/integrator/pass_accessor.cpp
+++ b/intern/cycles/integrator/pass_accessor.cpp
@@ -63,18 +63,7 @@ PassAccessor::Destination::Destination(const PassType pass_type, half4 *pixels)
PassAccessor::Destination::Destination(const PassType pass_type)
{
const PassInfo pass_info = Pass::get_info(pass_type);
-
- if (pass_info.divide_type != PASS_NONE) {
- /* Divide is used for colors, which has 3 destination components.
- * The passes which use division are stored as aligned float4 internally, and there is no
- * implementation of divide_even_color for float4. So we force it here.
- * The rest of the aligned float3 passes should be fine, because they have float4
- * implementation. */
- num_components = 3;
- }
- else {
- num_components = pass_info.num_components;
- }
+ num_components = pass_info.num_components;
}
/* --------------------------------------------------------------------
@@ -160,10 +149,10 @@ bool PassAccessor::get_render_tile_pixels(const RenderBuffers *render_buffers,
const PassMode mode = pass_access_info_.mode;
const PassInfo pass_info = Pass::get_info(type);
- if (destination.num_components == 1) {
- DCHECK_LE(pass_info.num_components, destination.num_components)
- << "Number of components mismatch for " << pass_type_as_string(type);
+ DCHECK_LE(pass_info.num_components, destination.num_components)
+ << "Number of components mismatch for " << pass_type_as_string(type);
+ if (pass_info.num_components == 1) {
if (mode == PassMode::DENOISED) {
/* Denoised passes store their final pixels, no need in special calculation. */
get_pass_float(render_buffers, buffer_params, destination);
@@ -184,16 +173,7 @@ bool PassAccessor::get_render_tile_pixels(const RenderBuffers *render_buffers,
get_pass_float(render_buffers, buffer_params, destination);
}
}
- else if (destination.num_components == 3) {
- if (pass_info.is_aligned) {
- DCHECK_LE(pass_info.num_components, 4)
- << "Number of components mismatch for pass " << pass_type_as_string(type);
- }
- else {
- DCHECK_LE(pass_info.num_components, 3)
- << "Number of components mismatch for pass " << pass_type_as_string(type);
- }
-
+ else if (pass_info.num_components == 3) {
if (mode == PassMode::DENOISED) {
/* Denoised passes store their final pixels, no need in special calculation. */
get_pass_float3(render_buffers, buffer_params, destination);
@@ -210,10 +190,7 @@ bool PassAccessor::get_render_tile_pixels(const RenderBuffers *render_buffers,
get_pass_float3(render_buffers, buffer_params, destination);
}
}
- else if (destination.num_components == 4) {
- DCHECK_EQ(pass_info.num_components, 4)
- << "Number of components mismatch for pass " << pass_type_as_string(type);
-
+ else if (pass_info.num_components == 4) {
if (type == PASS_SHADOW_CATCHER_MATTE && pass_access_info_.use_approximate_shadow_catcher) {
/* Denoised matte with shadow needs to do calculation (will use denoised shadow catcher pass
* to approximate shadow with). */
@@ -314,18 +291,23 @@ bool PassAccessor::set_render_tile_pixels(RenderBuffers *render_buffers, const S
return false;
}
+ const PassType type = pass_access_info_.type;
+ const PassInfo pass_info = Pass::get_info(type);
+
const BufferParams &buffer_params = render_buffers->params;
float *buffer_data = render_buffers->buffer.data();
- const int pass_stride = buffer_params.pass_stride;
const int size = buffer_params.width * buffer_params.height;
- const int num_components = source.num_components;
+
+ const int out_stride = buffer_params.pass_stride;
+ const int in_stride = source.num_components;
+ const int num_components_to_copy = min(source.num_components, pass_info.num_components);
float *out = buffer_data + pass_access_info_.offset;
- const float *in = source.pixels + source.offset * num_components;
+ const float *in = source.pixels + source.offset * in_stride;
- for (int i = 0; i < size; i++, out += pass_stride, in += num_components) {
- memcpy(out, in, sizeof(float) * num_components);
+ for (int i = 0; i < size; i++, out += out_stride, in += in_stride) {
+ memcpy(out, in, sizeof(float) * num_components_to_copy);
}
return true;
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index 98875d4fe43..e04e1378346 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -331,8 +331,7 @@ ccl_device_inline void kernel_accum_emission_or_background_pass(INTEGRATOR_STATE
const float3 denoising_feature_throughput = INTEGRATOR_STATE(path,
denoising_feature_throughput);
const float3 denoising_albedo = denoising_feature_throughput * contribution;
- kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_albedo,
- denoising_albedo);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_albedo, denoising_albedo);
}
}
# endif /* __DENOISING_FEATURES__ */
diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h
index ec18a141116..65966e0fef6 100644
--- a/intern/cycles/kernel/kernel_film.h
+++ b/intern/cycles/kernel/kernel_film.h
@@ -282,7 +282,7 @@ ccl_device_inline void film_get_pass_pixel_combined(const KernelFilmConvert *ccl
* Shadow catcher.
*/
-ccl_device_inline float4
+ccl_device_inline float3
film_calculate_shadow_catcher_denoised(const KernelFilmConvert *ccl_restrict kfilm_convert,
ccl_global const float *ccl_restrict buffer)
{
@@ -295,10 +295,10 @@ film_calculate_shadow_catcher_denoised(const KernelFilmConvert *ccl_restrict kfi
const float3 pixel = make_float3(in_catcher[0], in_catcher[1], in_catcher[2]) * scale_exposure;
- return make_float4(pixel.x, pixel.y, pixel.z, 1.0f);
+ return pixel;
}
-ccl_device_inline float4
+ccl_device_inline float3
film_calculate_shadow_catcher(const KernelFilmConvert *ccl_restrict kfilm_convert,
ccl_global const float *ccl_restrict buffer)
{
@@ -318,7 +318,7 @@ film_calculate_shadow_catcher(const KernelFilmConvert *ccl_restrict kfilm_conver
* needed, so return one. */
const float num_samples = in_catcher[3];
if (num_samples == 0.0f) {
- return one_float4();
+ return one_float3();
}
/* NOTE: It is possible that the Shadow Catcher pass is requested as an output without actual
@@ -356,7 +356,7 @@ film_calculate_shadow_catcher(const KernelFilmConvert *ccl_restrict kfilm_conver
* during the division. */
const float3 pixel = (1.0f - alpha) * one_float3() + alpha * shadow_catcher;
- return make_float4(pixel.x, pixel.y, pixel.z, 1.0f);
+ return pixel;
}
ccl_device_inline float4 film_calculate_shadow_catcher_matte_with_shadow(
@@ -378,14 +378,13 @@ ccl_device_inline float4 film_calculate_shadow_catcher_matte_with_shadow(
ccl_global const float *in_matte = buffer + kfilm_convert->pass_shadow_catcher_matte;
- const float4 shadow_catcher = film_calculate_shadow_catcher(kfilm_convert, buffer);
+ const float3 shadow_catcher = film_calculate_shadow_catcher(kfilm_convert, buffer);
const float3 color_matte = make_float3(in_matte[0], in_matte[1], in_matte[2]) * scale_exposure;
const float transparency = in_matte[3] * scale;
const float alpha = saturate(1.0f - transparency);
- const float alpha_matte = (1.0f - alpha) * (1.0f - average(float4_to_float3(shadow_catcher))) +
- alpha;
+ const float alpha_matte = (1.0f - alpha) * (1.0f - average(shadow_catcher)) + alpha;
if (kfilm_convert->use_approximate_shadow_catcher_background) {
kernel_assert(kfilm_convert->pass_background != PASS_UNUSED);
@@ -406,13 +405,13 @@ ccl_device_inline void film_get_pass_pixel_shadow_catcher(
ccl_global const float *ccl_restrict buffer,
float *ccl_restrict pixel)
{
- const float4 pixel_value = film_calculate_shadow_catcher(kfilm_convert, buffer);
+ const float3 pixel_value = film_calculate_shadow_catcher(kfilm_convert, buffer);
pixel[0] = pixel_value.x;
pixel[1] = pixel_value.y;
pixel[2] = pixel_value.z;
if (kfilm_convert->num_components == 4) {
- pixel[3] = pixel_value.w;
+ pixel[3] = 1.0f;
}
}
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index 568886a2104..325ea07218b 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -103,8 +103,7 @@ ccl_device_forceinline void kernel_write_denoising_features(
normal = transform_direction(&worldtocamera, normal);
const float3 denoising_normal = ensure_finite3(normal);
- kernel_write_pass_float3_unaligned(buffer + kernel_data.film.pass_denoising_normal,
- denoising_normal);
+ kernel_write_pass_float3(buffer + kernel_data.film.pass_denoising_normal, denoising_normal);
}
if (kernel_data.film.pass_denoising_albedo != PASS_UNUSED) {
@@ -112,8 +111,7 @@ ccl_device_forceinline void kernel_write_denoising_features(
denoising_feature_throughput);
const float3 denoising_albedo = ensure_finite3(denoising_feature_throughput *
diffuse_albedo);
- kernel
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list