[Bf-blender-cvs] [3fa86f4b280] master: Merge branch 'blender-v3.0-release'
Brecht Van Lommel
noreply at git.blender.org
Wed Nov 10 20:22:53 CET 2021
Commit: 3fa86f4b280cbc6ccc18993c089b94dda45afa34
Author: Brecht Van Lommel
Date: Wed Nov 10 20:19:09 2021 +0100
Branches: master
https://developer.blender.org/rB3fa86f4b280cbc6ccc18993c089b94dda45afa34
Merge branch 'blender-v3.0-release'
===================================================================
===================================================================
diff --cc intern/cycles/kernel/device/gpu/kernel.h
index e954178ec63,844bbf90f67..d63cd0e8262
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@@ -523,94 -456,227 +523,115 @@@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THR
* Film.
*/
-/* Common implementation for float destination. */
-template<typename Processor>
-ccl_device_inline void kernel_gpu_film_convert_common(const KernelFilmConvert *kfilm_convert,
- float *pixels,
- float *render_buffer,
- int num_pixels,
- int width,
- int offset,
- int stride,
- int dst_offset,
- int dst_stride,
- const Processor &processor)
-{
- const int render_pixel_index = ccl_gpu_global_id_x();
- if (render_pixel_index >= num_pixels) {
- return;
- }
-
- const int x = render_pixel_index % width;
- const int y = render_pixel_index / width;
-
- ccl_global const float *buffer = render_buffer + offset + x * kfilm_convert->pass_stride +
- y * stride * kfilm_convert->pass_stride;
-
- ccl_global float *pixel = pixels +
- (render_pixel_index + dst_offset) * kfilm_convert->pixel_stride;
-
- processor(kfilm_convert, buffer, pixel);
-}
-
+ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgba,
+ const int rgba_offset,
+ const int rgba_stride,
+ const int x,
+ const int y,
+ const half4 half_pixel)
+ {
+ /* Work around HIP issue with half float display, see T92972. */
+ #ifdef __KERNEL_HIP__
+ ccl_global half *out = ((ccl_global half *)rgba) + (rgba_offset + y * rgba_stride + x) * 4;
+ out[0] = half_pixel.x;
+ out[1] = half_pixel.y;
+ out[2] = half_pixel.z;
+ out[3] = half_pixel.w;
+ #else
+ ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x;
+ *out = half_pixel;
+ #endif
+ }
+
-/* Common implementation for half4 destination and 4-channel input pass. */
-template<typename Processor>
-ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgba(
- const KernelFilmConvert *kfilm_convert,
- uchar4 *rgba,
- float *render_buffer,
- int num_pixels,
- int width,
- int offset,
- int stride,
- int rgba_offset,
- int rgba_stride,
- const Processor &processor)
-{
- const int render_pixel_index = ccl_gpu_global_id_x();
- if (render_pixel_index >= num_pixels) {
- return;
- }
-
- const int x = render_pixel_index % width;
- const int y = render_pixel_index / width;
-
- ccl_global const float *buffer = render_buffer + offset + x * kfilm_convert->pass_stride +
- y * stride * kfilm_convert->pass_stride;
-
- float pixel[4];
- processor(kfilm_convert, buffer, pixel);
-
- film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel);
-
- const half4 half_pixel = float4_to_half4_display(
- make_float4(pixel[0], pixel[1], pixel[2], pixel[3]));
- kernel_gpu_film_convert_half_write(rgba, rgba_offset, rgba_stride, x, y, half_pixel);
-}
-
-/* Common implementation for half4 destination and 3-channel input pass. */
-template<typename Processor>
-ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgb(
- const KernelFilmConvert *kfilm_convert,
- uchar4 *rgba,
- float *render_buffer,
- int num_pixels,
- int width,
- int offset,
- int stride,
- int rgba_offset,
- int rgba_stride,
- const Processor &processor)
-{
- kernel_gpu_film_convert_half_rgba_common_rgba(
- kfilm_convert,
- rgba,
- render_buffer,
- num_pixels,
- width,
- offset,
- stride,
- rgba_offset,
- rgba_stride,
- [&processor](const KernelFilmConvert *kfilm_convert,
- ccl_global const float *buffer,
- float *pixel_rgba) {
- processor(kfilm_convert, buffer, pixel_rgba);
- pixel_rgba[3] = 1.0f;
- });
-}
-
-/* Common implementation for half4 destination and single channel input pass. */
-template<typename Processor>
-ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_value(
- const KernelFilmConvert *kfilm_convert,
- uchar4 *rgba,
- float *render_buffer,
- int num_pixels,
- int width,
- int offset,
- int stride,
- int rgba_offset,
- int rgba_stride,
- const Processor &processor)
-{
- kernel_gpu_film_convert_half_rgba_common_rgba(
- kfilm_convert,
- rgba,
- render_buffer,
- num_pixels,
- width,
- offset,
- stride,
- rgba_offset,
- rgba_stride,
- [&processor](const KernelFilmConvert *kfilm_convert,
- ccl_global const float *buffer,
- float *pixel_rgba) {
- float value;
- processor(kfilm_convert, buffer, &value);
-
- pixel_rgba[0] = value;
- pixel_rgba[1] = value;
- pixel_rgba[2] = value;
- pixel_rgba[3] = 1.0f;
- });
-}
-
-#define KERNEL_FILM_CONVERT_PROC(name) \
- ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) name
-
-#define KERNEL_FILM_CONVERT_DEFINE(variant, channels) \
- KERNEL_FILM_CONVERT_PROC(kernel_gpu_film_convert_##variant) \
- (const KernelFilmConvert kfilm_convert, \
- float *pixels, \
- float *render_buffer, \
- int num_pixels, \
- int width, \
- int offset, \
- int stride, \
- int rgba_offset, \
- int rgba_stride) \
+#define KERNEL_FILM_CONVERT_VARIANT(variant, input_channel_count) \
+ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \
+ ccl_gpu_kernel_signature(film_convert_##variant, \
+ const KernelFilmConvert kfilm_convert, \
+ ccl_global float *pixels, \
+ ccl_global float *render_buffer, \
+ int num_pixels, \
+ int width, \
+ int offset, \
+ int stride, \
+ int rgba_offset, \
+ int rgba_stride) \
{ \
- kernel_gpu_film_convert_common(&kfilm_convert, \
- pixels, \
- render_buffer, \
- num_pixels, \
- width, \
- offset, \
- stride, \
- rgba_offset, \
- rgba_stride, \
- film_get_pass_pixel_##variant); \
+ const int render_pixel_index = ccl_gpu_global_id_x(); \
+ if (render_pixel_index >= num_pixels) { \
+ return; \
+ } \
+\
+ const int x = render_pixel_index % width; \
+ const int y = render_pixel_index / width; \
+\
+ ccl_global const float *buffer = render_buffer + offset + x * kfilm_convert.pass_stride + \
+ y * stride * kfilm_convert.pass_stride; \
+\
+ ccl_global float *pixel = pixels + \
+ (render_pixel_index + rgba_offset) * kfilm_convert.pixel_stride; \
+\
+ film_get_pass_pixel_##variant(&kfilm_convert, buffer, pixel); \
} \
- KERNEL_FILM_CONVERT_PROC(kernel_gpu_film_convert_##variant##_half_rgba) \
- (const KernelFilmConvert kfilm_convert, \
- uchar4 *rgba, \
- float *render_buffer, \
- int num_pixels, \
- int width, \
- int offset, \
- int stride, \
- int rgba_offset, \
- int rgba_stride) \
+\
+ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \
+ ccl_gpu_kernel_signature(film_convert_##variant##_half_rgba, \
+ const KernelFilmConvert kfilm_convert, \
+ ccl_global uchar4 *rgba, \
+ ccl_global float *render_buffer, \
+ int num_pixels, \
+ int width, \
+ int offset, \
+ int stride, \
+ int rgba_offset, \
+ int rgba_stride) \
{ \
- kernel_gpu_film_convert_half_rgba_common_##channels(&kfilm_convert, \
- rgba, \
- render_buffer, \
- num_pixels, \
- width, \
- offset, \
- stride, \
- rgba_offset, \
- rgba_stride, \
- film_get_pass_pixel_##variant); \
- }
-
-KERNEL_FILM_CONVERT_DEFINE(depth, value)
-KERNEL_FILM_CONVERT_DEFINE(mist, value)
-KERNEL_FILM_CONVERT_DEFINE(sample_count, value)
-KERNEL_FILM_CONVERT_DEFINE(float, value)
-
-KERNEL_FILM_CONVERT_DEFINE(light_path, rgb)
-KERNEL_FILM_CONVERT_DEFINE(float3, rgb)
-
-KERNEL_FILM_CONVERT_DEFINE(motion, rgba)
-KERNEL_FILM_CONVERT_DEFINE(cryptomatte, rgba)
-KERNEL_FILM_CONVERT_DEFINE(shadow_catcher, rgba)
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list