[Bf-blender-cvs] [3fa86f4b280] master: Merge branch 'blender-v3.0-release'

Wed Nov 10 20:22:53 CET 2021

Commit: 3fa86f4b280cbc6ccc18993c089b94dda45afa34
Author: Brecht Van Lommel
Date:   Wed Nov 10 20:19:09 2021 +0100
Branches: master
https://developer.blender.org/rB3fa86f4b280cbc6ccc18993c089b94dda45afa34

Merge branch 'blender-v3.0-release'

===================================================================



===================================================================

diff --cc intern/cycles/kernel/device/gpu/kernel.h
index e954178ec63,844bbf90f67..d63cd0e8262

--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@@ -523,94 -456,227 +523,115 @@@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THR
   * Film.
   */
  
 -/* Common implementation for float destination. */
 -template<typename Processor>
 -ccl_device_inline void kernel_gpu_film_convert_common(const KernelFilmConvert *kfilm_convert,
 -                                                      float *pixels,
 -                                                      float *render_buffer,
 -                                                      int num_pixels,
 -                                                      int width,
 -                                                      int offset,
 -                                                      int stride,
 -                                                      int dst_offset,
 -                                                      int dst_stride,
 -                                                      const Processor &processor)
 -{
 -  const int render_pixel_index = ccl_gpu_global_id_x();
 -  if (render_pixel_index >= num_pixels) {
 -    return;
 -  }
 -
 -  const int x = render_pixel_index % width;
 -  const int y = render_pixel_index / width;
 -
 -  ccl_global const float *buffer = render_buffer + offset + x * kfilm_convert->pass_stride +
 -                                   y * stride * kfilm_convert->pass_stride;
 -
 -  ccl_global float *pixel = pixels +
 -                            (render_pixel_index + dst_offset) * kfilm_convert->pixel_stride;
 -
 -  processor(kfilm_convert, buffer, pixel);
 -}
 -
+ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgba,
+                                                           const int rgba_offset,
+                                                           const int rgba_stride,
+                                                           const int x,
+                                                           const int y,
+                                                           const half4 half_pixel)
+ {
+   /* Work around HIP issue with half float display, see T92972. */
+ #ifdef __KERNEL_HIP__
+   ccl_global half *out = ((ccl_global half *)rgba) + (rgba_offset + y * rgba_stride + x) * 4;
+   out[0] = half_pixel.x;
+   out[1] = half_pixel.y;
+   out[2] = half_pixel.z;
+   out[3] = half_pixel.w;
+ #else
+   ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x;
+   *out = half_pixel;
+ #endif
+ }
+ 
 -/* Common implementation for half4 destination and 4-channel input pass. */
 -template<typename Processor>
 -ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgba(
 -    const KernelFilmConvert *kfilm_convert,
 -    uchar4 *rgba,
 -    float *render_buffer,
 -    int num_pixels,
 -    int width,
 -    int offset,
 -    int stride,
 -    int rgba_offset,
 -    int rgba_stride,
 -    const Processor &processor)
 -{
 -  const int render_pixel_index = ccl_gpu_global_id_x();
 -  if (render_pixel_index >= num_pixels) {
 -    return;
 -  }
 -
 -  const int x = render_pixel_index % width;
 -  const int y = render_pixel_index / width;
 -
 -  ccl_global const float *buffer = render_buffer + offset + x * kfilm_convert->pass_stride +
 -                                   y * stride * kfilm_convert->pass_stride;
 -
 -  float pixel[4];
 -  processor(kfilm_convert, buffer, pixel);
 -
 -  film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel);
 -
 -  const half4 half_pixel = float4_to_half4_display(
 -      make_float4(pixel[0], pixel[1], pixel[2], pixel[3]));
 -  kernel_gpu_film_convert_half_write(rgba, rgba_offset, rgba_stride, x, y, half_pixel);
 -}
 -
 -/* Common implementation for half4 destination and 3-channel input pass. */
 -template<typename Processor>
 -ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgb(
 -    const KernelFilmConvert *kfilm_convert,
 -    uchar4 *rgba,
 -    float *render_buffer,
 -    int num_pixels,
 -    int width,
 -    int offset,
 -    int stride,
 -    int rgba_offset,
 -    int rgba_stride,
 -    const Processor &processor)
 -{
 -  kernel_gpu_film_convert_half_rgba_common_rgba(
 -      kfilm_convert,
 -      rgba,
 -      render_buffer,
 -      num_pixels,
 -      width,
 -      offset,
 -      stride,
 -      rgba_offset,
 -      rgba_stride,
 -      [&processor](const KernelFilmConvert *kfilm_convert,
 -                   ccl_global const float *buffer,
 -                   float *pixel_rgba) {
 -        processor(kfilm_convert, buffer, pixel_rgba);
 -        pixel_rgba[3] = 1.0f;
 -      });
 -}
 -
 -/* Common implementation for half4 destination and single channel input pass. */
 -template<typename Processor>
 -ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_value(
 -    const KernelFilmConvert *kfilm_convert,
 -    uchar4 *rgba,
 -    float *render_buffer,
 -    int num_pixels,
 -    int width,
 -    int offset,
 -    int stride,
 -    int rgba_offset,
 -    int rgba_stride,
 -    const Processor &processor)
 -{
 -  kernel_gpu_film_convert_half_rgba_common_rgba(
 -      kfilm_convert,
 -      rgba,
 -      render_buffer,
 -      num_pixels,
 -      width,
 -      offset,
 -      stride,
 -      rgba_offset,
 -      rgba_stride,
 -      [&processor](const KernelFilmConvert *kfilm_convert,
 -                   ccl_global const float *buffer,
 -                   float *pixel_rgba) {
 -        float value;
 -        processor(kfilm_convert, buffer, &value);
 -
 -        pixel_rgba[0] = value;
 -        pixel_rgba[1] = value;
 -        pixel_rgba[2] = value;
 -        pixel_rgba[3] = 1.0f;
 -      });
 -}
 -
 -#define KERNEL_FILM_CONVERT_PROC(name) \
 -  ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) name
 -
 -#define KERNEL_FILM_CONVERT_DEFINE(variant, channels) \
 -  KERNEL_FILM_CONVERT_PROC(kernel_gpu_film_convert_##variant) \
 -  (const KernelFilmConvert kfilm_convert, \
 -   float *pixels, \
 -   float *render_buffer, \
 -   int num_pixels, \
 -   int width, \
 -   int offset, \
 -   int stride, \
 -   int rgba_offset, \
 -   int rgba_stride) \
 +#define KERNEL_FILM_CONVERT_VARIANT(variant, input_channel_count) \
 +  ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \
 +      ccl_gpu_kernel_signature(film_convert_##variant, \
 +                               const KernelFilmConvert kfilm_convert, \
 +                               ccl_global float *pixels, \
 +                               ccl_global float *render_buffer, \
 +                               int num_pixels, \
 +                               int width, \
 +                               int offset, \
 +                               int stride, \
 +                               int rgba_offset, \
 +                               int rgba_stride) \
    { \
 -    kernel_gpu_film_convert_common(&kfilm_convert, \
 -                                   pixels, \
 -                                   render_buffer, \
 -                                   num_pixels, \
 -                                   width, \
 -                                   offset, \
 -                                   stride, \
 -                                   rgba_offset, \
 -                                   rgba_stride, \
 -                                   film_get_pass_pixel_##variant); \
 +    const int render_pixel_index = ccl_gpu_global_id_x(); \
 +    if (render_pixel_index >= num_pixels) { \
 +      return; \
 +    } \
 +\
 +    const int x = render_pixel_index % width; \
 +    const int y = render_pixel_index / width; \
 +\
 +    ccl_global const float *buffer = render_buffer + offset + x * kfilm_convert.pass_stride + \
 +                                     y * stride * kfilm_convert.pass_stride; \
 +\
 +    ccl_global float *pixel = pixels + \
 +                              (render_pixel_index + rgba_offset) * kfilm_convert.pixel_stride; \
 +\
 +    film_get_pass_pixel_##variant(&kfilm_convert, buffer, pixel); \
    } \
 -  KERNEL_FILM_CONVERT_PROC(kernel_gpu_film_convert_##variant##_half_rgba) \
 -  (const KernelFilmConvert kfilm_convert, \
 -   uchar4 *rgba, \
 -   float *render_buffer, \
 -   int num_pixels, \
 -   int width, \
 -   int offset, \
 -   int stride, \
 -   int rgba_offset, \
 -   int rgba_stride) \
 +\
 +  ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \
 +      ccl_gpu_kernel_signature(film_convert_##variant##_half_rgba, \
 +                               const KernelFilmConvert kfilm_convert, \
 +                               ccl_global uchar4 *rgba, \
 +                               ccl_global float *render_buffer, \
 +                               int num_pixels, \
 +                               int width, \
 +                               int offset, \
 +                               int stride, \
 +                               int rgba_offset, \
 +                               int rgba_stride) \
    { \
 -    kernel_gpu_film_convert_half_rgba_common_##channels(&kfilm_convert, \
 -                                                        rgba, \
 -                                                        render_buffer, \
 -                                                        num_pixels, \
 -                                                        width, \
 -                                                        offset, \
 -                                                        stride, \
 -                                                        rgba_offset, \
 -                                                        rgba_stride, \
 -                                                        film_get_pass_pixel_##variant); \
 -  }
 -
 -KERNEL_FILM_CONVERT_DEFINE(depth, value)
 -KERNEL_FILM_CONVERT_DEFINE(mist, value)
 -KERNEL_FILM_CONVERT_DEFINE(sample_count, value)
 -KERNEL_FILM_CONVERT_DEFINE(float, value)
 -
 -KERNEL_FILM_CONVERT_DEFINE(light_path, rgb)
 -KERNEL_FILM_CONVERT_DEFINE(float3, rgb)
 -
 -KERNEL_FILM_CONVERT_DEFINE(motion, rgba)
 -KERNEL_FILM_CONVERT_DEFINE(cryptomatte, rgba)
 -KERNEL_FILM_CONVERT_DEFINE(shadow_catcher, rgba)

@@ Diff output truncated at 10240 characters. @@