[Bf-blender-cvs] [4e56e738a8f] master: Cycles: optimize CPU texture sampler interpolation

Ethan-Hall noreply at git.blender.org
Wed Mar 23 20:06:27 CET 2022


Commit: 4e56e738a8f35228873d6e84d9e9f8b0e7a74a59
Author: Ethan-Hall
Date:   Wed Mar 23 15:45:32 2022 +0100
Branches: master
https://developer.blender.org/rB4e56e738a8f35228873d6e84d9e9f8b0e7a74a59

Cycles: optimize CPU texture sampler interpolation

Use templates to optimize the CPU texture sampler to interpolate using
float for single component datatypes instead of using float4 for all types.

Differential Revision: https://developer.blender.org/D14424

===================================================================

M	intern/cycles/kernel/device/cpu/image.h
M	intern/cycles/util/types_float4.h
M	intern/cycles/util/types_float4_impl.h

===================================================================

diff --git a/intern/cycles/kernel/device/cpu/image.h b/intern/cycles/kernel/device/cpu/image.h
index c0b6846e5b0..94eeaed7698 100644
--- a/intern/cycles/kernel/device/cpu/image.h
+++ b/intern/cycles/kernel/device/cpu/image.h
@@ -31,7 +31,18 @@ ccl_device_inline float frac(float x, int *ix)
   return x - (float)i;
 }
 
-template<typename T> struct TextureInterpolator {
+template<typename TexT, typename OutT = float4> struct TextureInterpolator {
+  template<typename ZeroT> static ccl_always_inline ZeroT zero();
+
+  template<> static ccl_always_inline float zero()
+  {
+    return 0.0f;
+  }
+
+  template<> static ccl_always_inline float4 zero()
+  {
+    return zero_float4();
+  }
 
   static ccl_always_inline float4 read(float4 r)
   {
@@ -40,21 +51,18 @@ template<typename T> struct TextureInterpolator {
 
   static ccl_always_inline float4 read(uchar4 r)
   {
-    float f = 1.0f / 255.0f;
+    const float f = 1.0f / 255.0f;
     return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
   }
 
-  static ccl_always_inline float4 read(uchar r)
+  static ccl_always_inline float read(uchar r)
   {
-    float f = r * (1.0f / 255.0f);
-    return make_float4(f, f, f, 1.0f);
+    return r * (1.0f / 255.0f);
   }
 
-  static ccl_always_inline float4 read(float r)
+  static ccl_always_inline float read(float r)
   {
-    /* TODO(dingto): Optimize this, so interpolation
-     * happens on float instead of float4 */
-    return make_float4(r, r, r, 1.0f);
+    return r;
   }
 
   static ccl_always_inline float4 read(half4 r)
@@ -62,63 +70,61 @@ template<typename T> struct TextureInterpolator {
     return half4_to_float4_image(r);
   }
 
-  static ccl_always_inline float4 read(half r)
+  static ccl_always_inline float read(half r)
   {
-    float f = half_to_float_image(r);
-    return make_float4(f, f, f, 1.0f);
+    return half_to_float_image(r);
   }
 
-  static ccl_always_inline float4 read(uint16_t r)
+  static ccl_always_inline float read(uint16_t r)
   {
-    float f = r * (1.0f / 65535.0f);
-    return make_float4(f, f, f, 1.0f);
+    return r * (1.0f / 65535.0f);
   }
 
   static ccl_always_inline float4 read(ushort4 r)
   {
-    float f = 1.0f / 65535.0f;
+    const float f = 1.0f / 65535.0f;
     return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
   }
 
   /* Read 2D Texture Data
    * Does not check if data request is in bounds. */
-  static ccl_always_inline float4 read(const T *data, int x, int y, int width, int height)
+  static ccl_always_inline OutT read(const TexT *data, int x, int y, int width, int height)
   {
     return read(data[y * width + x]);
   }
 
   /* Read 2D Texture Data Clip
    * Returns transparent black if data request is out of bounds. */
-  static ccl_always_inline float4 read_clip(const T *data, int x, int y, int width, int height)
+  static ccl_always_inline OutT read_clip(const TexT *data, int x, int y, int width, int height)
   {
     if (x < 0 || x >= width || y < 0 || y >= height) {
-      return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+      return zero<OutT>();
     }
     return read(data[y * width + x]);
   }
 
   /* Read 3D Texture Data
    * Does not check if data request is in bounds. */
-  static ccl_always_inline float4
-  read(const T *data, int x, int y, int z, int width, int height, int depth)
+  static ccl_always_inline OutT
+  read(const TexT *data, int x, int y, int z, int width, int height, int depth)
   {
     return read(data[x + y * width + z * width * height]);
   }
 
   /* Read 3D Texture Data Clip
    * Returns transparent black if data request is out of bounds. */
-  static ccl_always_inline float4
-  read_clip(const T *data, int x, int y, int z, int width, int height, int depth)
+  static ccl_always_inline OutT
+  read_clip(const TexT *data, int x, int y, int z, int width, int height, int depth)
   {
     if (x < 0 || x >= width || y < 0 || y >= height || z < 0 || z >= depth) {
-      return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+      return zero<OutT>();
     }
     return read(data[x + y * width + z * width * height]);
   }
 
   /* Trilinear Interpolation */
-  static ccl_always_inline float4
-  trilinear_lookup(const T *data,
+  static ccl_always_inline OutT
+  trilinear_lookup(const TexT *data,
                    float tx,
                    float ty,
                    float tz,
@@ -131,10 +137,10 @@ template<typename T> struct TextureInterpolator {
                    int width,
                    int height,
                    int depth,
-                   float4 read(const T *, int, int, int, int, int, int))
+                   OutT read(const TexT *, int, int, int, int, int, int))
   {
-    float4 r;
-    r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, iz, width, height, depth);
+    OutT r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) *
+             read(data, ix, iy, iz, width, height, depth);
     r += (1.0f - tz) * (1.0f - ty) * tx * read(data, nix, iy, iz, width, height, depth);
     r += (1.0f - tz) * ty * (1.0f - tx) * read(data, ix, niy, iz, width, height, depth);
     r += (1.0f - tz) * ty * tx * read(data, nix, niy, iz, width, height, depth);
@@ -147,8 +153,8 @@ template<typename T> struct TextureInterpolator {
   }
 
   /** Tricubic Interpolation */
-  static ccl_always_inline float4
-  tricubic_lookup(const T *data,
+  static ccl_always_inline OutT
+  tricubic_lookup(const TexT *data,
                   float tx,
                   float ty,
                   float tz,
@@ -158,7 +164,7 @@ template<typename T> struct TextureInterpolator {
                   int width,
                   int height,
                   int depth,
-                  float4 read(const T *, int, int, int, int, int, int))
+                  OutT read(const TexT *, int, int, int, int, int, int))
   {
     float u[4], v[4], w[4];
 
@@ -199,7 +205,7 @@ template<typename T> struct TextureInterpolator {
 
   /* ********  2D interpolation ******** */
 
-  static ccl_always_inline float4 interp_closest(const TextureInfo &info, float x, float y)
+  static ccl_always_inline OutT interp_closest(const TextureInfo &info, float x, float y)
   {
     const int width = info.width;
     const int height = info.height;
@@ -214,7 +220,7 @@ template<typename T> struct TextureInterpolator {
       case EXTENSION_CLIP:
         /* No samples are inside the clip region. */
         if (ix < 0 || ix >= width || iy < 0 || iy >= height) {
-          return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+          return zero<OutT>();
         }
         break;
       case EXTENSION_EXTEND:
@@ -223,14 +229,14 @@ template<typename T> struct TextureInterpolator {
         break;
       default:
         kernel_assert(0);
-        return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+        return zero<OutT>();
     }
 
-    const T *data = (const T *)info.data;
-    return read((const T *)data, ix, iy, width, height);
+    const TexT *data = (const TexT *)info.data;
+    return read((const TexT *)data, ix, iy, width, height);
   }
 
-  static ccl_always_inline float4 interp_linear(const TextureInfo &info, float x, float y)
+  static ccl_always_inline OutT interp_linear(const TextureInfo &info, float x, float y)
   {
     const int width = info.width;
     const int height = info.height;
@@ -252,7 +258,7 @@ template<typename T> struct TextureInterpolator {
       case EXTENSION_CLIP:
         /* No linear samples are inside the clip region. */
         if (ix < -1 || ix >= width || iy < -1 || iy >= height) {
-          return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+          return zero<OutT>();
         }
         nix = ix + 1;
         niy = iy + 1;
@@ -265,17 +271,17 @@ template<typename T> struct TextureInterpolator {
         break;
       default:
         kernel_assert(0);
-        return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+        return zero<OutT>();
     }
 
-    const T *data = (const T *)info.data;
+    const TexT *data = (const TexT *)info.data;
     return (1.0f - ty) * (1.0f - tx) * read_clip(data, ix, iy, width, height) +
            (1.0f - ty) * tx * read_clip(data, nix, iy, width, height) +
            ty * (1.0f - tx) * read_clip(data, ix, niy, width, height) +
            ty * tx * read_clip(data, nix, niy, width, height);
   }
 
-  static ccl_always_inline float4 interp_cubic(const TextureInfo &info, float x, float y)
+  static ccl_always_inline OutT interp_cubic(const TextureInfo &info, float x, float y)
   {
     const int width = info.width;
     const int height = info.height;
@@ -304,7 +310,7 @@ template<typename T> struct TextureInterpolator {
       case EXTENSION_CLIP:
         /* No cubic samples are inside the clip region. */
         if (ix < -2 || ix > width || iy < -2 || iy > height) {
-          return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+          return zero<OutT>();
         }
 
         pix = ix - 1;
@@ -328,10 +334,10 @@ template<typename T> struct TextureInterpolator {
         break;
       default:
         kernel_assert(0);
-        return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+        return zero<OutT>();
     }
 
-    const T *data = (const T *)info.data;
+    const TexT *data = (const TexT *)info.data;
     const int xc[4] = {pix, ix, nix, nnix};
     const int yc[4] = {piy, iy, niy, nniy};
     float u[4], v[4];
@@ -353,11 +359,8 @@ template<typename T> struct TextureInterpolator {
 #undef DATA
   }
 
-  static ccl_always_inline float4 interp(const TextureInfo &info, float x, float y)
+  static ccl_always_inline OutT interp(const TextureInfo &info, float x, float y)
   {
-    if (UNLIKELY(!info.data)) {
-      return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-    }
     switch (info.interpolation) {
       case INTERPOLATION_CLOSEST:
         return interp_closest(info, x, y);
@@ -370,10 +373,10 @@ template<typename T> struct TextureInterpolator {
 
   /* ********  3D interpolation ******** */
 
-  static ccl_always_inline float4 interp_3d_closest(const TextureInfo &info,
-                                                    float x,
-                                                    float y,
-                                                    float z)
+  static ccl_always_inline OutT interp_3d_closest(const TextureInfo &info,
+                                            

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list