[Bf-blender-cvs] [4e56e738a8f] master: Cycles: optimize CPU texture sampler interpolation
Ethan-Hall
noreply at git.blender.org
Wed Mar 23 20:06:27 CET 2022
Commit: 4e56e738a8f35228873d6e84d9e9f8b0e7a74a59
Author: Ethan-Hall
Date: Wed Mar 23 15:45:32 2022 +0100
Branches: master
https://developer.blender.org/rB4e56e738a8f35228873d6e84d9e9f8b0e7a74a59
Cycles: optimize CPU texture sampler interpolation
Use templates to optimize the CPU texture sampler to interpolate using
float for single component datatypes instead of using float4 for all types.
Differential Revision: https://developer.blender.org/D14424
===================================================================
M intern/cycles/kernel/device/cpu/image.h
M intern/cycles/util/types_float4.h
M intern/cycles/util/types_float4_impl.h
===================================================================
diff --git a/intern/cycles/kernel/device/cpu/image.h b/intern/cycles/kernel/device/cpu/image.h
index c0b6846e5b0..94eeaed7698 100644
--- a/intern/cycles/kernel/device/cpu/image.h
+++ b/intern/cycles/kernel/device/cpu/image.h
@@ -31,7 +31,18 @@ ccl_device_inline float frac(float x, int *ix)
return x - (float)i;
}
-template<typename T> struct TextureInterpolator {
+template<typename TexT, typename OutT = float4> struct TextureInterpolator {
+ template<typename ZeroT> static ccl_always_inline ZeroT zero();
+
+ template<> static ccl_always_inline float zero()
+ {
+ return 0.0f;
+ }
+
+ template<> static ccl_always_inline float4 zero()
+ {
+ return zero_float4();
+ }
static ccl_always_inline float4 read(float4 r)
{
@@ -40,21 +51,18 @@ template<typename T> struct TextureInterpolator {
static ccl_always_inline float4 read(uchar4 r)
{
- float f = 1.0f / 255.0f;
+ const float f = 1.0f / 255.0f;
return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
}
- static ccl_always_inline float4 read(uchar r)
+ static ccl_always_inline float read(uchar r)
{
- float f = r * (1.0f / 255.0f);
- return make_float4(f, f, f, 1.0f);
+ return r * (1.0f / 255.0f);
}
- static ccl_always_inline float4 read(float r)
+ static ccl_always_inline float read(float r)
{
- /* TODO(dingto): Optimize this, so interpolation
- * happens on float instead of float4 */
- return make_float4(r, r, r, 1.0f);
+ return r;
}
static ccl_always_inline float4 read(half4 r)
@@ -62,63 +70,61 @@ template<typename T> struct TextureInterpolator {
return half4_to_float4_image(r);
}
- static ccl_always_inline float4 read(half r)
+ static ccl_always_inline float read(half r)
{
- float f = half_to_float_image(r);
- return make_float4(f, f, f, 1.0f);
+ return half_to_float_image(r);
}
- static ccl_always_inline float4 read(uint16_t r)
+ static ccl_always_inline float read(uint16_t r)
{
- float f = r * (1.0f / 65535.0f);
- return make_float4(f, f, f, 1.0f);
+ return r * (1.0f / 65535.0f);
}
static ccl_always_inline float4 read(ushort4 r)
{
- float f = 1.0f / 65535.0f;
+ const float f = 1.0f / 65535.0f;
return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
}
/* Read 2D Texture Data
* Does not check if data request is in bounds. */
- static ccl_always_inline float4 read(const T *data, int x, int y, int width, int height)
+ static ccl_always_inline OutT read(const TexT *data, int x, int y, int width, int height)
{
return read(data[y * width + x]);
}
/* Read 2D Texture Data Clip
* Returns transparent black if data request is out of bounds. */
- static ccl_always_inline float4 read_clip(const T *data, int x, int y, int width, int height)
+ static ccl_always_inline OutT read_clip(const TexT *data, int x, int y, int width, int height)
{
if (x < 0 || x >= width || y < 0 || y >= height) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ return zero<OutT>();
}
return read(data[y * width + x]);
}
/* Read 3D Texture Data
* Does not check if data request is in bounds. */
- static ccl_always_inline float4
- read(const T *data, int x, int y, int z, int width, int height, int depth)
+ static ccl_always_inline OutT
+ read(const TexT *data, int x, int y, int z, int width, int height, int depth)
{
return read(data[x + y * width + z * width * height]);
}
/* Read 3D Texture Data Clip
* Returns transparent black if data request is out of bounds. */
- static ccl_always_inline float4
- read_clip(const T *data, int x, int y, int z, int width, int height, int depth)
+ static ccl_always_inline OutT
+ read_clip(const TexT *data, int x, int y, int z, int width, int height, int depth)
{
if (x < 0 || x >= width || y < 0 || y >= height || z < 0 || z >= depth) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ return zero<OutT>();
}
return read(data[x + y * width + z * width * height]);
}
/* Trilinear Interpolation */
- static ccl_always_inline float4
- trilinear_lookup(const T *data,
+ static ccl_always_inline OutT
+ trilinear_lookup(const TexT *data,
float tx,
float ty,
float tz,
@@ -131,10 +137,10 @@ template<typename T> struct TextureInterpolator {
int width,
int height,
int depth,
- float4 read(const T *, int, int, int, int, int, int))
+ OutT read(const TexT *, int, int, int, int, int, int))
{
- float4 r;
- r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, iz, width, height, depth);
+ OutT r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) *
+ read(data, ix, iy, iz, width, height, depth);
r += (1.0f - tz) * (1.0f - ty) * tx * read(data, nix, iy, iz, width, height, depth);
r += (1.0f - tz) * ty * (1.0f - tx) * read(data, ix, niy, iz, width, height, depth);
r += (1.0f - tz) * ty * tx * read(data, nix, niy, iz, width, height, depth);
@@ -147,8 +153,8 @@ template<typename T> struct TextureInterpolator {
}
/** Tricubic Interpolation */
- static ccl_always_inline float4
- tricubic_lookup(const T *data,
+ static ccl_always_inline OutT
+ tricubic_lookup(const TexT *data,
float tx,
float ty,
float tz,
@@ -158,7 +164,7 @@ template<typename T> struct TextureInterpolator {
int width,
int height,
int depth,
- float4 read(const T *, int, int, int, int, int, int))
+ OutT read(const TexT *, int, int, int, int, int, int))
{
float u[4], v[4], w[4];
@@ -199,7 +205,7 @@ template<typename T> struct TextureInterpolator {
/* ******** 2D interpolation ******** */
- static ccl_always_inline float4 interp_closest(const TextureInfo &info, float x, float y)
+ static ccl_always_inline OutT interp_closest(const TextureInfo &info, float x, float y)
{
const int width = info.width;
const int height = info.height;
@@ -214,7 +220,7 @@ template<typename T> struct TextureInterpolator {
case EXTENSION_CLIP:
/* No samples are inside the clip region. */
if (ix < 0 || ix >= width || iy < 0 || iy >= height) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ return zero<OutT>();
}
break;
case EXTENSION_EXTEND:
@@ -223,14 +229,14 @@ template<typename T> struct TextureInterpolator {
break;
default:
kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ return zero<OutT>();
}
- const T *data = (const T *)info.data;
- return read((const T *)data, ix, iy, width, height);
+ const TexT *data = (const TexT *)info.data;
+ return read((const TexT *)data, ix, iy, width, height);
}
- static ccl_always_inline float4 interp_linear(const TextureInfo &info, float x, float y)
+ static ccl_always_inline OutT interp_linear(const TextureInfo &info, float x, float y)
{
const int width = info.width;
const int height = info.height;
@@ -252,7 +258,7 @@ template<typename T> struct TextureInterpolator {
case EXTENSION_CLIP:
/* No linear samples are inside the clip region. */
if (ix < -1 || ix >= width || iy < -1 || iy >= height) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ return zero<OutT>();
}
nix = ix + 1;
niy = iy + 1;
@@ -265,17 +271,17 @@ template<typename T> struct TextureInterpolator {
break;
default:
kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ return zero<OutT>();
}
- const T *data = (const T *)info.data;
+ const TexT *data = (const TexT *)info.data;
return (1.0f - ty) * (1.0f - tx) * read_clip(data, ix, iy, width, height) +
(1.0f - ty) * tx * read_clip(data, nix, iy, width, height) +
ty * (1.0f - tx) * read_clip(data, ix, niy, width, height) +
ty * tx * read_clip(data, nix, niy, width, height);
}
- static ccl_always_inline float4 interp_cubic(const TextureInfo &info, float x, float y)
+ static ccl_always_inline OutT interp_cubic(const TextureInfo &info, float x, float y)
{
const int width = info.width;
const int height = info.height;
@@ -304,7 +310,7 @@ template<typename T> struct TextureInterpolator {
case EXTENSION_CLIP:
/* No cubic samples are inside the clip region. */
if (ix < -2 || ix > width || iy < -2 || iy > height) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ return zero<OutT>();
}
pix = ix - 1;
@@ -328,10 +334,10 @@ template<typename T> struct TextureInterpolator {
break;
default:
kernel_assert(0);
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+ return zero<OutT>();
}
- const T *data = (const T *)info.data;
+ const TexT *data = (const TexT *)info.data;
const int xc[4] = {pix, ix, nix, nnix};
const int yc[4] = {piy, iy, niy, nniy};
float u[4], v[4];
@@ -353,11 +359,8 @@ template<typename T> struct TextureInterpolator {
#undef DATA
}
- static ccl_always_inline float4 interp(const TextureInfo &info, float x, float y)
+ static ccl_always_inline OutT interp(const TextureInfo &info, float x, float y)
{
- if (UNLIKELY(!info.data)) {
- return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
- }
switch (info.interpolation) {
case INTERPOLATION_CLOSEST:
return interp_closest(info, x, y);
@@ -370,10 +373,10 @@ template<typename T> struct TextureInterpolator {
/* ******** 3D interpolation ******** */
- static ccl_always_inline float4 interp_3d_closest(const TextureInfo &info,
- float x,
- float y,
- float z)
+ static ccl_always_inline OutT interp_3d_closest(const TextureInfo &info,
+
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list