[Bf-blender-cvs] [0a07cdbe80b] master: Cycles: Split vectorized math utilities to a dedicated files

Tue Apr 25 10:44:35 CEST 2017

Commit: 0a07cdbe80b2999478fa0d062a846e9bcfafc872
Author: Sergey Sharybin
Date:   Fri Apr 14 14:05:23 2017 +0200
Branches: master
https://developer.blender.org/rB0a07cdbe80b2999478fa0d062a846e9bcfafc872

Cycles: Split vectorized math utilities to a dedicated files

This file was even a bigger mess than vectorized types header,
cleaning it up to make it easier to maintain this files and
extend further.

===================================================================

M	intern/cycles/kernel/CMakeLists.txt
M	intern/cycles/util/CMakeLists.txt
M	intern/cycles/util/util_math.h
A	intern/cycles/util/util_math_float2.h
A	intern/cycles/util/util_math_float3.h
A	intern/cycles/util/util_math_float4.h
A	intern/cycles/util/util_math_int2.h
A	intern/cycles/util/util_math_int3.h
A	intern/cycles/util/util_math_int4.h
M	intern/cycles/util/util_types_float2.h
M	intern/cycles/util/util_types_float2_impl.h
M	intern/cycles/util/util_types_float3.h
M	intern/cycles/util/util_types_float3_impl.h
M	intern/cycles/util/util_types_float4.h
M	intern/cycles/util/util_types_float4_impl.h
M	intern/cycles/util/util_types_int3.h
M	intern/cycles/util/util_types_int3_impl.h
M	intern/cycles/util/util_types_int4.h
M	intern/cycles/util/util_types_int4_impl.h

===================================================================

diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 10eff10d809..3750225571d 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -196,6 +196,12 @@ set(SRC_UTIL_HEADERS
 	../util/util_math.h
 	../util/util_math_fast.h
 	../util/util_math_intersect.h
+	../util/util_math_float2.h
+	../util/util_math_float3.h
+	../util/util_math_float4.h
+	../util/util_math_int2.h
+	../util/util_math_int3.h
+	../util/util_math_int4.h
 	../util/util_static_assert.h
 	../util/util_transform.h
 	../util/util_texture.h
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index 8d0a6c9fff9..388aba65460 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -53,6 +53,12 @@ set(SRC_HEADERS
 	util_math_cdf.h
 	util_math_fast.h
 	util_math_intersect.h
+	util_math_float2.h
+	util_math_float3.h
+	util_math_float4.h
+	util_math_int2.h
+	util_math_int3.h
+	util_math_int4.h
 	util_md5.h
 	util_opengl.h
 	util_optimization.h
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 77781ed4574..52b4fa859b7 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -28,12 +28,10 @@
 
 
 #ifndef __KERNEL_OPENCL__
-
-#include <float.h>
-#include <math.h>
-#include <stdio.h>
-
-#endif
+#  include <float.h>
+#  include <math.h>
+#  include <stdio.h>
+#endif  /* __KERNEL_OPENCL__ */
 
 #include "util/util_types.h"
 
@@ -43,49 +41,44 @@ CCL_NAMESPACE_BEGIN
 
 /* Division */
 #ifndef M_PI_F
-#define M_PI_F    (3.1415926535897932f)  /* pi */
+#  define M_PI_F    (3.1415926535897932f)  /* pi */
 #endif
 #ifndef M_PI_2_F
-#define M_PI_2_F  (1.5707963267948966f)  /* pi/2 */
+#  define M_PI_2_F  (1.5707963267948966f)  /* pi/2 */
 #endif
 #ifndef M_PI_4_F
-#define M_PI_4_F  (0.7853981633974830f)  /* pi/4 */
+#  define M_PI_4_F  (0.7853981633974830f)  /* pi/4 */
 #endif
 #ifndef M_1_PI_F
-#define M_1_PI_F  (0.3183098861837067f)  /* 1/pi */
+#  define M_1_PI_F  (0.3183098861837067f)  /* 1/pi */
 #endif
 #ifndef M_2_PI_F
-#define M_2_PI_F  (0.6366197723675813f)  /* 2/pi */
+#  define M_2_PI_F  (0.6366197723675813f)  /* 2/pi */
 #endif
 
 /* Multiplication */
 #ifndef M_2PI_F
-#define M_2PI_F   (6.2831853071795864f)  /* 2*pi */
+#  define M_2PI_F   (6.2831853071795864f)  /* 2*pi */
 #endif
 #ifndef M_4PI_F
-#define M_4PI_F   (12.566370614359172f)  /* 4*pi */
+#  define M_4PI_F   (12.566370614359172f)  /* 4*pi */
 #endif
 
 /* Float sqrt variations */
-
 #ifndef M_SQRT2_F
-#define M_SQRT2_F (1.4142135623730950f)  /* sqrt(2) */
+#  define M_SQRT2_F (1.4142135623730950f)  /* sqrt(2) */
 #endif
-
 #ifndef M_LN2_F
-#define M_LN2_F   (0.6931471805599453f)  /* ln(2) */
+#  define M_LN2_F   (0.6931471805599453f)  /* ln(2) */
 #endif
-
 #ifndef M_LN10_F
-#define M_LN10_F  (2.3025850929940457f)  /* ln(10) */
+#  define M_LN10_F  (2.3025850929940457f)  /* ln(10) */
 #endif
 
 /* Scalar */
 
 #ifdef _WIN32
-
-#ifndef __KERNEL_OPENCL__
-
+#  ifndef __KERNEL_OPENCL__
 ccl_device_inline float fmaxf(float a, float b)
 {
 	return (a > b)? a: b;
@@ -95,13 +88,10 @@ ccl_device_inline float fminf(float a, float b)
 {
 	return (a < b)? a: b;
 }
-
-#endif
-
-#endif
+#  endif  /* !__KERNEL_OPENCL__ */
+#endif  /* _WIN32 */
 
 #ifndef __KERNEL_GPU__
-
 using std::isfinite;
 using std::isnan;
 
@@ -157,8 +147,7 @@ ccl_device_inline T max4(const T& a, const T& b, const T& c, const T& d)
 {
 	return max(max(a,b),max(c,d));
 }
-
-#endif
+#endif /* __KERNEL_GPU__ */
 
 ccl_device_inline float min4(float a, float b, float c, float d)
 {
@@ -170,13 +159,7 @@ ccl_device_inline float max4(float a, float b, float c, float d)
 	return max(max(a, b), max(c, d));
 }
 
-ccl_device_inline float max3(float3 a)
-{
-	return max(max(a.x, a.y), a.z);
-}
-
 #ifndef __KERNEL_OPENCL__
-
 ccl_device_inline int clamp(int a, int mn, int mx)
 {
 	return min(max(a, mn), mx);
@@ -191,17 +174,14 @@ ccl_device_inline float mix(float a, float b, float t)
 {
     return a + t*(b - a);
 }
-
-#endif
+#endif  /* __KERNEL_OPENCL__ */
 
 #ifndef __KERNEL_CUDA__
-
 ccl_device_inline float saturate(float a)
 {
 	return clamp(a, 0.0f, 1.0f);
 }
-
-#endif
+#endif  /* __KERNEL_CUDA__ */
 
 ccl_device_inline int float_to_int(float f)
 {
@@ -242,1036 +222,101 @@ ccl_device_inline int mod(int x, int m)
 	return (x % m + m) % m;
 }
 
-/* Float2 Vector */
-
-#ifndef __KERNEL_OPENCL__
-
-ccl_device_inline bool is_zero(const float2& a)
-{
-	return (a.x == 0.0f && a.y == 0.0f);
-}
-
-#endif
-
-#ifndef __KERNEL_OPENCL__
-
-ccl_device_inline float average(const float2& a)
-{
-	return (a.x + a.y)*(1.0f/2.0f);
-}
-
-#endif
-
-#ifndef __KERNEL_OPENCL__
-
-ccl_device_inline float2 operator-(const float2& a)
-{
-	return make_float2(-a.x, -a.y);
-}
-
-ccl_device_inline float2 operator*(const float2& a, const float2& b)
-{
-	return make_float2(a.x*b.x, a.y*b.y);
-}
-
-ccl_device_inline float2 operator*(const float2& a, float f)
-{
-	return make_float2(a.x*f, a.y*f);
-}
-
-ccl_device_inline float2 operator*(float f, const float2& a)
-{
-	return make_float2(a.x*f, a.y*f);
-}
-
-ccl_device_inline float2 operator/(float f, const float2& a)
-{
-	return make_float2(f/a.x, f/a.y);
-}
-
-ccl_device_inline float2 operator/(const float2& a, float f)
-{
-	float invf = 1.0f/f;
-	return make_float2(a.x*invf, a.y*invf);
-}
-
-ccl_device_inline float2 operator/(const float2& a, const float2& b)
-{
-	return make_float2(a.x/b.x, a.y/b.y);
-}
-
-ccl_device_inline float2 operator+(const float2& a, const float2& b)
-{
-	return make_float2(a.x+b.x, a.y+b.y);
-}
-
-ccl_device_inline float2 operator-(const float2& a, const float2& b)
-{
-	return make_float2(a.x-b.x, a.y-b.y);
-}
-
-ccl_device_inline float2 operator+=(float2& a, const float2& b)
-{
-	return a = a + b;
-}
-
-ccl_device_inline float2 operator*=(float2& a, const float2& b)
-{
-	return a = a * b;
-}
-
-ccl_device_inline float2 operator*=(float2& a, float f)
-{
-	return a = a * f;
-}
-
-ccl_device_inline float2 operator/=(float2& a, const float2& b)
-{
-	return a = a / b;
-}
-
-ccl_device_inline float2 operator/=(float2& a, float f)
-{
-	float invf = 1.0f/f;
-	return a = a * invf;
-}
-
-
-ccl_device_inline float dot(const float2& a, const float2& b)
-{
-	return a.x*b.x + a.y*b.y;
-}
-
-ccl_device_inline float cross(const float2& a, const float2& b)
-{
-	return (a.x*b.y - a.y*b.x);
-}
-
-#endif
-
-#ifndef __KERNEL_OPENCL__
-
-ccl_device_inline bool operator==(const int2 a, const int2 b)
-{
-	return (a.x == b.x && a.y == b.y);
-}
-
-ccl_device_inline float len(const float2& a)
-{
-	return sqrtf(dot(a, a));
-}
-
-ccl_device_inline float2 normalize(const float2& a)
-{
-	return a/len(a);
-}
-
-ccl_device_inline float2 normalize_len(const float2& a, float *t)
-{
-	*t = len(a);
-	return a/(*t);
-}
-
-ccl_device_inline float2 safe_normalize(const float2& a)
-{
-	float t = len(a);
-	return (t != 0.0f)? a/t: a;
-}
-
-ccl_device_inline bool operator==(const float2& a, const float2& b)
-{
-	return (a.x == b.x && a.y == b.y);
-}
-
-ccl_device_inline bool operator!=(const float2& a, const float2& b)
-{
-	return !(a == b);
-}
-
-ccl_device_inline float2 min(const float2& a, const float2& b)
-{
-	return make_float2(min(a.x, b.x), min(a.y, b.y));
-}
-
-ccl_device_inline float2 max(const float2& a, const float2& b)
-{
-	return make_float2(max(a.x, b.x), max(a.y, b.y));
-}
-
-ccl_device_inline float2 clamp(const float2& a, const float2& mn, const float2& mx)
-{
-	return min(max(a, mn), mx);
-}
-
-ccl_device_inline float2 fabs(const float2& a)
+ccl_device_inline float3 float2_to_float3(const float2 a)
 {
-	return make_float2(fabsf(a.x), fabsf(a.y));
+	return make_float3(a.x, a.y, 0.0f);
 }
 
-ccl_device_inline float2 as_float2(const float4& a)
+ccl_device_inline float3 float4_to_float3(const float4 a)
 {
-	return make_float2(a.x, a.y);
+	return make_float3(a.x, a.y, a.z);
 }
 
-#endif
-
-#ifndef __KERNEL_GPU__
-
-ccl_device_inline void print_float2(const char *label, const float2& a)
+ccl_device_inline float4 float3_to_float4(const float3 a)
 {
-	printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y);
+	return make_float4(a.x, a.y, a.z, 1.0f);
 }
 
-#endif
-
-#ifndef __KERNEL_OPENCL__
+CCL_NAMESPACE_END
 
-ccl_device_inline float2 interp(const float2& a, const float2& b, float t)
-{
-	return a + t*(b - a);
-}
+#include "util/util_math_int2.h"
+#include "util/util_math_int3.h"
+#include "util/util_math_int4.h"
 
-#endif
+#include "util/util_math_float2.h"
+#include "util/util_math_float3.h"
+#include "util/util_math_float4.h"
 
-/* Float3 Vector */
+CCL_NAMESPACE_BEGIN
 
 #ifndef __KERNEL_OPENCL__
+/* Int/Float conversion */
 
-ccl_device_inline float3 operator-(const float3& a)
-{
-#ifdef __KERNEL_SSE__
-	return float3(_mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
-#else
-	return make_float3(-a.x, -a.y, -a.z);
-#endif
-}
-
-ccl_device_inline float3 operator*(const float3& a, const float3& b)
-{
-#ifdef __KERNEL_SSE__
-	return float3(_mm_mul_ps(a.m128,b.m128));
-#else
-	return make_float3(a.x*b.x, a.y*b.y, a.z*b.z);
-#endif
-}
-
-ccl_device_inline float3 operator*(const float3& a, const float f)
-{
-#ifdef __KERNEL_SSE__
-	return float3(_mm_mul_ps(a.m128,_mm_set1_ps(f)));
-#else
-	return make_float3(a.x*f, a.y*f, a.z*f);
-#endif
-}
-
-ccl_device_inline float3 operator*(const float f, const float3& a)
-{
-	/* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
-#if defined(__KERNEL_SSE__) && 0
-	return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128));
-#else
-	return make_float3(a.x*f, a.y*f, a.z*f);
-#endif
-}
-
-ccl_device_inline float3 operator/(const float f, const float3& a)
-{
-	/* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
-#if defined(__KERNEL_SSE__) && 0
-	__m128 rc = _mm_rcp_ps(a.m128);
-	return float3(_mm_mul_ps(_mm_set1_ps(f),rc));
-#else
-	return make_float3(f / a.x, f / a.y, f / a.z);
-#endif
-}
-
-ccl_device_inline float3 operator/(const float3& a, const float f)
-{
-	float invf = 1.0f/f;
-	return a * invf;
-}
-
-ccl_device_inline float3 operator/(const float3& a, const float3& b)
-{
-	/* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
-#if

@@ Diff output truncated at 10240 characters. @@