[Bf-blender-cvs] [60f5cdaee92] temp-lanpr-staging: LANPR: Use BLI math functions.

Fri Aug 16 05:11:41 CEST 2019

Commit: 60f5cdaee92708cf31733f1d8617930336985797
Author: YimingWu
Date:   Fri Aug 16 11:11:04 2019 +0800
Branches: temp-lanpr-staging
https://developer.blender.org/rB60f5cdaee92708cf31733f1d8617930336985797

LANPR: Use BLI math functions.

===================================================================

M	source/blender/blenlib/BLI_math_matrix.h
M	source/blender/blenlib/BLI_math_vector.h
M	source/blender/blenlib/intern/math_matrix.c
M	source/blender/blenlib/intern/math_vector_inline.c
M	source/blender/editors/include/ED_lanpr.h
M	source/blender/editors/lanpr/lanpr_cpu.c
M	source/blender/editors/lanpr/lanpr_util.c

===================================================================

diff --git a/source/blender/blenlib/BLI_math_matrix.h b/source/blender/blenlib/BLI_math_matrix.h
index 52d976daa2d..432b1f8d4f8 100644
--- a/source/blender/blenlib/BLI_math_matrix.h
+++ b/source/blender/blenlib/BLI_math_matrix.h
@@ -41,6 +41,7 @@ void zero_m4(float R[4][4]);
 void unit_m2(float R[2][2]);
 void unit_m3(float R[3][3]);
 void unit_m4(float R[4][4]);
+void unit_m4_db(double m[4][4]);
 
 void copy_m2_m2(float R[2][2], const float A[2][2]);
 void copy_m3_m3(float R[3][3], const float A[3][3]);
@@ -48,9 +49,14 @@ void copy_m4_m4(float R[4][4], const float A[4][4]);
 void copy_m3_m4(float R[3][3], const float A[4][4]);
 void copy_m4_m3(float R[4][4], const float A[3][3]);
 
+void copy_m4_m4_db(double m1[4][4], const double m2[4][4]);
+
 /* double->float */
 void copy_m3_m3d(float R[3][3], const double A[3][3]);
 
+/* float->double */
+void copy_m4d_m4(double R[4][4], const float A[4][4]);
+
 void swap_m3m3(float A[3][3], float B[3][3]);
 void swap_m4m4(float A[4][4], float B[4][4]);
 
@@ -82,6 +88,7 @@ void mul_m3_m3m3_uniq(float R[3][3], const float A[3][3], const float B[3][3]);
 void mul_m3_m3_pre(float R[3][3], const float A[3][3]);
 void mul_m3_m3_post(float R[3][3], const float B[3][3]);
 void mul_m4_m4m4_uniq(float R[4][4], const float A[4][4], const float B[4][4]);
+void mul_m4db_m4db_m4fl_uniq(double R[4][4], const double A[4][4], const float B[4][4]);
 void mul_m4_m4_pre(float R[4][4], const float A[4][4]);
 void mul_m4_m4_post(float R[4][4], const float B[4][4]);
 
@@ -173,11 +180,14 @@ void _va_mul_m4_series_9(float R[4][4],
 
 void mul_m4_v3(const float M[4][4], float r[3]);
 void mul_v3_m4v3(float r[3], const float M[4][4], const float v[3]);
+void mul_v3_m4v3_db(double r[3], const double mat[4][4], const double vec[3]);
+void mul_v4_m4v3_db(double r[4], const double mat[4][4], const double vec[3]);
 void mul_v2_m4v3(float r[2], const float M[4][4], const float v[3]);
 void mul_v2_m2v2(float r[2], const float M[2][2], const float v[2]);
 void mul_m2v2(const float M[2][2], float v[2]);
 void mul_mat3_m4_v3(const float M[4][4], float r[3]);
 void mul_v3_mat3_m4v3(float r[3], const float M[4][4], const float v[3]);
+void mul_v3_mat3_m4v3_db(double r[3], const double M[4][4], const double v[3]);
 void mul_m4_v4(const float M[4][4], float r[4]);
 void mul_v4_m4v4(float r[4], const float M[4][4], const float v[4]);
 void mul_v4_m4v3(float r[4], const float M[4][4], const float v[3]); /* v has implicit w = 1.0f */
diff --git a/source/blender/blenlib/BLI_math_vector.h b/source/blender/blenlib/BLI_math_vector.h
index 7f526585e3b..ccb42683d5a 100644
--- a/source/blender/blenlib/BLI_math_vector.h
+++ b/source/blender/blenlib/BLI_math_vector.h
@@ -197,6 +197,7 @@ MINLINE double dot_v3v3_db(const double a[3], const double b[3]) ATTR_WARN_UNUSE
 MINLINE float cross_v2v2(const float a[2], const float b[2]) ATTR_WARN_UNUSED_RESULT;
 MINLINE void cross_v3_v3v3(float r[3], const float a[3], const float b[3]);
 MINLINE void cross_v3_v3v3_hi_prec(float r[3], const float a[3], const float b[3]);
+MINLINE void cross_v3_v3v3_db(double r[3], const double a[3], const double b[3]);
 
 MINLINE void add_newell_cross_v3_v3v3(float n[3], const float v_prev[3], const float v_curr[3]);
 
diff --git a/source/blender/blenlib/intern/math_matrix.c b/source/blender/blenlib/intern/math_matrix.c
index 7c64206134b..48e9f905f18 100644
--- a/source/blender/blenlib/intern/math_matrix.c
+++ b/source/blender/blenlib/intern/math_matrix.c
@@ -71,6 +71,15 @@ void unit_m4(float m[4][4])
   m[3][0] = m[3][1] = m[3][2] = 0.0f;
 }
 
+void unit_m4_db(double m[4][4])
+{
+  m[0][0] = m[1][1] = m[2][2] = m[3][3] = 1.0f;
+  m[0][1] = m[0][2] = m[0][3] = 0.0f;
+  m[1][0] = m[1][2] = m[1][3] = 0.0f;
+  m[2][0] = m[2][1] = m[2][3] = 0.0f;
+  m[3][0] = m[3][1] = m[3][2] = 0.0f;
+}
+
 void copy_m2_m2(float m1[2][2], const float m2[2][2])
 {
   memcpy(m1, m2, sizeof(float[2][2]));
@@ -87,6 +96,11 @@ void copy_m4_m4(float m1[4][4], const float m2[4][4])
   memcpy(m1, m2, sizeof(float[4][4]));
 }
 
+void copy_m4_m4_db(double m1[4][4], const double m2[4][4])
+{
+  memcpy(m1, m2, sizeof(double[4][4]));
+}
+
 void copy_m3_m4(float m1[3][3], const float m2[4][4])
 {
   m1[0][0] = m2[0][0];
@@ -127,6 +141,30 @@ void copy_m4_m3(float m1[4][4], const float m2[3][3]) /* no clear */
   m1[3][3] = 1.0f;
 }
 
+
+void copy_m4d_m4(double m1[4][4], const float m2[4][4])
+{
+  m1[0][0] = m2[0][0];
+  m1[0][1] = m2[0][1];
+  m1[0][2] = m2[0][2];
+  m1[0][3] = m2[0][3];
+
+  m1[1][0] = m2[1][0];
+  m1[1][1] = m2[1][1];
+  m1[1][2] = m2[1][2];
+  m1[1][3] = m2[1][3];
+
+  m1[2][0] = m2[2][0];
+  m1[2][1] = m2[2][1];
+  m1[2][2] = m2[2][2];
+  m1[2][3] = m2[2][3];
+
+  m1[3][0] = m2[3][0];
+  m1[3][1] = m2[3][1];
+  m1[3][2] = m2[3][2];
+  m1[3][3] = m2[3][3];
+}
+
 void copy_m3_m3d(float R[3][3], const double A[3][3])
 {
   /* Keep it stupid simple for better data flow in CPU. */
@@ -231,6 +269,52 @@ void mul_m4_m4m4_uniq(float R[4][4], const float A[4][4], const float B[4][4])
 #endif
 }
 
+
+void mul_m4db_m4db_m4fl_uniq(double R[4][4], const double A[4][4], const float B[4][4])
+{
+  BLI_assert(R != A && R != B);
+
+  /* matrix product: R[j][k] = A[j][i] . B[i][k] */
+#if 0 /* Help needed to redo __SSE2__ implementation for double version */
+  __m128 A0 = _mm_loadu_ps(A[0]);
+  __m128 A1 = _mm_loadu_ps(A[1]);
+  __m128 A2 = _mm_loadu_ps(A[2]);
+  __m128 A3 = _mm_loadu_ps(A[3]);
+
+  for (int i = 0; i < 4; i++) {
+    __m128 B0 = _mm_set1_ps(B[i][0]);
+    __m128 B1 = _mm_set1_ps(B[i][1]);
+    __m128 B2 = _mm_set1_ps(B[i][2]);
+    __m128 B3 = _mm_set1_ps(B[i][3]);
+
+    __m128 sum = _mm_add_ps(_mm_add_ps(_mm_mul_ps(B0, A0), _mm_mul_ps(B1, A1)),
+                            _mm_add_ps(_mm_mul_ps(B2, A2), _mm_mul_ps(B3, A3)));
+
+    _mm_storeu_ps(R[i], sum);
+  }
+#else
+  R[0][0] = B[0][0] * A[0][0] + B[0][1] * A[1][0] + B[0][2] * A[2][0] + B[0][3] * A[3][0];
+  R[0][1] = B[0][0] * A[0][1] + B[0][1] * A[1][1] + B[0][2] * A[2][1] + B[0][3] * A[3][1];
+  R[0][2] = B[0][0] * A[0][2] + B[0][1] * A[1][2] + B[0][2] * A[2][2] + B[0][3] * A[3][2];
+  R[0][3] = B[0][0] * A[0][3] + B[0][1] * A[1][3] + B[0][2] * A[2][3] + B[0][3] * A[3][3];
+
+  R[1][0] = B[1][0] * A[0][0] + B[1][1] * A[1][0] + B[1][2] * A[2][0] + B[1][3] * A[3][0];
+  R[1][1] = B[1][0] * A[0][1] + B[1][1] * A[1][1] + B[1][2] * A[2][1] + B[1][3] * A[3][1];
+  R[1][2] = B[1][0] * A[0][2] + B[1][1] * A[1][2] + B[1][2] * A[2][2] + B[1][3] * A[3][2];
+  R[1][3] = B[1][0] * A[0][3] + B[1][1] * A[1][3] + B[1][2] * A[2][3] + B[1][3] * A[3][3];
+
+  R[2][0] = B[2][0] * A[0][0] + B[2][1] * A[1][0] + B[2][2] * A[2][0] + B[2][3] * A[3][0];
+  R[2][1] = B[2][0] * A[0][1] + B[2][1] * A[1][1] + B[2][2] * A[2][1] + B[2][3] * A[3][1];
+  R[2][2] = B[2][0] * A[0][2] + B[2][1] * A[1][2] + B[2][2] * A[2][2] + B[2][3] * A[3][2];
+  R[2][3] = B[2][0] * A[0][3] + B[2][1] * A[1][3] + B[2][2] * A[2][3] + B[2][3] * A[3][3];
+
+  R[3][0] = B[3][0] * A[0][0] + B[3][1] * A[1][0] + B[3][2] * A[2][0] + B[3][3] * A[3][0];
+  R[3][1] = B[3][0] * A[0][1] + B[3][1] * A[1][1] + B[3][2] * A[2][1] + B[3][3] * A[3][1];
+  R[3][2] = B[3][0] * A[0][2] + B[3][1] * A[1][2] + B[3][2] * A[2][2] + B[3][3] * A[3][2];
+  R[3][3] = B[3][0] * A[0][3] + B[3][1] * A[1][3] + B[3][2] * A[2][3] + B[3][3] * A[3][3];
+#endif
+}
+
 void mul_m4_m4_pre(float R[4][4], const float A[4][4])
 {
   BLI_assert(A != R);
@@ -604,6 +688,26 @@ void mul_v3_m4v3(float r[3], const float mat[4][4], const float vec[3])
   r[2] = x * mat[0][2] + y * mat[1][2] + mat[2][2] * vec[2] + mat[3][2];
 }
 
+void mul_v3_m4v3_db(double r[3], const double mat[4][4], const double vec[3])
+{
+  const double x = vec[0];
+  const double y = vec[1];
+
+  r[0] = x * mat[0][0] + y * mat[1][0] + mat[2][0] * vec[2] + mat[3][0];
+  r[1] = x * mat[0][1] + y * mat[1][1] + mat[2][1] * vec[2] + mat[3][1];
+  r[2] = x * mat[0][2] + y * mat[1][2] + mat[2][2] * vec[2] + mat[3][2];
+}
+void mul_v4_m4v3_db(double r[4], const double mat[4][4], const double vec[3])
+{
+  const double x = vec[0];
+  const double y = vec[1];
+
+  r[0] = x * mat[0][0] + y * mat[1][0] + mat[2][0] * vec[2] + mat[3][0];
+  r[1] = x * mat[0][1] + y * mat[1][1] + mat[2][1] * vec[2] + mat[3][1];
+  r[2] = x * mat[0][2] + y * mat[1][2] + mat[2][2] * vec[2] + mat[3][2];
+  r[3] = x * mat[0][3] + y * mat[1][3] + mat[2][3] * vec[2] + mat[3][3];
+}
+
 void mul_v2_m4v3(float r[2], const float mat[4][4], const float vec[3])
 {
   const float x = vec[0];
@@ -646,6 +750,16 @@ void mul_v3_mat3_m4v3(float r[3], const float mat[4][4], const float vec[3])
   r[2] = x * mat[0][2] + y * mat[1][2] + mat[2][2] * vec[2];
 }
 
+void mul_v3_mat3_m4v3_db(double r[3], const double mat[4][4], const double vec[3])
+{
+  const double x = vec[0];
+  const double y = vec[1];
+
+  r[0] = x * mat[0][0] + y * mat[1][0] + mat[2][0] * vec[2];
+  r[1] = x * mat[0][1] + y * mat[1][1] + mat[2][1] * vec[2];
+  r[2] = x * mat[0][2] + y * mat[1][2] + mat[2][2] * vec[2];
+}
+
 void mul_project_m4_v3(const float mat[4][4], float vec[3])
 {
   /* absolute value to not flip the frustum upside down behind the camera */
diff --git a/source/blender/blenlib/intern/math_vector_inline.c b/source/blender/blenlib/intern/math_vector_inline.c
index f646907797c..8cb618ae14e 100644
--- a/source/blender/blenlib/intern/math_vector_inline.c
+++ b/source/blender/blenlib/intern/math_vector_inline.c
@@ -885,6 +885,14 @@ MINLINE void cross_v3_v3v3_hi_prec(float r[3], const float a[3], const float b[3
   r[2] = (float)((double)a[0] * (double)b[1] - (double)a[1] * (double)b[0]);
 }
 
+MINLINE void cross_v3_v3v3_db(double r[3], const double a[3], const double b[3])
+{
+  BLI_assert(r != a && r != b);
+  r[0] = a[1] * b[2] - a[2] * b[1];
+  r[1] = a[2] * b[0] - a[0] * b[2];
+  r[2] = a[0] * b[1] - a[1] * b[0];
+}
+
 /* Newell's Method */
 /* excuse this fairly specific function,
  * its used for polygon normals all over the place
diff --git a/source/blender/editors/include/ED_lanpr.h b/source/blender/editors/include/ED_lanpr.h
index 73edfa0ca55..f9d10bd7b9e 100644
--- a/source/blender/editors/include/ED_lanpr.h
+++ b/source/blender/editors/include/ED_lanpr.h
@@ -48,7 +48,6 @@ typedef char nShortBuf[16];
 
 typedef float tnsMatrix44f[16];

@@ Diff output truncated at 10240 characters. @@