[Bf-blender-cvs] [515db4c] soc-2016-cycles_denoising: Cycles: Add util header with various matrix/vector math operations for the denoiser

Sun Jun 19 18:21:59 CEST 2016

Commit: 515db4c7d21fd3e183efaac1204e1f47a333ec43
Author: Lukas Stockner
Date:   Sun Jun 19 17:54:00 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB515db4c7d21fd3e183efaac1204e1f47a333ec43

Cycles: Add util header with various matrix/vector math operations for the denoiser

===================================================================

A	intern/cycles/util/util_math_matrix.h

===================================================================

diff --git a/intern/cycles/util/util_math_matrix.h b/intern/cycles/util/util_math_matrix.h
new file mode 100644
index 0000000..5adbf21
--- /dev/null
+++ b/intern/cycles/util/util_math_matrix.h
@@ -0,0 +1,359 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MATH_MATRIX_H__
+#define __UTIL_MATH_MATRIX_H__
+
+CCL_NAMESPACE_BEGIN
+
+#define MAT(A, size, row, col) A[(row)*(size)+(col)]
+
+ccl_device_inline void math_matrix_zero(float *A, int n)
+{
+	for(int i = 0; i < n*n; i++)
+		A[i] = 0.0f;
+}
+
+ccl_device_inline void math_vector_zero(float *v, int n)
+{
+	for(int i = 0; i < n; i++)
+		v[i] = 0.0f;
+}
+
+ccl_device_inline void math_vec3_zero(float3 *v, int n)
+{
+	for(int i = 0; i < n; i++)
+		v[i] = make_float3(0.0f, 0.0f, 0.0f);
+}
+
+ccl_device_inline void math_matrix_zero_lower(float *A, int n)
+{
+	for(int row = 0; row < n; row++)
+		for(int col = 0; col <= row; col++)
+			MAT(A, n, row, col) = 0.0f;
+}
+
+ccl_device_inline void math_matrix_identity(float *A, int n)
+{
+	for(int row = 0; row < n; row++)
+		for(int col = 0; col < n; col++)
+			MAT(A, n, row, col) = (row == col)? 1.0f: 0.0f;
+}
+
+/* In-place Cholesky-Banachiewicz decomposition of the square, positive-definite matrix A
+ * into a lower triangular matrix L so that A = L*L^T. A is being overwritten by L.
+ * Also, only the lower triangular part of A is ever accessed. */
+ccl_device void math_cholesky(float *A, int n)
+{
+	for(int row = 0; row < n; row++) {
+		for(int col = 0; col <= row; col++) {
+			float sum_col = MAT(A, n, row, col);
+			for(int k = 0; k < col; k++)
+				sum_col -= MAT(A, n, row, k) * MAT(A, n, col, k);
+			if(row == col) sum_col = sqrtf(sum_col);
+			else           sum_col = sum_col / MAT(A, n, col, col);
+			MAT(A, n, row, col) = sum_col;
+		}
+	}
+}
+
+/* Perform a Singular Value decomposition on A.
+ * Returns the transpose of V and the squared singular values. A is destroyed in the process.
+ * Note: Still buggy, therefore the old version from the proof-of-concept implementation is used for now. */
+ccl_device int math_svd(float *A, float *V, float *S2, int n)
+{
+	/* Initialize V to the identity matrix. */
+	for(int row = 0; row < n; row++)
+		for(int col = 0; col < n; col++)
+			MAT(V, n, row, col) = (row == col)? 1.0f: 0.0f;
+
+	const float eps1 = 1e-7f;
+	const float eps2 = 10.0f * n * eps1*eps1;
+	const float eps3 = 0.1f * eps1;
+
+	int estimated_rank = n;
+	for(int rotations = n, i = 0; rotations > 0 && i < 7; i++) {
+		rotations = estimated_rank * (estimated_rank - 1)/2;
+		for(int col1 = 0; col1 < estimated_rank-1; col1++) {
+			for(int col2 = col1+1; col2 < estimated_rank; col2++) {
+				float p = 0.0f, q = 0.0f, r = 0.0f;
+				for(int row = 0; row < n; row++) {
+					float c1 = MAT(A, n, row, col1), c2 = MAT(A, n, row, col2);
+					p += c1*c2;
+					q += c1*c1;
+					r += c2*c2;
+				}
+				S2[col1] = q;
+				S2[col2] = r;
+
+				float x, y;
+				if(q >= r) {
+					if(q < eps2*S2[0] || fabsf(p) < eps3*q) {
+						rotations--;
+						continue;
+					}
+					const float invQ = 1.0f / q;
+					p *= invQ;
+					r = 1.0f - r*invQ;
+					const float pr = p*r;
+					const float invVt = 1.0f / sqrtf(4.0f * pr*pr);
+					x = sqrtf(0.5f * (1.0f + r*invVt));
+					y = p*invVt / x;
+				}
+				else {
+					const float invR = 1.0f / r;
+					p *= invR;
+					q = q*invR - 1.0f;
+					const float pq = p*q;
+					const float invVt = 1.0f / sqrtf(4.0f * pq*pq);
+					y = sqrtf(0.5f * (1.0f - q*invVt));
+					if(p < 0.0f) y = -y;
+					x = p*invVt / y;
+				}
+
+#define ROT(A, n, row1, row2, col1, col2, x, y) { float c1 = MAT(A, n, row1, col1), c2 = MAT(A, n, row2, col2); MAT(A, n, row1, col1) = c1*(x)+c2*(y); MAT(A, n, row2, col2) = -c1*(y)+c2*(x); }
+				for(int row = 0; row < n; row++) {
+					ROT(A, n, row, row, col1, col2, x, y);
+					/* V is stored as its transpose. */
+					ROT(V, n, col1, col2, row, row, x, y);
+				}
+#undef ROT
+			}
+		}
+		while(estimated_rank > 2 && S2[estimated_rank-1] < (S2[0] + eps3)*eps3)
+			estimated_rank--;
+	}
+
+	return estimated_rank;
+}
+
+ccl_device_inline void math_matrix_add_diagonal(float *A, int n, float val)
+{
+	for(int row = 0; row < n; row++)
+		MAT(A, n, row, row) += val;
+}
+
+/* Add Gramian matrix of v to A.
+ * The Gramian matrix of v is v^T*v, so element (i,j) is v[i]*v[j].
+ * Obviously, the resulting matrix is symmetric, so only the lower triangluar part is stored. */
+ccl_device_inline void math_add_gramian(float *A, int n, float *v, float weight)
+{
+	for(int row = 0; row < n; row++)
+		for(int col = 0; col <= row; col++)
+			MAT(A, n, row, col) += v[row]*v[col]*weight;
+}
+
+ccl_device_inline void math_add_vec3(float3 *v, int n, float *x, float3 w)
+{
+	for(int i = 0; i < n; i++)
+		v[i] += w*x[i];
+}
+
+ccl_device_inline void math_lower_tri_to_full(float *A, int n)
+{
+	for(int row = 0; row < n; row++)
+		for(int col = row+1; col < n; col++)
+			MAT(A, n, row, col) = MAT(A, n, col, row);
+}
+
+ccl_device_inline float math_dot(float *a, float *b, int n)
+{
+	float d = 0.0f;
+	for(int i = 0; i < n; i++)
+		d += a[i]*b[i];
+	return d;
+}
+
+/* Solve the linear equation system L*x = b through forward substitution, where L is a lower triangular matrix.
+ * x is initially set to the right-hand-side vector and is overwritten with the solution vector x. */
+ccl_device_inline void math_substitute_forward_vec3(float *L, int n, float3 *x)
+{
+	for(int row = 0; row < n; row++) {
+		float3 sum = make_float3(0.0f, 0.0f, 0.0f);
+		for(int col = 0; col < row; col++)
+			sum += MAT(L, n, row, col) * x[col];
+		x[row] = (x[row] - sum) / MAT(L, n, row, row);
+	}
+}
+
+/* Solve the linear equation system L*x = b through backsubstitution, where L is a upper triangular matrix.
+ * In this implementation, instead of L, L^T is passed instead.
+ * x is initially set to the right-hand-side vector and is overwritten with the solution vector x. */
+ccl_device_inline void math_substitute_back_vec3(float *LT, int n, float3 *x)
+{
+	for(int row = n-1; row >= 0; row--) {
+		float3 sum = make_float3(0.0f, 0.0f, 0.0f);
+		for(int col = row+1; col < n; col++)
+			sum += MAT(LT, n, col, row) * x[col];
+		x[row] = (x[row] - sum) / MAT(LT, n, row, row);
+	}
+}
+
+ccl_device_inline void math_inverse_lower_tri(float *L, float *invL, int n)
+{
+	for(int comp = 0; comp < n; comp++) {
+		for(int row = 0; row < comp; row++)
+			MAT(invL, n, row, comp) = 0.0f;
+		MAT(invL, n, comp, comp) = 1.0f / MAT(L, n, comp, comp);
+		for(int row = comp+1; row < n; row++) {
+			float sum = 0.0f;
+			for(int col = comp; col < row; col++)
+				sum += MAT(L, n, row, col) * MAT(invL, n, col, comp);
+			MAT(invL, n, row, comp) = -sum/MAT(L, n, row, row);
+		}
+	}
+}
+
+/* Inverts the lower triangular matrix L and overwrites it with the transpose
+ * of the result. */
+ccl_device_inline void math_inverse_lower_tri_inplace(float *L, int n)
+{
+	for(int row = 0; row < n; row++)
+		MAT(L, n, row, row) = 1.0f / MAT(L, n, row, row);
+
+	for(int comp = 0; comp < n; comp++) {
+		for(int row = comp+1; row < n; row++) {
+			float sum = 0.0f;
+			for(int col = comp; col < row; col++)
+				sum += MAT(L, n, row, col) * MAT(L, n, comp, col);
+			MAT(L, n, comp, row) = -sum*MAT(L, n, row, row);
+		}
+	}
+}
+
+#define LSQ_SIZE 5
+/* Utility functions for least-squares-fitting a one-dimensional linear function: f(x) = a*x+b. */
+ccl_device_inline void math_lsq_init(double *lsq)
+{
+	for(int i = 0; i < 5; i++)
+		lsq[i] = 0.0;
+}
+
+ccl_device_inline void math_lsq_add(double *lsq, double x, double y)
+{
+	lsq[0] += 1.0;
+	lsq[1] += x;
+	lsq[2] += x*x;
+	lsq[3] += y;
+	lsq[4] += x*y;
+}
+
+/* Returns the first-order coefficient a of the fitted function. */
+ccl_device_inline double math_lsq_solve(double *lsq)
+{
+	return (lsq[2]*lsq[3] - lsq[1]*lsq[4]) / (lsq[0]*lsq[2] - lsq[1]*lsq[1] + 1e-4);
+}
+
+
+/* TODO(lukas): Fix new code and remove this. */
+ccl_device int svd(float *A, float *V, float *S2, int n)
+{
+    int  i, j, k, EstColRank = n, RotCount = n, SweepCount = 0;
+    int slimit = 8;
+    float eps = 1e-8f;
+    float e2 = 10.f * n * eps * eps;
+    float tol = 0.1f * eps;
+    float vt, p, x0, y0, q, r, c0, s0, d1, d2;
+
+    for(int r = 0; r < n; r++)
+        for(int c = 0; c < n; c++)
+            V[r*n+c] = (c == r)? 1.0f: 0.0f;
+
+    while (RotCount != 0 && SweepCount++ <= slimit) {
+        RotCount = EstColRank * (EstColRank - 1) / 2;
+
+        for (j = 0; j < EstColRank-1; ++j) {
+            for (k = j+1; k < EstColRank; ++k) {
+                p = q = r = 0.0;
+
+                for (i = 0; i < n; ++i) {
+                    x0 = A[i * n + j];
+                    y0 = A[i * n + k];
+                    p += x0 * y0;
+                    q += x0 * x0;
+                    r += y0 * y0;
+                }
+
+                S2[j] = q;
+                S2[k] = r;
+
+                if (q >= r) {
+                    if (q <= e2 * S2[0] || fabsf(p) <= tol * q)
+                        RotCount--;
+                    else {
+                        p /= q;
+                        r = 1.f - r/q;
+                        vt = sqrtf(4.0f * p * p + r * r);
+                        c0 = sqrtf(0.5f * (1.f + r / vt));
+                        s0 = p / (vt*c0);
+
+                        // Rotation
+                        for (i = 0; i < n; ++i) {
+                            d1 = A[i * n + j];
+                            d2 = A[i * n + k];
+                            A[i * n + j] = d1*c0+d2*s0;
+                            A[i * n + k] = -d1*s0+d2*c0;
+                        }
+                        for (i = 0; i < n; ++i) {
+                            d1 = V[i * n + j];
+        

@@ Diff output truncated at 10240 characters. @@