[Bf-blender-cvs] [515db4c] soc-2016-cycles_denoising: Cycles: Add util header with various matrix/vector math operations for the denoiser
Lukas Stockner
noreply at git.blender.org
Sun Jun 19 18:21:59 CEST 2016
Commit: 515db4c7d21fd3e183efaac1204e1f47a333ec43
Author: Lukas Stockner
Date: Sun Jun 19 17:54:00 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB515db4c7d21fd3e183efaac1204e1f47a333ec43
Cycles: Add util header with various matrix/vector math operations for the denoiser
===================================================================
A intern/cycles/util/util_math_matrix.h
===================================================================
diff --git a/intern/cycles/util/util_math_matrix.h b/intern/cycles/util/util_math_matrix.h
new file mode 100644
index 0000000..5adbf21
--- /dev/null
+++ b/intern/cycles/util/util_math_matrix.h
@@ -0,0 +1,359 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MATH_MATRIX_H__
+#define __UTIL_MATH_MATRIX_H__
+
+CCL_NAMESPACE_BEGIN
+
+#define MAT(A, size, row, col) A[(row)*(size)+(col)]
+
+ccl_device_inline void math_matrix_zero(float *A, int n)
+{
+ for(int i = 0; i < n*n; i++)
+ A[i] = 0.0f;
+}
+
+ccl_device_inline void math_vector_zero(float *v, int n)
+{
+ for(int i = 0; i < n; i++)
+ v[i] = 0.0f;
+}
+
+ccl_device_inline void math_vec3_zero(float3 *v, int n)
+{
+ for(int i = 0; i < n; i++)
+ v[i] = make_float3(0.0f, 0.0f, 0.0f);
+}
+
+ccl_device_inline void math_matrix_zero_lower(float *A, int n)
+{
+ for(int row = 0; row < n; row++)
+ for(int col = 0; col <= row; col++)
+ MAT(A, n, row, col) = 0.0f;
+}
+
+ccl_device_inline void math_matrix_identity(float *A, int n)
+{
+ for(int row = 0; row < n; row++)
+ for(int col = 0; col < n; col++)
+ MAT(A, n, row, col) = (row == col)? 1.0f: 0.0f;
+}
+
+/* In-place Cholesky-Banachiewicz decomposition of the square, positive-definite matrix A
+ * into a lower triangular matrix L so that A = L*L^T. A is being overwritten by L.
+ * Also, only the lower triangular part of A is ever accessed. */
+ccl_device void math_cholesky(float *A, int n)
+{
+ for(int row = 0; row < n; row++) {
+ for(int col = 0; col <= row; col++) {
+ float sum_col = MAT(A, n, row, col);
+ for(int k = 0; k < col; k++)
+ sum_col -= MAT(A, n, row, k) * MAT(A, n, col, k);
+ if(row == col) sum_col = sqrtf(sum_col);
+ else sum_col = sum_col / MAT(A, n, col, col);
+ MAT(A, n, row, col) = sum_col;
+ }
+ }
+}
+
+/* Perform a Singular Value decomposition on A.
+ * Returns the transpose of V and the squared singular values. A is destroyed in the process.
+ * Note: Still buggy, therefore the old version from the proof-of-concept implementation is used for now. */
+ccl_device int math_svd(float *A, float *V, float *S2, int n)
+{
+ /* Initialize V to the identity matrix. */
+ for(int row = 0; row < n; row++)
+ for(int col = 0; col < n; col++)
+ MAT(V, n, row, col) = (row == col)? 1.0f: 0.0f;
+
+ const float eps1 = 1e-7f;
+ const float eps2 = 10.0f * n * eps1*eps1;
+ const float eps3 = 0.1f * eps1;
+
+ int estimated_rank = n;
+ for(int rotations = n, i = 0; rotations > 0 && i < 7; i++) {
+ rotations = estimated_rank * (estimated_rank - 1)/2;
+ for(int col1 = 0; col1 < estimated_rank-1; col1++) {
+ for(int col2 = col1+1; col2 < estimated_rank; col2++) {
+ float p = 0.0f, q = 0.0f, r = 0.0f;
+ for(int row = 0; row < n; row++) {
+ float c1 = MAT(A, n, row, col1), c2 = MAT(A, n, row, col2);
+ p += c1*c2;
+ q += c1*c1;
+ r += c2*c2;
+ }
+ S2[col1] = q;
+ S2[col2] = r;
+
+ float x, y;
+ if(q >= r) {
+ if(q < eps2*S2[0] || fabsf(p) < eps3*q) {
+ rotations--;
+ continue;
+ }
+ const float invQ = 1.0f / q;
+ p *= invQ;
+ r = 1.0f - r*invQ;
+ const float pr = p*r;
+ const float invVt = 1.0f / sqrtf(4.0f * pr*pr);
+ x = sqrtf(0.5f * (1.0f + r*invVt));
+ y = p*invVt / x;
+ }
+ else {
+ const float invR = 1.0f / r;
+ p *= invR;
+ q = q*invR - 1.0f;
+ const float pq = p*q;
+ const float invVt = 1.0f / sqrtf(4.0f * pq*pq);
+ y = sqrtf(0.5f * (1.0f - q*invVt));
+ if(p < 0.0f) y = -y;
+ x = p*invVt / y;
+ }
+
+#define ROT(A, n, row1, row2, col1, col2, x, y) { float c1 = MAT(A, n, row1, col1), c2 = MAT(A, n, row2, col2); MAT(A, n, row1, col1) = c1*(x)+c2*(y); MAT(A, n, row2, col2) = -c1*(y)+c2*(x); }
+ for(int row = 0; row < n; row++) {
+ ROT(A, n, row, row, col1, col2, x, y);
+ /* V is stored as its transpose. */
+ ROT(V, n, col1, col2, row, row, x, y);
+ }
+#undef ROT
+ }
+ }
+ while(estimated_rank > 2 && S2[estimated_rank-1] < (S2[0] + eps3)*eps3)
+ estimated_rank--;
+ }
+
+ return estimated_rank;
+}
+
+ccl_device_inline void math_matrix_add_diagonal(float *A, int n, float val)
+{
+ for(int row = 0; row < n; row++)
+ MAT(A, n, row, row) += val;
+}
+
+/* Add Gramian matrix of v to A.
+ * The Gramian matrix of v is v^T*v, so element (i,j) is v[i]*v[j].
+ * Obviously, the resulting matrix is symmetric, so only the lower triangluar part is stored. */
+ccl_device_inline void math_add_gramian(float *A, int n, float *v, float weight)
+{
+ for(int row = 0; row < n; row++)
+ for(int col = 0; col <= row; col++)
+ MAT(A, n, row, col) += v[row]*v[col]*weight;
+}
+
+ccl_device_inline void math_add_vec3(float3 *v, int n, float *x, float3 w)
+{
+ for(int i = 0; i < n; i++)
+ v[i] += w*x[i];
+}
+
+ccl_device_inline void math_lower_tri_to_full(float *A, int n)
+{
+ for(int row = 0; row < n; row++)
+ for(int col = row+1; col < n; col++)
+ MAT(A, n, row, col) = MAT(A, n, col, row);
+}
+
+ccl_device_inline float math_dot(float *a, float *b, int n)
+{
+ float d = 0.0f;
+ for(int i = 0; i < n; i++)
+ d += a[i]*b[i];
+ return d;
+}
+
+/* Solve the linear equation system L*x = b through forward substitution, where L is a lower triangular matrix.
+ * x is initially set to the right-hand-side vector and is overwritten with the solution vector x. */
+ccl_device_inline void math_substitute_forward_vec3(float *L, int n, float3 *x)
+{
+ for(int row = 0; row < n; row++) {
+ float3 sum = make_float3(0.0f, 0.0f, 0.0f);
+ for(int col = 0; col < row; col++)
+ sum += MAT(L, n, row, col) * x[col];
+ x[row] = (x[row] - sum) / MAT(L, n, row, row);
+ }
+}
+
+/* Solve the linear equation system L*x = b through backsubstitution, where L is a upper triangular matrix.
+ * In this implementation, instead of L, L^T is passed instead.
+ * x is initially set to the right-hand-side vector and is overwritten with the solution vector x. */
+ccl_device_inline void math_substitute_back_vec3(float *LT, int n, float3 *x)
+{
+ for(int row = n-1; row >= 0; row--) {
+ float3 sum = make_float3(0.0f, 0.0f, 0.0f);
+ for(int col = row+1; col < n; col++)
+ sum += MAT(LT, n, col, row) * x[col];
+ x[row] = (x[row] - sum) / MAT(LT, n, row, row);
+ }
+}
+
+ccl_device_inline void math_inverse_lower_tri(float *L, float *invL, int n)
+{
+ for(int comp = 0; comp < n; comp++) {
+ for(int row = 0; row < comp; row++)
+ MAT(invL, n, row, comp) = 0.0f;
+ MAT(invL, n, comp, comp) = 1.0f / MAT(L, n, comp, comp);
+ for(int row = comp+1; row < n; row++) {
+ float sum = 0.0f;
+ for(int col = comp; col < row; col++)
+ sum += MAT(L, n, row, col) * MAT(invL, n, col, comp);
+ MAT(invL, n, row, comp) = -sum/MAT(L, n, row, row);
+ }
+ }
+}
+
+/* Inverts the lower triangular matrix L and overwrites it with the transpose
+ * of the result. */
+ccl_device_inline void math_inverse_lower_tri_inplace(float *L, int n)
+{
+ for(int row = 0; row < n; row++)
+ MAT(L, n, row, row) = 1.0f / MAT(L, n, row, row);
+
+ for(int comp = 0; comp < n; comp++) {
+ for(int row = comp+1; row < n; row++) {
+ float sum = 0.0f;
+ for(int col = comp; col < row; col++)
+ sum += MAT(L, n, row, col) * MAT(L, n, comp, col);
+ MAT(L, n, comp, row) = -sum*MAT(L, n, row, row);
+ }
+ }
+}
+
+#define LSQ_SIZE 5
+/* Utility functions for least-squares-fitting a one-dimensional linear function: f(x) = a*x+b. */
+ccl_device_inline void math_lsq_init(double *lsq)
+{
+ for(int i = 0; i < 5; i++)
+ lsq[i] = 0.0;
+}
+
+ccl_device_inline void math_lsq_add(double *lsq, double x, double y)
+{
+ lsq[0] += 1.0;
+ lsq[1] += x;
+ lsq[2] += x*x;
+ lsq[3] += y;
+ lsq[4] += x*y;
+}
+
+/* Returns the first-order coefficient a of the fitted function. */
+ccl_device_inline double math_lsq_solve(double *lsq)
+{
+ return (lsq[2]*lsq[3] - lsq[1]*lsq[4]) / (lsq[0]*lsq[2] - lsq[1]*lsq[1] + 1e-4);
+}
+
+
+/* TODO(lukas): Fix new code and remove this. */
+ccl_device int svd(float *A, float *V, float *S2, int n)
+{
+ int i, j, k, EstColRank = n, RotCount = n, SweepCount = 0;
+ int slimit = 8;
+ float eps = 1e-8f;
+ float e2 = 10.f * n * eps * eps;
+ float tol = 0.1f * eps;
+ float vt, p, x0, y0, q, r, c0, s0, d1, d2;
+
+ for(int r = 0; r < n; r++)
+ for(int c = 0; c < n; c++)
+ V[r*n+c] = (c == r)? 1.0f: 0.0f;
+
+ while (RotCount != 0 && SweepCount++ <= slimit) {
+ RotCount = EstColRank * (EstColRank - 1) / 2;
+
+ for (j = 0; j < EstColRank-1; ++j) {
+ for (k = j+1; k < EstColRank; ++k) {
+ p = q = r = 0.0;
+
+ for (i = 0; i < n; ++i) {
+ x0 = A[i * n + j];
+ y0 = A[i * n + k];
+ p += x0 * y0;
+ q += x0 * x0;
+ r += y0 * y0;
+ }
+
+ S2[j] = q;
+ S2[k] = r;
+
+ if (q >= r) {
+ if (q <= e2 * S2[0] || fabsf(p) <= tol * q)
+ RotCount--;
+ else {
+ p /= q;
+ r = 1.f - r/q;
+ vt = sqrtf(4.0f * p * p + r * r);
+ c0 = sqrtf(0.5f * (1.f + r / vt));
+ s0 = p / (vt*c0);
+
+ // Rotation
+ for (i = 0; i < n; ++i) {
+ d1 = A[i * n + j];
+ d2 = A[i * n + k];
+ A[i * n + j] = d1*c0+d2*s0;
+ A[i * n + k] = -d1*s0+d2*c0;
+ }
+ for (i = 0; i < n; ++i) {
+ d1 = V[i * n + j];
+
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list