[Bf-blender-cvs] [f84defa] master: optimize interp_weights_poly_v2, v3

Thu Dec 25 23:06:16 CET 2014

Commit: f84defa9c0373b7db4953b64aaa2f02673b28f69
Author: Campbell Barton
Date:   Fri Dec 26 08:33:41 2014 +1100
Branches: master
https://developer.blender.org/rBf84defa9c0373b7db4953b64aaa2f02673b28f69

optimize interp_weights_poly_v2, v3

halve sqrtf calls per per polygon corner.

===================================================================

M	source/blender/blenlib/intern/math_geom.c

===================================================================

diff --git a/source/blender/blenlib/intern/math_geom.c b/source/blender/blenlib/intern/math_geom.c
index bcf2147..148495c 100644
--- a/source/blender/blenlib/intern/math_geom.c
+++ b/source/blender/blenlib/intern/math_geom.c
@@ -2602,38 +2602,55 @@ int interp_sparse_array(float *array, const int list_size, const float skipval)
 	return 1;
 }
 
+/** \name interp_weights_poly_v2, v3
+ * \{ */
+
+#define IS_POINT_IX     (1 << 0)
+#define IS_SEGMENT_IX   (1 << 1)
+
+#define DIR_V3_SET(d_len, va, vb)  { \
+	sub_v3_v3v3((d_len)->dir, va, vb); \
+	(d_len)->len = len_v3((d_len)->dir); \
+} (void)0
+
+#define DIR_V2_SET(d_len, va, vb)  { \
+	sub_v2_v2v2((d_len)->dir, va, vb); \
+	(d_len)->len = len_v2((d_len)->dir); \
+} (void)0
+
+struct Float3_Len {
+	float dir[3], len;
+};
+
+struct Float2_Len {
+	float dir[2], len;
+};
+
 /* Mean value weights - smooth interpolation weights for polygons with
  * more than 3 vertices */
-static float mean_value_half_tan_v3(const float v1[3], const float v2[3], const float v3[3])
+static float mean_value_half_tan_v3(const struct Float3_Len *d_curr, const struct Float3_Len *d_next)
 {
-	float d2[3], d3[3], cross[3], area;
-
-	sub_v3_v3v3(d2, v2, v1);
-	sub_v3_v3v3(d3, v3, v1);
-	cross_v3_v3v3(cross, d2, d3);
-
+	float cross[3], area;
+	cross_v3_v3v3(cross, d_curr->dir, d_next->dir);
 	area = len_v3(cross);
 	if (LIKELY(area != 0.0f)) {
-		const float dot = dot_v3v3(d2, d3);
-		const float len = len_v3(d2) * len_v3(d3);
+		const float dot = dot_v3v3(d_curr->dir, d_next->dir);
+		const float len = d_curr->len * d_next->len;
 		return (len - dot) / area;
 	}
 	else {
 		return 0.0f;
 	}
 }
-static float mean_value_half_tan_v2(const float v1[2], const float v2[2], const float v3[2])
-{
-	float d2[2], d3[2], area;
-
-	sub_v2_v2v2(d2, v2, v1);
-	sub_v2_v2v2(d3, v3, v1);
 
+static float mean_value_half_tan_v2(const struct Float2_Len *d_curr, const struct Float2_Len *d_next)
+{
+	float area;
 	/* different from the 3d version but still correct */
-	area = cross_v2v2(d2, d3);
+	area = cross_v2v2(d_curr->dir, d_next->dir);
 	if (LIKELY(area != 0.0f)) {
-		const float dot = dot_v2v2(d2, d3);
-		const float len = len_v2(d2) * len_v2(d3);
+		const float dot = dot_v2v2(d_curr->dir, d_next->dir);
+		const float len = d_curr->len * d_next->len;
 		return (len - dot) / area;
 	}
 	else {
@@ -2649,30 +2666,34 @@ void interp_weights_poly_v3(float *w, float v[][3], const int n, const float co[
 	float ht_prev, ht;  /* half tangents */
 	float totweight = 0.0f;
 	int i = 0;
-	bool vert_interp = false;
-	bool edge_interp = false;
+	char ix_flag = 0;
+	struct Float3_Len d_curr, d_next;
 
 	v_curr = v[0];
 	v_next = v[1];
 
-	ht_prev = mean_value_half_tan_v3(co, v[n - 1], v_curr);
+	DIR_V3_SET(&d_curr, v[n - 1], co);
+	DIR_V3_SET(&d_next, v_curr, co);
+	ht_prev = mean_value_half_tan_v3(&d_curr, &d_next);
 
 	while (i < n) {
-		const float len_sq = len_squared_v3v3(co, v_curr);
-
 		/* Mark Mayer et al algorithm that is used here does not operate well if vertex is close
 		 * to borders of face. In that case, do simple linear interpolation between the two edge vertices */
-		if (len_sq < eps_sq) {
-			vert_interp = true;
+
+		/* 'd_next.len' is infact 'd_curr.len', just avoid copy to begin with */
+		if (UNLIKELY(d_next.len < eps)) {
+			ix_flag = IS_POINT_IX;
 			break;
 		}
-		else if (dist_squared_to_line_segment_v3(co, v_curr, v_next) < eps_sq) {
-			edge_interp = true;
+		else if (UNLIKELY(dist_squared_to_line_segment_v3(co, v_curr, v_next) < eps_sq)) {
+			ix_flag = IS_SEGMENT_IX;
 			break;
 		}
 
-		ht = mean_value_half_tan_v3(co, v_curr, v_next);
-		w[i] = (ht_prev + ht) / sqrtf(len_sq);
+		d_curr = d_next;
+		DIR_V3_SET(&d_next, v_next, co);
+		ht = mean_value_half_tan_v3(&d_curr, &d_next);
+		w[i] = (ht_prev + ht) / d_curr.len;
 		totweight += w[i];
 
 		/* step */
@@ -2683,22 +2704,22 @@ void interp_weights_poly_v3(float *w, float v[][3], const int n, const float co[
 		ht_prev = ht;
 	}
 
-	if (vert_interp) {
-		const int i_curr = i;
-		for (i = 0; i < n; i++)
-			w[i] = 0.0;
-		w[i_curr] = 1.0f;
-	}
-	else if (edge_interp) {
+	if (ix_flag) {
 		const int i_curr = i;
-		float len_curr = len_v3v3(co, v_curr);
-		float len_next = len_v3v3(co, v_next);
-		float edge_len = len_curr + len_next;
-		for (i = 0; i < n; i++)
-			w[i] = 0.0;
+		for (i = 0; i < n; i++) {
+			w[i] = 0.0f;
+		}
 
-		w[i_curr] = len_next / edge_len;
-		w[(i_curr + 1) % n] = len_curr / edge_len;
+		if (ix_flag & IS_POINT_IX) {
+			w[i_curr] = 1.0f;
+		}
+		else {
+			float len_curr = len_v3v3(co, v_curr);
+			float len_next = len_v3v3(co, v_next);
+			float edge_len = len_curr + len_next;
+			w[i_curr] = len_next / edge_len;
+			w[(i_curr + 1) % n] = len_curr / edge_len;
+		}
 	}
 	else {
 		if (totweight != 0.0f) {
@@ -2718,30 +2739,34 @@ void interp_weights_poly_v2(float *w, float v[][2], const int n, const float co[
 	float ht_prev, ht;  /* half tangents */
 	float totweight = 0.0f;
 	int i = 0;
-	bool vert_interp = false;
-	bool edge_interp = false;
+	char ix_flag = 0;
+	struct Float2_Len d_curr, d_next;
 
 	v_curr = v[0];
 	v_next = v[1];
 
-	ht_prev = mean_value_half_tan_v2(co, v[n - 1], v_curr);
+	DIR_V2_SET(&d_curr, v[n - 1], co);
+	DIR_V2_SET(&d_next, v_curr, co);
+	ht_prev = mean_value_half_tan_v2(&d_curr, &d_next);
 
 	while (i < n) {
-		const float len_sq = len_squared_v2v2(co, v_curr);
-
 		/* Mark Mayer et al algorithm that is used here does not operate well if vertex is close
 		 * to borders of face. In that case, do simple linear interpolation between the two edge vertices */
-		if (len_sq < eps_sq) {
-			vert_interp = true;
+
+		/* 'd_next.len' is infact 'd_curr.len', just avoid copy to begin with */
+		if (UNLIKELY(d_next.len < eps)) {
+			ix_flag = IS_POINT_IX;
 			break;
 		}
-		else if (dist_squared_to_line_segment_v2(co, v_curr, v_next) < eps_sq) {
-			edge_interp = true;
+		else if (UNLIKELY(dist_squared_to_line_segment_v2(co, v_curr, v_next) < eps_sq)) {
+			ix_flag = IS_SEGMENT_IX;
 			break;
 		}
 
-		ht = mean_value_half_tan_v2(co, v_curr, v_next);
-		w[i] = (ht_prev + ht) / sqrtf(len_sq);
+		d_curr = d_next;
+		DIR_V2_SET(&d_next, v_next, co);
+		ht = mean_value_half_tan_v2(&d_curr, &d_next);
+		w[i] = (ht_prev + ht) / d_curr.len;
 		totweight += w[i];
 
 		/* step */
@@ -2752,22 +2777,22 @@ void interp_weights_poly_v2(float *w, float v[][2], const int n, const float co[
 		ht_prev = ht;
 	}
 
-	if (vert_interp) {
-		const int i_curr = i;
-		for (i = 0; i < n; i++)
-			w[i] = 0.0;
-		w[i_curr] = 1.0f;
-	}
-	else if (edge_interp) {
+	if (ix_flag) {
 		const int i_curr = i;
-		float len_curr = len_v2v2(co, v_curr);
-		float len_next = len_v2v2(co, v_next);
-		float edge_len = len_curr + len_next;
-		for (i = 0; i < n; i++)
-			w[i] = 0.0;
+		for (i = 0; i < n; i++) {
+			w[i] = 0.0f;
+		}
 
-		w[i_curr] = len_next / edge_len;
-		w[(i_curr + 1) % n] = len_curr / edge_len;
+		if (ix_flag & IS_POINT_IX) {
+			w[i_curr] = 1.0f;
+		}
+		else {
+			float len_curr = len_v2v2(co, v_curr);
+			float len_next = len_v2v2(co, v_next);
+			float edge_len = len_curr + len_next;
+			w[i_curr] = len_next / edge_len;
+			w[(i_curr + 1) % n] = len_curr / edge_len;
+		}
 	}
 	else {
 		if (totweight != 0.0f) {
@@ -2778,6 +2803,15 @@ void interp_weights_poly_v2(float *w, float v[][2], const int n, const float co[
 	}
 }
 
+#undef IS_POINT_IX
+#undef IS_SEGMENT_IX
+
+#undef DIR_V3_SET
+#undef DIR_V2_SET
+
+/** \} */
+
+
 /* (x1, v1)(t1=0)------(x2, v2)(t2=1), 0<t<1 --> (x, v)(t) */
 void interp_cubic_v3(float x[3], float v[3], const float x1[3], const float v1[3], const float x2[3], const float v2[3], const float t)
 {