[Bf-blender-cvs] [39ae324] master: Cycles: remove extended precision hacks, no longer needed with SSE2 requirement.

Mon Jul 4 18:25:11 CEST 2016

Commit: 39ae324918176a1a94de0c6be4466fdffa2fe711
Author: Brecht Van Lommel
Date:   Mon Jul 4 16:43:32 2016 +0200
Branches: master
https://developer.blender.org/rB39ae324918176a1a94de0c6be4466fdffa2fe711

Cycles: remove extended precision hacks, no longer needed with SSE2 requirement.

Differential Revision: https://developer.blender.org/D2079

===================================================================

M	intern/cycles/bvh/bvh_sort.cpp
M	intern/cycles/kernel/geom/geom.h
M	intern/cycles/kernel/geom/geom_bvh_shadow.h
M	intern/cycles/kernel/geom/geom_bvh_subsurface.h
M	intern/cycles/kernel/geom/geom_bvh_traversal.h
M	intern/cycles/kernel/geom/geom_bvh_volume.h
M	intern/cycles/kernel/geom/geom_bvh_volume_all.h

===================================================================

diff --git a/intern/cycles/bvh/bvh_sort.cpp b/intern/cycles/bvh/bvh_sort.cpp
index e9032c6..d50178b 100644
--- a/intern/cycles/bvh/bvh_sort.cpp
+++ b/intern/cycles/bvh/bvh_sort.cpp
@@ -26,16 +26,6 @@ CCL_NAMESPACE_BEGIN
 
 static const int BVH_SORT_THRESHOLD = 4096;
 
-/* Silly workaround for float extended precision that happens when compiling
- * on x86, due to one float staying in 80 bit precision register and the other
- * not, which causes the strictly weak ordering to break.
- */
-#if !defined(__i386__)
-#  define NO_EXTENDED_PRECISION
-#else
-#  define NO_EXTENDED_PRECISION volatile
-#endif
-
 struct BVHReferenceCompare {
 public:
 	int dim;
@@ -52,8 +42,8 @@ public:
 	__forceinline int compare(const BVHReference& ra,
 	                          const BVHReference& rb) const
 	{
-		NO_EXTENDED_PRECISION float ca = ra.bounds().min[dim] + ra.bounds().max[dim];
-		NO_EXTENDED_PRECISION float cb = rb.bounds().min[dim] + rb.bounds().max[dim];
+		float ca = ra.bounds().min[dim] + ra.bounds().max[dim];
+		float cb = rb.bounds().min[dim] + rb.bounds().max[dim];
 
 		if(ca < cb) return -1;
 		else if(ca > cb) return 1;
diff --git a/intern/cycles/kernel/geom/geom.h b/intern/cycles/kernel/geom/geom.h
index c94a538..2949f66 100644
--- a/intern/cycles/kernel/geom/geom.h
+++ b/intern/cycles/kernel/geom/geom.h
@@ -27,15 +27,6 @@
 #define BVH_QNODE_LEAF_SIZE 1
 #define TRI_NODE_SIZE 3
 
-/* silly workaround for float extended precision that happens when compiling
- * without sse support on x86, it results in different results for float ops
- * that you would otherwise expect to compare correctly */
-#if !defined(__i386__) || defined(__SSE__)
-#  define NO_EXTENDED_PRECISION
-#else
-#  define NO_EXTENDED_PRECISION volatile
-#endif
-
 #include "geom_attribute.h"
 #include "geom_object.h"
 #include "geom_triangle.h"
diff --git a/intern/cycles/kernel/geom/geom_bvh_shadow.h b/intern/cycles/kernel/geom/geom_bvh_shadow.h
index 4005489..4991eb5 100644
--- a/intern/cycles/kernel/geom/geom_bvh_shadow.h
+++ b/intern/cycles/kernel/geom/geom_bvh_shadow.h
@@ -108,23 +108,23 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 				float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
 
 				/* intersect ray against child nodes */
-				NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
-				NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
-				NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
-				NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
-				NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
-				NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
-				NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
-				NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
-
-				NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
-				NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
-				NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
-				NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
-				NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
-				NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
-				NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
-				NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
+				float c0lox = (node0.x - P.x) * idir.x;
+				float c0hix = (node0.z - P.x) * idir.x;
+				float c0loy = (node1.x - P.y) * idir.y;
+				float c0hiy = (node1.z - P.y) * idir.y;
+				float c0loz = (node2.x - P.z) * idir.z;
+				float c0hiz = (node2.z - P.z) * idir.z;
+				float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
+				float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
+
+				float c1lox = (node0.y - P.x) * idir.x;
+				float c1hix = (node0.w - P.x) * idir.x;
+				float c1loy = (node1.y - P.y) * idir.y;
+				float c1hiy = (node1.w - P.y) * idir.y;
+				float c1loz = (node2.y - P.z) * idir.z;
+				float c1hiz = (node2.w - P.z) * idir.z;
+				float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
+				float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
 
 				/* decide which nodes to traverse next */
 #  ifdef __VISIBILITY_FLAG__
diff --git a/intern/cycles/kernel/geom/geom_bvh_subsurface.h b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
index 915e941..a5243f0 100644
--- a/intern/cycles/kernel/geom/geom_bvh_subsurface.h
+++ b/intern/cycles/kernel/geom/geom_bvh_subsurface.h
@@ -119,23 +119,23 @@ ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 				float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
 
 				/* intersect ray against child nodes */
-				NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
-				NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
-				NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
-				NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
-				NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
-				NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
-				NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
-				NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
-
-				NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
-				NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
-				NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
-				NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
-				NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
-				NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
-				NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
-				NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
+				float c0lox = (node0.x - P.x) * idir.x;
+				float c0hix = (node0.z - P.x) * idir.x;
+				float c0loy = (node1.x - P.y) * idir.y;
+				float c0hiy = (node1.z - P.y) * idir.y;
+				float c0loz = (node2.x - P.z) * idir.z;
+				float c0hiz = (node2.z - P.z) * idir.z;
+				float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
+				float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
+
+				float c1lox = (node0.y - P.x) * idir.x;
+				float c1hix = (node0.w - P.x) * idir.x;
+				float c1loy = (node1.y - P.y) * idir.y;
+				float c1hiy = (node1.w - P.y) * idir.y;
+				float c1loz = (node2.y - P.z) * idir.z;
+				float c1hiz = (node2.w - P.z) * idir.z;
+				float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
+				float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
 
 				/* decide which nodes to traverse next */
 				traverseChild0 = (c0max >= c0min);
diff --git a/intern/cycles/kernel/geom/geom_bvh_traversal.h b/intern/cycles/kernel/geom/geom_bvh_traversal.h
index ae919ef..89e6ba6 100644
--- a/intern/cycles/kernel/geom/geom_bvh_traversal.h
+++ b/intern/cycles/kernel/geom/geom_bvh_traversal.h
@@ -115,23 +115,23 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 				float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
 
 				/* intersect ray against child nodes */
-				NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
-				NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
-				NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
-				NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
-				NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
-				NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
-				NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
-				NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
-
-				NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
-				NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
-				NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
-				NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
-				NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
-				NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
-				NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
-				NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
+				float c0lox = (node0.x - P.x) * idir.x;
+				float c0hix = (node0.z - P.x) * idir.x;
+				float c0loy = (node1.x - P.y) * idir.y;
+				float c0hiy = (node1.z - P.y) * idir.y;
+				float c0loz = (node2.x - P.z) * idir.z;
+				float c0hiz = (node2.z - P.z) * idir.z;
+				float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
+				float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
+
+				float c1lox = (node0.y - P.x) * idir.x;
+				float c1hix = (node0.w - P.x) * idir.x;
+				float c1loy = (node1.y - P.y) * idir.y;
+				float c1hiy = (node1.w - P.y) * idir.y;
+				float c1loz = (node2.y - P.z) * idir.z;
+				float c1hiz = (node2.w - P.z) * idir.z;
+				float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
+				float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
 
 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
 				if(difl != 0.0f) {
diff --git a/intern/cycles/kernel/geom/geom_bvh_volume.h b/intern/cycles/kernel/geom/geom_bvh_volume.h
index f3edf85..195c404 100644
--- a/intern/cycles/kernel/geom/geom_bvh_volume.h
+++ b/intern/cycles/kernel/geom/geom_bvh_volume.h
@@ -105,23 +105,23 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 				float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
 
 				/* intersect ray agains

@@ Diff output truncated at 10240 characters. @@