[Bf-blender-cvs] [7d16710] cycles_bvh: Merge branch 'master' into cycles_bvh

Sergey Sharybin noreply at git.blender.org
Wed Jul 6 12:50:01 CEST 2016


Commit: 7d16710e775003efc7467ab8f5950f1eef7cc194
Author: Sergey Sharybin
Date:   Wed Jul 6 11:01:23 2016 +0200
Branches: cycles_bvh
https://developer.blender.org/rB7d16710e775003efc7467ab8f5950f1eef7cc194

Merge branch 'master' into cycles_bvh

This commit also removes NO_EXTENDED_PRECISION from refactored code, solving
merge conflicts and getting rid of this define in new code.

===================================================================



===================================================================

diff --cc intern/cycles/bvh/bvh_sort.cpp
index b9cd8d7,d50178b..e5bcf99
--- a/intern/cycles/bvh/bvh_sort.cpp
+++ b/intern/cycles/bvh/bvh_sort.cpp
@@@ -66,10 -42,8 +56,10 @@@ public
  	__forceinline int compare(const BVHReference& ra,
  	                          const BVHReference& rb) const
  	{
 -		float ca = ra.bounds().min[dim] + ra.bounds().max[dim];
 -		float cb = rb.bounds().min[dim] + rb.bounds().max[dim];
 +		BoundBox ra_bounds = get_prim_bounds(ra),
 +		         rb_bounds = get_prim_bounds(rb);
- 		NO_EXTENDED_PRECISION float ca = ra_bounds.min[dim] + ra_bounds.max[dim];
- 		NO_EXTENDED_PRECISION float cb = rb_bounds.min[dim] + rb_bounds.max[dim];
++		float ca = ra_bounds.min[dim] + ra_bounds.max[dim];
++		float cb = rb_bounds.min[dim] + rb_bounds.max[dim];
  
  		if(ca < cb) return -1;
  		else if(ca > cb) return 1;
diff --cc intern/cycles/kernel/geom/geom.h
index 3ba62bb,2949f66..33e91d1
--- a/intern/cycles/kernel/geom/geom.h
+++ b/intern/cycles/kernel/geom/geom.h
@@@ -21,17 -21,12 +21,8 @@@
  /* 64 object BVH + 64 mesh BVH + 64 object node splitting */
  #define BVH_STACK_SIZE 192
  #define BVH_QSTACK_SIZE 384
 -#define BVH_NODE_SIZE 4
 -#define BVH_NODE_LEAF_SIZE 1
 -#define BVH_QNODE_SIZE 7
 -#define BVH_QNODE_LEAF_SIZE 1
  #define TRI_NODE_SIZE 3
  
- /* silly workaround for float extended precision that happens when compiling
-  * without sse support on x86, it results in different results for float ops
-  * that you would otherwise expect to compare correctly */
- #if !defined(__i386__) || defined(__SSE__)
- #  define NO_EXTENDED_PRECISION
- #else
- #  define NO_EXTENDED_PRECISION volatile
- #endif
- 
  #include "geom_attribute.h"
  #include "geom_object.h"
  #include "geom_triangle.h"
diff --cc intern/cycles/kernel/geom/geom_bvh_nodes.h
index 1c9114d,0000000..75d90f2
mode 100644,000000..100644
--- a/intern/cycles/kernel/geom/geom_bvh_nodes.h
+++ b/intern/cycles/kernel/geom/geom_bvh_nodes.h
@@@ -1,670 -1,0 +1,670 @@@
 +/*
 + * Copyright 2011-2016, Blender Foundation.
 + *
 + * Licensed under the Apache License, Version 2.0 (the "License");
 + * you may not use this file except in compliance with the License.
 + * You may obtain a copy of the License at
 + *
 + * http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +ccl_device_inline Transform bvh_unaligned_node_fetch_space(KernelGlobals *kg,
 +                                                           int nodeAddr,
 +                                                           int child)
 +{
 +	Transform space;
 +	if(child == 0) {
 +		space.x = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
 +		space.y = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
 +		space.z = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
 +		space.w = kernel_tex_fetch(__bvh_nodes, nodeAddr+4);
 +	}
 +	else {
 +		space.x = kernel_tex_fetch(__bvh_nodes, nodeAddr+5);
 +		space.y = kernel_tex_fetch(__bvh_nodes, nodeAddr+6);
 +		space.z = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
 +		space.w = kernel_tex_fetch(__bvh_nodes, nodeAddr+8);
 +	}
 +	return space;
 +}
 +
 +#if !defined(__KERNEL_SSE2__)
 +ccl_device_inline int bvh_aligned_node_intersect(KernelGlobals *kg,
 +                                                 const float3 P,
 +                                                 const float3 idir,
 +                                                 const float t,
 +                                                 const int nodeAddr,
 +                                                 const uint visibility,
 +                                                 float *dist)
 +{
 +
 +	/* fetch node data */
 +	float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
 +	float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
 +	float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
 +	float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
 +
 +	/* intersect ray against child nodes */
- 	NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
- 	NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
- 	NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
- 	NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
- 	NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
- 	NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
- 	NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
- 	NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
- 
- 	NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
- 	NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
- 	NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
- 	NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
- 	NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
- 	NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
- 	NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
- 	NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
++	float c0lox = (node0.x - P.x) * idir.x;
++	float c0hix = (node0.z - P.x) * idir.x;
++	float c0loy = (node1.x - P.y) * idir.y;
++	float c0hiy = (node1.z - P.y) * idir.y;
++	float c0loz = (node2.x - P.z) * idir.z;
++	float c0hiz = (node2.z - P.z) * idir.z;
++	float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
++	float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
++
++	float c1lox = (node0.y - P.x) * idir.x;
++	float c1hix = (node0.w - P.x) * idir.x;
++	float c1loy = (node1.y - P.y) * idir.y;
++	float c1hiy = (node1.w - P.y) * idir.y;
++	float c1loz = (node2.y - P.z) * idir.z;
++	float c1hiz = (node2.w - P.z) * idir.z;
++	float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
++	float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
 +
 +	dist[0] = c0min;
 +	dist[1] = c1min;
 +
 +#ifdef __VISIBILITY_FLAG__
 +	/* this visibility test gives a 5% performance hit, how to solve? */
 +	return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
 +	       (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
 +#else
 +	return ((c0max >= c0min)? 1: 0) |
 +	       ((c1max >= c1min)? 2: 0);
 +#endif
 +}
 +
 +ccl_device_inline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
 +                                                        const float3 P,
 +                                                        const float3 idir,
 +                                                        const float t,
 +                                                        const float difl,
 +                                                        const float /*extmax*/,
 +                                                        const int nodeAddr,
 +                                                        const uint visibility,
 +                                                        float *dist)
 +{
 +
 +	/* fetch node data */
 +	float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
 +	float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
 +	float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
 +	float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
 +
 +	/* intersect ray against child nodes */
- 	NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
- 	NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
- 	NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
- 	NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
- 	NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
- 	NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
- 	NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
- 	NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
- 
- 	NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
- 	NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
- 	NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
- 	NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
- 	NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
- 	NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
- 	NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
- 	NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
++	float c0lox = (node0.x - P.x) * idir.x;
++	float c0hix = (node0.z - P.x) * idir.x;
++	float c0loy = (node1.x - P.y) * idir.y;
++	float c0hiy = (node1.z - P.y) * idir.y;
++	float c0loz = (node2.x - P.z) * idir.z;
++	float c0hiz = (node2.z - P.z) * idir.z;
++	float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
++	float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
++
++	float c1lox = (node0.y - P.x) * idir.x;
++	float c1hix = (node0.w - P.x) * idir.x;
++	float c1loy = (node1.y - P.y) * idir.y;
++	float c1hiy = (node1.w - P.y) * idir.y;
++	float c1loz = (node2.y - P.z) * idir.z;
++	float c1hiz = (node2.w - P.z) * idir.z;
++	float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
++	float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
 +
 +	if(difl != 0.0f) {
 +		float hdiff = 1.0f + difl;
 +		float ldiff = 1.0f - difl;
 +		if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
 +			c0min *= ldiff;
 +			c0max *= hdiff;
 +		}
 +		if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
 +			c1min *= ldiff;
 +			c1max *= hdiff;
 +		}
 +	}
 +
 +	dist[0] = c0min;
 +	dist[1] = c1min;
 +
 +#ifdef __VISIBILITY_FLAG__
 +	/* this visibility test gives a 5% performance hit, how to solve? */
 +	return (((c0max >= c0min) && (

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list