[Bf-blender-cvs] [7d16710] cycles_bvh: Merge branch 'master' into cycles_bvh
Sergey Sharybin
noreply at git.blender.org
Wed Jul 6 12:50:01 CEST 2016
Commit: 7d16710e775003efc7467ab8f5950f1eef7cc194
Author: Sergey Sharybin
Date: Wed Jul 6 11:01:23 2016 +0200
Branches: cycles_bvh
https://developer.blender.org/rB7d16710e775003efc7467ab8f5950f1eef7cc194
Merge branch 'master' into cycles_bvh
This commit also removes NO_EXTENDED_PRECISION from refactored code, solving
merge conflicts and getting rid of this define in new code.
===================================================================
===================================================================
diff --cc intern/cycles/bvh/bvh_sort.cpp
index b9cd8d7,d50178b..e5bcf99
--- a/intern/cycles/bvh/bvh_sort.cpp
+++ b/intern/cycles/bvh/bvh_sort.cpp
@@@ -66,10 -42,8 +56,10 @@@ public
__forceinline int compare(const BVHReference& ra,
const BVHReference& rb) const
{
- float ca = ra.bounds().min[dim] + ra.bounds().max[dim];
- float cb = rb.bounds().min[dim] + rb.bounds().max[dim];
+ BoundBox ra_bounds = get_prim_bounds(ra),
+ rb_bounds = get_prim_bounds(rb);
- NO_EXTENDED_PRECISION float ca = ra_bounds.min[dim] + ra_bounds.max[dim];
- NO_EXTENDED_PRECISION float cb = rb_bounds.min[dim] + rb_bounds.max[dim];
++ float ca = ra_bounds.min[dim] + ra_bounds.max[dim];
++ float cb = rb_bounds.min[dim] + rb_bounds.max[dim];
if(ca < cb) return -1;
else if(ca > cb) return 1;
diff --cc intern/cycles/kernel/geom/geom.h
index 3ba62bb,2949f66..33e91d1
--- a/intern/cycles/kernel/geom/geom.h
+++ b/intern/cycles/kernel/geom/geom.h
@@@ -21,17 -21,12 +21,8 @@@
/* 64 object BVH + 64 mesh BVH + 64 object node splitting */
#define BVH_STACK_SIZE 192
#define BVH_QSTACK_SIZE 384
-#define BVH_NODE_SIZE 4
-#define BVH_NODE_LEAF_SIZE 1
-#define BVH_QNODE_SIZE 7
-#define BVH_QNODE_LEAF_SIZE 1
#define TRI_NODE_SIZE 3
- /* silly workaround for float extended precision that happens when compiling
- * without sse support on x86, it results in different results for float ops
- * that you would otherwise expect to compare correctly */
- #if !defined(__i386__) || defined(__SSE__)
- # define NO_EXTENDED_PRECISION
- #else
- # define NO_EXTENDED_PRECISION volatile
- #endif
-
#include "geom_attribute.h"
#include "geom_object.h"
#include "geom_triangle.h"
diff --cc intern/cycles/kernel/geom/geom_bvh_nodes.h
index 1c9114d,0000000..75d90f2
mode 100644,000000..100644
--- a/intern/cycles/kernel/geom/geom_bvh_nodes.h
+++ b/intern/cycles/kernel/geom/geom_bvh_nodes.h
@@@ -1,670 -1,0 +1,670 @@@
+/*
+ * Copyright 2011-2016, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ccl_device_inline Transform bvh_unaligned_node_fetch_space(KernelGlobals *kg,
+ int nodeAddr,
+ int child)
+{
+ Transform space;
+ if(child == 0) {
+ space.x = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
+ space.y = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
+ space.z = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
+ space.w = kernel_tex_fetch(__bvh_nodes, nodeAddr+4);
+ }
+ else {
+ space.x = kernel_tex_fetch(__bvh_nodes, nodeAddr+5);
+ space.y = kernel_tex_fetch(__bvh_nodes, nodeAddr+6);
+ space.z = kernel_tex_fetch(__bvh_nodes, nodeAddr+7);
+ space.w = kernel_tex_fetch(__bvh_nodes, nodeAddr+8);
+ }
+ return space;
+}
+
+#if !defined(__KERNEL_SSE2__)
+ccl_device_inline int bvh_aligned_node_intersect(KernelGlobals *kg,
+ const float3 P,
+ const float3 idir,
+ const float t,
+ const int nodeAddr,
+ const uint visibility,
+ float *dist)
+{
+
+ /* fetch node data */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
+ float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
+ float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
+ float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
+
+ /* intersect ray against child nodes */
- NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
- NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
-
- NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
- NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
++ float c0lox = (node0.x - P.x) * idir.x;
++ float c0hix = (node0.z - P.x) * idir.x;
++ float c0loy = (node1.x - P.y) * idir.y;
++ float c0hiy = (node1.z - P.y) * idir.y;
++ float c0loz = (node2.x - P.z) * idir.z;
++ float c0hiz = (node2.z - P.z) * idir.z;
++ float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
++ float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
++
++ float c1lox = (node0.y - P.x) * idir.x;
++ float c1hix = (node0.w - P.x) * idir.x;
++ float c1loy = (node1.y - P.y) * idir.y;
++ float c1hiy = (node1.w - P.y) * idir.y;
++ float c1loz = (node2.y - P.z) * idir.z;
++ float c1hiz = (node2.w - P.z) * idir.z;
++ float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
++ float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
+
+ dist[0] = c0min;
+ dist[1] = c1min;
+
+#ifdef __VISIBILITY_FLAG__
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
+ (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
+#else
+ return ((c0max >= c0min)? 1: 0) |
+ ((c1max >= c1min)? 2: 0);
+#endif
+}
+
+ccl_device_inline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
+ const float3 P,
+ const float3 idir,
+ const float t,
+ const float difl,
+ const float /*extmax*/,
+ const int nodeAddr,
+ const uint visibility,
+ float *dist)
+{
+
+ /* fetch node data */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
+ float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
+ float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
+ float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
+
+ /* intersect ray against child nodes */
- NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
- NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
-
- NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
- NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
- NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
- NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
- NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
++ float c0lox = (node0.x - P.x) * idir.x;
++ float c0hix = (node0.z - P.x) * idir.x;
++ float c0loy = (node1.x - P.y) * idir.y;
++ float c0hiy = (node1.z - P.y) * idir.y;
++ float c0loz = (node2.x - P.z) * idir.z;
++ float c0hiz = (node2.z - P.z) * idir.z;
++ float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
++ float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
++
++ float c1lox = (node0.y - P.x) * idir.x;
++ float c1hix = (node0.w - P.x) * idir.x;
++ float c1loy = (node1.y - P.y) * idir.y;
++ float c1hiy = (node1.w - P.y) * idir.y;
++ float c1loz = (node2.y - P.z) * idir.z;
++ float c1hiz = (node2.w - P.z) * idir.z;
++ float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
++ float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
+
+ if(difl != 0.0f) {
+ float hdiff = 1.0f + difl;
+ float ldiff = 1.0f - difl;
+ if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
+ c0min *= ldiff;
+ c0max *= hdiff;
+ }
+ if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
+ c1min *= ldiff;
+ c1max *= hdiff;
+ }
+ }
+
+ dist[0] = c0min;
+ dist[1] = c1min;
+
+#ifdef __VISIBILITY_FLAG__
+ /* this visibility test gives a 5% performance hit, how to solve? */
+ return (((c0max >= c0min) && (
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list