[Bf-blender-cvs] [a08e217] master: Cycles: Implement unaligned nodes BVH traversal

Sergey Sharybin noreply at git.blender.org
Thu Jul 7 17:29:11 CEST 2016


Commit: a08e2179f17569abed814f734dadfebf591e7024
Author: Sergey Sharybin
Date:   Thu Jul 7 12:23:13 2016 +0200
Branches: master
https://developer.blender.org/rBa08e2179f17569abed814f734dadfebf591e7024

Cycles: Implement unaligned nodes BVH traversal

This commit implements traversal of unaligned BVH nodes.

QBVH traversal is fully SIMD optimized and calculates orientation
for all 4 children at a time, regular BVH might probably be optimized
a bit more.

===================================================================

M	intern/cycles/kernel/CMakeLists.txt
M	intern/cycles/kernel/geom/geom_bvh.h
A	intern/cycles/kernel/geom/geom_bvh_nodes.h
M	intern/cycles/kernel/geom/geom_bvh_shadow.h
M	intern/cycles/kernel/geom/geom_bvh_subsurface.h
M	intern/cycles/kernel/geom/geom_bvh_traversal.h
M	intern/cycles/kernel/geom/geom_bvh_volume.h
M	intern/cycles/kernel/geom/geom_bvh_volume_all.h
M	intern/cycles/kernel/geom/geom_qbvh.h
M	intern/cycles/kernel/geom/geom_qbvh_shadow.h
M	intern/cycles/kernel/geom/geom_qbvh_subsurface.h
M	intern/cycles/kernel/geom/geom_qbvh_traversal.h
M	intern/cycles/kernel/geom/geom_qbvh_volume.h
M	intern/cycles/kernel/geom/geom_qbvh_volume_all.h

===================================================================

diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index f0adbc0..3c2f774 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -141,6 +141,7 @@ set(SRC_GEOM_HEADERS
 	geom/geom.h
 	geom/geom_attribute.h
 	geom/geom_bvh.h
+	geom/geom_bvh_nodes.h
 	geom/geom_bvh_shadow.h
 	geom/geom_bvh_subsurface.h
 	geom/geom_bvh_traversal.h
diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h
index d0eedd3..f8d563f 100644
--- a/intern/cycles/kernel/geom/geom_bvh.h
+++ b/intern/cycles/kernel/geom/geom_bvh.h
@@ -77,6 +77,8 @@ CCL_NAMESPACE_BEGIN
 
 /* Regular BVH traversal */
 
+#include "geom_bvh_nodes.h"
+
 #define BVH_FUNCTION_NAME bvh_intersect
 #define BVH_FUNCTION_FEATURES 0
 #include "geom_bvh_traversal.h"
@@ -109,13 +111,13 @@ CCL_NAMESPACE_BEGIN
 
 #if defined(__SUBSURFACE__)
 #  define BVH_FUNCTION_NAME bvh_intersect_subsurface
-#  define BVH_FUNCTION_FEATURES 0
+#  define BVH_FUNCTION_FEATURES BVH_HAIR
 #  include "geom_bvh_subsurface.h"
 #endif
 
 #if defined(__SUBSURFACE__) && defined(__OBJECT_MOTION__)
 #  define BVH_FUNCTION_NAME bvh_intersect_subsurface_motion
-#  define BVH_FUNCTION_FEATURES BVH_MOTION
+#  define BVH_FUNCTION_FEATURES BVH_MOTION|BVH_HAIR
 #  include "geom_bvh_subsurface.h"
 #endif
 
@@ -123,19 +125,19 @@ CCL_NAMESPACE_BEGIN
 
 #if defined(__VOLUME__)
 #  define BVH_FUNCTION_NAME bvh_intersect_volume
-#  define BVH_FUNCTION_FEATURES 0
+#  define BVH_FUNCTION_FEATURES BVH_HAIR
 #  include "geom_bvh_volume.h"
 #endif
 
 #if defined(__VOLUME__) && defined(__INSTANCING__)
 #  define BVH_FUNCTION_NAME bvh_intersect_volume_instancing
-#  define BVH_FUNCTION_FEATURES BVH_INSTANCING
+#  define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
 #  include "geom_bvh_volume.h"
 #endif
 
 #if defined(__VOLUME__) && defined(__OBJECT_MOTION__)
 #  define BVH_FUNCTION_NAME bvh_intersect_volume_motion
-#  define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
+#  define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR
 #  include "geom_bvh_volume.h"
 #endif
 
@@ -175,19 +177,19 @@ CCL_NAMESPACE_BEGIN
 
 #if defined(__VOLUME_RECORD_ALL__)
 #  define BVH_FUNCTION_NAME bvh_intersect_volume_all
-#  define BVH_FUNCTION_FEATURES 0
+#  define BVH_FUNCTION_FEATURES BVH_HAIR
 #  include "geom_bvh_volume_all.h"
 #endif
 
 #if defined(__VOLUME_RECORD_ALL__) && defined(__INSTANCING__)
 #  define BVH_FUNCTION_NAME bvh_intersect_volume_all_instancing
-#  define BVH_FUNCTION_FEATURES BVH_INSTANCING
+#  define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_HAIR
 #  include "geom_bvh_volume_all.h"
 #endif
 
 #if defined(__VOLUME_RECORD_ALL__) && defined(__OBJECT_MOTION__)
 #  define BVH_FUNCTION_NAME bvh_intersect_volume_all_motion
-#  define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION
+#  define BVH_FUNCTION_FEATURES BVH_INSTANCING|BVH_MOTION|BVH_HAIR
 #  include "geom_bvh_volume_all.h"
 #endif
 
diff --git a/intern/cycles/kernel/geom/geom_bvh_nodes.h b/intern/cycles/kernel/geom/geom_bvh_nodes.h
new file mode 100644
index 0000000..deb91ec
--- /dev/null
+++ b/intern/cycles/kernel/geom/geom_bvh_nodes.h
@@ -0,0 +1,659 @@
+/*
+ * Copyright 2011-2016, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO(sergey): Look into avoid use of full Transform and use 3x3 matrix and
+// 3-vector which might be faster.
+ccl_device_inline Transform bvh_unaligned_node_fetch_space(KernelGlobals *kg,
+                                                           int nodeAddr,
+                                                           int child)
+{
+	Transform space;
+	const int child_addr = nodeAddr + child * 3;
+	space.x = kernel_tex_fetch(__bvh_nodes, child_addr+1);
+	space.y = kernel_tex_fetch(__bvh_nodes, child_addr+2);
+	space.z = kernel_tex_fetch(__bvh_nodes, child_addr+3);
+	space.w = make_float4(0.0f, 0.0f, 0.0f, 1.0f);
+	return space;
+}
+
+#if !defined(__KERNEL_SSE2__)
+ccl_device_inline int bvh_aligned_node_intersect(KernelGlobals *kg,
+                                                 const float3 P,
+                                                 const float3 idir,
+                                                 const float t,
+                                                 const int nodeAddr,
+                                                 const uint visibility,
+                                                 float *dist)
+{
+
+	/* fetch node data */
+	float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
+	float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
+	float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
+	float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
+
+	/* intersect ray against child nodes */
+	float c0lox = (node0.x - P.x) * idir.x;
+	float c0hix = (node0.z - P.x) * idir.x;
+	float c0loy = (node1.x - P.y) * idir.y;
+	float c0hiy = (node1.z - P.y) * idir.y;
+	float c0loz = (node2.x - P.z) * idir.z;
+	float c0hiz = (node2.z - P.z) * idir.z;
+	float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
+	float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
+
+	float c1lox = (node0.y - P.x) * idir.x;
+	float c1hix = (node0.w - P.x) * idir.x;
+	float c1loy = (node1.y - P.y) * idir.y;
+	float c1hiy = (node1.w - P.y) * idir.y;
+	float c1loz = (node2.y - P.z) * idir.z;
+	float c1hiz = (node2.w - P.z) * idir.z;
+	float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
+	float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
+
+	dist[0] = c0min;
+	dist[1] = c1min;
+
+#ifdef __VISIBILITY_FLAG__
+	/* this visibility test gives a 5% performance hit, how to solve? */
+	return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
+	       (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
+#else
+	return ((c0max >= c0min)? 1: 0) |
+	       ((c1max >= c1min)? 2: 0);
+#endif
+}
+
+ccl_device_inline int bvh_aligned_node_intersect_robust(KernelGlobals *kg,
+                                                        const float3 P,
+                                                        const float3 idir,
+                                                        const float t,
+                                                        const float difl,
+                                                        const float extmax,
+                                                        const int nodeAddr,
+                                                        const uint visibility,
+                                                        float *dist)
+{
+
+	/* fetch node data */
+	float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
+	float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr+1);
+	float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr+2);
+	float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr+3);
+
+	/* intersect ray against child nodes */
+	float c0lox = (node0.x - P.x) * idir.x;
+	float c0hix = (node0.z - P.x) * idir.x;
+	float c0loy = (node1.x - P.y) * idir.y;
+	float c0hiy = (node1.z - P.y) * idir.y;
+	float c0loz = (node2.x - P.z) * idir.z;
+	float c0hiz = (node2.z - P.z) * idir.z;
+	float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
+	float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
+
+	float c1lox = (node0.y - P.x) * idir.x;
+	float c1hix = (node0.w - P.x) * idir.x;
+	float c1loy = (node1.y - P.y) * idir.y;
+	float c1hiy = (node1.w - P.y) * idir.y;
+	float c1loz = (node2.y - P.z) * idir.z;
+	float c1hiz = (node2.w - P.z) * idir.z;
+	float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
+	float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
+
+	if(difl != 0.0f) {
+		float hdiff = 1.0f + difl;
+		float ldiff = 1.0f - difl;
+		if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
+			c0min = max(ldiff * c0min, c0min - extmax);
+			c0max = min(hdiff * c0max, c0max + extmax);
+		}
+		if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
+			c1min = max(ldiff * c1min, c1min - extmax);
+			c1max = min(hdiff * c1max, c1max + extmax);
+		}
+	}
+
+	dist[0] = c0min;
+	dist[1] = c1min;
+
+#ifdef __VISIBILITY_FLAG__
+	/* this visibility test gives a 5% performance hit, how to solve? */
+	return (((c0max >= c0min) && (__float_as_uint(cnodes.x) & visibility))? 1: 0) |
+	       (((c1max >= c1min) && (__float_as_uint(cnodes.y) & visibility))? 2: 0);
+#else
+	return ((c0max >= c0min)? 1: 0) |
+	       ((c1max >= c1min)? 2: 0);
+#endif
+}
+
+ccl_device_inline bool bvh_unaligned_node_intersect_child(
+        KernelGlobals *kg,
+        const float3 P,
+        const float3 dir,
+        const float t,
+        int nodeAddr,
+        int child,
+        float *dist)
+{
+	Transform space  = bvh_unaligned_node_fetch_space(kg, nodeAddr, child);
+	float3 aligned_dir = transform_direction(&space, dir);
+	float3 aligned_P = transform_point(&space, P);
+	float3 nrdir = -bvh_inverse_direction(aligned_dir);
+	float3 tLowerXYZ = aligned_P * nrdir;
+	float3 tUpperXYZ = tLowerXYZ - nrdir;
+	const float tNearX = min(tLowerXYZ.x, tUpperXYZ.x);
+	const float tNearY = min(tLowerXYZ.y, tUpperXYZ.y);
+	const float tNearZ = min(tLowerXYZ.z, tUpperXYZ.z);
+	const float tFarX  = max(tLowerXYZ.x, tUpperXYZ.x);
+	const float tFarY  = max(tLowerXYZ.y, tUpperXYZ.y);
+	const float tFarZ  = max(tLowerXYZ.z, tUpperXYZ.z);
+	const float tNear  = max4(0.0f, tNearX, tNearY, tNearZ);
+	const float tFar   = min4(t, tFarX, tFarY, tFarZ);
+	*dist = tNear;
+	return tNear <= tFar;
+}
+
+ccl_device_inline bool bvh_unaligned_node_intersect_child_robust(
+        KernelGlobals *kg,
+  

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list