[Bf-blender-cvs] [c354214] cycles_bvh: Cycles: Fix naming and merge node intersections into a single file
Sergey Sharybin
noreply at git.blender.org
Wed Jun 15 17:56:15 CEST 2016
Commit: c35421452169c902bff1b4ef166aaa26b2c23332
Author: Sergey Sharybin
Date: Wed Jun 15 16:58:59 2016 +0200
Branches: cycles_bvh
https://developer.blender.org/rBc35421452169c902bff1b4ef166aaa26b2c23332
Cycles: Fix naming and merge node intersections into a single file
===================================================================
M intern/cycles/kernel/CMakeLists.txt
M intern/cycles/kernel/geom/geom_bvh.h
M intern/cycles/kernel/geom/geom_qbvh.h
D intern/cycles/kernel/geom/geom_qbvh_curve.h
M intern/cycles/kernel/geom/geom_qbvh_shadow.h
M intern/cycles/kernel/geom/geom_qbvh_subsurface.h
M intern/cycles/kernel/geom/geom_qbvh_traversal.h
M intern/cycles/kernel/geom/geom_qbvh_volume.h
M intern/cycles/kernel/geom/geom_qbvh_volume_all.h
===================================================================
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 76b940d..fc0614e 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -150,7 +150,6 @@ set(SRC_GEOM_HEADERS
geom/geom_object.h
geom/geom_primitive.h
geom/geom_qbvh.h
- geom/geom_qbvh_curve.h
geom/geom_qbvh_shadow.h
geom/geom_qbvh_subsurface.h
geom/geom_qbvh_traversal.h
diff --git a/intern/cycles/kernel/geom/geom_bvh.h b/intern/cycles/kernel/geom/geom_bvh.h
index 3960de8..5b2c4df 100644
--- a/intern/cycles/kernel/geom/geom_bvh.h
+++ b/intern/cycles/kernel/geom/geom_bvh.h
@@ -73,7 +73,6 @@ CCL_NAMESPACE_BEGIN
/* Common QBVH functions. */
#ifdef __QBVH__
# include "geom_qbvh.h"
-# include "geom_qbvh_curve.h"
#endif
/* Regular BVH traversal */
diff --git a/intern/cycles/kernel/geom/geom_qbvh.h b/intern/cycles/kernel/geom/geom_qbvh.h
index 30ed851..5eda321 100644
--- a/intern/cycles/kernel/geom/geom_qbvh.h
+++ b/intern/cycles/kernel/geom/geom_qbvh.h
@@ -51,23 +51,25 @@ ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1,
if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
}
-ccl_device_inline int qbvh_node_intersect(KernelGlobals *__restrict kg,
- const ssef& tnear,
- const ssef& tfar,
+/* Axis-aligned nodes intersection */
+
+ccl_device_inline int qbvh_aligned_node_intersect(KernelGlobals *__restrict kg,
+ const ssef& tnear,
+ const ssef& tfar,
#ifdef __KERNEL_AVX2__
- const sse3f& org_idir,
+ const sse3f& org_idir,
#else
- const sse3f& org,
+ const sse3f& org,
#endif
- const sse3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int nodeAddr,
- ssef *__restrict dist)
+ const sse3f& idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int nodeAddr,
+ ssef *__restrict dist)
{
const int offset = nodeAddr + 1;
#ifdef __KERNEL_AVX2__
@@ -101,24 +103,25 @@ ccl_device_inline int qbvh_node_intersect(KernelGlobals *__restrict kg,
return mask;
}
-ccl_device_inline int qbvh_node_intersect_robust(KernelGlobals *__restrict kg,
- const ssef& tnear,
- const ssef& tfar,
+ccl_device_inline int qbvh_aligned_node_intersect_robust(
+ KernelGlobals *__restrict kg,
+ const ssef& tnear,
+ const ssef& tfar,
#ifdef __KERNEL_AVX2__
- const sse3f& P_idir,
+ const sse3f& P_idir,
#else
- const sse3f& P,
+ const sse3f& P,
#endif
- const sse3f& idir,
- const int near_x,
- const int near_y,
- const int near_z,
- const int far_x,
- const int far_y,
- const int far_z,
- const int nodeAddr,
- const float difl,
- ssef *__restrict dist)
+ const sse3f& idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int nodeAddr,
+ const float difl,
+ ssef *__restrict dist)
{
const int offset = nodeAddr + 1;
#ifdef __KERNEL_AVX2__
@@ -145,3 +148,286 @@ ccl_device_inline int qbvh_node_intersect_robust(KernelGlobals *__restrict kg,
*dist = tNear;
return (int)movemask(vmask);
}
+
+/* Unaligned nodes intersection */
+
+ccl_device_inline int qbvh_unaligned_node_intersect(
+ KernelGlobals *__restrict kg,
+ const ssef& tnear,
+ const ssef& tfar,
+#ifdef __KERNEL_AVX2__
+ const sse3f& org_idir,
+#endif
+ const sse3f& org,
+ const sse3f& dir,
+ const sse3f& idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int nodeAddr,
+ ssef *__restrict dist)
+{
+ const int offset = nodeAddr;
+ const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1);
+ const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2);
+ const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3);
+
+ const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+4);
+ const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+5);
+ const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+6);
+
+ const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+7);
+ const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+8);
+ const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+9);
+
+ const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+10);
+ const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+11);
+ const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+12);
+
+ const ssef aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z,
+ aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z,
+ aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z;
+
+ const ssef aligned_P_x = org.x*tfm_x_x + org.y*tfm_x_y + org.z*tfm_x_z + tfm_t_x,
+ aligned_P_y = org.x*tfm_y_x + org.y*tfm_y_y + org.z*tfm_y_z + tfm_t_y,
+ aligned_P_z = org.x*tfm_z_x + org.y*tfm_z_y + org.z*tfm_z_z + tfm_t_z;
+
+ const ssef neg_one(-1.0f, -1.0f, -1.0f, -1.0f);
+ const ssef nrdir_x = neg_one / aligned_dir_x,
+ nrdir_y = neg_one / aligned_dir_y,
+ nrdir_z = neg_one / aligned_dir_z;
+
+ const ssef tlower_x = aligned_P_x * nrdir_x,
+ tlower_y = aligned_P_y * nrdir_y,
+ tlower_z = aligned_P_z * nrdir_z;
+
+ const ssef tupper_x = tlower_x - nrdir_x,
+ tupper_y = tlower_y - nrdir_y,
+ tupper_z = tlower_z - nrdir_z;
+
+#ifdef __KERNEL_SSE41__
+ const ssef tnear_x = mini(tlower_x, tupper_x);
+ const ssef tnear_y = mini(tlower_y, tupper_y);
+ const ssef tnear_z = mini(tlower_z, tupper_z);
+ const ssef tfar_x = maxi(tlower_x, tupper_x);
+ const ssef tfar_y = maxi(tlower_y, tupper_y);
+ const ssef tfar_z = maxi(tlower_z, tupper_z);
+ const ssef tNear = max4(tnear, tnear_x, tnear_y, tnear_z);
+ const ssef tFar = min4(tfar, tfar_x, tfar_y, tfar_z);
+ const sseb vmask = tNear <= tFar;
+ *dist = tNear;
+ return movemask(vmask);
+#else
+ const ssef tnear_x = min(tlower_x, tupper_x);
+ const ssef tnear_y = min(tlower_y, tupper_y);
+ const ssef tnear_z = min(tlower_z, tupper_z);
+ const ssef tfar_x = max(tlower_x, tupper_x);
+ const ssef tfar_y = max(tlower_y, tupper_y);
+ const ssef tfar_z = max(tlower_z, tupper_z);
+ const ssef tNear = max4(tnear, tnear_x, tnear_y, tnear_z);
+ const ssef tFar = min4(tfar, tfar_x, tfar_y, tfar_z);
+ const sseb vmask = tNear <= tFar;
+ *dist = tNear;
+ return movemask(vmask);
+#endif
+}
+
+ccl_device_inline int qbvh_unaligned_node_intersect_robust(
+ KernelGlobals *__restrict kg,
+ const ssef& tnear,
+ const ssef& tfar,
+#ifdef __KERNEL_AVX2__
+ const sse3f& P_idir,
+#endif
+ const sse3f& P,
+ const sse3f& dir,
+ const sse3f& idir,
+ const int near_x,
+ const int near_y,
+ const int near_z,
+ const int far_x,
+ const int far_y,
+ const int far_z,
+ const int nodeAddr,
+ const float difl,
+ ssef *__restrict dist)
+{
+ const int offset = nodeAddr;
+ const ssef tfm_x_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+1);
+ const ssef tfm_x_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+2);
+ const ssef tfm_x_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+3);
+
+ const ssef tfm_y_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+4);
+ const ssef tfm_y_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+5);
+ const ssef tfm_y_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+6);
+
+ const ssef tfm_z_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+7);
+ const ssef tfm_z_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+8);
+ const ssef tfm_z_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+9);
+
+ const ssef tfm_t_x = kernel_tex_fetch_ssef(__bvh_nodes, offset+10);
+ const ssef tfm_t_y = kernel_tex_fetch_ssef(__bvh_nodes, offset+11);
+ const ssef tfm_t_z = kernel_tex_fetch_ssef(__bvh_nodes, offset+12);
+
+ const ssef aligned_dir_x = dir.x*tfm_x_x + dir.y*tfm_x_y + dir.z*tfm_x_z,
+ aligned_dir_y = dir.x*tfm_y_x + dir.y*tfm_y_y + dir.z*tfm_y_z,
+ aligned_dir_z = dir.x*tfm_z_x + dir.y*tfm_z_y + dir.z*tfm_z_z;
+
+ const ssef aligned_P_x = P.x*tfm_x_x + P.y*tfm_x_y + P.z*tfm_x_z + tfm_t_x,
+ aligned_P_y = P.x*tfm_y_x + P.y*tfm_y_y + P.z*tf
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list