[Bf-blender-cvs] [0e62c02] soc-2014-cycles: Merge remote-tracking branch 'origin/master' into soc-2014-cycles
Thomas Dinges
noreply at git.blender.org
Mon Jun 16 00:10:54 CEST 2014
Commit: 0e62c026ab3d66538834beb27c728f4107afa652
Author: Thomas Dinges
Date: Mon Jun 16 00:08:57 2014 +0200
https://developer.blender.org/rB0e62c026ab3d66538834beb27c728f4107afa652
Merge remote-tracking branch 'origin/master' into soc-2014-cycles
This commit removes somd sincos() code + fma(), as the code in master changed too much in those places. I will re-check on that and add back eventually.
Conflicts:
intern/cycles/CMakeLists.txt
intern/cycles/SConscript
intern/cycles/kernel/CMakeLists.txt
intern/cycles/kernel/closure/bsdf_microfacet.h
intern/cycles/kernel/geom/geom_bvh_shadow.h
intern/cycles/kernel/geom/geom_bvh_subsurface.h
intern/cycles/kernel/geom/geom_bvh_traversal.h
intern/cycles/kernel/kernel_compat_cpu.h
intern/cycles/kernel/svm/svm.h
intern/cycles/kernel/svm/svm_types.h
intern/cycles/render/nodes.cpp
intern/cycles/util/util_color.h
intern/cycles/util/util_optimization.h
intern/cycles/util/util_simd.h
source/blender/nodes/shader/nodes/node_shader_sepcombXYZ.c
===================================================================
===================================================================
diff --cc intern/cycles/kernel/geom/geom_triangle.h
index ca247da,f2f35c2..0b586a8
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@@ -400,89 -400,5 +400,89 @@@ ccl_device_inline void triangle_interse
}
#endif
+#ifdef __QBVH__
+ccl_device_inline void qbvh_node_intersect(KernelGlobals *kg, int *traverseChild,
+ int nodeAddrChild[4], float3 P, float3 idir, float t, int nodeAddr)
+{
+#ifdef __KERNEL_AVX2__
+ /* X axis */
+ const __m128 idirx = _mm_set_ps1(idir.x);
+ const __m128 mulx = _mm_mul_ps(_mm_set_ps1(P.x), idirx);
- const __m128 bminx = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+0);
- const __m128 t0x = fms(bminx, idirx, mulx);
- const __m128 bmaxx = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+1);
- const __m128 t1x = fms(bmaxx, idirx, mulx);
++ const __m128 bminx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+0);
++ const __m128 t0x = msub(bminx, idirx, mulx);
++ const __m128 bmaxx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+1);
++ const __m128 t1x = msub(bmaxx, idirx, mulx);
+
+ __m128 tmin = _mm_max_ps(_mm_min_ps(t0x, t1x), _mm_setzero_ps());
+ __m128 tmax = _mm_min_ps(_mm_max_ps(t0x, t1x), _mm_set_ps1(t));
+
+ /* Y axis */
+ const __m128 idiry = _mm_set_ps1(idir.y);
+ const __m128 muly = _mm_mul_ps(_mm_set_ps1(P.y), idiry);
- const __m128 bminy = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+2);
- const __m128 t0y = fms(bminy, idiry, muly);
- const __m128 bmaxy = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+3);
- const __m128 t1y = fms(bmaxy, idiry, muly);
++ const __m128 bminy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+2);
++ const __m128 t0y = msub(bminy, idiry, muly);
++ const __m128 bmaxy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+3);
++ const __m128 t1y = msub(bmaxy, idiry, muly);
+
+ tmin = _mm_max_ps(_mm_min_ps(t0y, t1y), tmin);
+ tmax = _mm_min_ps(_mm_max_ps(t0y, t1y), tmax);
+
+ /* Z axis */
+ const __m128 idirz = _mm_set_ps1(idir.z);
+ const __m128 mulz = _mm_mul_ps(_mm_set_ps1(P.z), idirz);
- const __m128 bminz = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+4);
- const __m128 t0z = fms(bminz, idirz, mulz);
- const __m128 bmaxz = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+5);
- const __m128 t1z = fms(bmaxz, idirz, mulz);
++ const __m128 bminz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+4);
++ const __m128 t0z = msub(bminz, idirz, mulz);
++ const __m128 bmaxz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+5);
++ const __m128 t1z = msub(bmaxz, idirz, mulz);
+
+ tmin = _mm_max_ps(_mm_min_ps(t0z, t1z), tmin);
+ tmax = _mm_min_ps(_mm_max_ps(t0z, t1z), tmax);
+#else
+ const __m128 Px = _mm_set_ps1(P.x);
+ const __m128 idirx = _mm_set_ps1(idir.x);
- const __m128 bminx = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+0);
++ const __m128 bminx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+0);
+ const __m128 t0x = _mm_mul_ps(_mm_sub_ps(bminx, Px), idirx);
- const __m128 bmaxx = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+1);
++ const __m128 bmaxx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+1);
+ const __m128 t1x = _mm_mul_ps(_mm_sub_ps(bmaxx, Px), idirx);
+
+ __m128 tmin = _mm_max_ps(_mm_min_ps(t0x, t1x), _mm_setzero_ps());
+ __m128 tmax = _mm_min_ps(_mm_max_ps(t0x, t1x), _mm_set_ps1(t));
+
+ /* Y axis */
+ const __m128 Py = _mm_set_ps1(P.y);
+ const __m128 idiry = _mm_set_ps1(idir.y);
- const __m128 bminy = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+2);
++ const __m128 bminy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+2);
+ const __m128 t0y = _mm_mul_ps(_mm_sub_ps(bminy, Py), idiry);
- const __m128 bmaxy = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+3);
++ const __m128 bmaxy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+3);
+ const __m128 t1y = _mm_mul_ps(_mm_sub_ps(bmaxy, Py), idiry);
+
+ tmin = _mm_max_ps(_mm_min_ps(t0y, t1y), tmin);
+ tmax = _mm_min_ps(_mm_max_ps(t0y, t1y), tmax);
+
+ /* Z axis */
+ const __m128 Pz = _mm_set_ps1(P.z);
+ const __m128 idirz = _mm_set_ps1(idir.z);
- const __m128 bminz = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+4);
++ const __m128 bminz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+4);
+ const __m128 t0z = _mm_mul_ps(_mm_sub_ps(bminz, Pz), idirz);
- const __m128 bmaxz = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+5);
++ const __m128 bmaxz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+5);
+ const __m128 t1z = _mm_mul_ps(_mm_sub_ps(bmaxz, Pz), idirz);
+
+ tmin = _mm_max_ps(_mm_min_ps(t0z, t1z), tmin);
+ tmax = _mm_min_ps(_mm_max_ps(t0z, t1z), tmax);
+#endif
+
+ /* compare and get mask */
+ *traverseChild = _mm_movemask_ps(_mm_cmple_ps(tmin, tmax));
+
+ /* get node addresses */
+ float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+6);
+
+ nodeAddrChild[0] = __float_as_int(cnodes.x);
+ nodeAddrChild[1] = __float_as_int(cnodes.y);
+ nodeAddrChild[2] = __float_as_int(cnodes.z);
+ nodeAddrChild[3] = __float_as_int(cnodes.w);
+}
+#endif
+
CCL_NAMESPACE_END
diff --cc intern/cycles/kernel/kernel_compat_cpu.h
index 3d31d40,c2aab93..50cb5bc
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@@ -43,17 -43,17 +43,17 @@@ template<typename T> struct texture
return data[index];
}
-#if 0
+#ifdef __KERNEL_SSE2__
- ccl_always_inline __m128 fetch_m128(int index)
+ ccl_always_inline ssef fetch_ssef(int index)
{
kernel_assert(index >= 0 && index < width);
- return ((__m128*)data)[index];
+ return ((ssef*)data)[index];
}
- ccl_always_inline __m128i fetch_m128i(int index)
+ ccl_always_inline ssei fetch_ssei(int index)
{
kernel_assert(index >= 0 && index < width);
- return ((__m128i*)data)[index];
+ return ((ssei*)data)[index];
}
#endif
More information about the Bf-blender-cvs
mailing list