[Bf-blender-cvs] [0e62c02] soc-2014-cycles: Merge remote-tracking branch 'origin/master' into soc-2014-cycles

Mon Jun 16 00:10:54 CEST 2014

Commit: 0e62c026ab3d66538834beb27c728f4107afa652
Author: Thomas Dinges
Date:   Mon Jun 16 00:08:57 2014 +0200
https://developer.blender.org/rB0e62c026ab3d66538834beb27c728f4107afa652

Merge remote-tracking branch 'origin/master' into soc-2014-cycles

This commit removes somd sincos() code + fma(), as the code in master changed too much in those places. I will re-check on that and add back eventually.

Conflicts:
	intern/cycles/CMakeLists.txt
	intern/cycles/SConscript
	intern/cycles/kernel/CMakeLists.txt
	intern/cycles/kernel/closure/bsdf_microfacet.h
	intern/cycles/kernel/geom/geom_bvh_shadow.h
	intern/cycles/kernel/geom/geom_bvh_subsurface.h
	intern/cycles/kernel/geom/geom_bvh_traversal.h
	intern/cycles/kernel/kernel_compat_cpu.h
	intern/cycles/kernel/svm/svm.h
	intern/cycles/kernel/svm/svm_types.h
	intern/cycles/render/nodes.cpp
	intern/cycles/util/util_color.h
	intern/cycles/util/util_optimization.h
	intern/cycles/util/util_simd.h
	source/blender/nodes/shader/nodes/node_shader_sepcombXYZ.c

===================================================================



===================================================================

diff --cc intern/cycles/kernel/geom/geom_triangle.h
index ca247da,f2f35c2..0b586a8

--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@@ -400,89 -400,5 +400,89 @@@ ccl_device_inline void triangle_interse
  }
  #endif
  
 +#ifdef __QBVH__
 +ccl_device_inline void qbvh_node_intersect(KernelGlobals *kg, int *traverseChild,
 +										   int nodeAddrChild[4], float3 P, float3 idir, float t, int nodeAddr)
 +{
 +#ifdef __KERNEL_AVX2__
 +	/* X axis */
 +	const __m128 idirx = _mm_set_ps1(idir.x);
 +	const __m128 mulx = _mm_mul_ps(_mm_set_ps1(P.x), idirx);
- 	const __m128 bminx = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+0);
- 	const __m128 t0x = fms(bminx, idirx, mulx);
- 	const __m128 bmaxx = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+1);
- 	const __m128 t1x = fms(bmaxx, idirx, mulx);
++	const __m128 bminx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+0);
++	const __m128 t0x = msub(bminx, idirx, mulx);
++	const __m128 bmaxx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+1);
++	const __m128 t1x = msub(bmaxx, idirx, mulx);
 +
 +	__m128 tmin = _mm_max_ps(_mm_min_ps(t0x, t1x), _mm_setzero_ps());
 +	__m128 tmax = _mm_min_ps(_mm_max_ps(t0x, t1x), _mm_set_ps1(t));
 +
 +	/* Y axis */
 +	const __m128 idiry = _mm_set_ps1(idir.y);
 +	const __m128 muly = _mm_mul_ps(_mm_set_ps1(P.y), idiry);
- 	const __m128 bminy = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+2);
- 	const __m128 t0y = fms(bminy, idiry, muly);
- 	const __m128 bmaxy = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+3);
- 	const __m128 t1y = fms(bmaxy, idiry, muly);
++	const __m128 bminy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+2);
++	const __m128 t0y = msub(bminy, idiry, muly);
++	const __m128 bmaxy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+3);
++	const __m128 t1y = msub(bmaxy, idiry, muly);
 +
 +	tmin = _mm_max_ps(_mm_min_ps(t0y, t1y), tmin);
 +	tmax = _mm_min_ps(_mm_max_ps(t0y, t1y), tmax);
 +
 +	/* Z axis */
 +	const __m128 idirz = _mm_set_ps1(idir.z);
 +	const __m128 mulz = _mm_mul_ps(_mm_set_ps1(P.z), idirz);
- 	const __m128 bminz = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+4);
- 	const __m128 t0z = fms(bminz, idirz, mulz);
- 	const __m128 bmaxz = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+5);
- 	const __m128 t1z = fms(bmaxz, idirz, mulz);
++	const __m128 bminz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+4);
++	const __m128 t0z = msub(bminz, idirz, mulz);
++	const __m128 bmaxz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+5);
++	const __m128 t1z = msub(bmaxz, idirz, mulz);
 +
 +	tmin = _mm_max_ps(_mm_min_ps(t0z, t1z), tmin);
 +	tmax = _mm_min_ps(_mm_max_ps(t0z, t1z), tmax);
 +#else
 +	const __m128 Px = _mm_set_ps1(P.x);
 +	const __m128 idirx = _mm_set_ps1(idir.x);
- 	const __m128 bminx = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+0);
++	const __m128 bminx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+0);
 +	const __m128 t0x = _mm_mul_ps(_mm_sub_ps(bminx, Px), idirx);
- 	const __m128 bmaxx = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+1);
++	const __m128 bmaxx = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+1);
 +	const __m128 t1x = _mm_mul_ps(_mm_sub_ps(bmaxx, Px), idirx);
 +
 +	__m128 tmin = _mm_max_ps(_mm_min_ps(t0x, t1x), _mm_setzero_ps());
 +	__m128 tmax = _mm_min_ps(_mm_max_ps(t0x, t1x), _mm_set_ps1(t));
 +
 +	/* Y axis */
 +	const __m128 Py = _mm_set_ps1(P.y);
 +	const __m128 idiry = _mm_set_ps1(idir.y);
- 	const __m128 bminy = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+2);
++	const __m128 bminy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+2);
 +	const __m128 t0y = _mm_mul_ps(_mm_sub_ps(bminy, Py), idiry);
- 	const __m128 bmaxy = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+3);
++	const __m128 bmaxy = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+3);
 +	const __m128 t1y = _mm_mul_ps(_mm_sub_ps(bmaxy, Py), idiry);
 +
 +	tmin = _mm_max_ps(_mm_min_ps(t0y, t1y), tmin);
 +	tmax = _mm_min_ps(_mm_max_ps(t0y, t1y), tmax);
 +
 +	/* Z axis */
 +	const __m128 Pz = _mm_set_ps1(P.z);
 +	const __m128 idirz = _mm_set_ps1(idir.z);
- 	const __m128 bminz = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+4);
++	const __m128 bminz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+4);
 +	const __m128 t0z = _mm_mul_ps(_mm_sub_ps(bminz, Pz), idirz);
- 	const __m128 bmaxz = kernel_tex_fetch_m128(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+5);
++	const __m128 bmaxz = kernel_tex_fetch_ssef(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+5);
 +	const __m128 t1z = _mm_mul_ps(_mm_sub_ps(bmaxz, Pz), idirz);
 +
 +	tmin = _mm_max_ps(_mm_min_ps(t0z, t1z), tmin);
 +	tmax = _mm_min_ps(_mm_max_ps(t0z, t1z), tmax);
 +#endif
 +
 +	/* compare and get mask */
 +	*traverseChild = _mm_movemask_ps(_mm_cmple_ps(tmin, tmax));
 +
 +	/* get node addresses */
 +	float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*QBVH_NODE_SIZE+6);
 +
 +	nodeAddrChild[0] = __float_as_int(cnodes.x);
 +	nodeAddrChild[1] = __float_as_int(cnodes.y);
 +	nodeAddrChild[2] = __float_as_int(cnodes.z);
 +	nodeAddrChild[3] = __float_as_int(cnodes.w);
 +}
 +#endif
 +
  CCL_NAMESPACE_END
  
diff --cc intern/cycles/kernel/kernel_compat_cpu.h
index 3d31d40,c2aab93..50cb5bc
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@@ -43,17 -43,17 +43,17 @@@ template<typename T> struct texture  
  		return data[index];
  	}
  
 -#if 0
 +#ifdef __KERNEL_SSE2__
- 	ccl_always_inline __m128 fetch_m128(int index)
+ 	ccl_always_inline ssef fetch_ssef(int index)
  	{
  		kernel_assert(index >= 0 && index < width);
- 		return ((__m128*)data)[index];
+ 		return ((ssef*)data)[index];
  	}
  
- 	ccl_always_inline __m128i fetch_m128i(int index)
+ 	ccl_always_inline ssei fetch_ssei(int index)
  	{
  		kernel_assert(index >= 0 && index < width);
- 		return ((__m128i*)data)[index];
+ 		return ((ssei*)data)[index];
  	}
  #endif