[Bf-blender-cvs] [ab8d9c4] master: Cycles: Add some utility functions and structures

Sergey Sharybin noreply at git.blender.org
Wed Dec 24 22:56:22 CET 2014


Commit: ab8d9c4b8853755faa62307750d961dc2ec43708
Author: Sergey Sharybin
Date:   Tue Dec 16 20:27:44 2014 +0500
Branches: master
https://developer.blender.org/rBab8d9c4b8853755faa62307750d961dc2ec43708

Cycles: Add some utility functions and structures

Most of them are not currently used but are essential for the further work.

- CPU kernels with SSE2 support will now have sse3b, sse3f and sse3i

- Added templatedversions of min4, max4 which are handy to use with register
  variables.

- Added util_swap function which gets arguments by pointers.
  So hopefully it'll be a portable version of std::swap.

===================================================================

M	intern/cycles/kernel/geom/geom_triangle_intersect.h
M	intern/cycles/kernel/kernel_compat_cpu.h
M	intern/cycles/util/util_math.h
M	intern/cycles/util/util_types.h

===================================================================

diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index 4bb60ca..8ed0e15 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -61,9 +61,7 @@ ccl_device_inline void triangle_intersect_precalc(float3 dir,
 
 	/* Swap kx and ky dimensions to preserve winding direction of triangles. */
 	if(IDX(dir, kz) < 0.0f) {
-		int tmp = kx;
-		kx = ky;
-		ky = tmp;
+		util_swap(&kx, &ky);
 	}
 
 	/* Calculate the shear constants. */
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index 08c8bdd..2f0b78e 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -344,6 +344,12 @@ typedef texture_image<uchar4> texture_image_uchar4;
 
 #define kernel_data (kg->__data)
 
+#ifdef __KERNEL_SSE2__
+typedef vector3<sseb> sse3b;
+typedef vector3<ssef> sse3f;
+typedef vector3<ssei> sse3i;
+#endif
+
 CCL_NAMESPACE_END
 
 #endif /* __KERNEL_COMPAT_CPU_H__ */
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 6898dc9..3d605e0 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -124,6 +124,24 @@ ccl_device_inline double min(double a, double b)
 	return (a < b)? a: b;
 }
 
+/* These 2 guys are templated for usage with registers data.
+ *
+ * NOTE: Since this is CPU-only functions it is ok to use references here.
+ * But for other devices we'll need to be careful about this.
+ */
+
+template<typename T>
+ccl_device_inline T min4(const T& a, const T& b, const T& c, const T& d)
+{
+	return min(min(a,b),min(c,d));
+}
+
+template<typename T>
+ccl_device_inline T max4(const T& a, const T& b, const T& c, const T& d)
+{
+	return max(max(a,b),max(c,d));
+}
+
 #endif
 
 ccl_device_inline float min4(float a, float b, float c, float d)
@@ -1468,6 +1486,25 @@ ccl_device_inline int util_max_axis(float3 vec)
 	}
 }
 
+/* NOTE: We don't use std::swap here because of number of reasons:
+ *
+ * - We don't want current context to be polluted with all the templated
+ *   functions from stl which might cause some interference about which
+ *   function is used.
+ *
+ * - Different devices in theory might want to use intrinsics to optimize
+ *   this function for specific type.
+ *
+ * - We don't want ot use references because of OpenCL state at this moment.
+ */
+template <typename T>
+ccl_device_inline void util_swap(T *__restrict a, T *__restrict b)
+{
+	T c = *a;
+	*a = *b;
+	*b = c;
+}
+
 CCL_NAMESPACE_END
 
 #endif /* __UTIL_MATH_H__ */
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index ce84200..8c0f6d1 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -264,6 +264,19 @@ struct ccl_try_align(16) float4 {
 	__forceinline float& operator[](int i) { return *(&x + i); }
 };
 
+template<typename T>
+class vector3
+{
+public:
+	T x, y, z;
+
+	ccl_always_inline vector3() {}
+	ccl_always_inline vector3(const T& a)
+	  : x(a), y(a), z(a) {}
+	ccl_always_inline vector3(const T& x, const T& y, const T& z)
+	  : x(x), y(y), z(z) {}
+};
+
 #endif
 
 #ifndef __KERNEL_GPU__




More information about the Bf-blender-cvs mailing list