[Bf-blender-cvs] [435ec91] soc-2016-cycles_images: Cleanup and generalize half to float conversion functions for all architectures.

Fri Jun 17 12:47:37 CEST 2016

Commit: 435ec91907465ffa6f6baae4a242de694c02ebf7
Author: Thomas Dinges
Date:   Fri Jun 17 12:47:11 2016 +0200
Branches: soc-2016-cycles_images
https://developer.blender.org/rB435ec91907465ffa6f6baae4a242de694c02ebf7

Cleanup and generalize half to float conversion functions for all architectures.

===================================================================

M	intern/cycles/util/util_half.h

===================================================================

diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index 24a0b18..724c406 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -38,16 +38,20 @@ struct half4 { half x, y, z, w; };
 #endif
 
 /* Float <-> Half conversion.
- * we define several functions:
+ * We define three main functions for each architecture.
+ * Half data is always loaded / written via pointers.
  * float4_store_half()
- * half_to_float4() TODO
- * half_to_float() TODO
-*/
+ * half_to_float()
+ * half_to_float4()
+ */
 
+/* OpenCL */
 #if defined(__KERNEL_OPENCL__)
 #  define float4_store_half(h, f, scale) vstore_half4(f * (scale), 0, h);
+#  define half_to_float(h) vload_half(0, h);
 #  define half4_to_float4(h) vload_half4(0, h);
 
+/* CUDA */
 #elif defined(__KERNEL_CUDA__)
 
 ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
@@ -58,12 +62,25 @@ ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
 	h[3] = __float2half(f.w * scale);
 }
 
-ccl_device_inline float4 half4_to_float4(half *h)
+ccl_device_inline float half_to_float(half h)
 {
-    return make_float4(__half2float(h[0]), __half2float(h[1]), __half2float(h[2]), __half2float(h[3]));
+	return __half2float(h);
 }
 
-#else /* __KERNEL_CPU__ */
+ccl_device_inline float4 half4_to_float4(half4 h)
+{
+	float4 f;
+
+	f.x = half_to_float(h.x);
+	f.y = half_to_float(h.y);
+	f.z = half_to_float(h.z);
+	f.w = half_to_float(h.w);
+
+	return f;
+}
+
+/* CPU */
+#else
 
 ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
 {
@@ -102,24 +119,23 @@ ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
 #endif
 }
 
-/* TODO(dingto) Verify this */
-ccl_device_inline float4 half4_to_float4(half4 h)
+ccl_device_inline float half_to_float(half h)
 {
-	float4 f;
+	float f;
 
-	*((int*) &f.x) = ((h.x & 0x8000) << 16) | (((h.x & 0x7c00) + 0x1C000) << 13) | ((h.x & 0x03FF) << 13);
-	*((int*) &f.y) = ((h.y & 0x8000) << 16) | (((h.y & 0x7c00) + 0x1C000) << 13) | ((h.y & 0x03FF) << 13);
-	*((int*) &f.z) = ((h.z & 0x8000) << 16) | (((h.z & 0x7c00) + 0x1C000) << 13) | ((h.z & 0x03FF) << 13);
-	*((int*) &f.w) = ((h.w & 0x8000) << 16) | (((h.w & 0x7c00) + 0x1C000) << 13) | ((h.w & 0x03FF) << 13);
+	*((int*) &f) = ((h & 0x8000) << 16) | (((h & 0x7c00) + 0x1C000) << 13) | ((h & 0x03FF) << 13);
 
 	return f;
 }
 
-ccl_device_inline float half_to_float(half h)
+ccl_device_inline float4 half4_to_float4(half4 h)
 {
-	float f;
+	float4 f;
 
-	*((int*) &f) = ((h & 0x8000) << 16) | (((h & 0x7c00) + 0x1C000) << 13) | ((h & 0x03FF) << 13);
+	f.x = half_to_float(h.x);
+	f.y = half_to_float(h.y);
+	f.z = half_to_float(h.w);
+	f.w = half_to_float(h.z);
 
 	return f;
 }
@@ -129,4 +145,3 @@ ccl_device_inline float half_to_float(half h)
 CCL_NAMESPACE_END
 
 #endif /* __UTIL_HALF_H__ */
-