[Bf-blender-cvs] [2ef0cd4] soc-2016-cycles_images: Some WIP code for CUDA half support.

Thomas Dinges noreply at git.blender.org
Sat Jun 11 23:52:05 CEST 2016


Commit: 2ef0cd4f1eeda8efc0efed1efa4ad1db0bf1a9c4
Author: Thomas Dinges
Date:   Sat Jun 11 23:50:18 2016 +0200
Branches: soc-2016-cycles_images
https://developer.blender.org/rB2ef0cd4f1eeda8efc0efed1efa4ad1db0bf1a9c4

Some WIP code for CUDA half support.

* CUDA actually only supports half and half2, but still we should use these instead of own declarations.
* For CPU we define half and half4 ourselves, for CUDA I still have to add half4 then.

CUDA doesnt compile anymore with this commit, need to fix this later.

===================================================================

M	intern/cycles/kernel/kernel_compat_cuda.h
M	intern/cycles/kernel/svm/svm_image.h
M	intern/cycles/util/util_half.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 4231475..d06176f 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -31,6 +31,7 @@
 #endif
 
 #include <cuda.h>
+#include <cuda_fp16.h>
 #include <float.h>
 
 /* Qualifier wrappers for different names on different devices */
@@ -87,6 +88,7 @@ typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
 
 #  define kernel_tex_image_interp_float4(t, x, y) tex2D<float4>(t, x, y)
 #  define kernel_tex_image_interp_float(t, x, y) tex2D<float>(t, x, y)
+#  define kernel_tex_image_interp_half(t, x, y) tex2D<ushort4>(t, x, y)
 #  define kernel_tex_image_interp_3d_float4(t, x, y, z) tex3D<float4>(t, x, y, z)
 #  define kernel_tex_image_interp_3d_float(t, x, y, z) tex3D<float>(t, x, y, z)
 #endif
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index 3d9ab40..af27031 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -267,10 +267,14 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
 	CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
 	if(id < 2048) /* TODO(dingto): Make this a variable */
 		r = kernel_tex_image_interp_float4(tex, x, y);
-	else {
+	else if(id < 4096) {
 		float f = kernel_tex_image_interp_float(tex, x, y);
 		r = make_float4(f, f, f, 1.0);
 	}
+	else {
+		half g = kernel_tex_image_interp_half(tex, x, y);
+		r = half4_to_float4(&g);
+	}
 #  endif
 #endif
 
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index dc02135..d652e88 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -27,24 +27,32 @@ CCL_NAMESPACE_BEGIN
 
 /* Half Floats */
 
-#ifdef __KERNEL_OPENCL__
-
-#define float4_store_half(h, f, scale) vstore_half4(f * (scale), 0, h);
-#define half4_to_float4(h) vload_half4(0, h);
-
-#else
-
+/* CUDA and OpenCL have inbuilt half data types,
+ * so we only need to declare them for CPU */
+#ifndef __KERNEL_GPU__
 typedef unsigned short half;
 struct half4 { half x, y, z, w; };
+#endif
 
-#ifdef __KERNEL_CUDA__
+/* Float <-> Half conversion.
+ * we define several functions:
+ * float4_store_half()
+ * half_to_float4() TODO
+ * half_to_float() TODO
+*/
+
+#if defined(__KERNEL_OPENCL__)
+#  define float4_store_half(h, f, scale) vstore_half4(f * (scale), 0, h);
+#  define half4_to_float4(h) vload_half4(0, h);
+
+#elif defined(__KERNEL_CUDA__)
 
 ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
 {
-	h[0] = __float2half_rn(f.x * scale);
-	h[1] = __float2half_rn(f.y * scale);
-	h[2] = __float2half_rn(f.z * scale);
-	h[3] = __float2half_rn(f.w * scale);
+	h[0] = __float2half(f.x * scale);
+	h[1] = __float2half(f.y * scale);
+	h[2] = __float2half(f.z * scale);
+	h[3] = __float2half(f.w * scale);
 }
 
 ccl_device_inline float4 half4_to_float4(half *h)
@@ -52,7 +60,7 @@ ccl_device_inline float4 half4_to_float4(half *h)
     return make_float4(__half2float(h[0]), __half2float(h[1]), __half2float(h[2]), __half2float(h[3]));
 }
 
-#else
+#else /* __KERNEL_CPU__ */
 
 ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
 {
@@ -115,8 +123,6 @@ ccl_device_inline float half_to_float(half h)
 
 #endif
 
-#endif
-
 CCL_NAMESPACE_END
 
 #endif /* __UTIL_HALF_H__ */




More information about the Bf-blender-cvs mailing list