[Bf-blender-cvs] [2ef0cd4] soc-2016-cycles_images: Some WIP code for CUDA half support.
Thomas Dinges
noreply at git.blender.org
Sat Jun 11 23:52:05 CEST 2016
Commit: 2ef0cd4f1eeda8efc0efed1efa4ad1db0bf1a9c4
Author: Thomas Dinges
Date: Sat Jun 11 23:50:18 2016 +0200
Branches: soc-2016-cycles_images
https://developer.blender.org/rB2ef0cd4f1eeda8efc0efed1efa4ad1db0bf1a9c4
Some WIP code for CUDA half support.
* CUDA actually only supports half and half2, but still we should use these instead of own declarations.
* For CPU we define half and half4 ourselves, for CUDA I still have to add half4 then.
CUDA doesnt compile anymore with this commit, need to fix this later.
===================================================================
M intern/cycles/kernel/kernel_compat_cuda.h
M intern/cycles/kernel/svm/svm_image.h
M intern/cycles/util/util_half.h
===================================================================
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 4231475..d06176f 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -31,6 +31,7 @@
#endif
#include <cuda.h>
+#include <cuda_fp16.h>
#include <float.h>
/* Qualifier wrappers for different names on different devices */
@@ -87,6 +88,7 @@ typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
# define kernel_tex_image_interp_float4(t, x, y) tex2D<float4>(t, x, y)
# define kernel_tex_image_interp_float(t, x, y) tex2D<float>(t, x, y)
+# define kernel_tex_image_interp_half(t, x, y) tex2D<ushort4>(t, x, y)
# define kernel_tex_image_interp_3d_float4(t, x, y, z) tex3D<float4>(t, x, y, z)
# define kernel_tex_image_interp_3d_float(t, x, y, z) tex3D<float>(t, x, y, z)
#endif
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index 3d9ab40..af27031 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -267,10 +267,14 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
if(id < 2048) /* TODO(dingto): Make this a variable */
r = kernel_tex_image_interp_float4(tex, x, y);
- else {
+ else if(id < 4096) {
float f = kernel_tex_image_interp_float(tex, x, y);
r = make_float4(f, f, f, 1.0);
}
+ else {
+ half g = kernel_tex_image_interp_half(tex, x, y);
+ r = half4_to_float4(&g);
+ }
# endif
#endif
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index dc02135..d652e88 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -27,24 +27,32 @@ CCL_NAMESPACE_BEGIN
/* Half Floats */
-#ifdef __KERNEL_OPENCL__
-
-#define float4_store_half(h, f, scale) vstore_half4(f * (scale), 0, h);
-#define half4_to_float4(h) vload_half4(0, h);
-
-#else
-
+/* CUDA and OpenCL have inbuilt half data types,
+ * so we only need to declare them for CPU */
+#ifndef __KERNEL_GPU__
typedef unsigned short half;
struct half4 { half x, y, z, w; };
+#endif
-#ifdef __KERNEL_CUDA__
+/* Float <-> Half conversion.
+ * we define several functions:
+ * float4_store_half()
+ * half_to_float4() TODO
+ * half_to_float() TODO
+*/
+
+#if defined(__KERNEL_OPENCL__)
+# define float4_store_half(h, f, scale) vstore_half4(f * (scale), 0, h);
+# define half4_to_float4(h) vload_half4(0, h);
+
+#elif defined(__KERNEL_CUDA__)
ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
{
- h[0] = __float2half_rn(f.x * scale);
- h[1] = __float2half_rn(f.y * scale);
- h[2] = __float2half_rn(f.z * scale);
- h[3] = __float2half_rn(f.w * scale);
+ h[0] = __float2half(f.x * scale);
+ h[1] = __float2half(f.y * scale);
+ h[2] = __float2half(f.z * scale);
+ h[3] = __float2half(f.w * scale);
}
ccl_device_inline float4 half4_to_float4(half *h)
@@ -52,7 +60,7 @@ ccl_device_inline float4 half4_to_float4(half *h)
return make_float4(__half2float(h[0]), __half2float(h[1]), __half2float(h[2]), __half2float(h[3]));
}
-#else
+#else /* __KERNEL_CPU__ */
ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
{
@@ -115,8 +123,6 @@ ccl_device_inline float half_to_float(half h)
#endif
-#endif
-
CCL_NAMESPACE_END
#endif /* __UTIL_HALF_H__ */
More information about the Bf-blender-cvs
mailing list