[Bf-blender-cvs] [ef7d3af] soc-2016-cycles_images: Half Floats: Initial support for CUDA.

Thomas Dinges noreply at git.blender.org
Tue Aug 9 11:36:39 CEST 2016


Commit: ef7d3af938514d4dfb7d7c2376c8fa6720c45279
Author: Thomas Dinges
Date:   Tue Aug 9 11:33:42 2016 +0200
Branches: soc-2016-cycles_images
https://developer.blender.org/rBef7d3af938514d4dfb7d7c2376c8fa6720c45279

Half Floats: Initial support for CUDA.

This is not working yet, due to an error on rendertime, I assume it's an error on reading the texture. If I render a scene, with the half float texture outside of the camera, it works.

* Enable half floats for Kepler cards (1024 for half and 1024 for half4)
* Use proper half data type for CUDA, don't declare our own.
* Change __float2half_rn to __float2half, which should be the same according to http://stackoverflow.com/questions/35198856/half-precision-difference-between-float2half-vs-float2half-rn

===================================================================

M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/kernel/kernel_compat_cuda.h
M	intern/cycles/kernel/svm/svm_image.h
M	intern/cycles/render/image.cpp
M	intern/cycles/util/util_half.h
M	intern/cycles/util/util_texture.h

===================================================================

diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 6a511ea..331857a 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -576,6 +576,7 @@ public:
 			case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
 			case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
 			case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
+			case TYPE_HALF: format = CU_AD_FORMAT_HALF; break;
 			default: assert(0); return;
 		}
 
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 063220b..d656fac 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -31,6 +31,7 @@
 #endif
 
 #include <cuda.h>
+#include <cuda_fp16.h>
 #include <float.h>
 
 /* Qualifier wrappers for different names on different devices */
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index b6b90df..d1bfcaa 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Float4 textures on various devices. */
 #if defined(__KERNEL_CPU__)
-#  define TEX_NUM_FLOAT4_IMAGES	TEX_NUM_FLOAT4_CPU
+#  define TEX_NUM_FLOAT4_IMAGES		TEX_NUM_FLOAT4_CPU
 #elif defined(__KERNEL_CUDA__)
 #  if __CUDA_ARCH__ < 300
 #    define TEX_NUM_FLOAT4_IMAGES	TEX_NUM_FLOAT4_CUDA
@@ -277,8 +277,21 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
 	}
 #  else
 	CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
-	if(id < 2048) /* TODO(dingto): Make this a variable */
+
+	/* Float and Byte, 4 components */
+	if(id < TEX_START_FLOAT_CUDA_KEPLER)
 		r = kernel_tex_image_interp_float4(tex, x, y);
+	/* Float and Byte, 1 component */
+	else if(id < TEX_START_HALF4_CUDA_KEPLER) {
+		float f = kernel_tex_image_interp_float(tex, x, y);
+		r = make_float4(f, f, f, 1.0);
+	}
+	/* Half Float, 4 components */
+	else if (id < TEX_START_HALF_CUDA_KEPLER){
+		/* TODO(dingto): proper tex call here. */
+		r = kernel_tex_image_interp_float4(tex, x, y);
+	}
+	/* Half Float, 1 component */
 	else {
 		float f = kernel_tex_image_interp_float(tex, x, y);
 		r = make_float4(f, f, f, 1.0);
@@ -465,7 +478,7 @@ ccl_device void svm_node_tex_environment(KernelGlobals *kg, ShaderData *sd, floa
 	float2 uv;
 
 	co = normalize(co);
-	
+
 	if(projection == 0)
 		uv = direction_to_equirectangular(co);
 	else
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index 614620c..1adf510 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -216,7 +216,7 @@ ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filen
 }
 
 /* We use a consecutive slot counting scheme on the devices, in order
- * float4, byte4, float, byte.
+ * float4, byte4, float, byte, half4 and half.
  * These functions convert the slot ids from ImageManager "images" ones
  * to device ones and vice versa. */
 int ImageManager::type_index_to_flattened_slot(int slot, ImageDataType type)
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index ae85ab3..7aa53e0 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -33,17 +33,20 @@ CCL_NAMESPACE_BEGIN
 
 #else
 
+#ifndef __KERNEL_CUDA__
 typedef unsigned short half;
+#endif
+
 struct half4 { half x, y, z, w; };
 
 #ifdef __KERNEL_CUDA__
 
 ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
 {
-	h[0] = __float2half_rn(f.x * scale);
-	h[1] = __float2half_rn(f.y * scale);
-	h[2] = __float2half_rn(f.z * scale);
-	h[3] = __float2half_rn(f.w * scale);
+	h[0] = __float2half(f.x * scale);
+	h[1] = __float2half(f.y * scale);
+	h[2] = __float2half(f.z * scale);
+	h[3] = __float2half(f.w * scale);
 }
 
 #else
diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h
index 2ef4728..dee9c30 100644
--- a/intern/cycles/util/util_texture.h
+++ b/intern/cycles/util/util_texture.h
@@ -54,8 +54,8 @@ CCL_NAMESPACE_BEGIN
 #define TEX_NUM_BYTE4_CUDA_KEPLER		1024
 #define TEX_NUM_FLOAT_CUDA_KEPLER		1024
 #define TEX_NUM_BYTE_CUDA_KEPLER		1024
-#define TEX_NUM_HALF4_CUDA_KEPLER		0
-#define TEX_NUM_HALF_CUDA_KEPLER		0
+#define TEX_NUM_HALF4_CUDA_KEPLER		1024
+#define TEX_NUM_HALF_CUDA_KEPLER		1024
 #define TEX_START_FLOAT4_CUDA_KEPLER	0
 #define TEX_START_BYTE4_CUDA_KEPLER		TEX_NUM_FLOAT4_CUDA_KEPLER
 #define TEX_START_FLOAT_CUDA_KEPLER		(TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER)




More information about the Bf-blender-cvs mailing list