[Bf-blender-cvs] [f2486a1] soc-2016-cycles_images: Cleanup for Bindless Textures, keep tex2/3D defines in kernel_compat_cuda.h.

Thomas Dinges noreply at git.blender.org
Tue May 17 13:38:47 CEST 2016


Commit: f2486a11cdb7059943ffbf8c3c6caaa309464063
Author: Thomas Dinges
Date:   Tue May 17 13:38:19 2016 +0200
Branches: soc-2016-cycles_images
https://developer.blender.org/rBf2486a11cdb7059943ffbf8c3c6caaa309464063

Cleanup for Bindless Textures, keep tex2/3D defines in kernel_compat_cuda.h.

===================================================================

M	intern/cycles/kernel/geom/geom_volume.h
M	intern/cycles/kernel/kernel_compat_cuda.h
M	intern/cycles/kernel/svm/svm_image.h
M	intern/cycles/kernel/svm/svm_voxel.h

===================================================================

diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h
index 83a163b..c77f498 100644
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -67,8 +67,8 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd,
 #ifdef __KERNEL_GPU__
 #  if __CUDA_ARCH__ >= 300
 	CUtexObject tex = kernel_data.bindless_mapping[id];
-	float g = tex3D<float>(tex, P.x, P.y, P.z);
-	float4 r = make_float4(g, g, g, 1.0);
+	float f = kernel_tex_image_interp_3d_float(tex, x, y, z);
+	float4 r = make_float4(f, f, f, 1.0);
 #  else
 	float4 r = volume_image_texture_3d(id, P.x, P.y, P.z);
 #  endif
@@ -92,7 +92,7 @@ ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *s
 #ifdef __KERNEL_GPU__
 #  if __CUDA_ARCH__ >= 300
 	CUtexObject tex = kernel_data.bindless_mapping[id];
-	float4 r = tex3D<float4>(tex, P.x, P.y, P.z);
+	float4 r = kernel_tex_image_interp_3d_float4(tex, P.x, P.y, P.z);
 #  else
 	float4 r = volume_image_texture_3d(id, P.x, P.y, P.z);
 #  endif
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index d10d325..5d9c307 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -67,20 +67,30 @@ typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
 
 /* Macros to handle different memory storage on different devices */
 
-/* In order to use full 6GB of memory on Titan cards, use arrays instead
- * of textures. On earlier cards this seems slower, but on Titan it is
- * actually slightly faster in tests. */
+/* On Fermi cards (4xx and 5xx), we use regular textures for both data and images.
+ * On Kepler (6xx) and above, we use Bindless Textures for images and arrays for data.
+ *
+ * Arrays are necessary in order to use the full VRAM on newer cards, and it's slightly faster.
+ * Using Arrays on Fermi turned out to be slower.*/
+
+
+/* Fermi */
 #if __CUDA_ARCH__ < 300
 #  define __KERNEL_CUDA_TEX_STORAGE__
-#endif
-
-#ifdef __KERNEL_CUDA_TEX_STORAGE__
 #  define kernel_tex_fetch(t, index) tex1Dfetch(t, index)
+
+#  define kernel_tex_image_interp(t, x, y) tex2D(t, x, y)
+#  define kernel_tex_image_interp_3d(t, x, y, z) tex3D(t, x, y, z)
+
+/* Kepler */
 #else
 #  define kernel_tex_fetch(t, index) t[(index)]
+
+#  define kernel_tex_image_interp_float4(t, x, y) tex2D<float4>(t, x, y)
+#  define kernel_tex_image_interp_float(t, x, y) tex2D<float>(t, x, y)
+#  define kernel_tex_image_interp_3d_float4(t, x, y, z) tex3D<float4>(t, x, y, z)
+#  define kernel_tex_image_interp_3d_float(t, x, y, z) tex3D<float>(t, x, y, z)
 #endif
-#define kernel_tex_image_interp(t, x, y) tex2D(t, x, y)
-#define kernel_tex_image_interp_3d(t, x, y, z) tex3D(t, x, y, z)
 
 #define kernel_data __data
 
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index 9ab627d..0fd04a0 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -262,10 +262,10 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
 #else
 	CUtexObject tex = kernel_data.bindless_mapping[id];
 	if(id < 2048) /* TODO(dingto): Make this a variable */
-		r = tex2D<float4>(tex, x, y);
+		r = kernel_tex_image_interp_float4(tex, x, y);
 	else {
-		float g = tex2D<float>(tex, x, y);
-		r = make_float4(g, g, g, 1.0);
+		float f = kernel_tex_image_interp_float(tex, x, y);
+		r = make_float4(f, f, f, 1.0);
 	}
 #endif
 #endif
diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h
index 9d85d97..825d76d 100644
--- a/intern/cycles/kernel/svm/svm_voxel.h
+++ b/intern/cycles/kernel/svm/svm_voxel.h
@@ -47,10 +47,10 @@ ccl_device void svm_node_tex_voxel(KernelGlobals *kg,
 #    if __CUDA_ARCH__ >= 300
 	CUtexObject tex = kernel_data.bindless_mapping[id];
 	if(id < 2048) /* TODO(dingto): Make this a variable */
-		r = tex3D<float4>(tex, co.x, co.y, co.z);
+		r = kernel_tex_image_interp_3d_float4(tex, co.x, co.y, co.z);
 	else {
-		float g = tex3D<float>(tex, co.x, co.y, co.z);
-		r = make_float4(g, g, g, 1.0);
+		float f = kernel_tex_image_interp_3d_float(tex, co.x, co.y, co.z);
+		r = make_float4(f, f, f, 1.0);
 	}
 #    else /* __CUDA_ARCH__ >= 300 */
 	r = volume_image_texture_3d(id, co.x, co.y, co.z);




More information about the Bf-blender-cvs mailing list