[Bf-blender-cvs] [f2486a1] soc-2016-cycles_images: Cleanup for Bindless Textures, keep tex2/3D defines in kernel_compat_cuda.h.
Thomas Dinges
noreply at git.blender.org
Tue May 17 13:38:47 CEST 2016
Commit: f2486a11cdb7059943ffbf8c3c6caaa309464063
Author: Thomas Dinges
Date: Tue May 17 13:38:19 2016 +0200
Branches: soc-2016-cycles_images
https://developer.blender.org/rBf2486a11cdb7059943ffbf8c3c6caaa309464063
Cleanup for Bindless Textures, keep tex2/3D defines in kernel_compat_cuda.h.
===================================================================
M intern/cycles/kernel/geom/geom_volume.h
M intern/cycles/kernel/kernel_compat_cuda.h
M intern/cycles/kernel/svm/svm_image.h
M intern/cycles/kernel/svm/svm_voxel.h
===================================================================
diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h
index 83a163b..c77f498 100644
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -67,8 +67,8 @@ ccl_device float volume_attribute_float(KernelGlobals *kg, const ShaderData *sd,
#ifdef __KERNEL_GPU__
# if __CUDA_ARCH__ >= 300
CUtexObject tex = kernel_data.bindless_mapping[id];
- float g = tex3D<float>(tex, P.x, P.y, P.z);
- float4 r = make_float4(g, g, g, 1.0);
+ float f = kernel_tex_image_interp_3d_float(tex, x, y, z);
+ float4 r = make_float4(f, f, f, 1.0);
# else
float4 r = volume_image_texture_3d(id, P.x, P.y, P.z);
# endif
@@ -92,7 +92,7 @@ ccl_device float3 volume_attribute_float3(KernelGlobals *kg, const ShaderData *s
#ifdef __KERNEL_GPU__
# if __CUDA_ARCH__ >= 300
CUtexObject tex = kernel_data.bindless_mapping[id];
- float4 r = tex3D<float4>(tex, P.x, P.y, P.z);
+ float4 r = kernel_tex_image_interp_3d_float4(tex, P.x, P.y, P.z);
# else
float4 r = volume_image_texture_3d(id, P.x, P.y, P.z);
# endif
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index d10d325..5d9c307 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -67,20 +67,30 @@ typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
/* Macros to handle different memory storage on different devices */
-/* In order to use full 6GB of memory on Titan cards, use arrays instead
- * of textures. On earlier cards this seems slower, but on Titan it is
- * actually slightly faster in tests. */
+/* On Fermi cards (4xx and 5xx), we use regular textures for both data and images.
+ * On Kepler (6xx) and above, we use Bindless Textures for images and arrays for data.
+ *
+ * Arrays are necessary in order to use the full VRAM on newer cards, and it's slightly faster.
+ * Using Arrays on Fermi turned out to be slower.*/
+
+
+/* Fermi */
#if __CUDA_ARCH__ < 300
# define __KERNEL_CUDA_TEX_STORAGE__
-#endif
-
-#ifdef __KERNEL_CUDA_TEX_STORAGE__
# define kernel_tex_fetch(t, index) tex1Dfetch(t, index)
+
+# define kernel_tex_image_interp(t, x, y) tex2D(t, x, y)
+# define kernel_tex_image_interp_3d(t, x, y, z) tex3D(t, x, y, z)
+
+/* Kepler */
#else
# define kernel_tex_fetch(t, index) t[(index)]
+
+# define kernel_tex_image_interp_float4(t, x, y) tex2D<float4>(t, x, y)
+# define kernel_tex_image_interp_float(t, x, y) tex2D<float>(t, x, y)
+# define kernel_tex_image_interp_3d_float4(t, x, y, z) tex3D<float4>(t, x, y, z)
+# define kernel_tex_image_interp_3d_float(t, x, y, z) tex3D<float>(t, x, y, z)
#endif
-#define kernel_tex_image_interp(t, x, y) tex2D(t, x, y)
-#define kernel_tex_image_interp_3d(t, x, y, z) tex3D(t, x, y, z)
#define kernel_data __data
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index 9ab627d..0fd04a0 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -262,10 +262,10 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
#else
CUtexObject tex = kernel_data.bindless_mapping[id];
if(id < 2048) /* TODO(dingto): Make this a variable */
- r = tex2D<float4>(tex, x, y);
+ r = kernel_tex_image_interp_float4(tex, x, y);
else {
- float g = tex2D<float>(tex, x, y);
- r = make_float4(g, g, g, 1.0);
+ float f = kernel_tex_image_interp_float(tex, x, y);
+ r = make_float4(f, f, f, 1.0);
}
#endif
#endif
diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h
index 9d85d97..825d76d 100644
--- a/intern/cycles/kernel/svm/svm_voxel.h
+++ b/intern/cycles/kernel/svm/svm_voxel.h
@@ -47,10 +47,10 @@ ccl_device void svm_node_tex_voxel(KernelGlobals *kg,
# if __CUDA_ARCH__ >= 300
CUtexObject tex = kernel_data.bindless_mapping[id];
if(id < 2048) /* TODO(dingto): Make this a variable */
- r = tex3D<float4>(tex, co.x, co.y, co.z);
+ r = kernel_tex_image_interp_3d_float4(tex, co.x, co.y, co.z);
else {
- float g = tex3D<float>(tex, co.x, co.y, co.z);
- r = make_float4(g, g, g, 1.0);
+ float f = kernel_tex_image_interp_3d_float(tex, co.x, co.y, co.z);
+ r = make_float4(f, f, f, 1.0);
}
# else /* __CUDA_ARCH__ >= 300 */
r = volume_image_texture_3d(id, co.x, co.y, co.z);
More information about the Bf-blender-cvs
mailing list