[Bf-blender-cvs] [c588af7] experimental-build: Revert "Cycles: Use array data storage for Fermi cards as well."
Thomas Dinges
noreply at git.blender.org
Mon May 16 14:58:27 CEST 2016
Commit: c588af70f428a246e957bb79531cb57f2e487d0b
Author: Thomas Dinges
Date: Mon May 16 14:58:16 2016 +0200
Branches: experimental-build
https://developer.blender.org/rBc588af70f428a246e957bb79531cb57f2e487d0b
Revert "Cycles: Use array data storage for Fermi cards as well."
This reverts commit 11f3c9470d3b11180aa7ce14ac89eb0829353469.
===================================================================
M intern/cycles/device/device_cuda.cpp
M intern/cycles/kernel/kernel_compat_cuda.h
M intern/cycles/kernel/kernel_globals.h
===================================================================
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 2ce2c2e..69015e7 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -86,7 +86,9 @@ public:
CUmodule cuModule;
map<device_ptr, bool> tex_interp_map;
int cuDevId;
+ int cuDevArchitecture;
bool first_error;
+ bool use_texture_storage;
struct PixelMem {
GLuint cuPBO;
@@ -174,6 +176,7 @@ public:
{
first_error = true;
background = background_;
+ use_texture_storage = true;
cuDevId = info.num;
cuDevice = 0;
@@ -204,6 +207,15 @@ public:
if(cuda_error_(result, "cuCtxCreate"))
return;
+ int major, minor;
+ cuDeviceComputeCapability(&major, &minor, cuDevId);
+ cuDevArchitecture = major*100 + minor*10;
+
+ /* In order to use full 6GB of memory on Titan cards, use arrays instead
+ * of textures. On earlier cards this seems slower, but on Titan it is
+ * actually slightly faster in tests. */
+ use_texture_storage = (cuDevArchitecture < 300);
+
cuda_pop_context();
}
@@ -483,7 +495,7 @@ public:
CUarray_format_enum format;
size_t dsize = datatype_size(mem.data_type);
size_t size = mem.memory_size();
- bool use_texture = (interpolation != INTERPOLATION_NONE);
+ bool use_texture = (interpolation != INTERPOLATION_NONE) || use_texture_storage;
if(use_texture) {
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 167c994..d10d325 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -67,7 +67,18 @@ typedef texture<uchar4, 2, cudaReadModeNormalizedFloat> texture_image_uchar4;
/* Macros to handle different memory storage on different devices */
-#define kernel_tex_fetch(t, index) t[(index)]
+/* In order to use full 6GB of memory on Titan cards, use arrays instead
+ * of textures. On earlier cards this seems slower, but on Titan it is
+ * actually slightly faster in tests. */
+#if __CUDA_ARCH__ < 300
+# define __KERNEL_CUDA_TEX_STORAGE__
+#endif
+
+#ifdef __KERNEL_CUDA_TEX_STORAGE__
+# define kernel_tex_fetch(t, index) tex1Dfetch(t, index)
+#else
+# define kernel_tex_fetch(t, index) t[(index)]
+#endif
#define kernel_tex_image_interp(t, x, y) tex2D(t, x, y)
#define kernel_tex_image_interp_3d(t, x, y, z) tex3D(t, x, y, z)
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index cb8ce4c..c44ea1b 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -65,9 +65,12 @@ typedef struct KernelGlobals {
__constant__ KernelData __data;
typedef struct KernelGlobals {} KernelGlobals;
-# define KERNEL_TEX(type, ttype, name) const __constant__ __device__ type *name;
+# ifdef __KERNEL_CUDA_TEX_STORAGE__
+# define KERNEL_TEX(type, ttype, name) ttype name;
+# else
+# define KERNEL_TEX(type, ttype, name) const __constant__ __device__ type *name;
+# endif
# define KERNEL_IMAGE_TEX(type, ttype, name) ttype name;
-
# include "kernel_textures.h"
#endif /* __KERNEL_CUDA__ */
More information about the Bf-blender-cvs
mailing list