[Bf-blender-cvs] [2c1c39b] soc-2016-cycles_images: Cleanup: Have a clear distinction between pre Kepler and Kepler cards in tex_alloc().

Sun May 15 18:54:32 CEST 2016

Commit: 2c1c39bf4d560c610e880be02e329223f54ba79e
Author: Thomas Dinges
Date:   Sun May 15 18:53:32 2016 +0200
Branches: soc-2016-cycles_images
https://developer.blender.org/rB2c1c39bf4d560c610e880be02e329223f54ba79e

Cleanup: Have a clear distinction between pre Kepler and Kepler cards in tex_alloc().

This temporarily breaks Image Textures on Kepler, untill Bindless textures are implemented.

===================================================================

M	intern/cycles/device/device_cuda.cpp

===================================================================

diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 7913290..3f64afa 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -88,7 +88,6 @@ public:
 	int cuDevId;
 	int cuDevArchitecture;
 	bool first_error;
-	bool use_texture_storage;
 
 	struct PixelMem {
 		GLuint cuPBO;
@@ -176,7 +175,6 @@ public:
 	{
 		first_error = true;
 		background = background_;
-		use_texture_storage = true;
 
 		cuDevId = info.num;
 		cuDevice = 0;
@@ -211,11 +209,6 @@ public:
 		cuDeviceComputeCapability(&major, &minor, cuDevId);
 		cuDevArchitecture = major*100 + minor*10;
 
-		/* In order to use full 6GB of memory on Titan cards, use arrays instead
-		 * of textures. On earlier cards this seems slower, but on Titan it is
-		 * actually slightly faster in tests. */
-		use_texture_storage = (cuDevArchitecture < 300);
-
 		cuda_pop_context();
 	}
 
@@ -479,26 +472,50 @@ public:
 	{
 		VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes.";
 
+		/* General variables for both architectures */
 		string bind_name = name;
-		if(mem.data_depth > 1) {
-			/* Kernel uses different bind names for 2d and 3d float textures,
-			 * so we have to adjust couple of things here.
-			 */
-			vector<string> tokens;
-			string_split(tokens, name, "_");
-			bind_name = string_printf("__tex_image_%s_3d_%s",
-			                          tokens[2].c_str(),
-			                          tokens[3].c_str());
-		}
-
-		/* determine format */
-		CUarray_format_enum format;
 		size_t dsize = datatype_size(mem.data_type);
 		size_t size = mem.memory_size();
-		bool use_texture = (interpolation != INTERPOLATION_NONE) || use_texture_storage;
 
-		if(use_texture) {
+		/* We differenciate between Fermi cards and Kepler & above */
+		bool is_kepler_card = info.bindless_textures;
+
+		/* Geforce 6xx and above */
+		if(is_kepler_card) {
+			/* Data Storage */
+			if(interpolation == INTERPOLATION_NONE) {
+				mem_alloc(mem, MEM_READ_ONLY);
+				mem_copy_to(mem);
+
+				cuda_push_context();
+
+				CUdeviceptr cumem;
+				size_t cubytes;
 
+				cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str()));
+
+				if(cubytes == 8) {
+					/* 64 bit device pointer */
+					uint64_t ptr = mem.device_pointer;
+					cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
+				}
+				else {
+					/* 32 bit device pointer */
+					uint32_t ptr = (uint32_t)mem.device_pointer;
+					cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
+				}
+
+				cuda_pop_context();
+			}
+
+			/* Texture Storage */
+			else {
+				; /* TODO(dingto): Implement Bindless textures */
+			}
+		}
+		/* Geforce 4xx and 5xx */
+		else {
+			CUarray_format_enum format;
 			switch(mem.data_type) {
 				case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
 				case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
@@ -507,6 +524,17 @@ public:
 				default: assert(0); return;
 			}
 
+			if(mem.data_depth > 1) {
+				/* Kernel uses different bind names for 2d and 3d float textures,
+				 * so we have to adjust couple of things here.
+				 */
+				vector<string> tokens;
+				string_split(tokens, name, "_");
+				bind_name = string_printf("__tex_image_%s_3d_%s",
+				                          tokens[2].c_str(),
+				                          tokens[3].c_str());
+			}
+
 			CUtexref texref = NULL;
 
 			cuda_push_context();
@@ -517,6 +545,7 @@ public:
 				return;
 			}
 
+			/* Texture Storage */
 			if(interpolation != INTERPOLATION_NONE) {
 				CUarray handle = NULL;
 
@@ -596,6 +625,7 @@ public:
 
 				stats.mem_alloc(size);
 			}
+			/* Data Storage */
 			else {
 				cuda_pop_context();
 
@@ -634,30 +664,6 @@ public:
 
 			cuda_pop_context();
 		}
-		else {
-			mem_alloc(mem, MEM_READ_ONLY);
-			mem_copy_to(mem);
-
-			cuda_push_context();
-
-			CUdeviceptr cumem;
-			size_t cubytes;
-
-			cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str()));
-
-			if(cubytes == 8) {
-				/* 64 bit device pointer */
-				uint64_t ptr = mem.device_pointer;
-				cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
-			}
-			else {
-				/* 32 bit device pointer */
-				uint32_t ptr = (uint32_t)mem.device_pointer;
-				cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
-			}
-
-			cuda_pop_context();
-		}
 
 		tex_interp_map[mem.device_pointer] = (interpolation != INTERPOLATION_NONE);
 	}