[Bf-blender-cvs] [2c1c39b] soc-2016-cycles_images: Cleanup: Have a clear distinction between pre Kepler and Kepler cards in tex_alloc().
Thomas Dinges
noreply at git.blender.org
Sun May 15 18:54:32 CEST 2016
Commit: 2c1c39bf4d560c610e880be02e329223f54ba79e
Author: Thomas Dinges
Date: Sun May 15 18:53:32 2016 +0200
Branches: soc-2016-cycles_images
https://developer.blender.org/rB2c1c39bf4d560c610e880be02e329223f54ba79e
Cleanup: Have a clear distinction between pre Kepler and Kepler cards in tex_alloc().
This temporarily breaks Image Textures on Kepler, untill Bindless textures are implemented.
===================================================================
M intern/cycles/device/device_cuda.cpp
===================================================================
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 7913290..3f64afa 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -88,7 +88,6 @@ public:
int cuDevId;
int cuDevArchitecture;
bool first_error;
- bool use_texture_storage;
struct PixelMem {
GLuint cuPBO;
@@ -176,7 +175,6 @@ public:
{
first_error = true;
background = background_;
- use_texture_storage = true;
cuDevId = info.num;
cuDevice = 0;
@@ -211,11 +209,6 @@ public:
cuDeviceComputeCapability(&major, &minor, cuDevId);
cuDevArchitecture = major*100 + minor*10;
- /* In order to use full 6GB of memory on Titan cards, use arrays instead
- * of textures. On earlier cards this seems slower, but on Titan it is
- * actually slightly faster in tests. */
- use_texture_storage = (cuDevArchitecture < 300);
-
cuda_pop_context();
}
@@ -479,26 +472,50 @@ public:
{
VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes.";
+ /* General variables for both architectures */
string bind_name = name;
- if(mem.data_depth > 1) {
- /* Kernel uses different bind names for 2d and 3d float textures,
- * so we have to adjust couple of things here.
- */
- vector<string> tokens;
- string_split(tokens, name, "_");
- bind_name = string_printf("__tex_image_%s_3d_%s",
- tokens[2].c_str(),
- tokens[3].c_str());
- }
-
- /* determine format */
- CUarray_format_enum format;
size_t dsize = datatype_size(mem.data_type);
size_t size = mem.memory_size();
- bool use_texture = (interpolation != INTERPOLATION_NONE) || use_texture_storage;
- if(use_texture) {
+ /* We differenciate between Fermi cards and Kepler & above */
+ bool is_kepler_card = info.bindless_textures;
+
+ /* Geforce 6xx and above */
+ if(is_kepler_card) {
+ /* Data Storage */
+ if(interpolation == INTERPOLATION_NONE) {
+ mem_alloc(mem, MEM_READ_ONLY);
+ mem_copy_to(mem);
+
+ cuda_push_context();
+
+ CUdeviceptr cumem;
+ size_t cubytes;
+ cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str()));
+
+ if(cubytes == 8) {
+ /* 64 bit device pointer */
+ uint64_t ptr = mem.device_pointer;
+ cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
+ }
+ else {
+ /* 32 bit device pointer */
+ uint32_t ptr = (uint32_t)mem.device_pointer;
+ cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
+ }
+
+ cuda_pop_context();
+ }
+
+ /* Texture Storage */
+ else {
+ ; /* TODO(dingto): Implement Bindless textures */
+ }
+ }
+ /* Geforce 4xx and 5xx */
+ else {
+ CUarray_format_enum format;
switch(mem.data_type) {
case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
@@ -507,6 +524,17 @@ public:
default: assert(0); return;
}
+ if(mem.data_depth > 1) {
+ /* Kernel uses different bind names for 2d and 3d float textures,
+ * so we have to adjust couple of things here.
+ */
+ vector<string> tokens;
+ string_split(tokens, name, "_");
+ bind_name = string_printf("__tex_image_%s_3d_%s",
+ tokens[2].c_str(),
+ tokens[3].c_str());
+ }
+
CUtexref texref = NULL;
cuda_push_context();
@@ -517,6 +545,7 @@ public:
return;
}
+ /* Texture Storage */
if(interpolation != INTERPOLATION_NONE) {
CUarray handle = NULL;
@@ -596,6 +625,7 @@ public:
stats.mem_alloc(size);
}
+ /* Data Storage */
else {
cuda_pop_context();
@@ -634,30 +664,6 @@ public:
cuda_pop_context();
}
- else {
- mem_alloc(mem, MEM_READ_ONLY);
- mem_copy_to(mem);
-
- cuda_push_context();
-
- CUdeviceptr cumem;
- size_t cubytes;
-
- cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str()));
-
- if(cubytes == 8) {
- /* 64 bit device pointer */
- uint64_t ptr = mem.device_pointer;
- cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
- }
- else {
- /* 32 bit device pointer */
- uint32_t ptr = (uint32_t)mem.device_pointer;
- cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
- }
-
- cuda_pop_context();
- }
tex_interp_map[mem.device_pointer] = (interpolation != INTERPOLATION_NONE);
}
More information about the Bf-blender-cvs
mailing list