[Bf-blender-cvs] [ab0f977] soc-2016-cycles_images: Further work on bindless textures.
Thomas Dinges
noreply at git.blender.org
Mon May 16 16:54:16 CEST 2016
Commit: ab0f977d04fda913173b36d353f5dacc07901ee8
Author: Thomas Dinges
Date: Sun May 15 22:36:23 2016 +0200
Branches: soc-2016-cycles_images
https://developer.blender.org/rBab0f977d04fda913173b36d353f5dacc07901ee8
Further work on bindless textures.
Doesnt work properly yet, textures are black.
===================================================================
M intern/cycles/device/device.h
M intern/cycles/device/device_cpu.cpp
M intern/cycles/device/device_cuda.cpp
M intern/cycles/device/device_multi.cpp
M intern/cycles/device/device_network.cpp
M intern/cycles/device/device_opencl.cpp
M intern/cycles/kernel/kernel_types.h
M intern/cycles/kernel/svm/svm_image.h
M intern/cycles/render/image.cpp
===================================================================
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 4004dd8..144ed0d 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -225,11 +225,14 @@ public:
virtual void tex_alloc(const char * /*name*/,
device_memory& /*mem*/,
InterpolationType interpolation = INTERPOLATION_NONE,
- ExtensionType extension = EXTENSION_REPEAT)
+ ExtensionType extension = EXTENSION_REPEAT,
+ int *flat_slot = 0)
{
(void)interpolation; /* Ignored. */
(void)extension; /* Ignored. */
+ (void)flat_slot; /* Ignored. */
};
+
virtual void tex_free(device_memory& /*mem*/) {};
/* pixel memory */
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 676b1279..1fa4bd0 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -153,7 +153,8 @@ public:
void tex_alloc(const char *name,
device_memory& mem,
InterpolationType interpolation,
- ExtensionType extension)
+ ExtensionType extension,
+ int /*flat_slot*/)
{
VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes.";
kernel_tex_copy(&kernel_globals,
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 868404d..0ed944c 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -468,7 +468,8 @@ public:
void tex_alloc(const char *name,
device_memory& mem,
InterpolationType interpolation,
- ExtensionType extension)
+ ExtensionType extension,
+ int *flat_slot)
{
VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes.";
@@ -511,12 +512,113 @@ public:
/* Texture Storage */
else {
/* TODO(dingto): Complete Bindless textures */
+
+ CUarray_format_enum format;
+ switch(mem.data_type) {
+ case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
+ case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
+ case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
+ case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
+ default: assert(0); return;
+ }
+
+ CUarray handle = NULL;
+
+ cuda_push_context();
+
+ if(mem.data_depth > 1) {
+ CUDA_ARRAY3D_DESCRIPTOR desc;
+
+ desc.Width = mem.data_width;
+ desc.Height = mem.data_height;
+ desc.Depth = mem.data_depth;
+ desc.Format = format;
+ desc.NumChannels = mem.data_elements;
+ desc.Flags = 0;
+
+ cuda_assert(cuArray3DCreate(&handle, &desc));
+ }
+ else {
+ CUDA_ARRAY_DESCRIPTOR desc;
+
+ desc.Width = mem.data_width;
+ desc.Height = mem.data_height;
+ desc.Format = format;
+ desc.NumChannels = mem.data_elements;
+
+ cuda_assert(cuArrayCreate(&handle, &desc));
+ }
+
+ if(!handle) {
+ cuda_pop_context();
+ return;
+ }
+
+ if(mem.data_depth > 1) {
+ CUDA_MEMCPY3D param;
+ memset(¶m, 0, sizeof(param));
+ param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+ param.dstArray = handle;
+ param.srcMemoryType = CU_MEMORYTYPE_HOST;
+ param.srcHost = (void*)mem.data_pointer;
+ param.srcPitch = mem.data_width*dsize*mem.data_elements;
+ param.WidthInBytes = param.srcPitch;
+ param.Height = mem.data_height;
+ param.Depth = mem.data_depth;
+
+ cuda_assert(cuMemcpy3D(¶m));
+ }
+ if(mem.data_height > 1) {
+ CUDA_MEMCPY2D param;
+ memset(¶m, 0, sizeof(param));
+ param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+ param.dstArray = handle;
+ param.srcMemoryType = CU_MEMORYTYPE_HOST;
+ param.srcHost = (void*)mem.data_pointer;
+ param.srcPitch = mem.data_width*dsize*mem.data_elements;
+ param.WidthInBytes = param.srcPitch;
+ param.Height = mem.data_height;
+
+ cuda_assert(cuMemcpy2D(¶m));
+ }
+ else
+ cuda_assert(cuMemcpyHtoA(handle, 0, (void*)mem.data_pointer, size));
+
CUDA_RESOURCE_DESC resDesc;
+ memset(&resDesc, 0, sizeof(resDesc));
+ resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
+ resDesc.res.array.hArray = handle;
+ resDesc.flags = 0;
+
+ CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP;
+ switch(extension) {
+ case EXTENSION_REPEAT:
+ address_mode = CU_TR_ADDRESS_MODE_WRAP;
+ break;
+ case EXTENSION_EXTEND:
+ address_mode = CU_TR_ADDRESS_MODE_CLAMP;
+ break;
+ case EXTENSION_CLIP:
+ address_mode = CU_TR_ADDRESS_MODE_BORDER;
+ break;
+ default:
+ assert(0);
+ break;
+ }
CUDA_TEXTURE_DESC texDesc;
-
- CUtexObject* tex;
- cuda_assert(cuTexObjectCreate(tex, &resDesc, &texDesc, NULL));
+ memset(&texDesc, 0, sizeof(texDesc));
+ texDesc.addressMode[0] = address_mode;
+ texDesc.addressMode[1] = address_mode;
+ texDesc.addressMode[2] = address_mode;
+ texDesc.filterMode = CU_TR_FILTER_MODE_LINEAR;
+ texDesc.flags = 0;
+
+ CUtexObject tex = 0;
+ cuda_assert(cuTexObjectCreate(&tex, &resDesc, &texDesc, NULL));
+
+ printf("Tex: %i - Slot: %i\n\n", tex, *flat_slot);
+ *flat_slot = (int)tex;
}
}
/* Geforce 4xx and 5xx */
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index e802410..a8e2628 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -173,13 +173,14 @@ public:
device_memory& mem,
InterpolationType
interpolation,
- ExtensionType extension)
+ ExtensionType extension,
+ int *flat_slot)
{
VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes.";
foreach(SubDevice& sub, devices) {
mem.device_pointer = 0;
- sub.device->tex_alloc(name, mem, interpolation, extension);
+ sub.device->tex_alloc(name, mem, interpolation, extension, flat_slot);
sub.ptr_map[unique_ptr] = mem.device_pointer;
}
diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp
index cf4a05d..c756805 100644
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -166,7 +166,8 @@ public:
void tex_alloc(const char *name,
device_memory& mem,
InterpolationType interpolation,
- ExtensionType extension)
+ ExtensionType extension,
+ int *flat_slot)
{
VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes.";
@@ -182,6 +183,7 @@ public:
snd.add(mem);
snd.add(interpolation);
snd.add(extension);
+ snd.add(flat_slot);
snd.write();
snd.write_buffer((void*)mem.data_pointer, mem.memory_size());
}
@@ -581,6 +583,7 @@ protected:
rcv.read(mem);
rcv.read(interpolation);
rcv.read(extension_type);
+ rcv.read(flat_slot);
lock.unlock();
client_pointer = mem.device_pointer;
@@ -596,7 +599,7 @@ protected:
rcv.read_buffer((uint8_t*)mem.data_pointer, data_size);
- device->tex_alloc(name.c_str(), mem, interpolation, extension_type);
+ device->tex_alloc(name.c_str(), mem, interpolation, extension_type, flat_slot);
pointer_mapping_insert(client_pointer, mem.device_pointer);
}
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 1b4e542..61f83f2 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -1185,7 +1185,8 @@ public:
void tex_alloc(const char *name,
device_memory& mem,
InterpolationType /*interpolation*/,
- ExtensionType /*extension*/)
+ ExtensionType /*extension*/,
+ int /*flat_slot*/)
{
VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes.";
mem_alloc(mem, MEM_READ_ONLY);
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 02e69c7..1c54a75 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -1157,6 +1157,7 @@ typedef struct KernelData {
KernelBVH bvh;
KernelCurves curve;
KernelTables tables;
+ int bindless_mapping[4096];
} KernelData;
#ifdef __KERNEL_DEBUG__
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index d2ffa42..2da4563 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -151,6 +151,7 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
#else
float4 r;
+#if __CUDA_ARCH__ < 300
/* not particularly proud of this massive switch, what are the
* alternatives?
* - use a single big 1D texture, and do our own lookup/filtering
@@ -258,6 +259,15 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
+#else
+ CUtexObject tex = (uint)kernel_data.bindless_mapping[id];
+ if(id < 5)
+ r = tex2D<float4>(tex, x, y);
+ else {
+ uchar4 f = tex2D<uchar4>(tex, x, y);
+ r = make_float4(f.x/255, f.y/255, f.z/255, f.w/255);
+ }
+#endif
#endif
#ifdef __KERNEL_SSE2__
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index b0f5c8b..fc40447 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -791,7 +791,8 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD
device->tex_alloc(name.c_str(),
tex_img,
img->interpolation,
- img->extension);
+ img->extension,
+ &flat_slot);
}
}
else if(type == IMAGE_DATA_TYPE_FLOAT) {
@@ -814,7 +815,8 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD
device->tex_alloc(name.c_str(),
tex_img,
img->interpolation,
- img->extension);
+
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list