[Bf-blender-cvs] [ab0f977] soc-2016-cycles_images: Further work on bindless textures.

Thomas Dinges noreply at git.blender.org
Mon May 16 16:54:16 CEST 2016


Commit: ab0f977d04fda913173b36d353f5dacc07901ee8
Author: Thomas Dinges
Date:   Sun May 15 22:36:23 2016 +0200
Branches: soc-2016-cycles_images
https://developer.blender.org/rBab0f977d04fda913173b36d353f5dacc07901ee8

Further work on bindless textures.

Doesnt work properly yet, textures are black.

===================================================================

M	intern/cycles/device/device.h
M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/device/device_multi.cpp
M	intern/cycles/device/device_network.cpp
M	intern/cycles/device/device_opencl.cpp
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/kernel/svm/svm_image.h
M	intern/cycles/render/image.cpp

===================================================================

diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 4004dd8..144ed0d 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -225,11 +225,14 @@ public:
 	virtual void tex_alloc(const char * /*name*/,
 	                       device_memory& /*mem*/,
 	                       InterpolationType interpolation = INTERPOLATION_NONE,
-	                       ExtensionType extension = EXTENSION_REPEAT)
+	                       ExtensionType extension = EXTENSION_REPEAT,
+	                       int *flat_slot = 0)
 	{
 		(void)interpolation;  /* Ignored. */
 		(void)extension;  /* Ignored. */
+		(void)flat_slot; /* Ignored. */
 	};
+
 	virtual void tex_free(device_memory& /*mem*/) {};
 
 	/* pixel memory */
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 676b1279..1fa4bd0 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -153,7 +153,8 @@ public:
 	void tex_alloc(const char *name,
 	               device_memory& mem,
 	               InterpolationType interpolation,
-	               ExtensionType extension)
+	               ExtensionType extension,
+	               int /*flat_slot*/)
 	{
 		VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes.";
 		kernel_tex_copy(&kernel_globals,
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 868404d..0ed944c 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -468,7 +468,8 @@ public:
 	void tex_alloc(const char *name,
 	               device_memory& mem,
 	               InterpolationType interpolation,
-	               ExtensionType extension)
+	               ExtensionType extension,
+	               int *flat_slot)
 	{
 		VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes.";
 
@@ -511,12 +512,113 @@ public:
 			/* Texture Storage */
 			else {
 				/* TODO(dingto): Complete Bindless textures */
+
+				CUarray_format_enum format;
+				switch(mem.data_type) {
+					case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
+					case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
+					case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
+					case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
+					default: assert(0); return;
+				}
+
+				CUarray handle = NULL;
+
+				cuda_push_context();
+
+				if(mem.data_depth > 1) {
+					CUDA_ARRAY3D_DESCRIPTOR desc;
+
+					desc.Width = mem.data_width;
+					desc.Height = mem.data_height;
+					desc.Depth = mem.data_depth;
+					desc.Format = format;
+					desc.NumChannels = mem.data_elements;
+					desc.Flags = 0;
+
+					cuda_assert(cuArray3DCreate(&handle, &desc));
+				}
+				else {
+					CUDA_ARRAY_DESCRIPTOR desc;
+
+					desc.Width = mem.data_width;
+					desc.Height = mem.data_height;
+					desc.Format = format;
+					desc.NumChannels = mem.data_elements;
+
+					cuda_assert(cuArrayCreate(&handle, &desc));
+				}
+
+				if(!handle) {
+					cuda_pop_context();
+					return;
+				}
+
+				if(mem.data_depth > 1) {
+					CUDA_MEMCPY3D param;
+					memset(&param, 0, sizeof(param));
+					param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+					param.dstArray = handle;
+					param.srcMemoryType = CU_MEMORYTYPE_HOST;
+					param.srcHost = (void*)mem.data_pointer;
+					param.srcPitch = mem.data_width*dsize*mem.data_elements;
+					param.WidthInBytes = param.srcPitch;
+					param.Height = mem.data_height;
+					param.Depth = mem.data_depth;
+
+					cuda_assert(cuMemcpy3D(&param));
+				}
+				if(mem.data_height > 1) {
+					CUDA_MEMCPY2D param;
+					memset(&param, 0, sizeof(param));
+					param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+					param.dstArray = handle;
+					param.srcMemoryType = CU_MEMORYTYPE_HOST;
+					param.srcHost = (void*)mem.data_pointer;
+					param.srcPitch = mem.data_width*dsize*mem.data_elements;
+					param.WidthInBytes = param.srcPitch;
+					param.Height = mem.data_height;
+
+					cuda_assert(cuMemcpy2D(&param));
+				}
+				else
+					cuda_assert(cuMemcpyHtoA(handle, 0, (void*)mem.data_pointer, size));
+
 				CUDA_RESOURCE_DESC resDesc;
+				memset(&resDesc, 0, sizeof(resDesc));
+				resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
+				resDesc.res.array.hArray = handle;
+				resDesc.flags = 0;
+
+				CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP;
+				switch(extension) {
+					case EXTENSION_REPEAT:
+						address_mode = CU_TR_ADDRESS_MODE_WRAP;
+						break;
+					case EXTENSION_EXTEND:
+						address_mode = CU_TR_ADDRESS_MODE_CLAMP;
+						break;
+					case EXTENSION_CLIP:
+						address_mode = CU_TR_ADDRESS_MODE_BORDER;
+						break;
+					default:
+						assert(0);
+						break;
+				}
 
 				CUDA_TEXTURE_DESC texDesc;
-
-				CUtexObject* tex;
-				cuda_assert(cuTexObjectCreate(tex, &resDesc, &texDesc, NULL));
+				memset(&texDesc, 0, sizeof(texDesc));
+				texDesc.addressMode[0] = address_mode;
+				texDesc.addressMode[1] = address_mode;
+				texDesc.addressMode[2] = address_mode;
+				texDesc.filterMode = CU_TR_FILTER_MODE_LINEAR;
+				texDesc.flags = 0;
+
+				CUtexObject tex = 0;
+				cuda_assert(cuTexObjectCreate(&tex, &resDesc, &texDesc, NULL));
+
+				printf("Tex: %i - Slot: %i\n\n", tex, *flat_slot);
+				*flat_slot = (int)tex;
 			}
 		}
 		/* Geforce 4xx and 5xx */
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index e802410..a8e2628 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -173,13 +173,14 @@ public:
 	               device_memory& mem,
 	               InterpolationType
 	               interpolation,
-	               ExtensionType extension)
+	               ExtensionType extension,
+	               int *flat_slot)
 	{
 		VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes.";
 
 		foreach(SubDevice& sub, devices) {
 			mem.device_pointer = 0;
-			sub.device->tex_alloc(name, mem, interpolation, extension);
+			sub.device->tex_alloc(name, mem, interpolation, extension, flat_slot);
 			sub.ptr_map[unique_ptr] = mem.device_pointer;
 		}
 
diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp
index cf4a05d..c756805 100644
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -166,7 +166,8 @@ public:
 	void tex_alloc(const char *name,
 	               device_memory& mem,
 	               InterpolationType interpolation,
-	               ExtensionType extension)
+	               ExtensionType extension,
+	               int *flat_slot)
 	{
 		VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes.";
 
@@ -182,6 +183,7 @@ public:
 		snd.add(mem);
 		snd.add(interpolation);
 		snd.add(extension);
+		snd.add(flat_slot);
 		snd.write();
 		snd.write_buffer((void*)mem.data_pointer, mem.memory_size());
 	}
@@ -581,6 +583,7 @@ protected:
 			rcv.read(mem);
 			rcv.read(interpolation);
 			rcv.read(extension_type);
+			rcv.read(flat_slot);
 			lock.unlock();
 
 			client_pointer = mem.device_pointer;
@@ -596,7 +599,7 @@ protected:
 
 			rcv.read_buffer((uint8_t*)mem.data_pointer, data_size);
 
-			device->tex_alloc(name.c_str(), mem, interpolation, extension_type);
+			device->tex_alloc(name.c_str(), mem, interpolation, extension_type, flat_slot);
 
 			pointer_mapping_insert(client_pointer, mem.device_pointer);
 		}
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 1b4e542..61f83f2 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -1185,7 +1185,8 @@ public:
 	void tex_alloc(const char *name,
 	               device_memory& mem,
 	               InterpolationType /*interpolation*/,
-	               ExtensionType /*extension*/)
+	               ExtensionType /*extension*/,
+	               int /*flat_slot*/)
 	{
 		VLOG(1) << "Texture allocate: " << name << ", " << mem.memory_size() << " bytes.";
 		mem_alloc(mem, MEM_READ_ONLY);
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index 02e69c7..1c54a75 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -1157,6 +1157,7 @@ typedef struct KernelData {
 	KernelBVH bvh;
 	KernelCurves curve;
 	KernelTables tables;
+	int bindless_mapping[4096];
 } KernelData;
 
 #ifdef __KERNEL_DEBUG__
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index d2ffa42..2da4563 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -151,6 +151,7 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
 #else
 	float4 r;
 
+#if __CUDA_ARCH__ < 300
 	/* not particularly proud of this massive switch, what are the
 	 * alternatives?
 	 * - use a single big 1D texture, and do our own lookup/filtering
@@ -258,6 +259,15 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
 			kernel_assert(0);
 			return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
 	}
+#else
+	CUtexObject tex = (uint)kernel_data.bindless_mapping[id];
+	if(id < 5)
+		r = tex2D<float4>(tex, x, y);
+	else {
+		uchar4 f = tex2D<uchar4>(tex, x, y);
+		r = make_float4(f.x/255, f.y/255, f.z/255, f.w/255);
+	}
+#endif
 #endif
 
 #ifdef __KERNEL_SSE2__
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index b0f5c8b..fc40447 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -791,7 +791,8 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD
 			device->tex_alloc(name.c_str(),
 			                  tex_img,
 			                  img->interpolation,
-			                  img->extension);
+			                  img->extension,
+			                  &flat_slot);
 		}
 	}
 	else if(type == IMAGE_DATA_TYPE_FLOAT) {
@@ -814,7 +815,8 @@ void ImageManager::device_load_image(Device *device, DeviceScene *dscene, ImageD
 			device->tex_alloc(name.c_str(),
 			                  tex_img,
 			                  img->interpolation,
-			                  img->extension);
+			

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list