[Bf-blender-cvs] [23098cda993] master: Code refactor: make texture code more consistent between devices.

Sat Oct 7 15:57:08 CEST 2017

Commit: 23098cda9936d785988b689ee69e58e900f17cb2
Author: Brecht Van Lommel
Date:   Fri Oct 6 21:47:41 2017 +0200
Branches: master
https://developer.blender.org/rB23098cda9936d785988b689ee69e58e900f17cb2

Code refactor: make texture code more consistent between devices.

* Use common TextureInfo struct for all devices, except CUDA fermi.
* Move image sampling code to kernels/*/kernel_*_image.h files.
* Use arrays for data textures on Fermi too, so device_vector<Struct> works.

===================================================================

M	intern/cycles/device/device.h
M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/device/opencl/opencl.h
M	intern/cycles/device/opencl/opencl_base.cpp
M	intern/cycles/device/opencl/opencl_split.cpp
M	intern/cycles/kernel/CMakeLists.txt
M	intern/cycles/kernel/geom/geom_volume.h
M	intern/cycles/kernel/kernel_compat_cpu.h
M	intern/cycles/kernel/kernel_compat_cuda.h
M	intern/cycles/kernel/kernel_compat_opencl.h
M	intern/cycles/kernel/kernel_globals.h
M	intern/cycles/kernel/kernel_textures.h
M	intern/cycles/kernel/kernels/cpu/kernel.cpp
M	intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
M	intern/cycles/kernel/kernels/cuda/kernel.cu
A	intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
M	intern/cycles/kernel/kernels/opencl/kernel.cl
R086	intern/cycles/kernel/kernel_image_opencl.h	intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
M	intern/cycles/kernel/osl/osl_services.cpp
M	intern/cycles/kernel/split/kernel_split_common.h
M	intern/cycles/kernel/svm/svm_image.h
M	intern/cycles/kernel/svm/svm_voxel.h
M	intern/cycles/util/util_texture.h
M	intern/cycles/util/util_types.h

===================================================================

diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 26d6d380a10..0e0a0079209 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -26,6 +26,7 @@
 #include "util/util_stats.h"
 #include "util/util_string.h"
 #include "util/util_thread.h"
+#include "util/util_texture.h"
 #include "util/util_types.h"
 #include "util/util_vector.h"
 
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 19e3c0a9075..ac6d3246d38 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -163,6 +163,9 @@ public:
 	TaskPool task_pool;
 	KernelGlobals kernel_globals;
 
+	device_vector<TextureInfo> texture_info;
+	bool need_texture_info;
+
 #ifdef WITH_OSL
 	OSLGlobals osl_globals;
 #endif
@@ -235,6 +238,8 @@ public:
 			VLOG(1) << "Will be using split kernel.";
 		}
 
+		need_texture_info = false;
+
 #define REGISTER_SPLIT_KERNEL(name) split_kernels[#name] = KernelFunctions<void(*)(KernelGlobals*, KernelData*)>(KERNEL_FUNCTIONS(name))
 		REGISTER_SPLIT_KERNEL(path_init);
 		REGISTER_SPLIT_KERNEL(scene_intersect);
@@ -261,6 +266,7 @@ public:
 	~CPUDevice()
 	{
 		task_pool.stop();
+		tex_free(texture_info);
 	}
 
 	virtual bool show_samples() const
@@ -268,6 +274,15 @@ public:
 		return (TaskScheduler::num_threads() == 1);
 	}
 
+	void load_texture_info()
+	{
+		if(need_texture_info) {
+			tex_free(texture_info);
+			tex_alloc("__texture_info", texture_info, INTERPOLATION_NONE, EXTENSION_REPEAT);
+			need_texture_info = false;
+		}
+	}
+
 	void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/)
 	{
 		if(name) {
@@ -333,14 +348,47 @@ public:
 		VLOG(1) << "Texture allocate: " << name << ", "
 		        << string_human_readable_number(mem.memory_size()) << " bytes. ("
 		        << string_human_readable_size(mem.memory_size()) << ")";
-		kernel_tex_copy(&kernel_globals,
-		                name,
-		                mem.data_pointer,
-		                mem.data_width,
-		                mem.data_height,
-		                mem.data_depth,
-		                interpolation,
-		                extension);
+
+		if(interpolation == INTERPOLATION_NONE) {
+			/* Data texture. */
+			kernel_tex_copy(&kernel_globals,
+							name,
+							mem.data_pointer,
+							mem.data_width,
+							mem.data_height,
+							mem.data_depth,
+							interpolation,
+							extension);
+		}
+		else {
+			/* Image Texture. */
+			int flat_slot = 0;
+			if(string_startswith(name, "__tex_image")) {
+				int pos =  string(name).rfind("_");
+				flat_slot = atoi(name + pos + 1);
+			}
+			else {
+				assert(0);
+			}
+
+			if(flat_slot >= texture_info.size()) {
+				/* Allocate some slots in advance, to reduce amount
+				 * of re-allocations. */
+				texture_info.resize(flat_slot + 128);
+			}
+
+			TextureInfo& info = texture_info.get_data()[flat_slot];
+			info.data = (uint64_t)mem.data_pointer;
+			info.cl_buffer = 0;
+			info.interpolation = interpolation;
+			info.extension = extension;
+			info.width = mem.data_width;
+			info.height = mem.data_height;
+			info.depth = mem.data_depth;
+
+			need_texture_info = true;
+		}
+
 		mem.device_pointer = mem.data_pointer;
 		mem.device_size = mem.memory_size();
 		stats.mem_alloc(mem.device_size);
@@ -352,6 +400,7 @@ public:
 			mem.device_pointer = 0;
 			stats.mem_free(mem.device_size);
 			mem.device_size = 0;
+			need_texture_info = true;
 		}
 	}
 
@@ -784,6 +833,9 @@ public:
 
 	void task_add(DeviceTask& task)
 	{
+		/* Load texture info. */
+		load_texture_info();
+
 		/* split task into smaller ones */
 		list<DeviceTask> tasks;
 
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 734edcff503..dcbe6033bcc 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -129,7 +129,7 @@ public:
 	CUcontext cuContext;
 	CUmodule cuModule, cuFilterModule;
 	map<device_ptr, bool> tex_interp_map;
-	map<device_ptr, uint> tex_bindless_map;
+	map<device_ptr, CUtexObject> tex_bindless_map;
 	int cuDevId;
 	int cuDevArchitecture;
 	bool first_error;
@@ -145,8 +145,8 @@ public:
 	map<device_ptr, PixelMem> pixel_mem_map;
 
 	/* Bindless Textures */
-	device_vector<uint> bindless_mapping;
-	bool need_bindless_mapping;
+	device_vector<TextureInfo> texture_info;
+	bool need_texture_info;
 
 	CUdeviceptr cuda_device_ptr(device_ptr mem)
 	{
@@ -231,7 +231,7 @@ public:
 
 		split_kernel = NULL;
 
-		need_bindless_mapping = false;
+		need_texture_info = false;
 
 		/* intialize */
 		if(cuda_error(cuInit(0)))
@@ -274,7 +274,7 @@ public:
 		delete split_kernel;
 
 		if(info.has_bindless_textures) {
-			tex_free(bindless_mapping);
+			tex_free(texture_info);
 		}
 
 		cuda_assert(cuCtxDestroy(cuContext));
@@ -544,12 +544,12 @@ public:
 		return (result == CUDA_SUCCESS);
 	}
 
-	void load_bindless_mapping()
+	void load_texture_info()
 	{
-		if(info.has_bindless_textures && need_bindless_mapping) {
-			tex_free(bindless_mapping);
-			tex_alloc("__bindless_mapping", bindless_mapping, INTERPOLATION_NONE, EXTENSION_REPEAT);
-			need_bindless_mapping = false;
+		if(info.has_bindless_textures && need_texture_info) {
+			tex_free(texture_info);
+			tex_alloc("__texture_info", texture_info, INTERPOLATION_NONE, EXTENSION_REPEAT);
+			need_texture_info = false;
 		}
 	}
 
@@ -646,8 +646,7 @@ public:
 		        << string_human_readable_number(mem.memory_size()) << " bytes. ("
 		        << string_human_readable_size(mem.memory_size()) << ")";
 
-		/* Check if we are on sm_30 or above.
-		 * We use arrays and bindles textures for storage there */
+		/* Check if we are on sm_30 or above, for bindless textures. */
 		bool has_bindless_textures = info.has_bindless_textures;
 
 		/* General variables for both architectures */
@@ -679,20 +678,10 @@ public:
 			filter_mode = CU_TR_FILTER_MODE_LINEAR;
 		}
 
-		CUarray_format_enum format;
-		switch(mem.data_type) {
-			case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
-			case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
-			case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
-			case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
-			case TYPE_HALF: format = CU_AD_FORMAT_HALF; break;
-			default: assert(0); return;
-		}
-
 		/* General variables for Fermi */
 		CUtexref texref = NULL;
 
-		if(!has_bindless_textures) {
+		if(!has_bindless_textures && interpolation != INTERPOLATION_NONE) {
 			if(mem.data_depth > 1) {
 				/* Kernel uses different bind names for 2d and 3d float textures,
 				 * so we have to adjust couple of things here.
@@ -711,41 +700,41 @@ public:
 			}
 		}
 
-		/* Data Storage */
 		if(interpolation == INTERPOLATION_NONE) {
-			if(has_bindless_textures) {
-				mem_alloc(NULL, mem, MEM_READ_ONLY);
-				mem_copy_to(mem);
+			/* Data Storage */
+			mem_alloc(NULL, mem, MEM_READ_ONLY);
+			mem_copy_to(mem);
 
-				CUdeviceptr cumem;
-				size_t cubytes;
+			CUdeviceptr cumem;
+			size_t cubytes;
 
-				cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str()));
+			cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str()));
 
-				if(cubytes == 8) {
-					/* 64 bit device pointer */
-					uint64_t ptr = mem.device_pointer;
-					cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
-				}
-				else {
-					/* 32 bit device pointer */
-					uint32_t ptr = (uint32_t)mem.device_pointer;
-					cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
-				}
+			if(cubytes == 8) {
+				/* 64 bit device pointer */
+				uint64_t ptr = mem.device_pointer;
+				cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
 			}
 			else {
-				mem_alloc(NULL, mem, MEM_READ_ONLY);
-				mem_copy_to(mem);
-
-				cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size));
-				cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT));
-				cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER));
+				/* 32 bit device pointer */
+				uint32_t ptr = (uint32_t)mem.device_pointer;
+				cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
 			}
 		}
-		/* Texture Storage */
 		else {
+			/* Texture Storage */
 			CUarray handle = NULL;
 
+			CUarray_format_enum format;
+			switch(mem.data_type) {
+				case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
+				case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
+				case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
+				case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
+				case TYPE_HALF: format = CU_AD_FORMAT_HALF; break;
+				default: assert(0); return;
+			}
+
 			if(mem.data_depth > 1) {
 				CUDA_ARRAY3D_DESCRIPTOR desc;
 
@@ -810,8 +799,8 @@ public:
 
 			stats.mem_alloc(size);
 
-			/* Bindless Textures - Kepler */
 			if(has_bindless_textures) {
+				/* Bindless Textures - Kepler */
 				int flat_slot = 0;
 				if(string_startswith(name, "__tex_image")) {
 					int pos =  string(name).rfind("_");
@@ -844,35 +833,39 @@ public:
 				}
 
 				/* Resize once */
-				if(flat_slot >= bindless_mapping.size()) {
+				if(flat_slot >= texture_info.size()) {
 					/* Allocate some slots in advance, to reduce amount
-					 * of re-allocations.
-					 */
-					bindless_mapping.resize(flat_slot + 128);
+					 * of re-allocations. */
+					texture_info.resize(flat_slot + 128);
 				}
 
 				/* Set Mapping and tag that we need to (re-)upload to device */
-				bindless_mapping.get_data()[flat_slot] = (uint)tex;
-				tex_bindless_map[mem.device_pointer] = (uint)tex;
-				need_bindless_mapping = true;
+				TextureInfo& info = texture_info.get_data()[flat_slot];
+				info.data = (uint64_t)tex;
+				info.cl_buffer = 0;
+				info.interpolation = interpolation;
+				info.extension = extension;
+				info.width = mem.data_width;
+				info.height = mem.data_height;
+				info.depth = mem.data_depth;
+
+				tex_bindless_map[mem.device_pointer] = tex;
+				need_texture_info = true;
 			}
-			/* Regular Textures - Fermi */
 			else {
+				/* Regular Textures - Fermi */
 				cuda_assert(cuTexRefSetArray(texref, handle, CU_TRSA_OVERRIDE_FORMAT));
 				cuda_assert(cuTexRefSetFilterMode(texref, filter_mode));
 				cuda_assert(cuTexRefSetFlags(texref, CU_TRS

@@ Diff output truncated at 10240 characters. @@