[Bf-blender-cvs] [23098cda993] master: Code refactor: make texture code more consistent between devices.
Brecht Van Lommel
noreply at git.blender.org
Sat Oct 7 15:57:08 CEST 2017
Commit: 23098cda9936d785988b689ee69e58e900f17cb2
Author: Brecht Van Lommel
Date: Fri Oct 6 21:47:41 2017 +0200
Branches: master
https://developer.blender.org/rB23098cda9936d785988b689ee69e58e900f17cb2
Code refactor: make texture code more consistent between devices.
* Use common TextureInfo struct for all devices, except CUDA fermi.
* Move image sampling code to kernels/*/kernel_*_image.h files.
* Use arrays for data textures on Fermi too, so device_vector<Struct> works.
===================================================================
M intern/cycles/device/device.h
M intern/cycles/device/device_cpu.cpp
M intern/cycles/device/device_cuda.cpp
M intern/cycles/device/opencl/opencl.h
M intern/cycles/device/opencl/opencl_base.cpp
M intern/cycles/device/opencl/opencl_split.cpp
M intern/cycles/kernel/CMakeLists.txt
M intern/cycles/kernel/geom/geom_volume.h
M intern/cycles/kernel/kernel_compat_cpu.h
M intern/cycles/kernel/kernel_compat_cuda.h
M intern/cycles/kernel/kernel_compat_opencl.h
M intern/cycles/kernel/kernel_globals.h
M intern/cycles/kernel/kernel_textures.h
M intern/cycles/kernel/kernels/cpu/kernel.cpp
M intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
M intern/cycles/kernel/kernels/cuda/kernel.cu
A intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
M intern/cycles/kernel/kernels/opencl/kernel.cl
R086 intern/cycles/kernel/kernel_image_opencl.h intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
M intern/cycles/kernel/osl/osl_services.cpp
M intern/cycles/kernel/split/kernel_split_common.h
M intern/cycles/kernel/svm/svm_image.h
M intern/cycles/kernel/svm/svm_voxel.h
M intern/cycles/util/util_texture.h
M intern/cycles/util/util_types.h
===================================================================
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 26d6d380a10..0e0a0079209 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -26,6 +26,7 @@
#include "util/util_stats.h"
#include "util/util_string.h"
#include "util/util_thread.h"
+#include "util/util_texture.h"
#include "util/util_types.h"
#include "util/util_vector.h"
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 19e3c0a9075..ac6d3246d38 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -163,6 +163,9 @@ public:
TaskPool task_pool;
KernelGlobals kernel_globals;
+ device_vector<TextureInfo> texture_info;
+ bool need_texture_info;
+
#ifdef WITH_OSL
OSLGlobals osl_globals;
#endif
@@ -235,6 +238,8 @@ public:
VLOG(1) << "Will be using split kernel.";
}
+ need_texture_info = false;
+
#define REGISTER_SPLIT_KERNEL(name) split_kernels[#name] = KernelFunctions<void(*)(KernelGlobals*, KernelData*)>(KERNEL_FUNCTIONS(name))
REGISTER_SPLIT_KERNEL(path_init);
REGISTER_SPLIT_KERNEL(scene_intersect);
@@ -261,6 +266,7 @@ public:
~CPUDevice()
{
task_pool.stop();
+ tex_free(texture_info);
}
virtual bool show_samples() const
@@ -268,6 +274,15 @@ public:
return (TaskScheduler::num_threads() == 1);
}
+ void load_texture_info()
+ {
+ if(need_texture_info) {
+ tex_free(texture_info);
+ tex_alloc("__texture_info", texture_info, INTERPOLATION_NONE, EXTENSION_REPEAT);
+ need_texture_info = false;
+ }
+ }
+
void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/)
{
if(name) {
@@ -333,14 +348,47 @@ public:
VLOG(1) << "Texture allocate: " << name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
- kernel_tex_copy(&kernel_globals,
- name,
- mem.data_pointer,
- mem.data_width,
- mem.data_height,
- mem.data_depth,
- interpolation,
- extension);
+
+ if(interpolation == INTERPOLATION_NONE) {
+ /* Data texture. */
+ kernel_tex_copy(&kernel_globals,
+ name,
+ mem.data_pointer,
+ mem.data_width,
+ mem.data_height,
+ mem.data_depth,
+ interpolation,
+ extension);
+ }
+ else {
+ /* Image Texture. */
+ int flat_slot = 0;
+ if(string_startswith(name, "__tex_image")) {
+ int pos = string(name).rfind("_");
+ flat_slot = atoi(name + pos + 1);
+ }
+ else {
+ assert(0);
+ }
+
+ if(flat_slot >= texture_info.size()) {
+ /* Allocate some slots in advance, to reduce amount
+ * of re-allocations. */
+ texture_info.resize(flat_slot + 128);
+ }
+
+ TextureInfo& info = texture_info.get_data()[flat_slot];
+ info.data = (uint64_t)mem.data_pointer;
+ info.cl_buffer = 0;
+ info.interpolation = interpolation;
+ info.extension = extension;
+ info.width = mem.data_width;
+ info.height = mem.data_height;
+ info.depth = mem.data_depth;
+
+ need_texture_info = true;
+ }
+
mem.device_pointer = mem.data_pointer;
mem.device_size = mem.memory_size();
stats.mem_alloc(mem.device_size);
@@ -352,6 +400,7 @@ public:
mem.device_pointer = 0;
stats.mem_free(mem.device_size);
mem.device_size = 0;
+ need_texture_info = true;
}
}
@@ -784,6 +833,9 @@ public:
void task_add(DeviceTask& task)
{
+ /* Load texture info. */
+ load_texture_info();
+
/* split task into smaller ones */
list<DeviceTask> tasks;
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 734edcff503..dcbe6033bcc 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -129,7 +129,7 @@ public:
CUcontext cuContext;
CUmodule cuModule, cuFilterModule;
map<device_ptr, bool> tex_interp_map;
- map<device_ptr, uint> tex_bindless_map;
+ map<device_ptr, CUtexObject> tex_bindless_map;
int cuDevId;
int cuDevArchitecture;
bool first_error;
@@ -145,8 +145,8 @@ public:
map<device_ptr, PixelMem> pixel_mem_map;
/* Bindless Textures */
- device_vector<uint> bindless_mapping;
- bool need_bindless_mapping;
+ device_vector<TextureInfo> texture_info;
+ bool need_texture_info;
CUdeviceptr cuda_device_ptr(device_ptr mem)
{
@@ -231,7 +231,7 @@ public:
split_kernel = NULL;
- need_bindless_mapping = false;
+ need_texture_info = false;
/* intialize */
if(cuda_error(cuInit(0)))
@@ -274,7 +274,7 @@ public:
delete split_kernel;
if(info.has_bindless_textures) {
- tex_free(bindless_mapping);
+ tex_free(texture_info);
}
cuda_assert(cuCtxDestroy(cuContext));
@@ -544,12 +544,12 @@ public:
return (result == CUDA_SUCCESS);
}
- void load_bindless_mapping()
+ void load_texture_info()
{
- if(info.has_bindless_textures && need_bindless_mapping) {
- tex_free(bindless_mapping);
- tex_alloc("__bindless_mapping", bindless_mapping, INTERPOLATION_NONE, EXTENSION_REPEAT);
- need_bindless_mapping = false;
+ if(info.has_bindless_textures && need_texture_info) {
+ tex_free(texture_info);
+ tex_alloc("__texture_info", texture_info, INTERPOLATION_NONE, EXTENSION_REPEAT);
+ need_texture_info = false;
}
}
@@ -646,8 +646,7 @@ public:
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
- /* Check if we are on sm_30 or above.
- * We use arrays and bindles textures for storage there */
+ /* Check if we are on sm_30 or above, for bindless textures. */
bool has_bindless_textures = info.has_bindless_textures;
/* General variables for both architectures */
@@ -679,20 +678,10 @@ public:
filter_mode = CU_TR_FILTER_MODE_LINEAR;
}
- CUarray_format_enum format;
- switch(mem.data_type) {
- case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
- case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
- case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
- case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
- case TYPE_HALF: format = CU_AD_FORMAT_HALF; break;
- default: assert(0); return;
- }
-
/* General variables for Fermi */
CUtexref texref = NULL;
- if(!has_bindless_textures) {
+ if(!has_bindless_textures && interpolation != INTERPOLATION_NONE) {
if(mem.data_depth > 1) {
/* Kernel uses different bind names for 2d and 3d float textures,
* so we have to adjust couple of things here.
@@ -711,41 +700,41 @@ public:
}
}
- /* Data Storage */
if(interpolation == INTERPOLATION_NONE) {
- if(has_bindless_textures) {
- mem_alloc(NULL, mem, MEM_READ_ONLY);
- mem_copy_to(mem);
+ /* Data Storage */
+ mem_alloc(NULL, mem, MEM_READ_ONLY);
+ mem_copy_to(mem);
- CUdeviceptr cumem;
- size_t cubytes;
+ CUdeviceptr cumem;
+ size_t cubytes;
- cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str()));
+ cuda_assert(cuModuleGetGlobal(&cumem, &cubytes, cuModule, bind_name.c_str()));
- if(cubytes == 8) {
- /* 64 bit device pointer */
- uint64_t ptr = mem.device_pointer;
- cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
- }
- else {
- /* 32 bit device pointer */
- uint32_t ptr = (uint32_t)mem.device_pointer;
- cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
- }
+ if(cubytes == 8) {
+ /* 64 bit device pointer */
+ uint64_t ptr = mem.device_pointer;
+ cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
}
else {
- mem_alloc(NULL, mem, MEM_READ_ONLY);
- mem_copy_to(mem);
-
- cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size));
- cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT));
- cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER));
+ /* 32 bit device pointer */
+ uint32_t ptr = (uint32_t)mem.device_pointer;
+ cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
}
}
- /* Texture Storage */
else {
+ /* Texture Storage */
CUarray handle = NULL;
+ CUarray_format_enum format;
+ switch(mem.data_type) {
+ case TYPE_UCHAR: format = CU_AD_FORMAT_UNSIGNED_INT8; break;
+ case TYPE_UINT: format = CU_AD_FORMAT_UNSIGNED_INT32; break;
+ case TYPE_INT: format = CU_AD_FORMAT_SIGNED_INT32; break;
+ case TYPE_FLOAT: format = CU_AD_FORMAT_FLOAT; break;
+ case TYPE_HALF: format = CU_AD_FORMAT_HALF; break;
+ default: assert(0); return;
+ }
+
if(mem.data_depth > 1) {
CUDA_ARRAY3D_DESCRIPTOR desc;
@@ -810,8 +799,8 @@ public:
stats.mem_alloc(size);
- /* Bindless Textures - Kepler */
if(has_bindless_textures) {
+ /* Bindless Textures - Kepler */
int flat_slot = 0;
if(string_startswith(name, "__tex_image")) {
int pos = string(name).rfind("_");
@@ -844,35 +833,39 @@ public:
}
/* Resize once */
- if(flat_slot >= bindless_mapping.size()) {
+ if(flat_slot >= texture_info.size()) {
/* Allocate some slots in advance, to reduce amount
- * of re-allocations.
- */
- bindless_mapping.resize(flat_slot + 128);
+ * of re-allocations. */
+ texture_info.resize(flat_slot + 128);
}
/* Set Mapping and tag that we need to (re-)upload to device */
- bindless_mapping.get_data()[flat_slot] = (uint)tex;
- tex_bindless_map[mem.device_pointer] = (uint)tex;
- need_bindless_mapping = true;
+ TextureInfo& info = texture_info.get_data()[flat_slot];
+ info.data = (uint64_t)tex;
+ info.cl_buffer = 0;
+ info.interpolation = interpolation;
+ info.extension = extension;
+ info.width = mem.data_width;
+ info.height = mem.data_height;
+ info.depth = mem.data_depth;
+
+ tex_bindless_map[mem.device_pointer] = tex;
+ need_texture_info = true;
}
- /* Regular Textures - Fermi */
else {
+ /* Regular Textures - Fermi */
cuda_assert(cuTexRefSetArray(texref, handle, CU_TRSA_OVERRIDE_FORMAT));
cuda_assert(cuTexRefSetFilterMode(texref, filter_mode));
cuda_assert(cuTexRefSetFlags(texref, CU_TRS
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list