[Bf-blender-cvs] [26bea849cfa] master: Cleanup: add device_texture for images, distinct from other global memory
Brecht Van Lommel
noreply at git.blender.org
Thu Mar 12 17:29:36 CET 2020
Commit: 26bea849cfa1d020150e0862002d7d5463f07817
Author: Brecht Van Lommel
Date: Thu Mar 12 15:22:18 2020 +0100
Branches: master
https://developer.blender.org/rB26bea849cfa1d020150e0862002d7d5463f07817
Cleanup: add device_texture for images, distinct from other global memory
There was too much image texture specific stuff in device_memory, and too
much code duplication between devices.
===================================================================
M intern/cycles/device/cuda/device_cuda.h
M intern/cycles/device/cuda/device_cuda_impl.cpp
M intern/cycles/device/device_cpu.cpp
M intern/cycles/device/device_memory.cpp
M intern/cycles/device/device_memory.h
M intern/cycles/device/opencl/device_opencl.h
M intern/cycles/device/opencl/device_opencl_impl.cpp
M intern/cycles/kernel/kernel.h
M intern/cycles/kernel/kernels/cpu/kernel.cpp
M intern/cycles/render/image.cpp
M intern/cycles/render/image.h
M intern/cycles/render/scene.cpp
M release/datafiles/locale
M release/scripts/addons
M release/scripts/addons_contrib
M source/tools
===================================================================
diff --git a/intern/cycles/device/cuda/device_cuda.h b/intern/cycles/device/cuda/device_cuda.h
index 6a0b39434aa..3e397da895b 100644
--- a/intern/cycles/device/cuda/device_cuda.h
+++ b/intern/cycles/device/cuda/device_cuda.h
@@ -155,9 +155,13 @@ class CUDADevice : public Device {
virtual void const_copy_to(const char *name, void *host, size_t size);
- void tex_alloc(device_memory &mem);
+ void global_alloc(device_memory &mem);
- void tex_free(device_memory &mem);
+ void global_free(device_memory &mem);
+
+ void tex_alloc(device_texture &mem);
+
+ void tex_free(device_texture &mem);
bool denoising_non_local_means(device_ptr image_ptr,
device_ptr guide_ptr,
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp
index 4df1ca2097a..a4e7bc2f64a 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -185,7 +185,7 @@ void CUDADevice::cuda_error_message(const string &message)
}
CUDADevice::CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_)
- : Device(info, stats, profiler, background_), texture_info(this, "__texture_info", MEM_TEXTURE)
+ : Device(info, stats, profiler, background_), texture_info(this, "__texture_info", MEM_GLOBAL)
{
first_error = true;
background = background_;
@@ -684,7 +684,8 @@ void CUDADevice::move_textures_to_host(size_t size, bool for_texture)
device_memory &mem = *pair.first;
CUDAMem *cmem = &pair.second;
- bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info);
+ bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
+ (&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
/* Can't move this type of memory. */
@@ -724,8 +725,7 @@ void CUDADevice::move_textures_to_host(size_t size, bool for_texture)
device_ptr prev_pointer = max_mem->device_pointer;
size_t prev_size = max_mem->device_size;
- tex_free(*max_mem);
- tex_alloc(*max_mem);
+ mem_copy_to(*max_mem);
size = (max_size >= size) ? 0 : size - max_size;
max_mem->device_pointer = prev_pointer;
@@ -759,7 +759,7 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_
* If there is not enough room for working memory, we will try to move
* textures to host memory, assuming the performance impact would have
* been worse for working memory. */
- bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info);
+ bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
bool is_image = is_texture && (mem.data_height > 1);
size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
@@ -922,6 +922,9 @@ void CUDADevice::mem_alloc(device_memory &mem)
else if (mem.type == MEM_TEXTURE) {
assert(!"mem_alloc not supported for textures.");
}
+ else if (mem.type == MEM_GLOBAL) {
+ assert(!"mem_alloc not supported for global memory.");
+ }
else {
generic_alloc(mem);
}
@@ -932,9 +935,13 @@ void CUDADevice::mem_copy_to(device_memory &mem)
if (mem.type == MEM_PIXELS) {
assert(!"mem_copy_to not supported for pixels.");
}
+ else if (mem.type == MEM_GLOBAL) {
+ global_free(mem);
+ global_alloc(mem);
+ }
else if (mem.type == MEM_TEXTURE) {
- tex_free(mem);
- tex_alloc(mem);
+ tex_free((device_texture &)mem);
+ tex_alloc((device_texture &)mem);
}
else {
if (!mem.device_pointer) {
@@ -950,7 +957,7 @@ void CUDADevice::mem_copy_from(device_memory &mem, int y, int w, int h, int elem
if (mem.type == MEM_PIXELS && !background) {
pixels_copy_from(mem, y, w, h);
}
- else if (mem.type == MEM_TEXTURE) {
+ else if (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) {
assert(!"mem_copy_from not supported for textures.");
}
else if (mem.host_pointer) {
@@ -993,8 +1000,11 @@ void CUDADevice::mem_free(device_memory &mem)
if (mem.type == MEM_PIXELS && !background) {
pixels_free(mem);
}
+ else if (mem.type == MEM_GLOBAL) {
+ global_free(mem);
+ }
else if (mem.type == MEM_TEXTURE) {
- tex_free(mem);
+ tex_free((device_texture &)mem);
}
else {
generic_free(mem);
@@ -1017,7 +1027,25 @@ void CUDADevice::const_copy_to(const char *name, void *host, size_t size)
cuda_assert(cuMemcpyHtoD(mem, host, size));
}
-void CUDADevice::tex_alloc(device_memory &mem)
+void CUDADevice::global_alloc(device_memory &mem)
+{
+ CUDAContextScope scope(this);
+
+ generic_alloc(mem);
+ generic_copy_to(mem);
+
+ const_copy_to(mem.name, &mem.device_pointer, sizeof(mem.device_pointer));
+}
+
+void CUDADevice::global_free(device_memory &mem)
+{
+ if (mem.device_pointer) {
+ CUDAContextScope scope(this);
+ generic_free(mem);
+ }
+}
+
+void CUDADevice::tex_alloc(device_texture &mem)
{
CUDAContextScope scope(this);
@@ -1027,7 +1055,7 @@ void CUDADevice::tex_alloc(device_memory &mem)
size_t size = mem.memory_size();
CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP;
- switch (mem.extension) {
+ switch (mem.info.extension) {
case EXTENSION_REPEAT:
address_mode = CU_TR_ADDRESS_MODE_WRAP;
break;
@@ -1043,22 +1071,13 @@ void CUDADevice::tex_alloc(device_memory &mem)
}
CUfilter_mode filter_mode;
- if (mem.interpolation == INTERPOLATION_CLOSEST) {
+ if (mem.info.interpolation == INTERPOLATION_CLOSEST) {
filter_mode = CU_TR_FILTER_MODE_POINT;
}
else {
filter_mode = CU_TR_FILTER_MODE_LINEAR;
}
- /* Data Storage */
- if (mem.interpolation == INTERPOLATION_NONE) {
- generic_alloc(mem);
- generic_copy_to(mem);
-
- const_copy_to(bind_name.c_str(), &mem.device_pointer, sizeof(mem.device_pointer));
- return;
- }
-
/* Image Texture Storage */
CUarray_format_enum format;
switch (mem.data_type) {
@@ -1169,15 +1188,6 @@ void CUDADevice::tex_alloc(device_memory &mem)
}
/* Kepler+, bindless textures. */
- int slot = 0;
- if (string_startswith(mem.name, "__tex_image")) {
- int pos = string(mem.name).rfind("_");
- slot = atoi(mem.name + pos + 1);
- }
- else {
- assert(0);
- }
-
CUDA_RESOURCE_DESC resDesc;
memset(&resDesc, 0, sizeof(resDesc));
@@ -1214,6 +1224,7 @@ void CUDADevice::tex_alloc(device_memory &mem)
cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
/* Resize once */
+ const uint slot = mem.slot;
if (slot >= texture_info.size()) {
/* Allocate some slots in advance, to reduce amount
* of re-allocations. */
@@ -1221,19 +1232,12 @@ void CUDADevice::tex_alloc(device_memory &mem)
}
/* Set Mapping and tag that we need to (re-)upload to device */
- TextureInfo &info = texture_info[slot];
- info.data = (uint64_t)cmem->texobject;
- info.data_type = mem.image_data_type;
- info.cl_buffer = 0;
- info.interpolation = mem.interpolation;
- info.extension = mem.extension;
- info.width = mem.data_width;
- info.height = mem.data_height;
- info.depth = mem.data_depth;
+ texture_info[slot] = mem.info;
+ texture_info[slot].data = (uint64_t)cmem->texobject;
need_texture_info = true;
}
-void CUDADevice::tex_free(device_memory &mem)
+void CUDADevice::tex_free(device_texture &mem)
{
if (mem.device_pointer) {
CUDAContextScope scope(this);
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 56569a5ee3d..57e8523e02a 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -264,7 +264,7 @@ class CPUDevice : public Device {
CPUDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_)
: Device(info_, stats_, profiler_, background_),
- texture_info(this, "__texture_info", MEM_TEXTURE),
+ texture_info(this, "__texture_info", MEM_GLOBAL),
#define REGISTER_KERNEL(name) name##_kernel(KERNEL_FUNCTIONS(name))
REGISTER_KERNEL(path_trace),
REGISTER_KERNEL(convert_to_half_float),
@@ -372,6 +372,9 @@ class CPUDevice : public Device {
if (mem.type == MEM_TEXTURE) {
assert(!"mem_alloc not supported for textures.");
}
+ else if (mem.type == MEM_GLOBAL) {
+ assert(!"mem_alloc not supported for global memory.");
+ }
else {
if (mem.name) {
VLOG(1) << "Buffer allocate: " << mem.name << ", "
@@ -396,9 +399,13 @@ class CPUDevice : public Device {
void mem_copy_to(device_memory &mem)
{
- if (mem.type == MEM_TEXTURE) {
- tex_free(mem);
- tex_alloc(mem);
+ if (mem.type == MEM_GLOBAL) {
+ global_free(mem);
+ global_alloc(mem);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ tex_free((device_texture &)mem);
+ tex_alloc((device_texture &)mem);
}
else if (mem.type == MEM_PIXELS) {
assert(!"mem_copy_to not supported for pixels.");
@@ -430,8 +437,11 @@ class CPUDevice : public Device {
void mem_free(device_memory &mem)
{
- if (mem.type == MEM_TEXTURE) {
- tex_free(mem);
+ if (mem.type == MEM_GLOBAL) {
+ global_free(mem);
+ }
+ else if (mem.type == MEM_TEXTURE) {
+ tex_free((device_texture &)mem);
}
else if (mem.device_pointer) {
if (mem.type == MEM_DEVICE_ONLY) {
@@ -453,52 +463,50 @@ class CPUDevice : public Device {
kernel_const_copy(&kernel_globals, name, host, size);
}
- void tex_alloc(device_memory &mem)
+ void global_alloc(device_memory &mem)
{
- VLOG(1) << "Texture allocate: " << mem.name << ", "
+ VLOG(1) << "Global memory allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
- if (mem.interpolation == INTERPOLATION_NONE) {
- /* Data texture. */
- kernel_tex_copy(&kernel_globals, mem.name, mem.host_pointer, mem.data_size);
- }
- else {
- /* Image Texture. */
- int slot = 0;
- if (string_startswith(mem.name, "__tex_image")) {
- int pos = string(mem.name).rfind("_");
- slot = a
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list