[Bf-blender-cvs] [26bea849cfa] master: Cleanup: add device_texture for images, distinct from other global memory

Thu Mar 12 17:29:36 CET 2020

Commit: 26bea849cfa1d020150e0862002d7d5463f07817
Author: Brecht Van Lommel
Date:   Thu Mar 12 15:22:18 2020 +0100
Branches: master
https://developer.blender.org/rB26bea849cfa1d020150e0862002d7d5463f07817

Cleanup: add device_texture for images, distinct from other global memory

There was too much image texture specific stuff in device_memory, and too
much code duplication between devices.

===================================================================

M	intern/cycles/device/cuda/device_cuda.h
M	intern/cycles/device/cuda/device_cuda_impl.cpp
M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_memory.cpp
M	intern/cycles/device/device_memory.h
M	intern/cycles/device/opencl/device_opencl.h
M	intern/cycles/device/opencl/device_opencl_impl.cpp
M	intern/cycles/kernel/kernel.h
M	intern/cycles/kernel/kernels/cpu/kernel.cpp
M	intern/cycles/render/image.cpp
M	intern/cycles/render/image.h
M	intern/cycles/render/scene.cpp
M	release/datafiles/locale
M	release/scripts/addons
M	release/scripts/addons_contrib
M	source/tools

===================================================================

diff --git a/intern/cycles/device/cuda/device_cuda.h b/intern/cycles/device/cuda/device_cuda.h
index 6a0b39434aa..3e397da895b 100644
--- a/intern/cycles/device/cuda/device_cuda.h
+++ b/intern/cycles/device/cuda/device_cuda.h
@@ -155,9 +155,13 @@ class CUDADevice : public Device {
 
   virtual void const_copy_to(const char *name, void *host, size_t size);
 
-  void tex_alloc(device_memory &mem);
+  void global_alloc(device_memory &mem);
 
-  void tex_free(device_memory &mem);
+  void global_free(device_memory &mem);
+
+  void tex_alloc(device_texture &mem);
+
+  void tex_free(device_texture &mem);
 
   bool denoising_non_local_means(device_ptr image_ptr,
                                  device_ptr guide_ptr,
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp
index 4df1ca2097a..a4e7bc2f64a 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -185,7 +185,7 @@ void CUDADevice::cuda_error_message(const string &message)
 }
 
 CUDADevice::CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_)
-    : Device(info, stats, profiler, background_), texture_info(this, "__texture_info", MEM_TEXTURE)
+    : Device(info, stats, profiler, background_), texture_info(this, "__texture_info", MEM_GLOBAL)
 {
   first_error = true;
   background = background_;
@@ -684,7 +684,8 @@ void CUDADevice::move_textures_to_host(size_t size, bool for_texture)
       device_memory &mem = *pair.first;
       CUDAMem *cmem = &pair.second;
 
-      bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info);
+      bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
+                        (&mem != &texture_info);
       bool is_image = is_texture && (mem.data_height > 1);
 
       /* Can't move this type of memory. */
@@ -724,8 +725,7 @@ void CUDADevice::move_textures_to_host(size_t size, bool for_texture)
       device_ptr prev_pointer = max_mem->device_pointer;
       size_t prev_size = max_mem->device_size;
 
-      tex_free(*max_mem);
-      tex_alloc(*max_mem);
+      mem_copy_to(*max_mem);
       size = (max_size >= size) ? 0 : size - max_size;
 
       max_mem->device_pointer = prev_pointer;
@@ -759,7 +759,7 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_
    * If there is not enough room for working memory, we will try to move
    * textures to host memory, assuming the performance impact would have
    * been worse for working memory. */
-  bool is_texture = (mem.type == MEM_TEXTURE) && (&mem != &texture_info);
+  bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
   bool is_image = is_texture && (mem.data_height > 1);
 
   size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
@@ -922,6 +922,9 @@ void CUDADevice::mem_alloc(device_memory &mem)
   else if (mem.type == MEM_TEXTURE) {
     assert(!"mem_alloc not supported for textures.");
   }
+  else if (mem.type == MEM_GLOBAL) {
+    assert(!"mem_alloc not supported for global memory.");
+  }
   else {
     generic_alloc(mem);
   }
@@ -932,9 +935,13 @@ void CUDADevice::mem_copy_to(device_memory &mem)
   if (mem.type == MEM_PIXELS) {
     assert(!"mem_copy_to not supported for pixels.");
   }
+  else if (mem.type == MEM_GLOBAL) {
+    global_free(mem);
+    global_alloc(mem);
+  }
   else if (mem.type == MEM_TEXTURE) {
-    tex_free(mem);
-    tex_alloc(mem);
+    tex_free((device_texture &)mem);
+    tex_alloc((device_texture &)mem);
   }
   else {
     if (!mem.device_pointer) {
@@ -950,7 +957,7 @@ void CUDADevice::mem_copy_from(device_memory &mem, int y, int w, int h, int elem
   if (mem.type == MEM_PIXELS && !background) {
     pixels_copy_from(mem, y, w, h);
   }
-  else if (mem.type == MEM_TEXTURE) {
+  else if (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) {
     assert(!"mem_copy_from not supported for textures.");
   }
   else if (mem.host_pointer) {
@@ -993,8 +1000,11 @@ void CUDADevice::mem_free(device_memory &mem)
   if (mem.type == MEM_PIXELS && !background) {
     pixels_free(mem);
   }
+  else if (mem.type == MEM_GLOBAL) {
+    global_free(mem);
+  }
   else if (mem.type == MEM_TEXTURE) {
-    tex_free(mem);
+    tex_free((device_texture &)mem);
   }
   else {
     generic_free(mem);
@@ -1017,7 +1027,25 @@ void CUDADevice::const_copy_to(const char *name, void *host, size_t size)
   cuda_assert(cuMemcpyHtoD(mem, host, size));
 }
 
-void CUDADevice::tex_alloc(device_memory &mem)
+void CUDADevice::global_alloc(device_memory &mem)
+{
+  CUDAContextScope scope(this);
+
+  generic_alloc(mem);
+  generic_copy_to(mem);
+
+  const_copy_to(mem.name, &mem.device_pointer, sizeof(mem.device_pointer));
+}
+
+void CUDADevice::global_free(device_memory &mem)
+{
+  if (mem.device_pointer) {
+    CUDAContextScope scope(this);
+    generic_free(mem);
+  }
+}
+
+void CUDADevice::tex_alloc(device_texture &mem)
 {
   CUDAContextScope scope(this);
 
@@ -1027,7 +1055,7 @@ void CUDADevice::tex_alloc(device_memory &mem)
   size_t size = mem.memory_size();
 
   CUaddress_mode address_mode = CU_TR_ADDRESS_MODE_WRAP;
-  switch (mem.extension) {
+  switch (mem.info.extension) {
     case EXTENSION_REPEAT:
       address_mode = CU_TR_ADDRESS_MODE_WRAP;
       break;
@@ -1043,22 +1071,13 @@ void CUDADevice::tex_alloc(device_memory &mem)
   }
 
   CUfilter_mode filter_mode;
-  if (mem.interpolation == INTERPOLATION_CLOSEST) {
+  if (mem.info.interpolation == INTERPOLATION_CLOSEST) {
     filter_mode = CU_TR_FILTER_MODE_POINT;
   }
   else {
     filter_mode = CU_TR_FILTER_MODE_LINEAR;
   }
 
-  /* Data Storage */
-  if (mem.interpolation == INTERPOLATION_NONE) {
-    generic_alloc(mem);
-    generic_copy_to(mem);
-
-    const_copy_to(bind_name.c_str(), &mem.device_pointer, sizeof(mem.device_pointer));
-    return;
-  }
-
   /* Image Texture Storage */
   CUarray_format_enum format;
   switch (mem.data_type) {
@@ -1169,15 +1188,6 @@ void CUDADevice::tex_alloc(device_memory &mem)
   }
 
   /* Kepler+, bindless textures. */
-  int slot = 0;
-  if (string_startswith(mem.name, "__tex_image")) {
-    int pos = string(mem.name).rfind("_");
-    slot = atoi(mem.name + pos + 1);
-  }
-  else {
-    assert(0);
-  }
-
   CUDA_RESOURCE_DESC resDesc;
   memset(&resDesc, 0, sizeof(resDesc));
 
@@ -1214,6 +1224,7 @@ void CUDADevice::tex_alloc(device_memory &mem)
   cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
 
   /* Resize once */
+  const uint slot = mem.slot;
   if (slot >= texture_info.size()) {
     /* Allocate some slots in advance, to reduce amount
      * of re-allocations. */
@@ -1221,19 +1232,12 @@ void CUDADevice::tex_alloc(device_memory &mem)
   }
 
   /* Set Mapping and tag that we need to (re-)upload to device */
-  TextureInfo &info = texture_info[slot];
-  info.data = (uint64_t)cmem->texobject;
-  info.data_type = mem.image_data_type;
-  info.cl_buffer = 0;
-  info.interpolation = mem.interpolation;
-  info.extension = mem.extension;
-  info.width = mem.data_width;
-  info.height = mem.data_height;
-  info.depth = mem.data_depth;
+  texture_info[slot] = mem.info;
+  texture_info[slot].data = (uint64_t)cmem->texobject;
   need_texture_info = true;
 }
 
-void CUDADevice::tex_free(device_memory &mem)
+void CUDADevice::tex_free(device_texture &mem)
 {
   if (mem.device_pointer) {
     CUDAContextScope scope(this);
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 56569a5ee3d..57e8523e02a 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -264,7 +264,7 @@ class CPUDevice : public Device {
 
   CPUDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_)
       : Device(info_, stats_, profiler_, background_),
-        texture_info(this, "__texture_info", MEM_TEXTURE),
+        texture_info(this, "__texture_info", MEM_GLOBAL),
 #define REGISTER_KERNEL(name) name##_kernel(KERNEL_FUNCTIONS(name))
         REGISTER_KERNEL(path_trace),
         REGISTER_KERNEL(convert_to_half_float),
@@ -372,6 +372,9 @@ class CPUDevice : public Device {
     if (mem.type == MEM_TEXTURE) {
       assert(!"mem_alloc not supported for textures.");
     }
+    else if (mem.type == MEM_GLOBAL) {
+      assert(!"mem_alloc not supported for global memory.");
+    }
     else {
       if (mem.name) {
         VLOG(1) << "Buffer allocate: " << mem.name << ", "
@@ -396,9 +399,13 @@ class CPUDevice : public Device {
 
   void mem_copy_to(device_memory &mem)
   {
-    if (mem.type == MEM_TEXTURE) {
-      tex_free(mem);
-      tex_alloc(mem);
+    if (mem.type == MEM_GLOBAL) {
+      global_free(mem);
+      global_alloc(mem);
+    }
+    else if (mem.type == MEM_TEXTURE) {
+      tex_free((device_texture &)mem);
+      tex_alloc((device_texture &)mem);
     }
     else if (mem.type == MEM_PIXELS) {
       assert(!"mem_copy_to not supported for pixels.");
@@ -430,8 +437,11 @@ class CPUDevice : public Device {
 
   void mem_free(device_memory &mem)
   {
-    if (mem.type == MEM_TEXTURE) {
-      tex_free(mem);
+    if (mem.type == MEM_GLOBAL) {
+      global_free(mem);
+    }
+    else if (mem.type == MEM_TEXTURE) {
+      tex_free((device_texture &)mem);
     }
     else if (mem.device_pointer) {
       if (mem.type == MEM_DEVICE_ONLY) {
@@ -453,52 +463,50 @@ class CPUDevice : public Device {
     kernel_const_copy(&kernel_globals, name, host, size);
   }
 
-  void tex_alloc(device_memory &mem)
+  void global_alloc(device_memory &mem)
   {
-    VLOG(1) << "Texture allocate: " << mem.name << ", "
+    VLOG(1) << "Global memory allocate: " << mem.name << ", "
             << string_human_readable_number(mem.memory_size()) << " bytes. ("
             << string_human_readable_size(mem.memory_size()) << ")";
 
-    if (mem.interpolation == INTERPOLATION_NONE) {
-      /* Data texture. */
-      kernel_tex_copy(&kernel_globals, mem.name, mem.host_pointer, mem.data_size);
-    }
-    else {
-      /* Image Texture. */
-      int slot = 0;
-      if (string_startswith(mem.name, "__tex_image")) {
-        int pos = string(mem.name).rfind("_");
-        slot = a

@@ Diff output truncated at 10240 characters. @@