[Bf-blender-cvs] [5801ef71e40] master: Code refactor: device memory cleanups, preparing for mapped host memory.
Brecht Van Lommel
noreply at git.blender.org
Sun Nov 5 15:52:00 CET 2017
Commit: 5801ef71e40bc932c69e67f06076cd8b41132e52
Author: Brecht Van Lommel
Date: Sun Nov 5 00:34:30 2017 +0100
Branches: master
https://developer.blender.org/rB5801ef71e40bc932c69e67f06076cd8b41132e52
Code refactor: device memory cleanups, preparing for mapped host memory.
===================================================================
M intern/cycles/device/device.cpp
M intern/cycles/device/device_cpu.cpp
M intern/cycles/device/device_cuda.cpp
M intern/cycles/device/device_memory.cpp
M intern/cycles/device/device_memory.h
M intern/cycles/device/device_network.cpp
M intern/cycles/device/device_network.h
M intern/cycles/device/device_split_kernel.cpp
M intern/cycles/device/opencl/memory_manager.cpp
M intern/cycles/device/opencl/opencl_base.cpp
M intern/cycles/kernel/kernel.h
M intern/cycles/kernel/kernels/cpu/kernel.cpp
M intern/cycles/render/bake.cpp
M intern/cycles/render/buffers.cpp
M intern/cycles/render/light.cpp
M intern/cycles/render/mesh_displace.cpp
M intern/cycles/render/object.cpp
M intern/cycles/render/tables.cpp
===================================================================
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index b2f20bab58b..641e3fde140 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -102,17 +102,17 @@ void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dx, int d
if(rgba.data_type == TYPE_HALF) {
/* for multi devices, this assumes the inefficient method that we allocate
* all pixels on the device even though we only render to a subset */
- GLhalf *data_pointer = (GLhalf*)rgba.data_pointer;
+ GLhalf *host_pointer = (GLhalf*)rgba.host_pointer;
float vbuffer[16], *basep;
float *vp = NULL;
- data_pointer += 4*y*w;
+ host_pointer += 4*y*w;
/* draw half float texture, GLSL shader for display transform assumed to be bound */
GLuint texid;
glGenTextures(1, &texid);
glBindTexture(GL_TEXTURE_2D, texid);
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, w, h, 0, GL_RGBA, GL_HALF_FLOAT, data_pointer);
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, w, h, 0, GL_RGBA, GL_HALF_FLOAT, host_pointer);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
@@ -194,7 +194,7 @@ void Device::draw_pixels(device_memory& rgba, int y, int w, int h, int dx, int d
glPixelZoom((float)width/(float)w, (float)height/(float)h);
glRasterPos2f(dx, dy);
- uint8_t *pixels = (uint8_t*)rgba.data_pointer;
+ uint8_t *pixels = (uint8_t*)rgba.host_pointer;
pixels += 4*y*w;
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 0c0e6af7eb4..1a54c3380ee 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -297,10 +297,14 @@ public:
<< string_human_readable_size(mem.memory_size()) << ")";
}
- mem.device_pointer = mem.data_pointer;
-
- if(!mem.device_pointer) {
- mem.device_pointer = (device_ptr)malloc(mem.memory_size());
+ if(mem.type == MEM_DEVICE_ONLY) {
+ assert(!mem.host_pointer);
+ size_t alignment = mem_address_alignment();
+ void *data = util_aligned_malloc(mem.memory_size(), alignment);
+ mem.device_pointer = (device_ptr)data;
+ }
+ else {
+ mem.device_pointer = (device_ptr)mem.host_pointer;
}
mem.device_size = mem.memory_size();
@@ -350,8 +354,8 @@ public:
tex_free(mem);
}
else if(mem.device_pointer) {
- if(!mem.data_pointer) {
- free((void*)mem.device_pointer);
+ if(mem.type == MEM_DEVICE_ONLY) {
+ util_aligned_free((void*)mem.device_pointer);
}
mem.device_pointer = 0;
stats.mem_free(mem.device_size);
@@ -379,7 +383,7 @@ public:
/* Data texture. */
kernel_tex_copy(&kernel_globals,
mem.name,
- mem.data_pointer,
+ mem.host_pointer,
mem.data_size);
}
else {
@@ -400,7 +404,7 @@ public:
}
TextureInfo& info = texture_info[flat_slot];
- info.data = (uint64_t)mem.data_pointer;
+ info.data = (uint64_t)mem.host_pointer;
info.cl_buffer = 0;
info.interpolation = mem.interpolation;
info.extension = mem.extension;
@@ -411,7 +415,7 @@ public:
need_texture_info = true;
}
- mem.device_pointer = mem.data_pointer;
+ mem.device_pointer = (device_ptr)mem.host_pointer;
mem.device_size = mem.memory_size();
stats.mem_alloc(mem.device_size);
}
@@ -457,7 +461,7 @@ public:
bool denoising_set_tiles(device_ptr *buffers, DenoisingTask *task)
{
- TilesInfo *tiles = (TilesInfo*) task->tiles_mem.data_pointer;
+ TilesInfo *tiles = (TilesInfo*) task->tiles_mem.host_pointer;
for(int i = 0; i < 9; i++) {
tiles->buffers[i] = buffers[i];
}
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 59d4fb055d0..4ab3cb9da75 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -128,20 +128,26 @@ public:
CUdevice cuDevice;
CUcontext cuContext;
CUmodule cuModule, cuFilterModule;
- map<device_ptr, bool> tex_interp_map;
- map<device_ptr, CUtexObject> tex_bindless_map;
int cuDevId;
int cuDevArchitecture;
bool first_error;
CUDASplitKernel *split_kernel;
+ struct CUDAMem {
+ CUDAMem()
+ : texobject(0), array(0) {}
+
+ CUtexObject texobject;
+ CUarray array;
+ };
+ map<device_memory*, CUDAMem> cuda_mem_map;
+
struct PixelMem {
GLuint cuPBO;
CUgraphicsResource cuPBOresource;
GLuint cuTexId;
int w, h;
};
-
map<device_ptr, PixelMem> pixel_mem_map;
/* Bindless Textures */
@@ -615,7 +621,7 @@ public:
}
}
- void generic_alloc(device_memory& mem, size_t padding = 0)
+ CUDAMem *generic_alloc(device_memory& mem, size_t padding = 0)
{
CUDAContextScope scope(this);
@@ -625,19 +631,28 @@ public:
<< string_human_readable_size(mem.memory_size()) << ")";
}
- CUdeviceptr device_pointer;
+ /* Allocate memory on device. */
+ CUdeviceptr device_pointer = 0;
size_t size = mem.memory_size();
cuda_assert(cuMemAlloc(&device_pointer, size + padding));
mem.device_pointer = (device_ptr)device_pointer;
mem.device_size = size;
stats.mem_alloc(size);
+
+ if(!mem.device_pointer) {
+ return NULL;
+ }
+
+ /* Insert into map of allocations. */
+ CUDAMem *cmem = &cuda_mem_map[&mem];
+ return cmem;
}
void generic_copy_to(device_memory& mem)
{
if(mem.device_pointer) {
CUDAContextScope scope(this);
- cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size()));
+ cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size()));
}
}
@@ -648,10 +663,11 @@ public:
cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer)));
- mem.device_pointer = 0;
-
stats.mem_free(mem.device_size);
+ mem.device_pointer = 0;
mem.device_size = 0;
+
+ cuda_mem_map.erase(cuda_mem_map.find(&mem));
}
}
@@ -700,11 +716,11 @@ public:
size_t size = elem*w*h;
if(mem.device_pointer) {
- cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset,
+ cuda_assert(cuMemcpyDtoH((uchar*)mem.host_pointer + offset,
(CUdeviceptr)(mem.device_pointer + offset), size));
}
else {
- memset((char*)mem.data_pointer + offset, 0, size);
+ memset((char*)mem.host_pointer + offset, 0, size);
}
}
}
@@ -715,8 +731,8 @@ public:
mem_alloc(mem);
}
- if(mem.data_pointer) {
- memset((void*)mem.data_pointer, 0, mem.memory_size());
+ if(mem.host_pointer) {
+ memset(mem.host_pointer, 0, mem.memory_size());
}
if(mem.device_pointer) {
@@ -814,8 +830,6 @@ public:
uint32_t ptr = (uint32_t)mem.device_pointer;
cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
}
-
- tex_interp_map[mem.device_pointer] = false;
return;
}
@@ -851,7 +865,7 @@ public:
default: assert(0); return;
}
-
+ CUDAMem *cmem = NULL;
CUarray array_3d = NULL;
size_t src_pitch = mem.data_width * dsize * mem.data_elements;
size_t dst_pitch = src_pitch;
@@ -878,7 +892,7 @@ public:
param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
param.dstArray = array_3d;
param.srcMemoryType = CU_MEMORYTYPE_HOST;
- param.srcHost = (void*)mem.data_pointer;
+ param.srcHost = mem.host_pointer;
param.srcPitch = src_pitch;
param.WidthInBytes = param.srcPitch;
param.Height = mem.data_height;
@@ -889,6 +903,10 @@ public:
mem.device_pointer = (device_ptr)array_3d;
mem.device_size = size;
stats.mem_alloc(size);
+
+ cmem = &cuda_mem_map[&mem];
+ cmem->texobject = 0;
+ cmem->array = array_3d;
}
else if(mem.data_height > 1) {
/* 2D texture, using pitch aligned linear memory. */
@@ -897,7 +915,10 @@ public:
dst_pitch = align_up(src_pitch, alignment);
size_t dst_size = dst_pitch * mem.data_height;
- generic_alloc(mem, dst_size - mem.memory_size());
+ cmem = generic_alloc(mem, dst_size - mem.memory_size());
+ if(!cmem) {
+ return;
+ }
CUDA_MEMCPY2D param;
memset(¶m, 0, sizeof(param));
@@ -905,7 +926,7 @@ public:
param.dstDevice = mem.device_pointer;
param.dstPitch = dst_pitch;
param.srcMemoryType = CU_MEMORYTYPE_HOST;
- param.srcHost = (void*)mem.data_pointer;
+ param.srcHost = mem.host_pointer;
param.srcPitch = src_pitch;
param.WidthInBytes = param.srcPitch;
param.Height = mem.data_height;
@@ -914,8 +935,12 @@ public:
}
else {
/* 1D texture, using linear memory. */
- generic_alloc(mem);
- cuda_assert(cuMemcpyHtoD(mem.device_pointer, (void*)mem.data_pointer, size));
+ cmem = generic_alloc(mem);
+ if(!cmem) {
+ return;
+ }
+
+ cuda_assert(cuMemcpyHtoD(mem.device_pointer, mem.host_pointer, size));
}
if(!has_fermi_limits) {
@@ -932,7 +957,7 @@ public:
CUDA_RESOURCE_DESC resDesc;
memset(&resDesc, 0, sizeof(resDesc));
- if(mem.data_depth > 1) {
+ if(array_3d) {
resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
resDesc.res.array.hArray = array_3d;
resDesc.flags = 0;
@@ -962,13 +987,7 @@ public:
texDesc.filterMode = filter_mode;
texDesc.flags = CU_TRSF_NORMALIZED_COORDINATES;
- CUtexObject tex = 0;
- cuda_assert(cuTexObjectCreate(&tex, &resDesc, &texDesc, NULL));
-
- /* Safety check */
- if((uint)tex > UINT_MAX) {
- assert(0);
- }
+ cuda_assert(cuTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL));
/* Resize once */
if(flat_slot >= texture_info.size()) {
@@ -979,20 +998,18 @@ public:
/* Set Mapping and tag that we need to (re-)upload to device */
TextureInfo& info = texture_info[flat_slot];
- info.data = (uint64_t)tex;
+ info.data = (uint64_t)cmem->texobject;
info.cl_buffer = 0;
info.interpolation = mem.interpolation;
info.extension = mem.extension;
info.width = mem.data_width;
info.height = mem.data_height;
info.depth = mem.data_depth;
-
- tex_bindless_map[mem.device_pointer] = tex;
need_texture_info = true;
}
else {
/* Fermi, fixed texture slots. */
- if(mem.data_depth > 1) {
+ if(array_3d) {
cuda_assert(cuTexRefSetArray(texref, array_3d, CU_TRSA_OVERRIDE_FORMAT));
}
else if(mem.data_height > 1) {
@@ -1017,38 +1034,27 @@ public:
cuda_assert(cuTexRefSetAddressMode(texre
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list