[Bf-blender-cvs] [6699454fb64] master: Cycles: make CUDA code a bit more robust to host/device alloc failures.

Brecht Van Lommel noreply at git.blender.org
Wed Jan 3 00:33:18 CET 2018


Commit: 6699454fb642bfd07e85f8d7bd8f8879878e3fc5
Author: Brecht Van Lommel
Date:   Tue Jan 2 22:56:07 2018 +0100
Branches: master
https://developer.blender.org/rB6699454fb642bfd07e85f8d7bd8f8879878e3fc5

Cycles: make CUDA code a bit more robust to host/device alloc failures.

Fixes a few corner cases found while stress testing host mapped memory.

===================================================================

M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/device/device_memory.cpp
M	intern/cycles/render/buffers.cpp
M	intern/cycles/render/image.cpp
M	intern/cycles/render/image.h
M	intern/cycles/render/object.cpp

===================================================================

diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 391809e5278..29aabd3169c 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -650,7 +650,7 @@ public:
 
 	void generic_copy_to(device_memory& mem)
 	{
-		if(mem.device_pointer) {
+		if(mem.host_pointer && mem.device_pointer) {
 			CUDAContextScope scope(this);
 			cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size()));
 		}
@@ -715,11 +715,11 @@ public:
 			size_t offset = elem*y*w;
 			size_t size = elem*w*h;
 
-			if(mem.device_pointer) {
+			if(mem.host_pointer && mem.device_pointer) {
 				cuda_assert(cuMemcpyDtoH((uchar*)mem.host_pointer + offset,
 										 (CUdeviceptr)(mem.device_pointer + offset), size));
 			}
-			else {
+			else if(mem.host_pointer) {
 				memset((char*)mem.host_pointer + offset, 0, size);
 			}
 		}
@@ -1118,13 +1118,17 @@ public:
 
 		int shift_stride = stride*h;
 		int num_shifts = (2*r+1)*(2*r+1);
-		int mem_size = sizeof(float)*shift_stride*2*num_shifts;
+		int mem_size = sizeof(float)*shift_stride*num_shifts;
 		int channel_offset = 0;
 
-		CUdeviceptr temporary_mem;
-		cuda_assert(cuMemAlloc(&temporary_mem, mem_size));
-		CUdeviceptr difference     = temporary_mem;
-		CUdeviceptr blurDifference = temporary_mem + sizeof(float)*shift_stride * num_shifts;
+		device_only_memory<uchar> temporary_mem(this, "Denoising temporary_mem");
+		temporary_mem.alloc_to_device(2*mem_size);
+
+		if(have_error())
+			return false;
+
+		CUdeviceptr difference     = cuda_device_ptr(temporary_mem.device_pointer);
+		CUdeviceptr blurDifference = difference + mem_size;
 
 		CUdeviceptr weightAccum = task->nlm_state.temporary_3_ptr;
 		cuda_assert(cuMemsetD8(weightAccum, 0, sizeof(float)*shift_stride));
@@ -1156,7 +1160,7 @@ public:
 			CUDA_LAUNCH_KERNEL_1D(cuNLMUpdateOutput, update_output_args);
 		}
 
-		cuMemFree(temporary_mem);
+		temporary_mem.free();
 
 		{
 			CUfunction cuNLMNormalize;
@@ -1225,10 +1229,14 @@ public:
 		int num_shifts = (2*r+1)*(2*r+1);
 		int mem_size = sizeof(float)*shift_stride*num_shifts;
 
-		CUdeviceptr temporary_mem;
-		cuda_assert(cuMemAlloc(&temporary_mem, 2*mem_size));
-		CUdeviceptr difference     = temporary_mem;
-		CUdeviceptr blurDifference = temporary_mem + mem_size;
+		device_only_memory<uchar> temporary_mem(this, "Denoising temporary_mem");
+		temporary_mem.alloc_to_device(2*mem_size);
+
+		if(have_error())
+			return false;
+
+		CUdeviceptr difference     = cuda_device_ptr(temporary_mem.device_pointer);
+		CUdeviceptr blurDifference = difference + mem_size;
 
 		{
 			CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian;
@@ -1268,7 +1276,7 @@ public:
 			CUDA_LAUNCH_KERNEL_1D(cuNLMConstructGramian, construct_gramian_args);
 		}
 
-		cuMemFree(temporary_mem);
+		temporary_mem.free();
 
 		{
 			CUfunction cuFinalize;
diff --git a/intern/cycles/device/device_memory.cpp b/intern/cycles/device/device_memory.cpp
index 3ad0946330b..b5db76bb3df 100644
--- a/intern/cycles/device/device_memory.cpp
+++ b/intern/cycles/device/device_memory.cpp
@@ -86,7 +86,7 @@ void device_memory::device_free()
 
 void device_memory::device_copy_to()
 {
-	if(data_size) {
+	if(host_pointer) {
 		device->mem_copy_to(*this);
 	}
 }
diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp
index 5c7729ec89f..9899fa1c39c 100644
--- a/intern/cycles/render/buffers.cpp
+++ b/intern/cycles/render/buffers.cpp
@@ -151,6 +151,10 @@ bool RenderBuffers::copy_from_device()
 
 bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int sample, int components, float *pixels)
 {
+	if(buffer.data() == NULL) {
+		return false;
+	}
+
 	float invsample = 1.0f/sample;
 	float scale = invsample;
 	bool variance = (offset == DENOISING_PASS_NORMAL_VAR) ||
@@ -218,6 +222,10 @@ bool RenderBuffers::get_denoising_pass_rect(int offset, float exposure, int samp
 
 bool RenderBuffers::get_pass_rect(PassType type, float exposure, int sample, int components, float *pixels)
 {
+	if(buffer.data() == NULL) {
+		return false;
+	}
+
 	int pass_offset = 0;
 
 	for(size_t j = 0; j < params.passes.size(); j++) {
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index 482442cce29..feaa17148ee 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -703,7 +703,7 @@ void ImageManager::device_load_image(Device *device,
 
 	/* Slot assignment */
 	int flat_slot = type_index_to_flattened_slot(slot, type);
-	string name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot);
+	img->mem_name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot);
 
 	/* Free previous texture in slot. */
 	if(img->mem) {
@@ -715,7 +715,7 @@ void ImageManager::device_load_image(Device *device,
 	/* Create new texture. */
 	if(type == IMAGE_DATA_TYPE_FLOAT4) {
 		device_vector<float4> *tex_img
-			= new device_vector<float4>(device, name.c_str(), MEM_TEXTURE);
+			= new device_vector<float4>(device, img->mem_name.c_str(), MEM_TEXTURE);
 
 		if(!file_load_image<TypeDesc::FLOAT, float>(img,
 		                                            type,
@@ -741,7 +741,7 @@ void ImageManager::device_load_image(Device *device,
 	}
 	else if(type == IMAGE_DATA_TYPE_FLOAT) {
 		device_vector<float> *tex_img
-			= new device_vector<float>(device, name.c_str(), MEM_TEXTURE);
+			= new device_vector<float>(device, img->mem_name.c_str(), MEM_TEXTURE);
 
 		if(!file_load_image<TypeDesc::FLOAT, float>(img,
 		                                            type,
@@ -764,7 +764,7 @@ void ImageManager::device_load_image(Device *device,
 	}
 	else if(type == IMAGE_DATA_TYPE_BYTE4) {
 		device_vector<uchar4> *tex_img
-			= new device_vector<uchar4>(device, name.c_str(), MEM_TEXTURE);
+			= new device_vector<uchar4>(device, img->mem_name.c_str(), MEM_TEXTURE);
 
 		if(!file_load_image<TypeDesc::UINT8, uchar>(img,
 		                                            type,
@@ -790,7 +790,7 @@ void ImageManager::device_load_image(Device *device,
 	}
 	else if(type == IMAGE_DATA_TYPE_BYTE) {
 		device_vector<uchar> *tex_img
-			= new device_vector<uchar>(device, name.c_str(), MEM_TEXTURE);
+			= new device_vector<uchar>(device, img->mem_name.c_str(), MEM_TEXTURE);
 
 		if(!file_load_image<TypeDesc::UINT8, uchar>(img,
 		                                            type,
@@ -812,7 +812,7 @@ void ImageManager::device_load_image(Device *device,
 	}
 	else if(type == IMAGE_DATA_TYPE_HALF4) {
 		device_vector<half4> *tex_img
-			= new device_vector<half4>(device, name.c_str(), MEM_TEXTURE);
+			= new device_vector<half4>(device, img->mem_name.c_str(), MEM_TEXTURE);
 
 		if(!file_load_image<TypeDesc::HALF, half>(img,
 		                                          type,
@@ -837,7 +837,7 @@ void ImageManager::device_load_image(Device *device,
 	}
 	else if(type == IMAGE_DATA_TYPE_HALF) {
 		device_vector<half> *tex_img
-			= new device_vector<half>(device, name.c_str(), MEM_TEXTURE);
+			= new device_vector<half>(device, img->mem_name.c_str(), MEM_TEXTURE);
 
 		if(!file_load_image<TypeDesc::HALF, half>(img,
 		                                          type,
diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h
index cc7c8544bed..3519a67bc05 100644
--- a/intern/cycles/render/image.h
+++ b/intern/cycles/render/image.h
@@ -111,6 +111,7 @@ public:
 		InterpolationType interpolation;
 		ExtensionType extension;
 
+		string mem_name;
 		device_memory *mem;
 
 		int users;
diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp
index aef7fc29573..d7143f24850 100644
--- a/intern/cycles/render/object.cpp
+++ b/intern/cycles/render/object.cpp
@@ -644,7 +644,7 @@ void ObjectManager::device_update_flags(Device *,
 
 void ObjectManager::device_update_mesh_offsets(Device *, DeviceScene *dscene, Scene *scene)
 {
-	if(scene->objects.size() == 0) {
+	if(dscene->objects.size() == 0) {
 		return;
 	}



More information about the Bf-blender-cvs mailing list