[Bf-blender-cvs] [88520dd5b6f] master: Code refactor: simplify CUDA context push/pop.

Wed Sep 27 13:43:42 CEST 2017

Commit: 88520dd5b6ff5bf310421ab6b9a30d7d49425685
Author: Brecht Van Lommel
Date:   Sun Sep 24 00:18:28 2017 +0200
Branches: master
https://developer.blender.org/rB88520dd5b6ff5bf310421ab6b9a30d7d49425685

Code refactor: simplify CUDA context push/pop.

Makes it possible to call a function like mem_alloc() when the context is
already active. Also fixes some missing pops in case of errors.

===================================================================

M	intern/cycles/device/device_cuda.cpp

===================================================================

diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 3b75142ee67..29b5bd70789 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -111,6 +111,16 @@ public:
 	virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task);
 };
 
+/* Utility to push/pop CUDA context. */
+class CUDAContextScope {
+public:
+	CUDAContextScope(CUDADevice *device);
+	~CUDAContextScope();
+
+private:
+	CUDADevice *device;
+};
+
 class CUDADevice : public Device
 {
 public:
@@ -206,16 +216,6 @@ public:
 		cuda_error_documentation();
 	}
 
-	void cuda_push_context()
-	{
-		cuda_assert(cuCtxSetCurrent(cuContext));
-	}
-
-	void cuda_pop_context()
-	{
-		cuda_assert(cuCtxSetCurrent(NULL));
-	}
-
 	CUDADevice(DeviceInfo& info, Stats &stats, bool background_)
 	: Device(info, stats, background_)
 	{
@@ -263,7 +263,8 @@ public:
 		cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
 		cuDevArchitecture = major*100 + minor*10;
 
-		cuda_pop_context();
+		/* Pop context set by cuCtxCreate. */
+		cuCtxPopCurrent(NULL);
 	}
 
 	~CUDADevice()
@@ -519,7 +520,7 @@ public:
 			return false;
 
 		/* open module */
-		cuda_push_context();
+		CUDAContextScope scope(this);
 
 		string cubin_data;
 		CUresult result;
@@ -540,8 +541,6 @@ public:
 		if(cuda_error_(result, "cuModuleLoad"))
 			cuda_error_message(string_printf("Failed loading CUDA kernel %s.", filter_cubin.c_str()));
 
-		cuda_pop_context();
-
 		return (result == CUDA_SUCCESS);
 	}
 
@@ -556,36 +555,36 @@ public:
 
 	void mem_alloc(const char *name, device_memory& mem, MemoryType /*type*/)
 	{
+		CUDAContextScope scope(this);
+
 		if(name) {
 			VLOG(1) << "Buffer allocate: " << name << ", "
 			        << string_human_readable_number(mem.memory_size()) << " bytes. ("
 			        << string_human_readable_size(mem.memory_size()) << ")";
 		}
 
-		cuda_push_context();
 		CUdeviceptr device_pointer;
 		size_t size = mem.memory_size();
 		cuda_assert(cuMemAlloc(&device_pointer, size));
 		mem.device_pointer = (device_ptr)device_pointer;
 		mem.device_size = size;
 		stats.mem_alloc(size);
-		cuda_pop_context();
 	}
 
 	void mem_copy_to(device_memory& mem)
 	{
-		cuda_push_context();
+		CUDAContextScope scope(this);
+
 		if(mem.device_pointer)
 			cuda_assert(cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), (void*)mem.data_pointer, mem.memory_size()));
-		cuda_pop_context();
 	}
 
 	void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
 	{
+		CUDAContextScope scope(this);
 		size_t offset = elem*y*w;
 		size_t size = elem*w*h;
 
-		cuda_push_context();
 		if(mem.device_pointer) {
 			cuda_assert(cuMemcpyDtoH((uchar*)mem.data_pointer + offset,
 			                         (CUdeviceptr)(mem.device_pointer + offset), size));
@@ -593,7 +592,6 @@ public:
 		else {
 			memset((char*)mem.data_pointer + offset, 0, size);
 		}
-		cuda_pop_context();
 	}
 
 	void mem_zero(device_memory& mem)
@@ -602,18 +600,17 @@ public:
 			memset((void*)mem.data_pointer, 0, mem.memory_size());
 		}
 
-		cuda_push_context();
-		if(mem.device_pointer)
+		if(mem.device_pointer) {
+			CUDAContextScope scope(this);
 			cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size()));
-		cuda_pop_context();
+		}
 	}
 
 	void mem_free(device_memory& mem)
 	{
 		if(mem.device_pointer) {
-			cuda_push_context();
+			CUDAContextScope scope(this);
 			cuda_assert(cuMemFree(cuda_device_ptr(mem.device_pointer)));
-			cuda_pop_context();
 
 			mem.device_pointer = 0;
 
@@ -629,14 +626,13 @@ public:
 
 	void const_copy_to(const char *name, void *host, size_t size)
 	{
+		CUDAContextScope scope(this);
 		CUdeviceptr mem;
 		size_t bytes;
 
-		cuda_push_context();
 		cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name));
 		//assert(bytes == size);
 		cuda_assert(cuMemcpyHtoD(mem, host, size));
-		cuda_pop_context();
 	}
 
 	void tex_alloc(const char *name,
@@ -644,6 +640,8 @@ public:
 	               InterpolationType interpolation,
 	               ExtensionType extension)
 	{
+		CUDAContextScope scope(this);
+
 		VLOG(1) << "Texture allocate: " << name << ", "
 		        << string_human_readable_number(mem.memory_size()) << " bytes. ("
 		        << string_human_readable_size(mem.memory_size()) << ")";
@@ -706,9 +704,7 @@ public:
 				                          tokens[3].c_str());
 			}
 
-			cuda_push_context();
 			cuda_assert(cuModuleGetTexRef(&texref, cuModule, bind_name.c_str()));
-			cuda_pop_context();
 
 			if(!texref) {
 				return;
@@ -721,8 +717,6 @@ public:
 				mem_alloc(NULL, mem, MEM_READ_ONLY);
 				mem_copy_to(mem);
 
-				cuda_push_context();
-
 				CUdeviceptr cumem;
 				size_t cubytes;
 
@@ -738,28 +732,20 @@ public:
 					uint32_t ptr = (uint32_t)mem.device_pointer;
 					cuda_assert(cuMemcpyHtoD(cumem, (void*)&ptr, cubytes));
 				}
-
-				cuda_pop_context();
 			}
 			else {
 				mem_alloc(NULL, mem, MEM_READ_ONLY);
 				mem_copy_to(mem);
 
-				cuda_push_context();
-
 				cuda_assert(cuTexRefSetAddress(NULL, texref, cuda_device_ptr(mem.device_pointer), size));
 				cuda_assert(cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT));
 				cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_READ_AS_INTEGER));
-
-				cuda_pop_context();
 			}
 		}
 		/* Texture Storage */
 		else {
 			CUarray handle = NULL;
 
-			cuda_push_context();
-
 			if(mem.data_depth > 1) {
 				CUDA_ARRAY3D_DESCRIPTOR desc;
 
@@ -784,7 +770,6 @@ public:
 			}
 
 			if(!handle) {
-				cuda_pop_context();
 				return;
 			}
 
@@ -877,14 +862,10 @@ public:
 				cuda_assert(cuTexRefSetFilterMode(texref, filter_mode));
 				cuda_assert(cuTexRefSetFlags(texref, CU_TRSF_NORMALIZED_COORDINATES));
 			}
-
-			cuda_pop_context();
 		}
 
 		/* Fermi, Data and Image Textures */
 		if(!has_bindless_textures) {
-			cuda_push_context();
-
 			cuda_assert(cuTexRefSetAddressMode(texref, 0, address_mode));
 			cuda_assert(cuTexRefSetAddressMode(texref, 1, address_mode));
 			if(mem.data_depth > 1) {
@@ -892,8 +873,6 @@ public:
 			}
 
 			cuda_assert(cuTexRefSetFormat(texref, format, mem.data_elements));
-
-			cuda_pop_context();
 		}
 
 		/* Fermi and Kepler */
@@ -904,9 +883,8 @@ public:
 	{
 		if(mem.device_pointer) {
 			if(tex_interp_map[mem.device_pointer]) {
-				cuda_push_context();
+				CUDAContextScope scope(this);
 				cuArrayDestroy((CUarray)mem.device_pointer);
-				cuda_pop_context();
 
 				/* Free CUtexObject (Bindless Textures) */
 				if(info.has_bindless_textures && tex_bindless_map[mem.device_pointer]) {
@@ -960,7 +938,7 @@ public:
 		if(have_error())
 			return false;
 
-		cuda_push_context();
+		CUDAContextScope scope(this);
 
 		int4 rect = task->rect;
 		int w = align_up(rect.z-rect.x, 4);
@@ -1017,7 +995,6 @@ public:
 		CUDA_LAUNCH_KERNEL(cuNLMNormalize, normalize_args);
 		cuda_assert(cuCtxSynchronize());
 
-		cuda_pop_context();
 		return !have_error();
 	}
 
@@ -1026,7 +1003,7 @@ public:
 		if(have_error())
 			return false;
 
-		cuda_push_context();
+		CUDAContextScope scope(this);
 
 		CUfunction cuFilterConstructTransform;
 		cuda_assert(cuModuleGetFunction(&cuFilterConstructTransform, cuFilterModule, "kernel_cuda_filter_construct_transform"));
@@ -1046,7 +1023,6 @@ public:
 		CUDA_LAUNCH_KERNEL(cuFilterConstructTransform, args);
 		cuda_assert(cuCtxSynchronize());
 
-		cuda_pop_context();
 		return !have_error();
 	}
 
@@ -1058,11 +1034,11 @@ public:
 		if(have_error())
 			return false;
 
+		CUDAContextScope scope(this);
+
 		mem_zero(task->storage.XtWX);
 		mem_zero(task->storage.XtWY);
 
-		cuda_push_context();
-
 		CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian, cuFinalize;
 		cuda_assert(cuModuleGetFunction(&cuNLMCalcDifference,   cuFilterModule, "kernel_cuda_filter_nlm_calc_difference"));
 		cuda_assert(cuModuleGetFunction(&cuNLMBlur,             cuFilterModule, "kernel_cuda_filter_nlm_blur"));
@@ -1150,7 +1126,6 @@ public:
 		CUDA_LAUNCH_KERNEL(cuFinalize, finalize_args);
 		cuda_assert(cuCtxSynchronize());
 
-		cuda_pop_context();
 		return !have_error();
 	}
 
@@ -1161,7 +1136,7 @@ public:
 		if(have_error())
 			return false;
 
-		cuda_push_context();
+		CUDAContextScope scope(this);
 
 		CUfunction cuFilterCombineHalves;
 		cuda_assert(cuModuleGetFunction(&cuFilterCombineHalves, cuFilterModule, "kernel_cuda_filter_combine_halves"));
@@ -1179,7 +1154,6 @@ public:
 		CUDA_LAUNCH_KERNEL(cuFilterCombineHalves, args);
 		cuda_assert(cuCtxSynchronize());
 
-		cuda_pop_context();
 		return !have_error();
 	}
 
@@ -1190,7 +1164,7 @@ public:
 		if(have_error())
 			return false;
 
-		cuda_push_context();
+		CUDAContextScope scope(this);
 
 		CUfunction cuFilterDivideShadow;
 		cuda_assert(cuModuleGetFunction(&cuFilterDivideShadow, cuFilterModule, "kernel_cuda_filter_divide_shadow"));
@@ -1214,7 +1188,6 @@ public:
 		CUDA_LAUNCH_KERNEL(cuFilterDivideShadow, args);
 		cuda_assert(cuCtxSynchronize());
 
-		cuda_pop_context();
 		return !have_error();
 	}
 
@@ -1227,7 +1200,7 @@ public:
 		if(have_error())
 			return false;
 
-		cuda_push_context();
+		CUDAContextScope scope(this);
 
 		CUfunction cuFilterGetFeature;
 		cuda_assert(cuModuleGetFunction(&cuFilterGetFeature, cuFilterModule, "kernel_cuda_filter_get_feature"));
@@ -1250,7 +1223,6 @@ public:
 		CUDA_LAUNCH_KERNEL(cuFilterGetFeature, args);
 		cuda_assert(cuCtxSynchronize());
 
-		cuda_pop_context();
 		return !have_error();
 	}
 
@@ -1263,7 +1235,7 @@ public:
 		if(have_error())
 			return false;
 
-		cuda_push_context();
+		CUDAContextScope scope(this);
 
 		CUfunction cuFilterDetectOutliers;
 		cuda_assert(cuModuleGetFunction(&cuFilterDetectOutliers, cuFilterModule, "kernel_cuda_filter_detect_outliers"));
@@ -1282,7 +1254,6 @@ public:
 		CUDA_LAUNCH_KERNEL(cuFilterDetectOutliers, args);
 		cuda_assert(cuCtxSynchronize());
 
-		cuda_pop_context();
 		return !have_error();
 	}
 
@@ -1319,7 +1290,7 @@ public:
 		if(have_error())
 			return;
 
-		cuda_push_context();
+		CUDAContextScope scope(this);
 
 		CUfunction cuPathTrace;
 		CUdeviceptr d_buffer = cuda_device_ptr(rtile.buffer);
@@ -1333,8 +1304,9 @@ public:
 			cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda

@@ Diff output truncated at 10240 characters. @@