[Bf-blender-cvs] [1d01675] master: Cuda use streams and async to avoid busywaiting

Martijn Berger noreply at git.blender.org
Thu Mar 6 20:52:23 CET 2014


Commit: 1d016758330b7e328758b3df28ea93a19d47fcdc
Author: Martijn Berger
Date:   Thu Mar 6 20:51:13 2014 +0100
https://developer.blender.org/rB1d016758330b7e328758b3df28ea93a19d47fcdc

Cuda use streams and async to avoid busywaiting

This switches api usage for cuda towards using more of the Async calls.

Updating only once every second is sufficiently cheap that I don't think it is worth doing it less often.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D262

===================================================================

M	intern/cycles/device/device_cuda.cpp

===================================================================

diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 0fbb48c..932fdc3 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -41,11 +41,14 @@ public:
 	CUdevice cuDevice;
 	CUcontext cuContext;
 	CUmodule cuModule;
+	CUstream cuStream;
+	CUevent tileDone;
 	map<device_ptr, bool> tex_interp_map;
 	int cuDevId;
 	int cuDevArchitecture;
 	bool first_error;
 	bool use_texture_storage;
+	unsigned int target_update_frequency;
 
 	struct PixelMem {
 		GLuint cuPBO;
@@ -177,6 +180,8 @@ public:
 		first_error = true;
 		background = background_;
 		use_texture_storage = true;
+		/* we try an update / sync every 1000 ms */
+		target_update_frequency = 1000;
 
 		cuDevId = info.num;
 		cuDevice = 0;
@@ -207,6 +212,9 @@ public:
 		if(cuda_error_(result, "cuCtxCreate"))
 			return;
 
+		cuda_assert(cuStreamCreate(&cuStream, 0))
+		cuda_assert(cuEventCreate(&tileDone, 0x1))
+
 		int major, minor;
 		cuDeviceComputeCapability(&major, &minor, cuDevId);
 		cuDevArchitecture = major*100 + minor*10;
@@ -223,6 +231,8 @@ public:
 	{
 		task_pool.stop();
 
+		cuda_assert(cuEventDestroy(tileDone))
+		cuda_assert(cuStreamDestroy(cuStream))
 		cuda_assert(cuCtxDestroy(cuContext))
 	}
 
@@ -645,9 +655,7 @@ public:
 
 		cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1))
 		cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1))
-		cuda_assert(cuLaunchGrid(cuPathTrace, xblocks, yblocks))
-
-		cuda_assert(cuCtxSynchronize())
+		cuda_assert(cuLaunchGridAsync(cuPathTrace, xblocks, yblocks, cuStream))
 
 		cuda_pop_context();
 	}
@@ -964,11 +972,16 @@ public:
 			
 			bool branched = task->integrator_branched;
 			
+
 			/* keep rendering tiles until done */
 			while(task->acquire_tile(this, tile)) {
 				int start_sample = tile.start_sample;
 				int end_sample = tile.start_sample + tile.num_samples;
 
+				boost::posix_time::ptime start_time(boost::posix_time::microsec_clock::local_time());
+				boost::posix_time::ptime last_time = start_time;
+				int sync_sample = 10;
+
 				for(int sample = start_sample; sample < end_sample; sample++) {
 					if (task->get_cancel()) {
 						if(task->need_finish_queue == false)
@@ -978,8 +991,28 @@ public:
 					path_trace(tile, sample, branched);
 
 					tile.sample = sample + 1;
-
 					task->update_progress(tile);
+
+					if(sample == sync_sample){
+						cuda_push_context();
+						cuda_assert(cuEventRecord(tileDone, cuStream ))
+						cuda_assert(cuEventSynchronize(tileDone))
+
+						/* Do some time keeping to find out if we need to sync less */
+						boost::posix_time::ptime current_time(boost::posix_time::microsec_clock::local_time());
+						boost::posix_time::time_duration sample_duration = current_time - last_time;
+
+						long msec = sample_duration.total_milliseconds();
+						float scaling_factor = (float)target_update_frequency / (float)msec;
+
+						/* sync at earliest next sample and probably later */
+						sync_sample = (sample + 1) + sync_sample * ceil(scaling_factor);
+
+						sync_sample = min(end_sample - 1, sync_sample); // make sure we sync the last sample always
+
+						last_time = current_time;
+						cuda_pop_context();
+					}
 				}
 
 				task->release_tile(tile);




More information about the Bf-blender-cvs mailing list