[Bf-blender-cvs] [07db4d7] cycles_kernel_split: [BCYCLES-213] Improve tile splitting logic

varunsundar08 noreply at git.blender.org
Thu Apr 30 23:24:56 CEST 2015


Commit: 07db4d7e3ae725e590a22165fe0c1c3a113b1a85
Author: varunsundar08
Date:   Thu Apr 16 16:50:31 2015 +0530
Branches: cycles_kernel_split
https://developer.blender.org/rB07db4d7e3ae725e590a22165fe0c1c3a113b1a85

[BCYCLES-213] Improve tile splitting logic

===================================================================

M	intern/cycles/device/device_opencl.cpp

===================================================================

diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 94a2d90..652731c 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -3275,39 +3275,67 @@ public:
 	}
 
 	/* Considers the scene properties, global memory available in the device
-	 * and returns a rectanglular tile dimension that should render on split kernel
+	 * and returns a rectanglular tile dimension (approx the maximum)
+	 * that should render on split kernel
 	 */
-	int2 get_render_feasible_tile_size(size_t feasible_global_work_size) {
-		int2 render_feasible_tile_size;
+	int2 get_max_render_feasible_tile_size(size_t feasible_global_work_size) {
+		int2 max_render_feasible_tile_size;
 		int square_root_val = sqrt(feasible_global_work_size);
-		render_feasible_tile_size.x = square_root_val;
-		render_feasible_tile_size.y = square_root_val;
+		max_render_feasible_tile_size.x = square_root_val;
+		max_render_feasible_tile_size.y = square_root_val;
 
-		/* ciel round-off render_feasible_tile_size */
+		/* ciel round-off max_render_feasible_tile_size */
 		int2 ceil_render_feasible_tile_size;
-		ceil_render_feasible_tile_size.x = (((render_feasible_tile_size.x - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) * SPLIT_KERNEL_LOCAL_SIZE_X;
-		ceil_render_feasible_tile_size.y = (((render_feasible_tile_size.y - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) * SPLIT_KERNEL_LOCAL_SIZE_Y;
+		ceil_render_feasible_tile_size.x = (((max_render_feasible_tile_size.x - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) * SPLIT_KERNEL_LOCAL_SIZE_X;
+		ceil_render_feasible_tile_size.y = (((max_render_feasible_tile_size.y - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) * SPLIT_KERNEL_LOCAL_SIZE_Y;
 
 		if (ceil_render_feasible_tile_size.x * ceil_render_feasible_tile_size.y <= feasible_global_work_size) {
 			return ceil_render_feasible_tile_size;
 		}
 
-		/* floor round-off render_feasible_tile_size */
+		/* floor round-off max_render_feasible_tile_size */
 		int2 floor_render_feasible_tile_size;
-		floor_render_feasible_tile_size.x = (render_feasible_tile_size.x / SPLIT_KERNEL_LOCAL_SIZE_X) * SPLIT_KERNEL_LOCAL_SIZE_X;
-		floor_render_feasible_tile_size.y = (render_feasible_tile_size.y / SPLIT_KERNEL_LOCAL_SIZE_Y) * SPLIT_KERNEL_LOCAL_SIZE_Y;
+		floor_render_feasible_tile_size.x = (max_render_feasible_tile_size.x / SPLIT_KERNEL_LOCAL_SIZE_X) * SPLIT_KERNEL_LOCAL_SIZE_X;
+		floor_render_feasible_tile_size.y = (max_render_feasible_tile_size.y / SPLIT_KERNEL_LOCAL_SIZE_Y) * SPLIT_KERNEL_LOCAL_SIZE_Y;
 
 		return floor_render_feasible_tile_size;
 	}
 
-	/* Splits existing tile into multiple tiles of tile size render_feasible_tile_size */
-	vector<RenderTile> split_tiles(RenderTile rtile, int2 render_feasible_tile_size) {
+	/* Try splitting the current tile into multiple smaller almost-square-tiles */
+	int2 get_split_tile_size(RenderTile rtile, int2 max_render_feasible_tile_size) {
+		int2 split_tile_size;
+		int num_global_threads = max_render_feasible_tile_size.x * max_render_feasible_tile_size.y;
+		int d_w = rtile.w;
+		int d_h = rtile.h;
+
+		/* Ceil round off d_w and d_h */
+		d_w = (((d_w - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) * SPLIT_KERNEL_LOCAL_SIZE_X;
+		d_h = (((d_h - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) * SPLIT_KERNEL_LOCAL_SIZE_Y;
+
+		while (d_w * d_h > num_global_threads) {
+			/* Halve the longer dimension */
+			if (d_w >= d_h) {
+				d_w = d_w / 2;
+				d_w = (((d_w - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) * SPLIT_KERNEL_LOCAL_SIZE_X;
+			}
+			else {
+				d_h = d_h / 2;
+				d_h = (((d_h - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) * SPLIT_KERNEL_LOCAL_SIZE_Y;
+			}
+		}
+		split_tile_size.x = d_w;
+		split_tile_size.y = d_h;
+		return split_tile_size;
+	}
+
+	/* Splits existing tile into multiple tiles of tile size split_tile_size */
+	vector<RenderTile> split_tiles(RenderTile rtile, int2 split_tile_size) {
 		vector<RenderTile> to_path_trace_rtile;
 
 		int d_w = rtile.w;
 		int d_h = rtile.h;
-		int num_tiles_x = (((d_w - 1) / render_feasible_tile_size.x) + 1);
-		int num_tiles_y = (((d_h - 1) / render_feasible_tile_size.y) + 1);
+		int num_tiles_x = (((d_w - 1) / split_tile_size.x) + 1);
+		int num_tiles_y = (((d_h - 1) / split_tile_size.y) + 1);
 
 		/* buffer and rng_state offset calc */
 		size_t offset_index = rtile.offset + (rtile.x + rtile.y * rtile.stride);
@@ -3321,10 +3349,10 @@ public:
 			for (int tile_iter_x = 0; tile_iter_x < num_tiles_x; tile_iter_x++) {
 				int rtile_index = tile_iter_y * num_tiles_x + tile_iter_x;
 
-				to_path_trace_rtile[rtile_index].rng_state_offset_x = offset_x + tile_iter_x * render_feasible_tile_size.x;
-				to_path_trace_rtile[rtile_index].rng_state_offset_y = offset_y + tile_iter_y * render_feasible_tile_size.y;
-				to_path_trace_rtile[rtile_index].buffer_offset_x = offset_x + tile_iter_x * render_feasible_tile_size.x;
-				to_path_trace_rtile[rtile_index].buffer_offset_y = offset_y + tile_iter_y * render_feasible_tile_size.y;
+				to_path_trace_rtile[rtile_index].rng_state_offset_x = offset_x + tile_iter_x * split_tile_size.x;
+				to_path_trace_rtile[rtile_index].rng_state_offset_y = offset_y + tile_iter_y * split_tile_size.y;
+				to_path_trace_rtile[rtile_index].buffer_offset_x = offset_x + tile_iter_x * split_tile_size.x;
+				to_path_trace_rtile[rtile_index].buffer_offset_y = offset_y + tile_iter_y * split_tile_size.y;
 				to_path_trace_rtile[rtile_index].start_sample = rtile.start_sample;
 				to_path_trace_rtile[rtile_index].num_samples = rtile.num_samples;
 				to_path_trace_rtile[rtile_index].sample = rtile.sample;
@@ -3334,20 +3362,17 @@ public:
 				to_path_trace_rtile[rtile_index].buffers = rtile.buffers;
 				to_path_trace_rtile[rtile_index].buffer = rtile.buffer;
 				to_path_trace_rtile[rtile_index].rng_state = rtile.rng_state;
-				to_path_trace_rtile[rtile_index].x = rtile.x + (tile_iter_x * render_feasible_tile_size.x);
-				to_path_trace_rtile[rtile_index].y = rtile.y + (tile_iter_y * render_feasible_tile_size.y);
+				to_path_trace_rtile[rtile_index].x = rtile.x + (tile_iter_x * split_tile_size.x);
+				to_path_trace_rtile[rtile_index].y = rtile.y + (tile_iter_y * split_tile_size.y);
 				to_path_trace_rtile[rtile_index].buffer_rng_state_stride = rtile.stride;
 
-				/* Set max render feasible tile size */
-				to_path_trace_rtile[rtile_index].max_render_feasible_tile_size = render_feasible_tile_size;
-
 				/* Fill width and height of the new render tile */
 				to_path_trace_rtile[rtile_index].w = (tile_iter_x == (num_tiles_x - 1)) ?
-					(d_w - (tile_iter_x * render_feasible_tile_size.x)) /* Border tile */
-					: render_feasible_tile_size.x;
+					(d_w - (tile_iter_x * split_tile_size.x)) /* Border tile */
+					: split_tile_size.x;
 				to_path_trace_rtile[rtile_index].h = (tile_iter_y == (num_tiles_y - 1)) ?
-					(d_h - (tile_iter_y * render_feasible_tile_size.y)) /* Border tile */
-					: render_feasible_tile_size.y;
+					(d_h - (tile_iter_y * split_tile_size.y)) /* Border tile */
+					: split_tile_size.y;
 
 				to_path_trace_rtile[rtile_index].stride = to_path_trace_rtile[rtile_index].w;
 			}
@@ -3370,6 +3395,7 @@ public:
 #ifdef __SPLIT_KERNEL__
 			bool initialize_data_and_check_render_feasibility = false;
 			bool need_to_split_tiles_further = false;
+			int2 max_render_feasible_tile_size;
 			size_t feasible_global_work_size;
 #endif
 
@@ -3404,24 +3430,27 @@ public:
 
 					/* Check render feasibility */
 					feasible_global_work_size = get_feasible_global_work_size(tile, CL_MEM_PTR(const_mem_map["__data"]->device_pointer));
+					max_render_feasible_tile_size = get_max_render_feasible_tile_size(feasible_global_work_size);
 					need_to_split_tiles_further = need_to_split_tile(tile.tile_size.x, tile.tile_size.y, feasible_global_work_size);
 
-					/* Print message to console */
-					if (need_to_split_tiles_further && background) {
-						int2 render_feasible_tile_size = get_render_feasible_tile_size(feasible_global_work_size);
-						fprintf(stderr, "Message : Tiles need to be split further inside path trace (due to in-sufficient device-global-memory for split kernel to function) \n\
-The user set tile size %dx%d will be split into tiles of dimension %dx%d to render \n", tile.tile_size.x, tile.tile_size.y, render_feasible_tile_size.x, render_feasible_tile_size.y);
-					}
-
 					initialize_data_and_check_render_feasibility = true;
 				}
 
 				if (need_to_split_tiles_further) {
-					int2 render_feasible_tile_size = get_render_feasible_tile_size(feasible_global_work_size);
-					vector<RenderTile> to_path_trace_render_tiles = split_tiles(tile, render_feasible_tile_size);
+
+					int2 split_tile_size = get_split_tile_size(tile, max_render_feasible_tile_size);
+					vector<RenderTile> to_path_trace_render_tiles = split_tiles(tile, split_tile_size);
+
+					/* Print message to console */
+					if (background && (to_path_trace_render_tiles.size() > 1)) {
+						fprintf(stderr, "Message : Tiles need to be split further inside path trace (due to insufficient device-global-memory for split kernel to function) \n\
+The current tile of dimensions %dx%d is split into tiles of dimension %dx%d for render \n", tile.w, tile.h, split_tile_size.x, split_tile_size.y);
+					}
 
 					/* Process all split tiles */
 					for (int tile_iter = 0; tile_iter < to_path_trace_render_tiles.size(); tile_iter++) {
+						/* Set max_render_feasible_render_tile_size for all tiles */
+						to_path_trace_render_tiles[tile_iter].max_render_feasible_tile_size = max_render_feasible_tile_size;
 						/* The second argument is dummy */
 						path_trace(to_path_trace_render_tiles[tile_iter], 0);
 					}




More information about the Bf-blender-cvs mailing list