[Bf-blender-cvs] [f30d722461] cycles_split_kernel: Cycles: Clean up variables in host side split kernel logic
Mai Lavelle
noreply at git.blender.org
Fri Jan 6 04:03:18 CET 2017
Commit: f30d722461f6b7ac446c982b47ce9ca1f9222bb8
Author: Mai Lavelle
Date: Thu Dec 22 00:53:35 2016 -0500
Branches: cycles_split_kernel
https://developer.blender.org/rBf30d722461f6b7ac446c982b47ce9ca1f9222bb8
Cycles: Clean up variables in host side split kernel logic
Was a bit messy, lots of duplication, hard to tell what was what.
===================================================================
M intern/cycles/device/device_split_kernel.cpp
===================================================================
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index dcbb6821d6..0d968d450e 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -102,9 +102,7 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
tile.buffer_rng_state_stride = tile.stride;
tile.stride = tile.w;
- size_t global_size[2];
size_t local_size[2];
-
{
int2 lsize = device->split_kernel_local_size();
local_size[0] = lsize[0];
@@ -143,40 +141,38 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
tile.buffers->params.height);
}
- int d_w = tile.w;
- int d_h = tile.h;
-
+ /* set global_size and num_parallel_samples */
+ size_t global_size[2];
+ unsigned int num_parallel_samples;
+ {
#ifdef __WORK_STEALING__
- global_size[0] = (((d_w - 1) / local_size[0]) + 1) * local_size[0];
- global_size[1] = (((d_h - 1) / local_size[1]) + 1) * local_size[1];
- unsigned int num_parallel_samples = 1;
+ global_size[0] = round_up(tile.w, local_size[0]);
+ global_size[1] = round_up(tile.h, local_size[1]);
+ num_parallel_samples = 1;
#else
- global_size[1] = (((d_h - 1) / local_size[1]) + 1) * local_size[1];
- unsigned int num_threads = max_render_feasible_tile_size.x *
- max_render_feasible_tile_size.y;
- unsigned int num_tile_columns_possible = num_threads / global_size[1];
- /* Estimate number of parallel samples that can be
- * processed in parallel.
- */
- unsigned int num_parallel_samples = min(num_tile_columns_possible / d_w,
- tile.num_samples);
- /* Wavefront size in AMD is 64.
- * TODO(sergey): What about other platforms?
- */
- if(num_parallel_samples >= 64) {
- /* TODO(sergey): Could use generic round-up here. */
- num_parallel_samples = (num_parallel_samples / 64) * 64;
- }
- assert(num_parallel_samples != 0);
+ global_size[1] = round_up(tile.h, local_size[1]);
+ unsigned int num_threads = max_render_feasible_tile_size.x * max_render_feasible_tile_size.y;
+ unsigned int num_tile_columns_possible = num_threads / global_size[1];
+ /* Estimate number of parallel samples that can be
+ * processed in parallel.
+ */
+ num_parallel_samples = min(num_tile_columns_possible / tile.w, tile.num_samples);
+ /* Wavefront size in AMD is 64.
+ * TODO(sergey): What about other platforms?
+ */
+ if(num_parallel_samples >= 64) {
+ /* TODO(sergey): Could use generic round-up here. */
+ num_parallel_samples = (num_parallel_samples / 64) * 64;
+ }
+ assert(num_parallel_samples != 0);
- global_size[0] = d_w * num_parallel_samples;
+ global_size[0] = tile.w * num_parallel_samples;
#endif /* __WORK_STEALING__ */
- assert(global_size[0] * global_size[1] <=
- max_render_feasible_tile_size.x * max_render_feasible_tile_size.y);
+ assert(global_size[0] * global_size[1] <= max_render_feasible_tile_size.x * max_render_feasible_tile_size.y);
+ }
- int num_global_elements = max_render_feasible_tile_size.x *
- max_render_feasible_tile_size.y;
+ int num_global_elements = max_render_feasible_tile_size.x * max_render_feasible_tile_size.y;
/* Allocate all required global memory once. */
if(first_tile) {
@@ -185,10 +181,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
#ifdef __WORK_STEALING__
/* Calculate max groups */
size_t max_global_size[2];
- size_t tile_x = max_render_feasible_tile_size.x;
- size_t tile_y = max_render_feasible_tile_size.y;
- max_global_size[0] = (((tile_x - 1) / local_size[0]) + 1) * local_size[0];
- max_global_size[1] = (((tile_y - 1) / local_size[1]) + 1) * local_size[1];
+ max_global_size[0] = round_up(max_render_feasible_tile_size.x, local_size[0]);
+ max_global_size[1] = round_up(max_render_feasible_tile_size.y, local_size[1]);
/* Denotes the maximum work groups possible w.r.t. current tile size. */
unsigned int max_work_groups = (max_global_size[0] * max_global_size[1]) /
@@ -316,12 +310,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
size_t sum_all_radiance_local_size[2] = {16, 16};
size_t sum_all_radiance_global_size[2];
- sum_all_radiance_global_size[0] =
- (((d_w - 1) / sum_all_radiance_local_size[0]) + 1) *
- sum_all_radiance_local_size[0];
- sum_all_radiance_global_size[1] =
- (((d_h - 1) / sum_all_radiance_local_size[1]) + 1) *
- sum_all_radiance_local_size[1];
+ sum_all_radiance_global_size[0] = round_up(tile.w, sum_all_radiance_local_size[0]);
+ sum_all_radiance_global_size[1] = round_up(tile.h, sum_all_radiance_local_size[1]);
ENQUEUE_SPLIT_KERNEL(sum_all_radiance,
sum_all_radiance_global_size,
More information about the Bf-blender-cvs
mailing list