[Bf-blender-cvs] [f215f97] cycles_kernel_split: Move max_render_feasible_tile_size to device_opencl.cpp
varunsundar08
noreply at git.blender.org
Tue May 5 20:05:55 CEST 2015
Commit: f215f972ef0471745502bbb115a8f7981d5a9691
Author: varunsundar08
Date: Tue May 5 21:23:31 2015 +0530
Branches: cycles_kernel_split
https://developer.blender.org/rBf215f972ef0471745502bbb115a8f7981d5a9691
Move max_render_feasible_tile_size to device_opencl.cpp
===================================================================
M intern/cycles/device/device_opencl.cpp
M intern/cycles/render/buffers.h
===================================================================
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index 6a5e216..86124dc 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -2493,7 +2493,7 @@ public:
clReleaseProgram(sumAllRadiance_program);
}
- void path_trace(RenderTile& rtile)
+ void path_trace(RenderTile& rtile, int2 max_render_feasible_tile_size)
{
/* cast arguments to cl types */
cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
@@ -2507,8 +2507,8 @@ public:
cl_int d_stride = rtile.stride;
/* Make sure that set render feasible tile size is a multiple of local work size dimensions */
- assert(rtile.max_render_feasible_tile_size.x % SPLIT_KERNEL_LOCAL_SIZE_X == 0);
- assert(rtile.max_render_feasible_tile_size.y % SPLIT_KERNEL_LOCAL_SIZE_Y == 0);
+ assert(max_render_feasible_tile_size.x % SPLIT_KERNEL_LOCAL_SIZE_X == 0);
+ assert(max_render_feasible_tile_size.y % SPLIT_KERNEL_LOCAL_SIZE_Y == 0);
/* ray_state and hostRayStateArray should be of same size */
assert(hostRayState_size == rayState_size);
@@ -2528,7 +2528,7 @@ public:
unsigned int num_parallel_samples = 1;
#else
global_size[1] = (((d_h - 1) / local_size[1]) + 1) * local_size[1];
- unsigned int num_threads = rtile.max_render_feasible_tile_size.x * rtile.max_render_feasible_tile_size.y;
+ unsigned int num_threads = max_render_feasible_tile_size.x * max_render_feasible_tile_size.y;
unsigned int num_tile_columns_possible = num_threads / global_size[1];
/* Estimate number of parallel samples that can be processed in parallel */
unsigned int num_parallel_samples = (num_tile_columns_possible / d_w) <= rtile.num_samples ? (num_tile_columns_possible / d_w) : rtile.num_samples;
@@ -2545,7 +2545,7 @@ public:
/* Allocate all required global memory once */
if(first_tile) {
- size_t num_global_elements = rtile.max_render_feasible_tile_size.x * rtile.max_render_feasible_tile_size.y;
+ size_t num_global_elements = max_render_feasible_tile_size.x * max_render_feasible_tile_size.y;
#ifdef __MULTI_CLOSURE__
size_t ShaderClosure_size = get_shader_closure_size(clos_max);
@@ -2556,8 +2556,8 @@ public:
#ifdef __WORK_STEALING__
/* Calculate max groups */
size_t max_global_size[2];
- size_t tile_x = rtile.max_render_feasible_tile_size.x;
- size_t tile_y = rtile.max_render_feasible_tile_size.y;
+ size_t tile_x = max_render_feasible_tile_size.x;
+ size_t tile_y = max_render_feasible_tile_size.y;
max_global_size[0] = (((tile_x - 1) / local_size[0]) + 1) * local_size[0];
max_global_size[1] = (((tile_y - 1) / local_size[1]) + 1) * local_size[1];
max_work_groups = (max_global_size[0] * max_global_size[1]) / (local_size[0] * local_size[1]);
@@ -3520,21 +3520,17 @@ The current tile of dimensions %dx%d is split into tiles of dimension %dx%d for
/* Process all split tiles */
for(int tile_iter = 0; tile_iter < to_path_trace_render_tiles.size(); tile_iter++) {
- /* Set max_render_feasible_render_tile_size for all tiles */
- to_path_trace_render_tiles[tile_iter].max_render_feasible_tile_size = max_render_feasible_tile_size;
- /* The second argument is dummy */
- path_trace(to_path_trace_render_tiles[tile_iter]);
+ path_trace(to_path_trace_render_tiles[tile_iter], max_render_feasible_tile_size);
}
}
else {
/* No splitting required; process the entire tile at once */
/* Render feasible tile size is user-set-tile-size itself */
- tile.max_render_feasible_tile_size.x = (((tile.tile_size.x - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) * SPLIT_KERNEL_LOCAL_SIZE_X;
- tile.max_render_feasible_tile_size.y = (((tile.tile_size.y - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) * SPLIT_KERNEL_LOCAL_SIZE_Y;
+ max_render_feasible_tile_size.x = (((tile.tile_size.x - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) * SPLIT_KERNEL_LOCAL_SIZE_X;
+ max_render_feasible_tile_size.y = (((tile.tile_size.y - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) * SPLIT_KERNEL_LOCAL_SIZE_Y;
/* buffer_rng_state_stride is stride itself */
tile.buffer_rng_state_stride = tile.stride;
- /* The second argument is dummy */
- path_trace(tile);
+ path_trace(tile, max_render_feasible_tile_size);
}
tile.sample = tile.start_sample + tile.num_samples;
diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h
index f7bd7a7..20bb64f 100644
--- a/intern/cycles/render/buffers.h
+++ b/intern/cycles/render/buffers.h
@@ -155,10 +155,6 @@ public:
int rng_state_offset_x;
int rng_state_offset_y;
int buffer_rng_state_stride;
- /* Record the maximum of render-feasible tile size
- * to allocate enough memory for split kernel
- */
- int2 max_render_feasible_tile_size;
device_ptr buffer;
device_ptr rng_state;
More information about the Bf-blender-cvs
mailing list