[Bf-blender-cvs] [d2bb0e6] master: Fix T46207: Slow OpenCL GPU bake and blown out baking Cycles render
Sergey Sharybin
noreply at git.blender.org
Tue May 31 17:48:29 CEST 2016
Commit: d2bb0e660bdec00164a9fc72d145e308fb723d16
Author: Sergey Sharybin
Date: Tue May 31 17:47:54 2016 +0200
Branches: master
https://developer.blender.org/rBd2bb0e660bdec00164a9fc72d145e308fb723d16
Fix T46207: Slow OpenCL GPU bake and blown out baking Cycles render
===================================================================
M intern/cycles/device/device_opencl.cpp
M intern/cycles/kernel/kernel_bake.h
M intern/cycles/render/bake.cpp
===================================================================
diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp
index c7dcf76..afe21c4 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -1224,18 +1224,28 @@ public:
CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL);
clGetDeviceInfo(cdDevice,
CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*3, max_work_items, NULL);
-
- /* try to divide evenly over 2 dimensions */
+
+ /* Try to divide evenly over 2 dimensions. */
size_t sqrt_workgroup_size = max((size_t)sqrt((double)workgroup_size), 1);
size_t local_size[2] = {sqrt_workgroup_size, sqrt_workgroup_size};
- /* some implementations have max size 1 on 2nd dimension */
+ /* Some implementations have max size 1 on 2nd dimension. */
if(local_size[1] > max_work_items[1]) {
local_size[0] = workgroup_size/max_work_items[1];
local_size[1] = max_work_items[1];
}
- size_t global_size[2] = {global_size_round_up(local_size[0], w), global_size_round_up(local_size[1], h)};
+ size_t global_size[2] = {global_size_round_up(local_size[0], w),
+ global_size_round_up(local_size[1], h)};
+
+ /* Vertical size of 1 is coming from bake/shade kernels where we should
+ * not round anything up because otherwise we'll either be doing too
+ * much work per pixel (if we don't check global ID on Y axis) or will
+ * be checking for global ID to always have Y of 0.
+ */
+ if (h == 1) {
+ global_size[h] = 1;
+ }
/* run kernel */
opencl_assert(clEnqueueNDRangeKernel(cqCommandQueue, kernel, 2, NULL, global_size, NULL, 0, NULL, NULL));
@@ -1320,48 +1330,49 @@ public:
else
kernel = ckShaderKernel;
- for(int sample = 0; sample < task.num_samples; sample++) {
-
- if(task.get_cancel())
- break;
-
- cl_int d_sample = sample;
-
- cl_uint start_arg_index =
- kernel_set_args(kernel,
- 0,
- d_data,
- d_input,
- d_output);
+ cl_uint start_arg_index =
+ kernel_set_args(kernel,
+ 0,
+ d_data,
+ d_input,
+ d_output);
- if(task.shader_eval_type < SHADER_EVAL_BAKE) {
- start_arg_index += kernel_set_args(kernel,
- start_arg_index,
- d_output_luma);
- }
+ if(task.shader_eval_type < SHADER_EVAL_BAKE) {
+ start_arg_index += kernel_set_args(kernel,
+ start_arg_index,
+ d_output_luma);
+ }
#define KERNEL_TEX(type, ttype, name) \
- set_kernel_arg_mem(kernel, &start_arg_index, #name);
+ set_kernel_arg_mem(kernel, &start_arg_index, #name);
#include "kernel_textures.h"
#undef KERNEL_TEX
+ start_arg_index += kernel_set_args(kernel,
+ start_arg_index,
+ d_shader_eval_type);
+ if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
start_arg_index += kernel_set_args(kernel,
start_arg_index,
- d_shader_eval_type);
- if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
- start_arg_index += kernel_set_args(kernel,
- start_arg_index,
- d_shader_filter);
- }
- start_arg_index += kernel_set_args(kernel,
- start_arg_index,
- d_shader_x,
- d_shader_w,
- d_offset,
- d_sample);
+ d_shader_filter);
+ }
+ start_arg_index += kernel_set_args(kernel,
+ start_arg_index,
+ d_shader_x,
+ d_shader_w,
+ d_offset);
+
+ for(int sample = 0; sample < task.num_samples; sample++) {
+
+ if(task.get_cancel())
+ break;
+
+ kernel_set_args(kernel, start_arg_index, sample);
enqueue_kernel(kernel, task.shader_w, 1);
+ clFinish(cqCommandQueue);
+
task.update_progress(NULL);
}
}
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 3966a06..8d05bef 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -482,12 +482,10 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
}
/* write output */
- float output_fac = is_aa_pass(type)? 1.0f/num_samples: 1.0f;
+ const float output_fac = is_aa_pass(type)? 1.0f/num_samples: 1.0f;
+ const float4 scaled_result = make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
- if(sample == 0)
- output[i] = make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
- else
- output[i] += make_float4(out.x, out.y, out.z, 1.0f) * output_fac;
+ output[i] = (sample == 0)? scaled_result: output[i] + scaled_result;
}
#endif /* __BAKING__ */
diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp
index 5bf5e51..13310a6 100644
--- a/intern/cycles/render/bake.cpp
+++ b/intern/cycles/render/bake.cpp
@@ -177,7 +177,7 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
device->mem_alloc(d_input, MEM_READ_ONLY);
device->mem_copy_to(d_input);
- device->mem_alloc(d_output, MEM_WRITE_ONLY);
+ device->mem_alloc(d_output, MEM_READ_WRITE);
DeviceTask task(DeviceTask::SHADER);
task.shader_input = d_input.device_pointer;
More information about the Bf-blender-cvs
mailing list