[Bf-blender-cvs] [fb99ea79f84] master: Code refactor: split displace/background into separate kernels, remove luma.
Brecht Van Lommel
noreply at git.blender.org
Thu Oct 5 18:01:05 CEST 2017
Commit: fb99ea79f84b49bf3de2d80c14a08c9040dc4ac1
Author: Brecht Van Lommel
Date: Thu Oct 5 15:17:09 2017 +0200
Branches: master
https://developer.blender.org/rBfb99ea79f84b49bf3de2d80c14a08c9040dc4ac1
Code refactor: split displace/background into separate kernels, remove luma.
===================================================================
M intern/cycles/device/device_cpu.cpp
M intern/cycles/device/device_cuda.cpp
M intern/cycles/device/device_multi.cpp
M intern/cycles/device/device_network.cpp
M intern/cycles/device/device_network.h
M intern/cycles/device/device_task.cpp
M intern/cycles/device/device_task.h
M intern/cycles/device/opencl/opencl_base.cpp
M intern/cycles/kernel/kernel_bake.h
M intern/cycles/kernel/kernel_shader.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M intern/cycles/kernel/kernels/cuda/kernel.cu
M intern/cycles/kernel/kernels/opencl/kernel.cl
M intern/cycles/kernel/osl/osl_shader.cpp
M intern/cycles/kernel/osl/osl_shader.h
M intern/cycles/render/bake.cpp
M intern/cycles/render/light.cpp
M intern/cycles/render/mesh_displace.cpp
===================================================================
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index ff34f4f9ce4..19e3c0a9075 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -171,10 +171,10 @@ public:
DeviceRequestedFeatures requested_features;
- KernelFunctions<void(*)(KernelGlobals *, float *, int, int, int, int, int)> path_trace_kernel;
- KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_half_float_kernel;
- KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_byte_kernel;
- KernelFunctions<void(*)(KernelGlobals *, uint4 *, float4 *, float*, int, int, int, int, int)> shader_kernel;
+ KernelFunctions<void(*)(KernelGlobals *, float *, int, int, int, int, int)> path_trace_kernel;
+ KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_half_float_kernel;
+ KernelFunctions<void(*)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int)> convert_to_byte_kernel;
+ KernelFunctions<void(*)(KernelGlobals *, uint4 *, float4 *, int, int, int, int, int)> shader_kernel;
KernelFunctions<void(*)(int, TilesInfo*, int, int, float*, float*, float*, float*, float*, int*, int, int)> filter_divide_shadow_kernel;
KernelFunctions<void(*)(int, TilesInfo*, int, int, int, int, float*, float*, int*, int, int)> filter_get_feature_kernel;
@@ -756,7 +756,6 @@ public:
shader_kernel()(&kg,
(uint4*)task.shader_input,
(float4*)task.shader_output,
- (float*)task.shader_output_luma,
task.shader_eval_type,
task.shader_filter,
x,
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 8cfc5332e94..734edcff503 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1424,14 +1424,16 @@ public:
CUfunction cuShader;
CUdeviceptr d_input = cuda_device_ptr(task.shader_input);
CUdeviceptr d_output = cuda_device_ptr(task.shader_output);
- CUdeviceptr d_output_luma = cuda_device_ptr(task.shader_output_luma);
/* get kernel function */
if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_bake"));
}
+ else if(task.shader_eval_type == SHADER_EVAL_DISPLACE) {
+ cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_displace"));
+ }
else {
- cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_shader"));
+ cuda_assert(cuModuleGetFunction(&cuShader, cuModule, "kernel_cuda_background"));
}
/* do tasks in smaller chunks, so we can cancel it */
@@ -1450,9 +1452,6 @@ public:
int arg = 0;
args[arg++] = &d_input;
args[arg++] = &d_output;
- if(task.shader_eval_type < SHADER_EVAL_BAKE) {
- args[arg++] = &d_output_luma;
- }
args[arg++] = &task.shader_eval_type;
if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
args[arg++] = &task.shader_filter;
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index 164ed50bdf6..b17b972b06f 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -383,7 +383,6 @@ public:
if(task.rgba_half) subtask.rgba_half = sub.ptr_map[task.rgba_half];
if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input];
if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output];
- if(task.shader_output_luma) subtask.shader_output_luma = sub.ptr_map[task.shader_output_luma];
sub.device->task_add(subtask);
}
diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp
index 4ff8647f66b..deea59f1d23 100644
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -660,10 +660,6 @@ protected:
if(task.shader_output)
task.shader_output = device_ptr_from_client_pointer(task.shader_output);
- if(task.shader_output_luma)
- task.shader_output_luma = device_ptr_from_client_pointer(task.shader_output_luma);
-
-
task.acquire_tile = function_bind(&DeviceServer::task_acquire_tile, this, _1, _2);
task.release_tile = function_bind(&DeviceServer::task_release_tile, this, _1);
task.update_progress_sample = function_bind(&DeviceServer::task_update_progress_sample, this);
diff --git a/intern/cycles/device/device_network.h b/intern/cycles/device/device_network.h
index 7bfebaf5aec..3d3bd99dfe7 100644
--- a/intern/cycles/device/device_network.h
+++ b/intern/cycles/device/device_network.h
@@ -132,7 +132,7 @@ public:
archive & type & task.x & task.y & task.w & task.h;
archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples;
archive & task.offset & task.stride;
- archive & task.shader_input & task.shader_output & task.shader_output_luma & task.shader_eval_type;
+ archive & task.shader_input & task.shader_output & task.shader_eval_type;
archive & task.shader_x & task.shader_w;
archive & task.need_finish_queue;
}
@@ -291,7 +291,7 @@ public:
*archive & type & task.x & task.y & task.w & task.h;
*archive & task.rgba_byte & task.rgba_half & task.buffer & task.sample & task.num_samples;
*archive & task.offset & task.stride;
- *archive & task.shader_input & task.shader_output & task.shader_output_luma & task.shader_eval_type;
+ *archive & task.shader_input & task.shader_output & task.shader_eval_type;
*archive & task.shader_x & task.shader_w;
*archive & task.need_finish_queue;
diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp
index 3bc4c310283..3c7d24fb5b7 100644
--- a/intern/cycles/device/device_task.cpp
+++ b/intern/cycles/device/device_task.cpp
@@ -31,7 +31,7 @@ CCL_NAMESPACE_BEGIN
DeviceTask::DeviceTask(Type type_)
: type(type_), x(0), y(0), w(0), h(0), rgba_byte(0), rgba_half(0), buffer(0),
sample(0), num_samples(1),
- shader_input(0), shader_output(0), shader_output_luma(0),
+ shader_input(0), shader_output(0),
shader_eval_type(0), shader_filter(0), shader_x(0), shader_w(0)
{
last_update_time = time_dt();
diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
index 44a1efff1f5..b9658eb978f 100644
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -46,7 +46,7 @@ public:
int offset, stride;
device_ptr shader_input;
- device_ptr shader_output, shader_output_luma;
+ device_ptr shader_output;
int shader_eval_type;
int shader_filter;
int shader_x, shader_w;
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index 8095611f099..3db3efd1103 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -228,7 +228,8 @@ bool OpenCLDeviceBase::load_kernels(const DeviceRequestedFeatures& requested_fea
base_program = OpenCLProgram(this, "base", "kernel.cl", build_options_for_base_program(requested_features));
base_program.add_kernel(ustring("convert_to_byte"));
base_program.add_kernel(ustring("convert_to_half_float"));
- base_program.add_kernel(ustring("shader"));
+ base_program.add_kernel(ustring("displace"));
+ base_program.add_kernel(ustring("background"));
base_program.add_kernel(ustring("bake"));
base_program.add_kernel(ustring("zero_buffer"));
@@ -1112,7 +1113,6 @@ void OpenCLDeviceBase::shader(DeviceTask& task)
cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
cl_mem d_input = CL_MEM_PTR(task.shader_input);
cl_mem d_output = CL_MEM_PTR(task.shader_output);
- cl_mem d_output_luma = CL_MEM_PTR(task.shader_output_luma);
cl_int d_shader_eval_type = task.shader_eval_type;
cl_int d_shader_filter = task.shader_filter;
cl_int d_shader_x = task.shader_x;
@@ -1121,10 +1121,15 @@ void OpenCLDeviceBase::shader(DeviceTask& task)
cl_kernel kernel;
- if(task.shader_eval_type >= SHADER_EVAL_BAKE)
+ if(task.shader_eval_type >= SHADER_EVAL_BAKE) {
kernel = base_program(ustring("bake"));
- else
- kernel = base_program(ustring("shader"));
+ }
+ else if(task.shader_eval_type >= SHADER_EVAL_DISPLACE) {
+ kernel = base_program(ustring("displace"));
+ }
+ else {
+ kernel = base_program(ustring("background"));
+ }
cl_uint start_arg_index =
kernel_set_args(kernel,
@@ -1133,12 +1138,6 @@ void OpenCLDeviceBase::shader(DeviceTask& task)
d_input,
d_output);
- if(task.shader_eval_type < SHADER_EVAL_BAKE) {
- start_arg_index += kernel_set_args(kernel,
- start_arg_index,
- d_output_luma);
- }
-
set_kernel_arg_buffers(kernel, &start_arg_index);
start_arg_index += kernel_set_args(kernel,
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 0d10e17a593..84d8d84d486 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -493,78 +493,69 @@ ccl_device void kernel_bake_evaluate(KernelGlobals *kg, ccl_global uint4 *input,
#endif /* __BAKING__ */
-ccl_device void kernel_shader_evaluate(KernelGlobals *kg,
- ccl_global uint4 *input,
- ccl_global float4 *output,
- ccl_global float *output_luma,
- ShaderEvalType type,
- int i,
- int sample)
+ccl_device void kernel_displace_evaluate(KernelGlobals *kg,
+ ccl_global uint4 *input,
+ ccl_global float4 *output,
+ int i)
{
ShaderData sd;
PathState state = {0};
uint4 in = input[i];
- float3 out;
- if(type == SHADER_EVAL_DISPLACE) {
- /* setup shader data */
- int object = in.x;
- int prim = in.y;
- float u = __uint_as_float(in.z);
- float v = __uint_as_float(in.w);
+ /* setup shader
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list