[Bf-blender-cvs] [365ad27] cycles_split_kernel: Cycles: Actually implement work item functions for CPU
Mai Lavelle
noreply at git.blender.org
Thu Oct 27 17:48:59 CEST 2016
Commit: 365ad278f6ceff5ba83d596cbd4e348dff5de76b
Author: Mai Lavelle
Date: Thu Oct 27 17:36:52 2016 +0200
Branches: cycles_split_kernel
https://developer.blender.org/rB365ad278f6ceff5ba83d596cbd4e348dff5de76b
Cycles: Actually implement work item functions for CPU
Didn't actually implement these yet, unfortunately we need to pass `kg` around
to a lot more functions for this.
===================================================================
M intern/cycles/kernel/closure/alloc.h
M intern/cycles/kernel/closure/bssrdf.h
M intern/cycles/kernel/kernel_compat_cpu.h
M intern/cycles/kernel/kernel_compat_opencl.h
M intern/cycles/kernel/kernel_globals.h
M intern/cycles/kernel/kernel_queues.h
M intern/cycles/kernel/kernel_subsurface.h
M intern/cycles/kernel/kernel_work_stealing.h
M intern/cycles/kernel/split/kernel_background_buffer_update.h
M intern/cycles/kernel/split/kernel_data_init.h
M intern/cycles/kernel/split/kernel_direct_lighting.h
M intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
M intern/cycles/kernel/split/kernel_lamp_emission.h
M intern/cycles/kernel/split/kernel_next_iteration_setup.h
M intern/cycles/kernel/split/kernel_queue_enqueue.h
M intern/cycles/kernel/split/kernel_scene_intersect.h
M intern/cycles/kernel/split/kernel_shader_eval.h
M intern/cycles/kernel/split/kernel_shadow_blocked.h
M intern/cycles/kernel/svm/svm.h
M intern/cycles/kernel/svm/svm_closure.h
M intern/cycles/kernel/svm/svm_fresnel.h
M intern/cycles/kernel/svm/svm_light_path.h
===================================================================
diff --git a/intern/cycles/kernel/closure/alloc.h b/intern/cycles/kernel/closure/alloc.h
index b7abc1e..76563ce 100644
--- a/intern/cycles/kernel/closure/alloc.h
+++ b/intern/cycles/kernel/closure/alloc.h
@@ -16,7 +16,7 @@
CCL_NAMESPACE_BEGIN
-ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int size, ClosureType type, float3 weight)
+ccl_device ShaderClosure *closure_alloc(KernelGlobals *kg, ShaderData *sd, int size, ClosureType type, float3 weight)
{
kernel_assert(size <= sizeof(ShaderClosure));
@@ -35,7 +35,7 @@ ccl_device ShaderClosure *closure_alloc(ShaderData *sd, int size, ClosureType ty
return sc;
}
-ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size)
+ccl_device ccl_addr_space void *closure_alloc_extra(KernelGlobals *kg, ShaderData *sd, int size)
{
/* Allocate extra space for closure that need more parameters. We allocate
* in chunks of sizeof(ShaderClosure) starting from the end of the closure
@@ -58,9 +58,9 @@ ccl_device ccl_addr_space void *closure_alloc_extra(ShaderData *sd, int size)
return (ccl_addr_space void*)(ccl_fetch(sd, closure) + MAX_CLOSURE - num_closure_extra);
}
-ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 weight)
+ccl_device_inline ShaderClosure *bsdf_alloc(KernelGlobals *kg, ShaderData *sd, int size, float3 weight)
{
- ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
+ ShaderClosure *sc = closure_alloc(kg, sd, size, CLOSURE_NONE_ID, weight);
if(!sc)
return NULL;
@@ -71,9 +71,9 @@ ccl_device_inline ShaderClosure *bsdf_alloc(ShaderData *sd, int size, float3 wei
}
#ifdef __OSL__
-ccl_device_inline ShaderClosure *bsdf_alloc_osl(ShaderData *sd, int size, float3 weight, void *data)
+ccl_device_inline ShaderClosure *bsdf_alloc_osl(KernelGlobals *kg, ShaderData *sd, int size, float3 weight, void *data)
{
- ShaderClosure *sc = closure_alloc(sd, size, CLOSURE_NONE_ID, weight);
+ ShaderClosure *sc = closure_alloc(kg, sd, size, CLOSURE_NONE_ID, weight);
if(!sc)
return NULL;
diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h
index af0bbd8..a342025 100644
--- a/intern/cycles/kernel/closure/bssrdf.h
+++ b/intern/cycles/kernel/closure/bssrdf.h
@@ -344,9 +344,9 @@ ccl_device void bssrdf_none_sample(const ShaderClosure *sc, float xi, float *r,
/* Generic */
-ccl_device_inline Bssrdf *bssrdf_alloc(ShaderData *sd, float3 weight)
+ccl_device_inline Bssrdf *bssrdf_alloc(KernelGlobals *kg, ShaderData *sd, float3 weight)
{
- Bssrdf *bssrdf = (Bssrdf*)closure_alloc(sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight);
+ Bssrdf *bssrdf = (Bssrdf*)closure_alloc(kg, sd, sizeof(Bssrdf), CLOSURE_NONE_ID, weight);
if(!bssrdf)
return NULL;
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index e6aa8f8..e347a1e 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -45,12 +45,13 @@
#define ccl_addr_space
#define ccl_local_id(d) 0
-#define ccl_global_id(d) 0
+#define ccl_global_id(d) (kg->global_id[d])
#define ccl_local_size(d) 1
-#define ccl_global_size(d) 1
+#define ccl_global_size(d) (kg->global_size[d])
-#define ccl_num_groups(d) 1
+#define ccl_group_id(d) ccl_global_id(d)
+#define ccl_num_groups(d) ccl_global_size(d)
/* On x86_64, versions of glibc < 2.16 have an issue where expf is
* much slower than the double version. This was fixed in glibc 2.16.
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index b60eb14..ea99fdb 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -55,6 +55,7 @@
#define ccl_local_size(d) get_local_size(d)
#define ccl_global_size(d) get_global_size(d)
+#define ccl_group_id(d) get_group_id(d)
#define ccl_num_groups(d) get_num_groups(d)
/* Selective nodes compilation. */
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index a2d0057..121b840 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -70,6 +70,9 @@ typedef struct KernelGlobals {
Intersection *isect_shadow;
SplitData split_data;
SplitParams split_param_data;
+
+ int2 global_size;
+ int2 global_id;
} KernelGlobals;
#endif /* __KERNEL_CPU__ */
diff --git a/intern/cycles/kernel/kernel_queues.h b/intern/cycles/kernel/kernel_queues.h
index 8d3176f..212ef98 100644
--- a/intern/cycles/kernel/kernel_queues.h
+++ b/intern/cycles/kernel/kernel_queues.h
@@ -49,6 +49,7 @@ ccl_device void enqueue_ray_index(
* is no more ray to allocate to other threads.
*/
ccl_device int get_ray_index(
+ KernelGlobals *kg,
int thread_index, /* Global thread index. */
int queue_number, /* Queue to operate on. */
ccl_global int *queues, /* Buffer of all queues. */
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index 52c05b8..5bdb3a6 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -140,7 +140,7 @@ ccl_device_inline float3 subsurface_scatter_eval(ShaderData *sd,
}
/* replace closures with a single diffuse bsdf closure after scatter step */
-ccl_device void subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, float3 weight, bool hit, float3 N)
+ccl_device void subsurface_scatter_setup_diffuse_bsdf(KernelGlobals *kg, ShaderData *sd, float3 weight, bool hit, float3 N)
{
sd->flag &= ~SD_CLOSURE_FLAGS;
sd->randb_closure = 0.0f;
@@ -148,7 +148,7 @@ ccl_device void subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, float3 wei
sd->num_closure_extra = 0;
if(hit) {
- DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
+ DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(kg, sd, sizeof(DiffuseBsdf), weight);
if(bsdf) {
bsdf->N = N;
@@ -373,7 +373,7 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
subsurface_color_bump_blur(kg, sd, state, state_flag, &weight, &N);
/* Setup diffuse BSDF. */
- subsurface_scatter_setup_diffuse_bsdf(sd, weight, true, N);
+ subsurface_scatter_setup_diffuse_bsdf(kg, sd, weight, true, N);
}
/* subsurface scattering step, from a point on the surface to another nearby point on the same object */
@@ -463,7 +463,7 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, PathS
subsurface_color_bump_blur(kg, sd, state, state_flag, &eval, &N);
/* setup diffuse bsdf */
- subsurface_scatter_setup_diffuse_bsdf(sd, eval, (ss_isect.num_hits > 0), N);
+ subsurface_scatter_setup_diffuse_bsdf(kg, sd, eval, (ss_isect.num_hits > 0), N);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/kernel_work_stealing.h b/intern/cycles/kernel/kernel_work_stealing.h
index afb9ac7..859994e 100644
--- a/intern/cycles/kernel/kernel_work_stealing.h
+++ b/intern/cycles/kernel/kernel_work_stealing.h
@@ -46,7 +46,8 @@ ccl_device uint get_group_id_with_ray_index(uint ray_index,
}
}
-ccl_device uint get_total_work(uint tile_dim_x,
+ccl_device uint get_total_work(KernelGlobals *kg,
+ uint tile_dim_x,
uint tile_dim_y,
uint grp_idx,
uint grp_idy,
@@ -73,7 +74,8 @@ ccl_device uint get_total_work(uint tile_dim_x,
/* Returns 0 in case there is no next work available */
/* Returns 1 in case work assigned is valid */
-ccl_device int get_next_work(ccl_global uint *work_pool,
+ccl_device int get_next_work(KernelGlobals *kg,
+ ccl_global uint *work_pool,
ccl_private uint *my_work,
uint tile_dim_x,
uint tile_dim_y,
@@ -91,7 +93,8 @@ ccl_device int get_next_work(ccl_global uint *work_pool,
tile_dim_y,
parallel_samples,
1);
- uint total_work = get_total_work(tile_dim_x,
+ uint total_work = get_total_work(kg,
+ tile_dim_x,
tile_dim_y,
grp_idx,
grp_idy,
@@ -103,7 +106,8 @@ ccl_device int get_next_work(ccl_global uint *work_pool,
/* This function assumes that the passed my_work is valid. */
/* Decode sample number w.r.t. assigned my_work. */
-ccl_device uint get_my_sample(uint my_work,
+ccl_device uint get_my_sample(KernelGlobals *kg,
+ uint my_work,
uint tile_dim_x,
uint tile_dim_y,
uint parallel_samples,
@@ -138,7 +142,8 @@ ccl_device uint get_my_sample(uint my_work,
}
/* Decode pixel and tile position w.r.t. assigned my_work. */
-ccl_device void get_pixel_tile_position(ccl_private uint *pixel_x,
+ccl_device void get_pixel_tile_position(KernelGlobals *kg,
+ ccl_private uint *pixel_x,
ccl_private uint *pixel_y,
ccl_private uint *tile_x,
ccl_private uint *tile_y,
diff --git a/intern/cycles/kernel/split/kernel_background_buffer_update.h b/intern/cycles/kernel/split/kernel_background_buffer_update.h
index 24c821d..eb40473 100644
--- a/intern/cycles/kernel/split/kernel_background_buffer_update.h
+++ b/intern/cycles/kernel/split/kernel_background_buffer_update.h
@@ -83,7 +83,7 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
split_params->queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
}
char enqueue_flag = 0;
- ray_index = get_ray_index(ray_index,
+ ray_index = get_ray_index(kg, ray_index,
QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
split_state->queue_data,
split_params->queue_size,
@@ -142,8 +142,8 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
#ifdef __WORK_STEALING__
my_work = split_state->work_array[ray_index];
- sample = get_my_sample(my_work, sw, sh, parallel_samples, ray_index) + split_params->start_sample;
- g
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list