[Bf-blender-cvs] [a908e63590] cycles_split_kernel: Merge branch 'master' into cycles_split_kernel
Sergey Sharybin
noreply at git.blender.org
Fri Feb 17 14:23:15 CET 2017
Commit: a908e63590cf8579894642df095c64dbe3abed01
Author: Sergey Sharybin
Date: Fri Feb 17 13:54:04 2017 +0100
Branches: cycles_split_kernel
https://developer.blender.org/rBa908e63590cf8579894642df095c64dbe3abed01
Merge branch 'master' into cycles_split_kernel
===================================================================
===================================================================
diff --cc intern/cycles/kernel/kernel_shadow.h
index 84b5fd254c,06a77a208c..6abfa9c387
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@@ -168,129 -204,262 +204,262 @@@ ccl_device bool shadow_blocked_transpar
return blocked;
}
- #undef STACK_MAX_HITS
-
- #else
+ /* Here we do all device specific trickery before invoking actual traversal
+ * loop to help readability of the actual logic.
+ */
+ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg,
+ ShaderData *shadow_sd,
+ ccl_addr_space PathState *state,
+ Ray *ray,
+ uint max_hits,
+ float3 *shadow)
+ {
+ # ifdef __KERNEL_CUDA__
+ Intersection *hits = kg->hits_stack;
+ # else
+ Intersection hits_stack[SHADOW_STACK_MAX_HITS];
+ Intersection *hits = hits_stack;
+ # endif
+ # ifndef __KERNEL_GPU__
+ /* Prefer to use stack but use dynamic allocation if too deep max hits
+ * we need max_hits + 1 storage space due to the logic in
+ * scene_intersect_shadow_all which will first store and then check if
+ * the limit is exceeded.
+ *
+ * Ignore this on GPU because of slow/unavailable malloc().
+ */
+ if(max_hits + 1 > SHADOW_STACK_MAX_HITS) {
+ if(kg->transparent_shadow_intersections == NULL) {
+ const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
+ kg->transparent_shadow_intersections =
+ (Intersection*)malloc(sizeof(Intersection)*(transparent_max_bounce + 1));
+ }
+ hits = kg->transparent_shadow_intersections;
+ }
+ # endif /* __KERNEL_GPU__ */
+ /* Invoke actual traversal. */
+ return shadow_blocked_transparent_all_loop(kg,
+ shadow_sd,
+ state,
+ ray,
+ hits,
+ max_hits,
+ shadow);
+ }
+ # endif /* __SHADOW_RECORD_ALL__ */
- /* Shadow function to compute how much light is blocked, GPU variation.
-# ifdef __KERNEL_GPU__
++# if defined(__KERNEL_GPU__) || !defined(__SHADOW_RECORD_ALL__)
+ /* Shadow function to compute how much light is blocked,
*
* Here we raytrace from one transparent surface to the next step by step.
* To minimize overhead in cases where we don't need transparent shadows, we
* first trace a regular shadow ray. We check if the hit primitive was
* potentially transparent, and only in that case start marching. this gives
- * one extra ray cast for the cases were we do want transparency. */
+ * one extra ray cast for the cases were we do want transparency.
+ */
- ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
- ShaderData *shadow_sd,
- ccl_addr_space PathState *state,
- ccl_addr_space Ray *ray_input,
- float3 *shadow)
+ /* This function is only implementing device-independent traversal logic
+ * which requires some precalculation done.
+ */
+ ccl_device bool shadow_blocked_transparent_stepped_loop(
+ KernelGlobals *kg,
+ ShaderData *shadow_sd,
+ ccl_addr_space PathState *state,
+ Ray *ray,
+ Intersection *isect,
+ const bool blocked,
+ const bool is_transparent_isect,
+ float3 *shadow)
{
- *shadow = make_float3(1.0f, 1.0f, 1.0f);
+ if(blocked && is_transparent_isect) {
+ float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+ float3 Pend = ray->P + ray->D*ray->t;
+ int bounce = state->transparent_bounce;
+ # ifdef __VOLUME__
+ PathState ps = *state;
+ # endif
+ for(;;) {
+ if(bounce >= kernel_data.integrator.transparent_max_bounce) {
+ return true;
+ }
+ if(!scene_intersect(kg,
+ *ray,
+ PATH_RAY_SHADOW_TRANSPARENT,
+ isect,
+ NULL,
+ 0.0f, 0.0f))
+ {
+ break;
+ }
+ if(!shader_transparent_shadow(kg, isect)) {
+ return true;
+ }
+ /* Attenuate the throughput. */
+ if(shadow_handle_transparent_isect(kg,
+ shadow_sd,
+ state,
+ #ifdef __VOLUME__
+ &ps,
+ #endif
+ isect,
+ ray,
+ &throughput))
+ {
+ return true;
+ }
+ /* Move ray forward. */
+ ray->P = ray_offset(ccl_fetch(shadow_sd, P), -ccl_fetch(shadow_sd, Ng));
+ if(ray->t != FLT_MAX) {
+ ray->D = normalize_len(Pend - ray->P, &ray->t);
+ }
+ bounce++;
+ }
+ # ifdef __VOLUME__
+ /* Attenuation for last line segment towards light. */
+ if(ps.volume_stack[0].shader != SHADER_NONE) {
+ kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
+ }
+ # endif
+ *shadow *= throughput;
+ return is_zero(throughput);
+ }
+ # ifdef __VOLUME__
+ if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
+ /* Apply attenuation from current volume shader. */
+ kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
+ }
+ # endif
+ return blocked;
+ }
- if(ray_input->t == 0.0f)
- return false;
+ ccl_device bool shadow_blocked_transparent_stepped(
+ KernelGlobals *kg,
+ ShaderData *shadow_sd,
+ ccl_addr_space PathState *state,
+ Ray *ray,
+ Intersection *isect,
+ float3 *shadow)
+ {
+ const bool blocked = scene_intersect(kg,
+ *ray,
+ PATH_RAY_SHADOW_OPAQUE,
+ isect,
+ NULL,
+ 0.0f, 0.0f);
+ const bool is_transparent_isect = blocked
+ ? shader_transparent_shadow(kg, isect)
+ : false;
+ return shadow_blocked_transparent_stepped_loop(kg,
+ shadow_sd,
+ state,
+ ray,
+ isect,
+ blocked,
+ is_transparent_isect,
+ shadow);
+ }
-# endif /* __KERNEL_GPU__ */
++# endif /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */
+ #endif /* __TRANSPARENT_SHADOWS__ */
+
+ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
+ ShaderData *shadow_sd,
+ ccl_addr_space PathState *state,
+ ccl_addr_space Ray *ray_input,
+ float3 *shadow)
+ {
+ /* Special trickery for split kernel: some data is coming from the
+ * global memory.
+ */
#ifdef __SPLIT_KERNEL__
Ray private_ray = *ray_input;
Ray *ray = &private_ray;
- #else
- Ray *ray = ray_input;
- #endif
-
- #ifdef __SPLIT_KERNEL__
- Intersection *isect = &kg->isect_shadow[SD_THREAD];
+ Intersection *isect = &kernel_split_state.isect_shadow[SD_THREAD];
- #else
+ #else /* __SPLIT_KERNEL__ */
+ Ray *ray = ray_input;
Intersection isect_object;
Intersection *isect = &isect_object;
- #endif
-
- bool blocked = scene_intersect(kg, *ray, PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f);
-
+ #endif /* __SPLIT_KERNEL__ */
+ /* Some common early checks. */
+ *shadow = make_float3(1.0f, 1.0f, 1.0f);
+ if(ray->t == 0.0f) {
+ return false;
+ }
+ /* Do actual shadow shading. */
+ /* First of all, we check if integrator requires transparent shadows.
+ * if not, we use simplest and fastest ever way to calculate occlusion.
+ */
#ifdef __TRANSPARENT_SHADOWS__
- if(blocked && kernel_data.integrator.transparent_shadows) {
- if(shader_transparent_shadow(kg, isect)) {
- float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
- float3 Pend = ray->P + ray->D*ray->t;
- int bounce = state->transparent_bounce;
- #ifdef __VOLUME__
- PathState ps = *state;
+ if(!kernel_data.integrator.transparent_shadows)
#endif
-
- for(;;) {
- if(bounce >= kernel_data.integrator.transparent_max_bounce)
- return true;
-
- if(!scene_intersect(kg, *ray, PATH_RAY_SHADOW_TRANSPARENT, isect, NULL, 0.0f, 0.0f))
- {
- #ifdef __VOLUME__
- /* attenuation for last line segment towards light */
- if(ps.volume_stack[0].shader != SHADER_NONE)
- kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
- #endif
-
- *shadow *= throughput;
-
- return false;
- }
-
- if(!shader_transparent_shadow(kg, isect)) {
- return true;
- }
-
- #ifdef __VOLUME__
- /* attenuation between last surface and next surface */
- if(ps.volume_stack[0].shader != SHADER_NONE) {
- Ray segment_ray = *ray;
- segment_ray.t = isect->t;
- kernel_volume_shadow(kg, shadow_sd, &ps, &segment_ray, &throughput);
- }
- #endif
-
- /* setup shader data at surface */
- shader_setup_from_ray(kg, shadow_sd, isect, ray);
-
- /* attenuation from transparent surface */
- if(!(ccl_fetch(shadow_sd, flag) & SD_HAS_ONLY_VOLUME)) {
- path_state_modify_bounce(state, true);
- shader_eval_surface(kg, shadow_sd, NULL, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
- path_state_modify_bounce(state, false);
-
- throughput *= shader_bsdf_transparency(kg, shadow_sd);
- }
-
- /* stop if all light is blocked */
- if(is_zero(throughput)) {
- return true;
- }
-
- /* move ray forward */
- ray->P = ray_offset(ccl_fetch(shadow_sd, P), -ccl_fetch(shadow_sd, Ng));
- if(ray->t != FLT_MAX) {
- ray->D = normalize_len(Pend - ra
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list