[Bf-blender-cvs] [a908e63590] cycles_split_kernel: Merge branch 'master' into cycles_split_kernel

Sergey Sharybin noreply at git.blender.org
Fri Feb 17 14:23:15 CET 2017


Commit: a908e63590cf8579894642df095c64dbe3abed01
Author: Sergey Sharybin
Date:   Fri Feb 17 13:54:04 2017 +0100
Branches: cycles_split_kernel
https://developer.blender.org/rBa908e63590cf8579894642df095c64dbe3abed01

Merge branch 'master' into cycles_split_kernel

===================================================================



===================================================================

diff --cc intern/cycles/kernel/kernel_shadow.h
index 84b5fd254c,06a77a208c..6abfa9c387
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@@ -168,129 -204,262 +204,262 @@@ ccl_device bool shadow_blocked_transpar
  	return blocked;
  }
  
- #undef STACK_MAX_HITS
- 
- #else
+ /* Here we do all device specific trickery before invoking actual traversal
+  * loop to help readability of the actual logic.
+  */
+ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg,
+                                                ShaderData *shadow_sd,
+                                                ccl_addr_space PathState *state,
+                                                Ray *ray,
+                                                uint max_hits,
+                                                float3 *shadow)
+ {
+ #    ifdef __KERNEL_CUDA__
+ 	Intersection *hits = kg->hits_stack;
+ #    else
+ 	Intersection hits_stack[SHADOW_STACK_MAX_HITS];
+ 	Intersection *hits = hits_stack;
+ #    endif
+ #    ifndef __KERNEL_GPU__
+ 	/* Prefer to use stack but use dynamic allocation if too deep max hits
+ 	 * we need max_hits + 1 storage space due to the logic in
+ 	 * scene_intersect_shadow_all which will first store and then check if
+ 	 * the limit is exceeded.
+ 	 *
+ 	 * Ignore this on GPU because of slow/unavailable malloc().
+ 	 */
+ 	if(max_hits + 1 > SHADOW_STACK_MAX_HITS) {
+ 		if(kg->transparent_shadow_intersections == NULL) {
+ 			const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
+ 			kg->transparent_shadow_intersections =
+ 				(Intersection*)malloc(sizeof(Intersection)*(transparent_max_bounce + 1));
+ 		}
+ 		hits = kg->transparent_shadow_intersections;
+ 	}
+ #    endif  /* __KERNEL_GPU__ */
+ 	/* Invoke actual traversal. */
+ 	return shadow_blocked_transparent_all_loop(kg,
+ 	                                           shadow_sd,
+ 	                                           state,
+ 	                                           ray,
+ 	                                           hits,
+ 	                                           max_hits,
+ 	                                           shadow);
+ }
+ #  endif  /* __SHADOW_RECORD_ALL__ */
  
- /* Shadow function to compute how much light is blocked, GPU variation.
 -#  ifdef __KERNEL_GPU__
++#  if defined(__KERNEL_GPU__) || !defined(__SHADOW_RECORD_ALL__)
+ /* Shadow function to compute how much light is blocked,
   *
   * Here we raytrace from one transparent surface to the next step by step.
   * To minimize overhead in cases where we don't need transparent shadows, we
   * first trace a regular shadow ray. We check if the hit primitive was
   * potentially transparent, and only in that case start marching. this gives
-  * one extra ray cast for the cases were we do want transparency. */
+  * one extra ray cast for the cases were we do want transparency.
+  */
  
- ccl_device_noinline bool shadow_blocked(KernelGlobals *kg,
-                                         ShaderData *shadow_sd,
-                                         ccl_addr_space PathState *state,
-                                         ccl_addr_space Ray *ray_input,
-                                         float3 *shadow)
+ /* This function is only implementing device-independent traversal logic
+  * which requires some precalculation done.
+  */
+ ccl_device bool shadow_blocked_transparent_stepped_loop(
+         KernelGlobals *kg,
+         ShaderData *shadow_sd,
+         ccl_addr_space PathState *state,
+         Ray *ray,
+         Intersection *isect,
+         const bool blocked,
+         const bool is_transparent_isect,
+         float3 *shadow)
  {
- 	*shadow = make_float3(1.0f, 1.0f, 1.0f);
+ 	if(blocked && is_transparent_isect) {
+ 		float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
+ 		float3 Pend = ray->P + ray->D*ray->t;
+ 		int bounce = state->transparent_bounce;
+ #    ifdef __VOLUME__
+ 		PathState ps = *state;
+ #    endif
+ 		for(;;) {
+ 			if(bounce >= kernel_data.integrator.transparent_max_bounce) {
+ 				return true;
+ 			}
+ 			if(!scene_intersect(kg,
+ 			                    *ray,
+ 			                    PATH_RAY_SHADOW_TRANSPARENT,
+ 			                    isect,
+ 			                    NULL,
+ 			                    0.0f, 0.0f))
+ 			{
+ 				break;
+ 			}
+ 			if(!shader_transparent_shadow(kg, isect)) {
+ 				return true;
+ 			}
+ 			/* Attenuate the throughput. */
+ 			if(shadow_handle_transparent_isect(kg,
+ 			                                   shadow_sd,
+ 			                                   state,
+ #ifdef __VOLUME__
+ 			                                   &ps,
+ #endif
+ 			                                   isect,
+ 			                                   ray,
+ 			                                   &throughput))
+ 			{
+ 				return true;
+ 			}
+ 			/* Move ray forward. */
+ 			ray->P = ray_offset(ccl_fetch(shadow_sd, P), -ccl_fetch(shadow_sd, Ng));
+ 			if(ray->t != FLT_MAX) {
+ 				ray->D = normalize_len(Pend - ray->P, &ray->t);
+ 			}
+ 			bounce++;
+ 		}
+ #    ifdef __VOLUME__
+ 		/* Attenuation for last line segment towards light. */
+ 		if(ps.volume_stack[0].shader != SHADER_NONE) {
+ 			kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
+ 		}
+ #    endif
+ 		*shadow *= throughput;
+ 		return is_zero(throughput);
+ 	}
+ #    ifdef __VOLUME__
+ 	if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
+ 		/* Apply attenuation from current volume shader. */
+ 		kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
+ 	}
+ #    endif
+ 	return blocked;
+ }
  
- 	if(ray_input->t == 0.0f)
- 		return false;
+ ccl_device bool shadow_blocked_transparent_stepped(
+         KernelGlobals *kg,
+         ShaderData *shadow_sd,
+         ccl_addr_space PathState *state,
+         Ray *ray,
+         Intersection *isect,
+         float3 *shadow)
+ {
+ 	const bool blocked = scene_intersect(kg,
+ 	                                     *ray,
+ 	                                     PATH_RAY_SHADOW_OPAQUE,
+ 	                                     isect,
+ 	                                     NULL,
+ 	                                     0.0f, 0.0f);
+ 	const bool is_transparent_isect = blocked
+ 	        ? shader_transparent_shadow(kg, isect)
+ 	        : false;
+ 	return shadow_blocked_transparent_stepped_loop(kg,
+ 	                                               shadow_sd,
+ 	                                               state,
+ 	                                               ray,
+ 	                                               isect,
+ 	                                               blocked,
+ 	                                               is_transparent_isect,
+ 	                                               shadow);
+ }
  
 -#  endif  /* __KERNEL_GPU__ */
++#  endif  /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */
+ #endif /* __TRANSPARENT_SHADOWS__ */
+ 
+ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
+                                       ShaderData *shadow_sd,
+                                       ccl_addr_space PathState *state,
+                                       ccl_addr_space Ray *ray_input,
+                                       float3 *shadow)
+ {
+ 	/* Special trickery for split kernel: some data is coming from the
+ 	 * global memory.
+ 	 */
  #ifdef __SPLIT_KERNEL__
  	Ray private_ray = *ray_input;
  	Ray *ray = &private_ray;
- #else
- 	Ray *ray = ray_input;
- #endif
- 
- #ifdef __SPLIT_KERNEL__
 -	Intersection *isect = &kg->isect_shadow[SD_THREAD];
 +	Intersection *isect = &kernel_split_state.isect_shadow[SD_THREAD];
- #else
+ #else  /* __SPLIT_KERNEL__ */
+ 	Ray *ray = ray_input;
  	Intersection isect_object;
  	Intersection *isect = &isect_object;
- #endif
- 
- 	bool blocked = scene_intersect(kg, *ray, PATH_RAY_SHADOW_OPAQUE, isect, NULL, 0.0f, 0.0f);
- 
+ #endif  /* __SPLIT_KERNEL__ */
+ 	/* Some common early checks. */
+ 	*shadow = make_float3(1.0f, 1.0f, 1.0f);
+ 	if(ray->t == 0.0f) {
+ 		return false;
+ 	}
+ 	/* Do actual shadow shading. */
+ 	/* First of all, we check if integrator requires transparent shadows.
+ 	 * if not, we use simplest and fastest ever way to calculate occlusion.
+ 	 */
  #ifdef __TRANSPARENT_SHADOWS__
- 	if(blocked && kernel_data.integrator.transparent_shadows) {
- 		if(shader_transparent_shadow(kg, isect)) {
- 			float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
- 			float3 Pend = ray->P + ray->D*ray->t;
- 			int bounce = state->transparent_bounce;
- #ifdef __VOLUME__
- 			PathState ps = *state;
+ 	if(!kernel_data.integrator.transparent_shadows)
  #endif
- 
- 			for(;;) {
- 				if(bounce >= kernel_data.integrator.transparent_max_bounce)
- 					return true;
- 
- 				if(!scene_intersect(kg, *ray, PATH_RAY_SHADOW_TRANSPARENT, isect, NULL, 0.0f, 0.0f))
- 				{
- #ifdef __VOLUME__
- 					/* attenuation for last line segment towards light */
- 					if(ps.volume_stack[0].shader != SHADER_NONE)
- 						kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
- #endif
- 
- 					*shadow *= throughput;
- 
- 					return false;
- 				}
- 
- 				if(!shader_transparent_shadow(kg, isect)) {
- 					return true;
- 				}
- 
- #ifdef __VOLUME__
- 				/* attenuation between last surface and next surface */
- 				if(ps.volume_stack[0].shader != SHADER_NONE) {
- 					Ray segment_ray = *ray;
- 					segment_ray.t = isect->t;
- 					kernel_volume_shadow(kg, shadow_sd, &ps, &segment_ray, &throughput);
- 				}
- #endif
- 
- 				/* setup shader data at surface */
- 				shader_setup_from_ray(kg, shadow_sd, isect, ray);
- 
- 				/* attenuation from transparent surface */
- 				if(!(ccl_fetch(shadow_sd, flag) & SD_HAS_ONLY_VOLUME)) {
- 					path_state_modify_bounce(state, true);
- 					shader_eval_surface(kg, shadow_sd, NULL, state, 0.0f, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
- 					path_state_modify_bounce(state, false);
- 
- 					throughput *= shader_bsdf_transparency(kg, shadow_sd);
- 				}
- 
- 				/* stop if all light is blocked */
- 				if(is_zero(throughput)) {
- 					return true;
- 				}
- 
- 				/* move ray forward */
- 				ray->P = ray_offset(ccl_fetch(shadow_sd, P), -ccl_fetch(shadow_sd, Ng));
- 				if(ray->t != FLT_MAX) {
- 					ray->D = normalize_len(Pend - ra

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list