[Bf-blender-cvs] [6ea54fe9ffe] master: Cycles: Switch to reformulated Pluecker ray/triangle intersection

Sergey Sharybin noreply at git.blender.org
Tue Mar 28 17:30:20 CEST 2017


Commit: 6ea54fe9ffe2b2514990fdf3489ca53d05ce449a
Author: Sergey Sharybin
Date:   Mon Mar 27 17:06:37 2017 +0200
Branches: master
https://developer.blender.org/rB6ea54fe9ffe2b2514990fdf3489ca53d05ce449a

Cycles: Switch to reformulated Pluecker ray/triangle intersection

The intention of this commit it to address issues mentioned in the
reports T43865,T50164 and T50452.

The code is based on Embree code with some extra vectorization
to speed up single ray to single triangle intersection.

Unfortunately, such a fix is not coming for free. There is some
slowdown for AVX2 processors, mainly due to different vectorization
code, which caused different number of instructions to be executed
and different instructions-per-cycle counters. But on another hand
this commit makes pre-AVX2 platforms such as AVX and SSE4.1 a bit
faster. The prerformance goes as following:

              2.78c AVX2   2.78c AVX   Patch AVX2         Patch AVX
BMW            05:21.09     06:05.34    05:32.97 (+3.5%)   05:34.97 (-8.5%)
Classroom      16:55.36     18:24.51    17:10.41 (+1.4%)   17:15.87 (-6.3%)
Fishy Cat      08:08.49     08:36.26    08:09.19 (+0.2%)   08:12.25 (-4.7%
Koro           11:22.54     11:45.24    11:13.25 (-1.5%)   11:43.81 (-0.3%)
Barcelone      14:18.32     16:09.46    14:15.20 (-0.4%)   14:25.15 (-10.8%)

On GPU the performance is about 1.5-2% slower in my tests on GTX1080
but afraid we can't do much as a part of this chaneg here and
consider it a price to pay for more proper intersection check.

Made in collaboration with Maxym Dmytrychenko, big thanks to him!

Reviewers: brecht, juicyfruit, lukasstockner97, dingto

Differential Revision: https://developer.blender.org/D1574

===================================================================

M	intern/cycles/kernel/bvh/bvh_shadow_all.h
M	intern/cycles/kernel/bvh/bvh_subsurface.h
M	intern/cycles/kernel/bvh/bvh_traversal.h
M	intern/cycles/kernel/bvh/bvh_volume.h
M	intern/cycles/kernel/bvh/bvh_volume_all.h
M	intern/cycles/kernel/bvh/qbvh_shadow_all.h
M	intern/cycles/kernel/bvh/qbvh_subsurface.h
M	intern/cycles/kernel/bvh/qbvh_traversal.h
M	intern/cycles/kernel/bvh/qbvh_volume.h
M	intern/cycles/kernel/bvh/qbvh_volume_all.h
M	intern/cycles/kernel/geom/geom_motion_triangle_intersect.h
M	intern/cycles/kernel/geom/geom_triangle_intersect.h
M	intern/cycles/util/util_math_intersect.h

===================================================================

diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index b2555b3a6bb..1e9f3b60a79 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -101,9 +101,6 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 	gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
 #endif  /* __KERNEL_SSE2__ */
 
-	TriangleIsectPrecalc isect_precalc;
-	ray_triangle_intersect_precalc(dir, &isect_precalc);
-
 	/* traversal loop */
 	do {
 		do {
@@ -209,9 +206,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 						switch(p_type) {
 							case PRIMITIVE_TRIANGLE: {
 								hit = triangle_intersect(kg,
-								                         &isect_precalc,
 								                         isect_array,
 								                         P,
+								                         dir,
 								                         PATH_RAY_SHADOW,
 								                         object,
 								                         prim_addr);
@@ -220,9 +217,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 #if BVH_FEATURE(BVH_MOTION)
 							case PRIMITIVE_MOTION_TRIANGLE: {
 								hit = motion_triangle_intersect(kg,
-								                                &isect_precalc,
 								                                isect_array,
 								                                P,
+								                                dir,
 								                                ray->time,
 								                                PATH_RAY_SHADOW,
 								                                object,
@@ -325,7 +322,6 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 					isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
 #  endif
 
-					ray_triangle_intersect_precalc(dir, &isect_precalc);
 					num_hits_in_instance = 0;
 					isect_array->t = isect_t;
 
@@ -365,8 +361,6 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 				bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
 #  endif
 
-				ray_triangle_intersect_precalc(dir, &isect_precalc);
-
 				/* scale isect->t to adjust for instancing */
 				for(int i = 0; i < num_hits_in_instance; i++) {
 					(isect_array-i-1)->t *= t_fac;
@@ -378,7 +372,6 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 #  else
 				bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
 #  endif
-				ray_triangle_intersect_precalc(dir, &isect_precalc);
 			}
 
 			isect_t = tmax;
diff --git a/intern/cycles/kernel/bvh/bvh_subsurface.h b/intern/cycles/kernel/bvh/bvh_subsurface.h
index cb7a4e3bc31..77778f5e36f 100644
--- a/intern/cycles/kernel/bvh/bvh_subsurface.h
+++ b/intern/cycles/kernel/bvh/bvh_subsurface.h
@@ -109,9 +109,6 @@ void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 	gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
 #endif
 
-	TriangleIsectPrecalc isect_precalc;
-	ray_triangle_intersect_precalc(dir, &isect_precalc);
-
 	/* traversal loop */
 	do {
 		do {
@@ -197,9 +194,9 @@ void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 						for(; prim_addr < prim_addr2; prim_addr++) {
 							kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
 							triangle_intersect_subsurface(kg,
-							                              &isect_precalc,
 							                              ss_isect,
 							                              P,
+							                              dir,
 							                              object,
 							                              prim_addr,
 							                              isect_t,
@@ -214,9 +211,9 @@ void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 						for(; prim_addr < prim_addr2; prim_addr++) {
 							kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
 							motion_triangle_intersect_subsurface(kg,
-							                                     &isect_precalc,
 							                                     ss_isect,
 							                                     P,
+							                                     dir,
 							                                     ray->time,
 							                                     object,
 							                                     prim_addr,
diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h
index eac98a3165a..ac1fc14e2bf 100644
--- a/intern/cycles/kernel/bvh/bvh_traversal.h
+++ b/intern/cycles/kernel/bvh/bvh_traversal.h
@@ -104,9 +104,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 	gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
 #endif
 
-	TriangleIsectPrecalc isect_precalc;
-	ray_triangle_intersect_precalc(dir, &isect_precalc);
-
 	/* traversal loop */
 	do {
 		do {
@@ -238,9 +235,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 								BVH_DEBUG_NEXT_INTERSECTION();
 								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
 								if(triangle_intersect(kg,
-								                      &isect_precalc,
 								                      isect,
 								                      P,
+								                      dir,
 								                      visibility,
 								                      object,
 								                      prim_addr))
@@ -267,9 +264,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 								BVH_DEBUG_NEXT_INTERSECTION();
 								kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
 								if(motion_triangle_intersect(kg,
-								                             &isect_precalc,
 								                             isect,
 								                             P,
+								                             dir,
 								                             ray->time,
 								                             visibility,
 								                             object,
@@ -358,7 +355,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 #  else
 					isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
-					ray_triangle_intersect_precalc(dir, &isect_precalc);
 
 #  if defined(__KERNEL_SSE2__)
 					Psplat[0] = ssef(P.x);
@@ -395,7 +391,6 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 #  else
 			isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
-			ray_triangle_intersect_precalc(dir, &isect_precalc);
 
 #  if defined(__KERNEL_SSE2__)
 			Psplat[0] = ssef(P.x);
diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h
index da97dae0b99..7bf8fef35f3 100644
--- a/intern/cycles/kernel/bvh/bvh_volume.h
+++ b/intern/cycles/kernel/bvh/bvh_volume.h
@@ -97,9 +97,6 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 	gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
 #endif
 
-	TriangleIsectPrecalc isect_precalc;
-	ray_triangle_intersect_precalc(dir, &isect_precalc);
-
 	/* traversal loop */
 	do {
 		do {
@@ -194,9 +191,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 									continue;
 								}
 								triangle_intersect(kg,
-								                   &isect_precalc,
 								                   isect,
 								                   P,
+								                   dir,
 								                   visibility,
 								                   object,
 								                   prim_addr);
@@ -215,9 +212,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 									continue;
 								}
 								motion_triangle_intersect(kg,
-								                          &isect_precalc,
 								                          isect,
 								                          P,
+								                          dir,
 								                          ray->time,
 								                          visibility,
 								                          object,
@@ -243,8 +240,6 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 						isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
 
-						ray_triangle_intersect_precalc(dir, &isect_precalc);
-
 #  if defined(__KERNEL_SSE2__)
 						Psplat[0] = ssef(P.x);
 						Psplat[1] = ssef(P.y);
@@ -286,8 +281,6 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 			isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
 
-			ray_triangle_intersect_precalc(dir, &isect_precalc);
-
 #  if defined(__KERNEL_SSE2__)
 			Psplat[0] = ssef(P.x);
 			Psplat[1] = ssef(P.y);
diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h
index 6efb7e265d0..b92c90a8ab7 100644
--- a/intern/cycles/kernel/bvh/bvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/bvh_volume_all.h
@@ -101,9 +101,6 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 	gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
 #endif  /* __KERNEL_SSE2__ */
 
-	TriangleIsectPrecalc isect_precalc;
-	ray_triangle_intersect_precalc(dir, &isect_precalc);
-
 	/* traversal loop */
 	do {
 		do {
@@ -199,9 +196,9 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 									continue;
 								}
 								hit = triangle_intersect(kg,
-								                         &isect_precalc,
 								                         isect_array,
 								                         P,
+								                         dir,
 								                         visibility,
 								                         object,
 								                         prim_addr);
@@ -243,9 +240,9 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 									continue;
 								}
 								hit = motion_triangle_intersect(kg,
-								                                &isect_precalc,
 								                                isect_array,
 								                                P,
+								                                dir,
 								                                ray->time,
 								                                visibility,
 								                                object,
@@ -294,7 +291,6 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 						isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
 #  endif
 
-						ray_triangle_intersect_precalc(dir, &isect_precalc);
 						num_hits_in_instance = 0;
 						isect_array->t = isect_t;
 
@@ -340,7 +336,6 

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list