[Bf-blender-cvs] [a1348dde2e] master: Cycles: Fix speed regression on GPU

Sergey Sharybin noreply at git.blender.org
Thu Mar 23 18:05:20 CET 2017


Commit: a1348dde2ed27d0a8a1d62f9e17602857b1f19f1
Author: Sergey Sharybin
Date:   Thu Mar 23 17:15:54 2017 +0100
Branches: master
https://developer.blender.org/rBa1348dde2ed27d0a8a1d62f9e17602857b1f19f1

Cycles: Fix speed regression on GPU

Avoid construction of temporary array and make utility function force-inlined.
Additionally avoid calling float4_to_float3 twice.

This brings render times to the same values as before current patch series.

===================================================================

M	intern/cycles/kernel/geom/geom_triangle_intersect.h
M	intern/cycles/util/util_math_intersect.h

===================================================================

diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index 973b356637..313121104f 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -51,19 +51,22 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
 	const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
 
 #if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
-	const ssef *verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex];
+	const ssef *ssef_verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex];
 #else
 	const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
 	             tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
 	             tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
-	const float3 verts[3] = {float4_to_float3(tri_a),
-	                         float4_to_float3(tri_b),
-	                         float4_to_float3(tri_c)};
 #endif
 	float t, u, v;
 	if(ray_triangle_intersect(isect_precalc,
 	                          P, isect->t,
-	                          verts,
+#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+	                          ssef_verts,
+#else
+	                          float4_to_float3(tri_a),
+	                          float4_to_float3(tri_b),
+	                          float4_to_float3(tri_c),
+#endif
 	                          &u, &v, &t))
 	{
 #ifdef __VISIBILITY_FLAG__
@@ -105,19 +108,22 @@ ccl_device_inline void triangle_intersect_subsurface(
 	const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
 
 #if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
-	const ssef *verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex];
+	const ssef *ssef_verts = (ssef*)&kg->__prim_tri_verts.data[tri_vindex];
 #else
-	const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
-	             tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
-	             tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
-	const float3 verts[3] = {float4_to_float3(tri_a),
-	                         float4_to_float3(tri_b),
-	                         float4_to_float3(tri_c)};
+	const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+0)),
+	             tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+1)),
+	             tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+2));
 #endif
 	float t, u, v;
 	if(!ray_triangle_intersect(isect_precalc,
 	                           P, tmax,
-	                           verts,
+#if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
+	                           ssef_verts,
+#else
+	                           tri_a,
+	                           tri_b,
+	                           tri_c,
+#endif
 	                           &u, &v, &t))
 	{
 		return;
@@ -156,15 +162,11 @@ ccl_device_inline void triangle_intersect_subsurface(
 	/* Record geometric normal. */
 	/* TODO(sergey): Check whether it's faster to re-use ssef verts. */
 #if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
-	const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
-	             tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
-	             tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
+	const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+0)),
+	             tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+1)),
+	             tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex+2));
 #endif
-	/* TODO(sergey): Use float4_to_float3() on just an edges. */
-	const float3 v0 = float4_to_float3(tri_a);
-	const float3 v1 = float4_to_float3(tri_b);
-	const float3 v2 = float4_to_float3(tri_c);
-	ss_isect->Ng[hit] = normalize(cross(v1 - v0, v2 - v0));
+	ss_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
 }
 #endif
 
diff --git a/intern/cycles/util/util_math_intersect.h b/intern/cycles/util/util_math_intersect.h
index 4a052b8b9c..5bd3a52dce 100644
--- a/intern/cycles/util/util_math_intersect.h
+++ b/intern/cycles/util/util_math_intersect.h
@@ -153,13 +153,13 @@ void ray_triangle_intersect_precalc(float3 dir,
 	isect_precalc->kz = kz;
 }
 
-ccl_device_inline bool ray_triangle_intersect(
+ccl_device_forceinline bool ray_triangle_intersect(
         const TriangleIsectPrecalc *isect_precalc,
         float3 ray_P, float ray_t,
 #if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
         const ssef *ssef_verts,
 #else
-        const float3 *verts,
+        const float3 tri_a, const float3 tri_b, const float3 tri_c,
 #endif
         float *isect_u, float *isect_v, float *isect_t)
 {
@@ -230,9 +230,9 @@ ccl_device_inline bool ray_triangle_intersect(
 	}
 #else
 	/* Calculate vertices relative to ray origin. */
-	const float3 A = verts[0] - ray_P;
-	const float3 B = verts[1] - ray_P;
-	const float3 C = verts[2] - ray_P;
+	const float3 A = make_float3(tri_a.x - ray_P.x, tri_a.y - ray_P.y, tri_a.z - ray_P.z);
+	const float3 B = make_float3(tri_b.x - ray_P.x, tri_b.y - ray_P.y, tri_b.z - ray_P.z);
+	const float3 C = make_float3(tri_c.x - ray_P.x, tri_c.y - ray_P.y, tri_c.z - ray_P.z);
 
 	const float A_kx = IDX(A, kx), A_ky = IDX(A, ky), A_kz = IDX(A, kz);
 	const float B_kx = IDX(B, kx), B_ky = IDX(B, ky), B_kz = IDX(B, kz);




More information about the Bf-blender-cvs mailing list