[Bf-blender-cvs] [023eb2ea7c1] master: Cycles: more closely match some math and intersection operations in Embree
Brecht Van Lommel
noreply at git.blender.org
Mon Jul 25 13:58:02 CEST 2022
Commit: 023eb2ea7c16a00272f83d564145e28aeb9ed2b7
Author: Brecht Van Lommel
Date: Thu Jul 21 15:49:00 2022 +0200
Branches: master
https://developer.blender.org/rB023eb2ea7c16a00272f83d564145e28aeb9ed2b7
Cycles: more closely match some math and intersection operations in Embree
This helps with debugging, and gives a slightly closer match between CPU
and CUDA/HIP/Metal renders when it comes to ray tracing precision.
===================================================================
M intern/cycles/blender/curves.cpp
M intern/cycles/kernel/geom/object.h
M intern/cycles/kernel/geom/shader_data.h
M intern/cycles/util/math.h
M intern/cycles/util/math_float3.h
M intern/cycles/util/math_intersect.h
M intern/cycles/util/transform.cpp
M intern/cycles/util/transform.h
===================================================================
diff --git a/intern/cycles/blender/curves.cpp b/intern/cycles/blender/curves.cpp
index c4154bce022..04ba7c53878 100644
--- a/intern/cycles/blender/curves.cpp
+++ b/intern/cycles/blender/curves.cpp
@@ -55,7 +55,7 @@ static bool ObtainCacheParticleData(
return false;
Transform tfm = get_transform(b_ob->matrix_world());
- Transform itfm = transform_quick_inverse(tfm);
+ Transform itfm = transform_inverse(tfm);
for (BL::Modifier &b_mod : b_ob->modifiers) {
if ((b_mod.type() == b_mod.type_PARTICLE_SYSTEM) &&
diff --git a/intern/cycles/kernel/geom/object.h b/intern/cycles/kernel/geom/object.h
index b15f6b5dda5..bef7d710159 100644
--- a/intern/cycles/kernel/geom/object.h
+++ b/intern/cycles/kernel/geom/object.h
@@ -86,7 +86,7 @@ ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals kg,
Transform tfm = object_fetch_transform_motion(kg, object, time);
if (itfm)
- *itfm = transform_quick_inverse(tfm);
+ *itfm = transform_inverse(tfm);
return tfm;
}
diff --git a/intern/cycles/kernel/geom/shader_data.h b/intern/cycles/kernel/geom/shader_data.h
index 99b9289cb4a..5af89b45f20 100644
--- a/intern/cycles/kernel/geom/shader_data.h
+++ b/intern/cycles/kernel/geom/shader_data.h
@@ -18,7 +18,7 @@ ccl_device void shader_setup_object_transforms(KernelGlobals kg,
{
if (sd->object_flag & SD_OBJECT_MOTION) {
sd->ob_tfm_motion = object_fetch_transform_motion(kg, sd->object, time);
- sd->ob_itfm_motion = transform_quick_inverse(sd->ob_tfm_motion);
+ sd->ob_itfm_motion = transform_inverse(sd->ob_tfm_motion);
}
}
#endif
diff --git a/intern/cycles/util/math.h b/intern/cycles/util/math.h
index af2f1ea092d..8360ce05a56 100644
--- a/intern/cycles/util/math.h
+++ b/intern/cycles/util/math.h
@@ -511,6 +511,11 @@ ccl_device_inline float4 float3_to_float4(const float3 a)
return make_float4(a.x, a.y, a.z, 1.0f);
}
+ccl_device_inline float4 float3_to_float4(const float3 a, const float w)
+{
+ return make_float4(a.x, a.y, a.z, w);
+}
+
ccl_device_inline float inverse_lerp(float a, float b, float x)
{
return (x - a) / (b - a);
diff --git a/intern/cycles/util/math_float3.h b/intern/cycles/util/math_float3.h
index c02b4cdbf0d..c408eadf195 100644
--- a/intern/cycles/util/math_float3.h
+++ b/intern/cycles/util/math_float3.h
@@ -147,8 +147,11 @@ ccl_device_inline float3 operator/(const float f, const float3 &a)
ccl_device_inline float3 operator/(const float3 &a, const float f)
{
- float invf = 1.0f / f;
- return a * invf;
+# if defined(__KERNEL_SSE__)
+ return float3(_mm_div_ps(a.m128, _mm_set1_ps(f)));
+# else
+ return make_float3(a.x / f, a.y / f, a.z / f);
+# endif
}
ccl_device_inline float3 operator/(const float3 &a, const float3 &b)
@@ -284,8 +287,12 @@ ccl_device_inline float dot_xy(const float3 &a, const float3 &b)
ccl_device_inline float3 cross(const float3 &a, const float3 &b)
{
- float3 r = make_float3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);
- return r;
+# ifdef __KERNEL_SSE__
+ return float3(shuffle<1, 2, 0, 3>(
+ msub(ssef(a), shuffle<1, 2, 0, 3>(ssef(b)), shuffle<1, 2, 0, 3>(ssef(a)) * ssef(b))));
+# else
+ return make_float3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);
+# endif
}
ccl_device_inline float3 normalize(const float3 &a)
diff --git a/intern/cycles/util/math_intersect.h b/intern/cycles/util/math_intersect.h
index c5b1cd51030..3e5891b2507 100644
--- a/intern/cycles/util/math_intersect.h
+++ b/intern/cycles/util/math_intersect.h
@@ -105,10 +105,10 @@ ccl_device bool ray_disk_intersect(float3 ray_P,
return false;
}
-ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P,
- float3 ray_dir,
- float ray_tmin,
- float ray_tmax,
+ccl_device_forceinline bool ray_triangle_intersect(const float3 ray_P,
+ const float3 ray_D,
+ const float ray_tmin,
+ const float ray_tmax,
const float3 tri_a,
const float3 tri_b,
const float3 tri_c,
@@ -116,14 +116,13 @@ ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P,
ccl_private float *isect_v,
ccl_private float *isect_t)
{
-#define dot3(a, b) dot(a, b)
- const float3 P = ray_P;
- const float3 dir = ray_dir;
+ /* This implementation matches the Plücker coordinates triangle intersection
+ * in Embree. */
/* Calculate vertices relative to ray origin. */
- const float3 v0 = tri_c - P;
- const float3 v1 = tri_a - P;
- const float3 v2 = tri_b - P;
+ const float3 v0 = tri_c - ray_P;
+ const float3 v1 = tri_a - ray_P;
+ const float3 v2 = tri_b - ray_P;
/* Calculate triangle edges. */
const float3 e0 = v2 - v0;
@@ -131,29 +130,29 @@ ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P,
const float3 e2 = v1 - v2;
/* Perform edge tests. */
- const float U = dot(cross(v2 + v0, e0), ray_dir);
- const float V = dot(cross(v0 + v1, e1), ray_dir);
- const float W = dot(cross(v1 + v2, e2), ray_dir);
+ const float U = dot(cross(v2 + v0, e0), ray_D);
+ const float V = dot(cross(v0 + v1, e1), ray_D);
+ const float W = dot(cross(v1 + v2, e2), ray_D);
+ const float eps = FLT_EPSILON * fabsf(U + V + W);
const float minUVW = min(U, min(V, W));
const float maxUVW = max(U, max(V, W));
- if (minUVW < 0.0f && maxUVW > 0.0f) {
+ if (!(minUVW >= -eps || maxUVW <= eps)) {
return false;
}
/* Calculate geometry normal and denominator. */
const float3 Ng1 = cross(e1, e0);
- // const Vec3vfM Ng1 = stable_triangle_normal(e2,e1,e0);
const float3 Ng = Ng1 + Ng1;
- const float den = dot3(Ng, dir);
+ const float den = dot(Ng, ray_D);
/* Avoid division by 0. */
if (UNLIKELY(den == 0.0f)) {
return false;
}
/* Perform depth test. */
- const float T = dot3(v0, Ng);
+ const float T = dot(v0, Ng);
const float t = T / den;
if (!(t >= ray_tmin && t <= ray_tmax)) {
return false;
@@ -163,8 +162,6 @@ ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P,
*isect_v = V / den;
*isect_t = t;
return true;
-
-#undef dot3
}
/* Tests for an intersection between a ray and a quad defined by
diff --git a/intern/cycles/util/transform.cpp b/intern/cycles/util/transform.cpp
index 0bf5de57a20..0b87e88871d 100644
--- a/intern/cycles/util/transform.cpp
+++ b/intern/cycles/util/transform.cpp
@@ -99,15 +99,7 @@ ProjectionTransform projection_inverse(const ProjectionTransform &tfm)
memcpy(M, &tfm, sizeof(M));
if (UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
- /* matrix is degenerate (e.g. 0 scale on some axis), ideally we should
- * never be in this situation, but try to invert it anyway with tweak */
- M[0][0] += 1e-8f;
- M[1][1] += 1e-8f;
- M[2][2] += 1e-8f;
-
- if (UNLIKELY(!transform_matrix4_gj_inverse(R, M))) {
- return projection_identity();
- }
+ return projection_identity();
}
memcpy(&tfmR, R, sizeof(R));
@@ -115,16 +107,9 @@ ProjectionTransform projection_inverse(const ProjectionTransform &tfm)
return tfmR;
}
-Transform transform_inverse(const Transform &tfm)
-{
- ProjectionTransform projection(tfm);
- return projection_to_transform(projection_inverse(projection));
-}
-
Transform transform_transposed_inverse(const Transform &tfm)
{
- ProjectionTransform projection(tfm);
- ProjectionTransform iprojection = projection_inverse(projection);
+ ProjectionTransform iprojection(transform_inverse(tfm));
return projection_to_transform(projection_transpose(iprojection));
}
diff --git a/intern/cycles/util/transform.h b/intern/cycles/util/transform.h
index a460581d1f3..71164efbac1 100644
--- a/intern/cycles/util/transform.h
+++ b/intern/cycles/util/transform.h
@@ -63,10 +63,10 @@ ccl_device_inline float3 transform_point(ccl_private const Transform *t, const f
_MM_TRANSPOSE4_PS(x, y, z, w);
- ssef tmp = shuffle<0>(aa) * x;
- tmp = madd(shuffle<1>(aa), y, tmp);
+ ssef tmp = w;
tmp = madd(shuffle<2>(aa), z, tmp);
- tmp += w;
+ tmp = madd(shuffle<1>(aa), y, tmp);
+ tmp = madd(shuffle<0>(aa), x, tmp);
return float3(tmp.m128);
#elif defined(__KERNEL_METAL__)
@@ -93,9 +93,9 @@ ccl_device_inline float3 transform_direction(ccl_private const Transform *t, con
_MM_TRANSPOSE4_PS(x, y, z, w);
- ssef tmp = shuffle<0>(aa) * x;
+ ssef tmp = shuffle<2>(aa) * z;
tmp = madd(shuffle<1>(aa), y, tmp);
- tmp = madd(shuffle<2>(aa), z, tmp);
+ tmp = madd(shuffle<0>(aa), x, tmp);
return float3(tmp.m128);
#elif defined(__KERNEL_METAL__)
@@ -312,7 +312,6 @@ ccl_device_inline void transform_set_column(Transform *t, int column, float3 val
t->z[column] = value.z;
}
-Transform transform_inverse(const Transform &a);
Transform transform_transposed_inverse(const Transform &a);
ccl_device_inline bool transform_uniform_scale(const Transform &tfm, float &scale)
@@ -392,39 +391,47 @@ ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t)
#endif /* defined(__KERNEL_GPU_RAYTRACING__) */
}
-ccl_device_inline Transform transform_quick_inverse(Transform M)
+ccl_device_inline Transform transform_inverse(const Transform tfm)
{
- /* possible optimization: can we avoid doing this altogether and construct
- * the inverse matrix directly from negated translation, transposed rotation,
- * scale can be inverted but what about shearing? */
- Transform R;
- float det = M.x.x * (M.z.z * M.y.y - M.z.y * M.y.z) - M.y.x * (M.z.z * M.x.y - M.z.y * M.x.z) +
- M.z.x * (M.y.z * M.x.y - M.y.y * M.x.z);
+ /* This implementation matches the one in Embree exactly, to ensure consistent
+ * results with the ray intersection of instances. */
+ float3 x = make_float3(tfm.x.x, tfm.y.x, tfm.z.x);
+ float3 y = m
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list