[Bf-blender-cvs] [4f96edd] soc-2014-cycles: Cycles: Use a function to calculate sin and cos at the same time.
Thomas Dinges
noreply at git.blender.org
Sat May 24 14:29:29 CEST 2014
Commit: 4f96edd254c6533ff7ddc56695c287b8c12c6540
Author: Thomas Dinges
Date: Sat May 24 14:24:56 2014 +0200
https://developer.blender.org/rB4f96edd254c6533ff7ddc56695c287b8c12c6540
Cycles: Use a function to calculate sin and cos at the same time.
Only enabled for gcc atm, tested on an Ivy Bridge CPU with gcc 4.8.
This gives me about 4% speedup in simple scenes like the cornell_box.blend and color_ramp.blend.
Can probably also be enabled for clang and msvc later, they have a sincos() function as well.
THis commit also contains small cleanup and a compile fix for the AVX2 kernel after merge.
===================================================================
M intern/cycles/kernel/closure/bsdf_microfacet.h
M intern/cycles/kernel/closure/volume.h
M intern/cycles/kernel/kernel_avx2.cpp
M intern/cycles/kernel/kernel_montecarlo.h
M intern/cycles/kernel/kernel_subsurface.h
M intern/cycles/util/util_math.h
===================================================================
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index 1ec35e4..89fb227 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -382,8 +382,11 @@ ccl_device int bsdf_microfacet_beckmann_sample(const ShaderClosure *sc, float3 N
float sinThetaM = cosThetaM * tanThetaM;
float phiM = M_2PI_F * randv;
- float3 m = (cosf(phiM) * sinThetaM) * X +
- (sinf(phiM) * sinThetaM) * Y +
+
+ float sin_phiM, cos_phiM;
+ sincos(phiM, &sin_phiM, &cos_phiM);
+ float3 m = (cos_phiM * sinThetaM) * X +
+ (sin_phiM * sinThetaM) * Y +
( cosThetaM) * Z;
if(!m_refractive) {
diff --git a/intern/cycles/kernel/closure/volume.h b/intern/cycles/kernel/closure/volume.h
index 058c4b8..7568ced 100644
--- a/intern/cycles/kernel/closure/volume.h
+++ b/intern/cycles/kernel/closure/volume.h
@@ -72,8 +72,7 @@ ccl_device int volume_henyey_greenstein_sample(const ShaderClosure *sc, float3 I
float sin_theta = safe_sqrtf(1.0f - cos_theta * cos_theta);
float phi = M_2PI_F * randv;
- cos_phi = cosf(phi);
- sin_phi = sinf(phi);
+ sincos(phi, &sin_phi, &cos_phi);
/* note that I points towards the viewer and so is used negated */
float3 T, B;
diff --git a/intern/cycles/kernel/kernel_avx2.cpp b/intern/cycles/kernel/kernel_avx2.cpp
index c6c4ba5..2ca4473 100644
--- a/intern/cycles/kernel/kernel_avx2.cpp
+++ b/intern/cycles/kernel/kernel_avx2.cpp
@@ -39,7 +39,7 @@
#include "kernel_globals.h"
#include "kernel_film.h"
#include "kernel_path.h"
-#include "kernel_displace.h"
+#include "kernel_bake.h"
CCL_NAMESPACE_BEGIN
diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h
index af7b727..296acff 100644
--- a/intern/cycles/kernel/kernel_montecarlo.h
+++ b/intern/cycles/kernel/kernel_montecarlo.h
@@ -41,8 +41,10 @@ ccl_device void to_unit_disk(float *x, float *y)
float phi = M_2PI_F * (*x);
float r = sqrtf(*y);
- *x = r * cosf(phi);
- *y = r * sinf(phi);
+ float sin_phi, cos_phi;
+ sincos(phi, &sin_phi, &cos_phi);
+ *x = r * cos_phi;
+ *y = r * sin_phi;
}
/* return an orthogonal tangent and bitangent given a normal and tangent that
@@ -73,8 +75,11 @@ ccl_device_inline void sample_uniform_hemisphere(const float3 N,
float z = randu;
float r = sqrtf(max(0.0f, 1.0f - z*z));
float phi = M_2PI_F * randv;
- float x = r * cosf(phi);
- float y = r * sinf(phi);
+
+ float sin_phi, cos_phi;
+ sincos(phi, &sin_phi, &cos_phi);
+ float x = r * cos_phi;
+ float y = r * sin_phi;
float3 T, B;
make_orthonormals (N, &T, &B);
@@ -90,8 +95,11 @@ ccl_device_inline void sample_uniform_cone(const float3 N, float angle,
float z = cosf(angle*randu);
float r = sqrtf(max(0.0f, 1.0f - z*z));
float phi = M_2PI_F * randv;
- float x = r * cosf(phi);
- float y = r * sinf(phi);
+
+ float sin_phi, cos_phi;
+ sincos(phi, &sin_phi, &cos_phi);
+ float x = r * cos_phi;
+ float y = r * sin_phi;
float3 T, B;
make_orthonormals (N, &T, &B);
@@ -105,8 +113,11 @@ ccl_device float3 sample_uniform_sphere(float u1, float u2)
float z = 1.0f - 2.0f*u1;
float r = sqrtf(fmaxf(0.0f, 1.0f - z*z));
float phi = M_2PI_F*u2;
- float x = r*cosf(phi);
- float y = r*sinf(phi);
+
+ float sin_phi, cos_phi;
+ sincos(phi, &sin_phi, &cos_phi);
+ float x = r * cos_phi;
+ float y = r * sin_phi;
return make_float3(x, y, z);
}
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index fb927e8..e669dab 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -255,8 +255,10 @@ ccl_device int subsurface_scatter_multi_step(KernelGlobals *kg, ShaderData *sd,
float disk_height;
bssrdf_sample(sc, disk_r, &disk_r, &disk_height);
-
- float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
+
+ float sin_phi, cos_phi;
+ sincos(phi, &sin_phi, &cos_phi);
+ float3 disk_P = (disk_r*cos_phi) * disk_T + (disk_r*sin_phi) * disk_B;
/* create ray */
Ray ray;
@@ -354,8 +356,11 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd,
float disk_height;
bssrdf_sample(sc, disk_r, &disk_r, &disk_height);
+
+ float sin_phi, cos_phi;
+ sincos(phi, &sin_phi, &cos_phi);
- float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
+ float3 disk_P = (disk_r*cos_phi) * disk_T + (disk_r*sin_phi) * disk_B;
/* create ray */
Ray ray;
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index ded7576..15c022c 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -189,6 +189,15 @@ ccl_device_inline float nonzerof(float f, float eps)
return f;
}
+ccl_device_inline void sincos(float theta, float *sin, float *cos) {
+#if defined(__GNUC__) && !defined(__clang__)
+ sincosf(theta, sin, cos);
+#else
+ *sin = sinf(theta);
+ *cos = cosf(theta);
+#endif
+}
+
ccl_device_inline float smoothstepf(float f)
{
float ff = f*f;
@@ -622,11 +631,7 @@ ccl_device_inline bool is_zero(const float3 a)
ccl_device_inline float reduce_add(const float3 a)
{
-#ifdef __KERNEL_SSE__
- return (a.x + a.y + a.z);
-#else
return (a.x + a.y + a.z);
-#endif
}
ccl_device_inline float average(const float3 a)
More information about the Bf-blender-cvs
mailing list