[Bf-blender-cvs] [4f96edd] soc-2014-cycles: Cycles: Use a function to calculate sin and cos at the same time.

Thomas Dinges noreply at git.blender.org
Sat May 24 14:29:29 CEST 2014


Commit: 4f96edd254c6533ff7ddc56695c287b8c12c6540
Author: Thomas Dinges
Date:   Sat May 24 14:24:56 2014 +0200
https://developer.blender.org/rB4f96edd254c6533ff7ddc56695c287b8c12c6540

Cycles: Use a function to calculate sin and cos at the same time.

Only enabled for gcc atm, tested on an Ivy Bridge CPU with gcc 4.8.
This gives me about 4% speedup in simple scenes like the cornell_box.blend and color_ramp.blend.

Can probably also be enabled for clang and msvc later, they have a sincos() function as well.

THis commit also contains small cleanup and a compile fix for the AVX2 kernel after merge.

===================================================================

M	intern/cycles/kernel/closure/bsdf_microfacet.h
M	intern/cycles/kernel/closure/volume.h
M	intern/cycles/kernel/kernel_avx2.cpp
M	intern/cycles/kernel/kernel_montecarlo.h
M	intern/cycles/kernel/kernel_subsurface.h
M	intern/cycles/util/util_math.h

===================================================================

diff --git a/intern/cycles/kernel/closure/bsdf_microfacet.h b/intern/cycles/kernel/closure/bsdf_microfacet.h
index 1ec35e4..89fb227 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet.h
@@ -382,8 +382,11 @@ ccl_device int bsdf_microfacet_beckmann_sample(const ShaderClosure *sc, float3 N
 
 		float sinThetaM = cosThetaM * tanThetaM;
 		float phiM = M_2PI_F * randv;
-		float3 m = (cosf(phiM) * sinThetaM) * X +
-		           (sinf(phiM) * sinThetaM) * Y +
+		
+		float sin_phiM, cos_phiM;
+		sincos(phiM, &sin_phiM, &cos_phiM);
+		float3 m = (cos_phiM * sinThetaM) * X +
+		           (sin_phiM * sinThetaM) * Y +
 		           (             cosThetaM) * Z;
 
 		if(!m_refractive) {
diff --git a/intern/cycles/kernel/closure/volume.h b/intern/cycles/kernel/closure/volume.h
index 058c4b8..7568ced 100644
--- a/intern/cycles/kernel/closure/volume.h
+++ b/intern/cycles/kernel/closure/volume.h
@@ -72,8 +72,7 @@ ccl_device int volume_henyey_greenstein_sample(const ShaderClosure *sc, float3 I
 	float sin_theta = safe_sqrtf(1.0f - cos_theta * cos_theta);
 
 	float phi = M_2PI_F * randv;
-	cos_phi = cosf(phi);
-	sin_phi = sinf(phi);
+	sincos(phi, &sin_phi, &cos_phi);
 
 	/* note that I points towards the viewer and so is used negated */
 	float3 T, B;
diff --git a/intern/cycles/kernel/kernel_avx2.cpp b/intern/cycles/kernel/kernel_avx2.cpp
index c6c4ba5..2ca4473 100644
--- a/intern/cycles/kernel/kernel_avx2.cpp
+++ b/intern/cycles/kernel/kernel_avx2.cpp
@@ -39,7 +39,7 @@
 #include "kernel_globals.h"
 #include "kernel_film.h"
 #include "kernel_path.h"
-#include "kernel_displace.h"
+#include "kernel_bake.h"
 
 CCL_NAMESPACE_BEGIN
 
diff --git a/intern/cycles/kernel/kernel_montecarlo.h b/intern/cycles/kernel/kernel_montecarlo.h
index af7b727..296acff 100644
--- a/intern/cycles/kernel/kernel_montecarlo.h
+++ b/intern/cycles/kernel/kernel_montecarlo.h
@@ -41,8 +41,10 @@ ccl_device void to_unit_disk(float *x, float *y)
 	float phi = M_2PI_F * (*x);
 	float r = sqrtf(*y);
 
-	*x = r * cosf(phi);
-	*y = r * sinf(phi);
+	float sin_phi, cos_phi;
+	sincos(phi, &sin_phi, &cos_phi);
+	*x = r * cos_phi;
+	*y = r * sin_phi;
 }
 
 /* return an orthogonal tangent and bitangent given a normal and tangent that
@@ -73,8 +75,11 @@ ccl_device_inline void sample_uniform_hemisphere(const float3 N,
 	float z = randu;
 	float r = sqrtf(max(0.0f, 1.0f - z*z));
 	float phi = M_2PI_F * randv;
-	float x = r * cosf(phi);
-	float y = r * sinf(phi);
+	
+	float sin_phi, cos_phi;
+	sincos(phi, &sin_phi, &cos_phi);
+	float x = r * cos_phi;
+	float y = r * sin_phi;
 
 	float3 T, B;
 	make_orthonormals (N, &T, &B);
@@ -90,8 +95,11 @@ ccl_device_inline void sample_uniform_cone(const float3 N, float angle,
 	float z = cosf(angle*randu);
 	float r = sqrtf(max(0.0f, 1.0f - z*z));
 	float phi = M_2PI_F * randv;
-	float x = r * cosf(phi);
-	float y = r * sinf(phi);
+	
+	float sin_phi, cos_phi;
+	sincos(phi, &sin_phi, &cos_phi);	
+	float x = r * cos_phi;
+	float y = r * sin_phi;
 
 	float3 T, B;
 	make_orthonormals (N, &T, &B);
@@ -105,8 +113,11 @@ ccl_device float3 sample_uniform_sphere(float u1, float u2)
 	float z = 1.0f - 2.0f*u1;
 	float r = sqrtf(fmaxf(0.0f, 1.0f - z*z));
 	float phi = M_2PI_F*u2;
-	float x = r*cosf(phi);
-	float y = r*sinf(phi);
+	
+	float sin_phi, cos_phi;
+	sincos(phi, &sin_phi, &cos_phi);	
+	float x = r * cos_phi;
+	float y = r * sin_phi;
 
 	return make_float3(x, y, z);
 }
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index fb927e8..e669dab 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -255,8 +255,10 @@ ccl_device int subsurface_scatter_multi_step(KernelGlobals *kg, ShaderData *sd,
 	float disk_height;
 
 	bssrdf_sample(sc, disk_r, &disk_r, &disk_height);
-
-	float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
+	
+	float sin_phi, cos_phi;
+	sincos(phi, &sin_phi, &cos_phi);
+	float3 disk_P = (disk_r*cos_phi) * disk_T + (disk_r*sin_phi) * disk_B;
 
 	/* create ray */
 	Ray ray;
@@ -354,8 +356,11 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd,
 	float disk_height;
 
 	bssrdf_sample(sc, disk_r, &disk_r, &disk_height);
+	
+	float sin_phi, cos_phi;
+	sincos(phi, &sin_phi, &cos_phi);
 
-	float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
+	float3 disk_P = (disk_r*cos_phi) * disk_T + (disk_r*sin_phi) * disk_B;
 
 	/* create ray */
 	Ray ray;
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index ded7576..15c022c 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -189,6 +189,15 @@ ccl_device_inline float nonzerof(float f, float eps)
 		return f;
 }
 
+ccl_device_inline void sincos(float theta, float *sin, float *cos) {
+#if defined(__GNUC__) && !defined(__clang__)
+	sincosf(theta, sin, cos);
+#else
+	*sin = sinf(theta);
+	*cos = cosf(theta);
+#endif
+}
+
 ccl_device_inline float smoothstepf(float f)
 {
 	float ff = f*f;
@@ -622,11 +631,7 @@ ccl_device_inline bool is_zero(const float3 a)
 
 ccl_device_inline float reduce_add(const float3 a)
 {
-#ifdef __KERNEL_SSE__
-	return (a.x + a.y + a.z);
-#else
 	return (a.x + a.y + a.z);
-#endif
 }
 
 ccl_device_inline float average(const float3 a)




More information about the Bf-blender-cvs mailing list