[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [47133] trunk/blender/intern/cycles: Cycles: fixes to make CUDA 4.2 work, compiling gave errors in shadows and

Brecht Van Lommel brechtvanlommel at pandora.be
Mon May 28 21:21:14 CEST 2012


Revision: 47133
          http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=47133
Author:   blendix
Date:     2012-05-28 19:21:13 +0000 (Mon, 28 May 2012)
Log Message:
-----------
Cycles: fixes to make CUDA 4.2 work, compiling gave errors in shadows and
other places, was mainly due to instancing not working, but also found
issues in procedural textures.

The problem was with --use_fast_math, this seems to now have way lower
precision for some operations. Disabled this flag and selectively use
fast math functions. Did not find performance regression on GTX 460 after
doing this.

Modified Paths:
--------------
    trunk/blender/intern/cycles/blender/blender_mesh.cpp
    trunk/blender/intern/cycles/blender/blender_sync.cpp
    trunk/blender/intern/cycles/device/device_cuda.cpp
    trunk/blender/intern/cycles/kernel/CMakeLists.txt
    trunk/blender/intern/cycles/kernel/kernel_bvh.h
    trunk/blender/intern/cycles/kernel/kernel_compat_cuda.h
    trunk/blender/intern/cycles/kernel/kernel_projection.h
    trunk/blender/intern/cycles/util/util_transform.h

Modified: trunk/blender/intern/cycles/blender/blender_mesh.cpp
===================================================================
--- trunk/blender/intern/cycles/blender/blender_mesh.cpp	2012-05-28 19:10:57 UTC (rev 47132)
+++ trunk/blender/intern/cycles/blender/blender_mesh.cpp	2012-05-28 19:21:13 UTC (rev 47133)
@@ -304,7 +304,6 @@
 void BlenderSync::sync_mesh_motion(BL::Object b_ob, Mesh *mesh, int motion)
 {
 	/* todo: displacement, subdivision */
-	BL::ID b_ob_data = b_ob.data();
 	size_t size = mesh->verts.size();
 
 	/* skip objects without deforming modifiers. this is not a totally reliable,

Modified: trunk/blender/intern/cycles/blender/blender_sync.cpp
===================================================================
--- trunk/blender/intern/cycles/blender/blender_sync.cpp	2012-05-28 19:10:57 UTC (rev 47132)
+++ trunk/blender/intern/cycles/blender/blender_sync.cpp	2012-05-28 19:21:13 UTC (rev 47133)
@@ -135,7 +135,6 @@
 
 void BlenderSync::sync_integrator()
 {
-	BL::RenderSettings r = b_scene.render();
 	PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
 
 	experimental = (RNA_enum_get(&cscene, "feature_set") != 0);

Modified: trunk/blender/intern/cycles/device/device_cuda.cpp
===================================================================
--- trunk/blender/intern/cycles/device/device_cuda.cpp	2012-05-28 19:10:57 UTC (rev 47132)
+++ trunk/blender/intern/cycles/device/device_cuda.cpp	2012-05-28 19:21:13 UTC (rev 47133)
@@ -259,7 +259,7 @@
 
 		path_create_directories(cubin);
 
-		string command = string_printf("\"%s\" -arch=sm_%d%d -m%d --cubin \"%s\" --use_fast_math "
+		string command = string_printf("\"%s\" -arch=sm_%d%d -m%d --cubin \"%s\" "
 			"-o \"%s\" --ptxas-options=\"-v\" --maxrregcount=%d --opencc-options -OPT:Olimit=0 -I\"%s\" -DNVCC",
 			nvcc.c_str(), major, minor, machine, kernel.c_str(), cubin.c_str(), maxreg, include.c_str());
 

Modified: trunk/blender/intern/cycles/kernel/CMakeLists.txt
===================================================================
--- trunk/blender/intern/cycles/kernel/CMakeLists.txt	2012-05-28 19:10:57 UTC (rev 47132)
+++ trunk/blender/intern/cycles/kernel/CMakeLists.txt	2012-05-28 19:21:13 UTC (rev 47133)
@@ -114,7 +114,7 @@
 
 		add_custom_command(
 			OUTPUT ${cuda_cubin}
-			COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} -m${CUDA_BITS} --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu --use_fast_math -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} --ptxas-options="-v" --maxrregcount=24 --opencc-options -OPT:Olimit=0 -I${CMAKE_CURRENT_SOURCE_DIR}/../util -I${CMAKE_CURRENT_SOURCE_DIR}/svm -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC
+			COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} -m${CUDA_BITS} --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} --ptxas-options="-v" --maxrregcount=24 --opencc-options -OPT:Olimit=0 -I${CMAKE_CURRENT_SOURCE_DIR}/../util -I${CMAKE_CURRENT_SOURCE_DIR}/svm -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC
 			DEPENDS ${cuda_sources})
 
 		delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)

Modified: trunk/blender/intern/cycles/kernel/kernel_bvh.h
===================================================================
--- trunk/blender/intern/cycles/kernel/kernel_bvh.h	2012-05-28 19:10:57 UTC (rev 47132)
+++ trunk/blender/intern/cycles/kernel/kernel_bvh.h	2012-05-28 19:21:13 UTC (rev 47133)
@@ -74,10 +74,10 @@
 
 __device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, const float tmax)
 {
-	Transform tfm = object_fetch_transform(kg, object, ray->time, OBJECT_TRANSFORM);
-
-	if(*t != FLT_MAX)
+	if(*t != FLT_MAX) {
+		Transform tfm = object_fetch_transform(kg, object, ray->time, OBJECT_TRANSFORM);
 		*t *= len(transform_direction(&tfm, 1.0f/(*idir)));
+	}
 
 	*P = ray->P;
 	*idir = bvh_inverse_direction(ray->D);

Modified: trunk/blender/intern/cycles/kernel/kernel_compat_cuda.h
===================================================================
--- trunk/blender/intern/cycles/kernel/kernel_compat_cuda.h	2012-05-28 19:10:57 UTC (rev 47132)
+++ trunk/blender/intern/cycles/kernel/kernel_compat_cuda.h	2012-05-28 19:21:13 UTC (rev 47133)
@@ -62,5 +62,15 @@
 
 #define kernel_data __data
 
+/* Use fast math functions */
+
+#define cosf(x) __cosf(((float)x))
+#define sinf(x) __sinf(((float)x))
+#define powf(x, y) __powf(((float)x), ((float)y))
+#define cosf(x) __cosf(((float)x))
+#define tanf(x) __tanf(((float)x))
+#define logf(x) __logf(((float)x))
+#define expf(x) __expf(((float)x))
+
 #endif /* __KERNEL_COMPAT_CUDA_H__ */
 

Modified: trunk/blender/intern/cycles/kernel/kernel_projection.h
===================================================================
--- trunk/blender/intern/cycles/kernel/kernel_projection.h	2012-05-28 19:10:57 UTC (rev 47132)
+++ trunk/blender/intern/cycles/kernel/kernel_projection.h	2012-05-28 19:21:13 UTC (rev 47133)
@@ -69,20 +69,20 @@
 	float theta = M_PI_F*(1.0f - v);
 
 	return make_float3(
-		sin(theta)*cos(phi),
-		sin(theta)*sin(phi),
-		cos(theta));
+		sinf(theta)*cosf(phi),
+		sinf(theta)*sinf(phi),
+		cosf(theta));
 }
 
 /* Fisheye <-> Cartesian direction */
 
 __device float2 direction_to_fisheye(float3 dir, float fov)
 {
-	float r = atan2f(sqrt(dir.y*dir.y +  dir.z*dir.z), dir.x) / fov;
-	float phi = atan2(dir.z, dir.y);
+	float r = atan2f(sqrtf(dir.y*dir.y +  dir.z*dir.z), dir.x) / fov;
+	float phi = atan2f(dir.z, dir.y);
 
-	float u = r * cos(phi) + 0.5f;
-	float v = r * sin(phi) + 0.5f;
+	float u = r * cosf(phi) + 0.5f;
+	float v = r * sinf(phi) + 0.5f;
 
 	return make_float2(u, v);
 }
@@ -92,7 +92,7 @@
 	u = (u - 0.5f) * 2.0f;
 	v = (v - 0.5f) * 2.0f;
 
-	float r = sqrt(u*u + v*v);
+	float r = sqrtf(u*u + v*v);
 
 	if(r > 1.0f)
 		return make_float3(0.0f, 0.0f, 0.0f);
@@ -127,7 +127,7 @@
 	v = (v - 0.5f) * height;
 
 	float rmax = 2.0f * lens * sinf(fov * 0.25f);
-	float r = sqrt(u*u + v*v);
+	float r = sqrtf(u*u + v*v);
 
 	if(r > rmax)
 		return make_float3(0.0f, 0.0f, 0.0f);
@@ -153,7 +153,7 @@
 
 	dir.x = 2.0f*u - 1.0f;
 	dir.z = 2.0f*v - 1.0f;
-	dir.y = -sqrt(max(1.0f - dir.x*dir.x - dir.z*dir.z, 0.0f));
+	dir.y = -sqrtf(max(1.0f - dir.x*dir.x - dir.z*dir.z, 0.0f));
 
 	/* reflection */
 	float3 I = make_float3(0.0f, -1.0f, 0.0f);
@@ -166,7 +166,7 @@
 	/* inverse of mirrorball_to_direction */
 	dir.y -= 1.0f;
 
-	float div = 2.0f*sqrt(max(-0.5f*dir.y, 0.0f));
+	float div = 2.0f*sqrtf(max(-0.5f*dir.y, 0.0f));
 	if(div > 0.0f)
 		dir /= div;
 

Modified: trunk/blender/intern/cycles/util/util_transform.h
===================================================================
--- trunk/blender/intern/cycles/util/util_transform.h	2012-05-28 19:10:57 UTC (rev 47132)
+++ trunk/blender/intern/cycles/util/util_transform.h	2012-05-28 19:21:13 UTC (rev 47133)
@@ -61,16 +61,20 @@
 
 __device_inline float3 transform_point(const Transform *t, const float3 a)
 {
-	float4 b = make_float4(a.x, a.y, a.z, 1.0f);
-	float3 c = make_float3(dot(t->x, b), dot(t->y, b), dot(t->z, b));
+	float3 c = make_float3(
+		a.x*t->x.x + a.y*t->x.y + a.z*t->x.z + t->x.w,
+		a.x*t->y.x + a.y*t->y.y + a.z*t->y.z + t->y.w,
+		a.x*t->z.x + a.y*t->z.y + a.z*t->z.z + t->z.w);
 
 	return c;
 }
 
 __device_inline float3 transform_direction(const Transform *t, const float3 a)
 {
-	float4 b = make_float4(a.x, a.y, a.z, 0.0f);
-	float3 c = make_float3(dot(t->x, b), dot(t->y, b), dot(t->z, b));
+	float3 c = make_float3(
+		a.x*t->x.x + a.y*t->x.y + a.z*t->x.z,
+		a.x*t->y.x + a.y*t->y.y + a.z*t->y.z,
+		a.x*t->z.x + a.y*t->z.y + a.z*t->z.z);
 
 	return c;
 }




More information about the Bf-blender-cvs mailing list