[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [39570] trunk/blender/source/blender: floats were being promoted to doubles in quite a few cases (using gcc' s -Wdouble-promotion), went over render module and use float constants, gives small but consistent speedup - approx 3%.

Campbell Barton ideasman42 at gmail.com
Sat Aug 20 19:39:13 CEST 2011


Revision: 39570
          http://projects.blender.org/scm/viewvc.php?view=rev&root=bf-blender&revision=39570
Author:   campbellbarton
Date:     2011-08-20 17:39:13 +0000 (Sat, 20 Aug 2011)
Log Message:
-----------
floats were being promoted to doubles in quite a few cases (using gcc's -Wdouble-promotion), went over render module and use float constants, gives small but consistent speedup - approx 3%.

Modified Paths:
--------------
    trunk/blender/source/blender/blenloader/intern/readfile.c
    trunk/blender/source/blender/python/generic/noise_py_api.c
    trunk/blender/source/blender/render/intern/source/convertblender.c
    trunk/blender/source/blender/render/intern/source/envmap.c
    trunk/blender/source/blender/render/intern/source/gammaCorrectionTables.c
    trunk/blender/source/blender/render/intern/source/initrender.c
    trunk/blender/source/blender/render/intern/source/occlusion.c
    trunk/blender/source/blender/render/intern/source/pixelblending.c
    trunk/blender/source/blender/render/intern/source/pixelshading.c
    trunk/blender/source/blender/render/intern/source/rayshade.c
    trunk/blender/source/blender/render/intern/source/render_texture.c
    trunk/blender/source/blender/render/intern/source/rendercore.c
    trunk/blender/source/blender/render/intern/source/renderdatabase.c
    trunk/blender/source/blender/render/intern/source/shadbuf.c
    trunk/blender/source/blender/render/intern/source/sss.c
    trunk/blender/source/blender/render/intern/source/strand.c
    trunk/blender/source/blender/render/intern/source/sunsky.c
    trunk/blender/source/blender/render/intern/source/zbuf.c

Modified: trunk/blender/source/blender/blenloader/intern/readfile.c
===================================================================
--- trunk/blender/source/blender/blenloader/intern/readfile.c	2011-08-20 16:48:53 UTC (rev 39569)
+++ trunk/blender/source/blender/blenloader/intern/readfile.c	2011-08-20 17:39:13 UTC (rev 39570)
@@ -11670,8 +11670,8 @@
 			Tex *tex;
 			for(tex= main->tex.first; tex; tex= tex->id.next) {
 				if(tex->pd) {
-					if (tex->pd->falloff_speed_scale == 0.0)
-						tex->pd->falloff_speed_scale = 100.0;
+					if (tex->pd->falloff_speed_scale == 0.0f)
+						tex->pd->falloff_speed_scale = 100.0f;
 
 					if (!tex->pd->falloff_curve) {
 						tex->pd->falloff_curve = curvemapping_add(1, 0, 0, 1, 1);

Modified: trunk/blender/source/blender/python/generic/noise_py_api.c
===================================================================
--- trunk/blender/source/blender/python/generic/noise_py_api.c	2011-08-20 16:48:53 UTC (rev 39569)
+++ trunk/blender/source/blender/python/generic/noise_py_api.c	2011-08-20 17:39:13 UTC (rev 39570)
@@ -210,8 +210,8 @@
 	if((r = 1.f - v[2] * v[2]) > 0.f) {
 		float a = (float)(6.283185307f * frand());
 		r = (float)sqrt(r);
-		v[0] = (float)(r * cos(a));
-		v[1] = (float)(r * sin(a));
+		v[0] = (float)(r * cosf(a));
+		v[1] = (float)(r * sinf(a));
 	}
 	else {
 		v[2] = 1.f;
@@ -254,7 +254,7 @@
 	if(!PyArg_ParseTuple(args, "(fff)|i:noise", &x, &y, &z, &nb))
 		return NULL;
 
-	return PyFloat_FromDouble((2.0 * BLI_gNoise(1.0, x, y, z, 0, nb) - 1.0));
+	return PyFloat_FromDouble((2.0f * BLI_gNoise(1.0f, x, y, z, 0, nb) - 1.0f));
 }
 
 /*-------------------------------------------------------------------------*/
@@ -264,11 +264,11 @@
 static void noise_vector(float x, float y, float z, int nb, float v[3])
 {
 	/* Simply evaluate noise at 3 different positions */
-	v[0] = (float)(2.0 * BLI_gNoise(1.f, x + 9.321f, y - 1.531f, z - 7.951f, 0,
-				 nb) - 1.0);
-	v[1] = (float)(2.0 * BLI_gNoise(1.f, x, y, z, 0, nb) - 1.0);
-	v[2] = (float)(2.0 * BLI_gNoise(1.f, x + 6.327f, y + 0.1671f, z - 2.672f, 0,
-				 nb) - 1.0);
+	v[0]= (float)(2.0f * BLI_gNoise(1.f, x + 9.321f, y - 1.531f, z - 7.951f, 0,
+				 nb) - 1.0f);
+	v[1]= (float)(2.0f * BLI_gNoise(1.f, x, y, z, 0, nb) - 1.0f);
+	v[2]= (float)(2.0f * BLI_gNoise(1.f, x + 6.327f, y + 0.1671f, z - 2.672f, 0,
+				 nb) - 1.0f);
 }
 
 static PyObject *Noise_vector(PyObject *UNUSED(self), PyObject *args)
@@ -291,7 +291,7 @@
 	float amp, out, t;
 	int i;
 	amp = 1.f;
-	out = (float)(2.0 * BLI_gNoise(1.f, x, y, z, 0, nb) - 1.0);
+	out = (float)(2.0f * BLI_gNoise(1.f, x, y, z, 0, nb) - 1.0f);
 	if(hard)
 		out = (float)fabs(out);
 	for(i = 1; i < oct; i++) {
@@ -299,7 +299,7 @@
 		x *= freqscale;
 		y *= freqscale;
 		z *= freqscale;
-		t = (float)(amp * (2.0 * BLI_gNoise(1.f, x, y, z, 0, nb) - 1.0));
+		t = (float)(amp * (2.0f * BLI_gNoise(1.f, x, y, z, 0, nb) - 1.0f));
 		if(hard)
 			t = (float)fabs(t);
 		out += t;

Modified: trunk/blender/source/blender/render/intern/source/convertblender.c
===================================================================
--- trunk/blender/source/blender/render/intern/source/convertblender.c	2011-08-20 16:48:53 UTC (rev 39569)
+++ trunk/blender/source/blender/render/intern/source/convertblender.c	2011-08-20 17:39:13 UTC (rev 39570)
@@ -1046,9 +1046,9 @@
 		float fac;
 		if(ma->strand_ease!=0.0f) {
 			if(ma->strand_ease<0.0f)
-				fac= pow(sd->time, 1.0+ma->strand_ease);
+				fac= pow(sd->time, 1.0f+ma->strand_ease);
 			else
-				fac= pow(sd->time, 1.0/(1.0f-ma->strand_ease));
+				fac= pow(sd->time, 1.0f/(1.0f-ma->strand_ease));
 		}
 		else fac= sd->time;
 
@@ -1063,7 +1063,7 @@
 				width= w;
 
 			/*cross is the radius of the strand so we want it to be half of full width */
-			mul_v3_fl(cross,0.5/crosslen);
+			mul_v3_fl(cross,0.5f/crosslen);
 		}
 		else
 			width/=w;
@@ -1984,8 +1984,8 @@
 		else {
 			/* render normal particles */
 			if(part->trail_count > 1) {
-				float length = part->path_end * (1.0 - part->randlength * r_length);
-				int trail_count = part->trail_count * (1.0 - part->randlength * r_length);
+				float length = part->path_end * (1.0f - part->randlength * r_length);
+				int trail_count = part->trail_count * (1.0f - part->randlength * r_length);
 				float ct = (part->draw & PART_ABS_PATH_TIME) ? cfra : pa_time;
 				float dt = length / (trail_count ? (float)trail_count : 1.0f);
 
@@ -2159,7 +2159,7 @@
 				normalize_v3(view);
 
 				zn= nor[0]*view[0]+nor[1]*view[1]+nor[2]*view[2];
-				if(zn>=0.0) hasize= 0.0;
+				if(zn>=0.0f) hasize= 0.0f;
 				else hasize*= zn*zn*zn*zn;
 			}
 
@@ -3599,7 +3599,7 @@
 	
 	/* bias is percentage, made 2x larger because of correction for angle of incidence */
 	/* when a ray is closer to parallel of a face, bias value is increased during render */
-	shb->bias= (0.02*lar->bias)*0x7FFFFFFF;
+	shb->bias= (0.02f*lar->bias)*0x7FFFFFFF;
 	
 	/* halfway method (average of first and 2nd z) reduces bias issues */
 	if(ELEM(lar->buftype, LA_SHADBUF_HALFWAY, LA_SHADBUF_DEEP))
@@ -3610,7 +3610,7 @@
 
 static void area_lamp_vectors(LampRen *lar)
 {
-	float xsize= 0.5*lar->area_size, ysize= 0.5*lar->area_sizey, multifac;
+	float xsize= 0.5f*lar->area_size, ysize= 0.5f*lar->area_sizey, multifac;
 
 	/* make it smaller, so area light can be multisampled */
 	multifac= 1.0f/sqrt((float)lar->ray_totsamp);
@@ -3637,7 +3637,7 @@
 	lar->area[3][1]= lar->co[1] + xsize*lar->mat[0][1] - ysize*lar->mat[1][1];
 	lar->area[3][2]= lar->co[2] + xsize*lar->mat[0][2] - ysize*lar->mat[1][2];	
 	/* only for correction button size, matrix size works on energy */
-	lar->areasize= lar->dist*lar->dist/(4.0*xsize*ysize);
+	lar->areasize= lar->dist*lar->dist/(4.0f*xsize*ysize);
 }
 
 /* If lar takes more lamp data, the decoupling will be better. */
@@ -3791,10 +3791,10 @@
 	
 	lar->spotsi= la->spotsize;
 	if(lar->mode & LA_HALO) {
-		if(lar->spotsi>170.0) lar->spotsi= 170.0;
+		if(lar->spotsi>170.0f) lar->spotsi= 170.0f;
 	}
-	lar->spotsi= cos( M_PI*lar->spotsi/360.0 );
-	lar->spotbl= (1.0-lar->spotsi)*la->spotblend;
+	lar->spotsi= cos( M_PI*lar->spotsi/360.0f );
+	lar->spotbl= (1.0f-lar->spotsi)*la->spotblend;
 
 	memcpy(lar->mtex, la->mtex, MAX_MTEX*sizeof(void *));
 
@@ -3813,7 +3813,7 @@
 
 		xn= saacos(lar->spotsi);
 		xn= sin(xn)/cos(xn);
-		lar->spottexfac= 1.0/(xn);
+		lar->spottexfac= 1.0f/(xn);
 
 		if(lar->mode & LA_ONLYSHADOW) {
 			if((lar->mode & (LA_SHAD_BUF|LA_SHAD_RAY))==0) lar->mode -= LA_ONLYSHADOW;
@@ -3823,7 +3823,7 @@
 
 	/* set flag for spothalo en initvars */
 	if(la->type==LA_SPOT && (la->mode & LA_HALO) && (la->buftype != LA_SHADBUF_DEEP)) {
-		if(la->haint>0.0) {
+		if(la->haint>0.0f) {
 			re->flag |= R_LAMPHALO;
 
 			/* camera position (0,0,0) rotate around lamp */
@@ -3990,9 +3990,9 @@
 		
 		cp= (char *)&re->wrld.fastcol;
 		
-		cp[0]= 255.0*re->wrld.horr;
-		cp[1]= 255.0*re->wrld.horg;
-		cp[2]= 255.0*re->wrld.horb;
+		cp[0]= 255.0f*re->wrld.horr;
+		cp[1]= 255.0f*re->wrld.horg;
+		cp[2]= 255.0f*re->wrld.horb;
 		cp[3]= 1;
 		
 		VECCOPY(re->grvec, re->viewmat[2]);
@@ -4047,25 +4047,25 @@
 		if(vlr->flag & R_SMOOTH) {
 			dot= INPR(vlr->n, vlr->v1->n);
 			dot= ABS(dot);
-			if(dot>0.9) {
+			if(dot>0.9f) {
 				thresh+= dot; tot++;
 			}
 			dot= INPR(vlr->n, vlr->v2->n);
 			dot= ABS(dot);
-			if(dot>0.9) {
+			if(dot>0.9f) {
 				thresh+= dot; tot++;
 			}
 
 			dot= INPR(vlr->n, vlr->v3->n);
 			dot= ABS(dot);
-			if(dot>0.9) {
+			if(dot>0.9f) {
 				thresh+= dot; tot++;
 			}
 
 			if(vlr->v4) {
 				dot= INPR(vlr->n, vlr->v4->n);
 				dot= ABS(dot);
-				if(dot>0.9) {
+				if(dot>0.9f) {
 					thresh+= dot; tot++;
 				}
 			}
@@ -4105,7 +4105,7 @@
 				else if((mode & MA_RAYMIRROR) || ((mode & MA_TRANSP) && (mode & MA_RAYTRANSP))) {
 					/* for blurry reflect/refract, better to take more samples 
 					 * inside the raytrace than as OSA samples */
-					if ((vlr->mat->gloss_mir == 1.0) && (vlr->mat->gloss_tra == 1.0)) 
+					if ((vlr->mat->gloss_mir == 1.0f) && (vlr->mat->gloss_tra == 1.0f))
 						vlr->flag |= R_FULL_OSA;
 				}
 			}
@@ -4221,11 +4221,11 @@
 				
 				/* render normals are inverted in render! we calculate normal of single tria here */
 				flen= normal_tri_v3( nor,vlr->v4->co, vlr->v3->co, vlr->v1->co);
-				if(flen==0.0) normal_tri_v3( nor,vlr->v4->co, vlr->v2->co, vlr->v1->co);
+				if(flen==0.0f) normal_tri_v3( nor,vlr->v4->co, vlr->v2->co, vlr->v1->co);
 				
 				xn= nor[0]*vlr->n[0] + nor[1]*vlr->n[1] + nor[2]*vlr->n[2];
 
-				if(ABS(xn) < 0.999995 ) {	// checked on noisy fractal grid
+				if(ABS(xn) < 0.999995f ) {	// checked on noisy fractal grid
 					
 					float d1, d2;
 
@@ -5461,7 +5461,7 @@
 		for(j=0;j<3;j++) fsvec[j] = velarray[a].vel[j];
 		
 		/* (bad) HACK insert average velocity if none is there (see previous comment) */
-		if((fsvec[0] == 0.0) && (fsvec[1] == 0.0) && (fsvec[2] == 0.0))
+		if((fsvec[0] == 0.0f) && (fsvec[1] == 0.0f) && (fsvec[2] == 0.0f))
 		{
 			fsvec[0] = avgvel[0];
 			fsvec[1] = avgvel[1];

Modified: trunk/blender/source/blender/render/intern/source/envmap.c
===================================================================
--- trunk/blender/source/blender/render/intern/source/envmap.c	2011-08-20 16:48:53 UTC (rev 39569)
+++ trunk/blender/source/blender/render/intern/source/envmap.c	2011-08-20 17:39:13 UTC (rev 39570)
@@ -595,7 +595,7 @@
 	if(env->type==ENV_PLANE) {
 		face= 1;
 		
-		labda= 1.0/vec[2];
+		labda= 1.0f/vec[2];
 		answ[0]= env->viewscale*labda*vec[0];
 		answ[1]= -env->viewscale*labda*vec[1];
 	}
@@ -603,44 +603,44 @@
 		/* which face */
 		if( vec[2]<=-fabs(vec[0]) && vec[2]<=-fabs(vec[1]) ) {
 			face= 0;
-			labda= -1.0/vec[2];
+			labda= -1.0f/vec[2];
 			answ[0]= labda*vec[0];
 			answ[1]= labda*vec[1];
 		}
 		else if( vec[2]>=fabs(vec[0]) && vec[2]>=fabs(vec[1]) ) {
 			face= 1;
-			labda= 1.0/vec[2];
+			labda= 1.0f/vec[2];
 			answ[0]= labda*vec[0];
 			answ[1]= -labda*vec[1];
 		}
 		else if( vec[1]>=fabs(vec[0]) ) {
 			face= 2;
-			labda= 1.0/vec[1];
+			labda= 1.0f/vec[1];
 			answ[0]= labda*vec[0];
 			answ[1]= labda*vec[2];
 		}
 		else if( vec[0]<=-fabs(vec[1]) ) {
 			face= 3;
-			labda= -1.0/vec[0];
+			labda= -1.0f/vec[0];
 			answ[0]= labda*vec[1];
 			answ[1]= labda*vec[2];
 		}
 		else if( vec[1]<=-fabs(vec[0]) ) {
 			face= 4;
-			labda= -1.0/vec[1];
+			labda= -1.0f/vec[1];
 			answ[0]= -labda*vec[0];
 			answ[1]= labda*vec[2];
 		}
 		else {
 			face= 5;
-			labda= 1.0/vec[0];
+			labda= 1.0f/vec[0];
 			answ[0]= -labda*vec[1];
 			answ[1]= labda*vec[2];

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list