[Bf-blender-cvs] [4537e855846] master: Fix T53001: more workarounds for crash in AMD compiler with recent drivers.

Brecht Van Lommel noreply at git.blender.org
Thu Oct 5 18:01:07 CEST 2017


Commit: 4537e8558468c71a03bf53f59c60f888b3412de2
Author: Brecht Van Lommel
Date:   Thu Oct 5 16:41:41 2017 +0200
Branches: master
https://developer.blender.org/rB4537e8558468c71a03bf53f59c60f888b3412de2

Fix T53001: more workarounds for crash in AMD compiler with recent drivers.

===================================================================

M	intern/cycles/kernel/kernel_compat_cuda.h
M	intern/cycles/kernel/kernel_compat_opencl.h
M	intern/cycles/kernel/svm/svm_blackbody.h
M	intern/cycles/kernel/svm/svm_math_util.h
M	intern/cycles/kernel/svm/svm_wavelength.h
M	intern/cycles/util/util_defines.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 1e2af9de8b3..2e8ca48c413 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -50,7 +50,8 @@
 #endif
 #define ccl_device_noinline  __device__ __noinline__
 #define ccl_global
-#define ccl_constant
+#define ccl_static_constant __constant__
+#define ccl_constant const
 #define ccl_local __shared__
 #define ccl_local_param
 #define ccl_private
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index 36d6031d042..7f81523791b 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -36,6 +36,7 @@
 #define ccl_device_forceinline ccl_device
 #define ccl_device_noinline ccl_device ccl_noinline
 #define ccl_may_alias
+#define ccl_static_constant static __constant
 #define ccl_constant __constant
 #define ccl_global __global
 #define ccl_local __local
diff --git a/intern/cycles/kernel/svm/svm_blackbody.h b/intern/cycles/kernel/svm/svm_blackbody.h
index b750ad87b7f..51590b18505 100644
--- a/intern/cycles/kernel/svm/svm_blackbody.h
+++ b/intern/cycles/kernel/svm/svm_blackbody.h
@@ -41,8 +41,7 @@ ccl_device void svm_node_blackbody(KernelGlobals *kg, ShaderData *sd, float *sta
 
 	float3 color_rgb = svm_math_blackbody_color(temperature);
 
-	if(stack_valid(col_offset))
-		stack_store_float3(stack, col_offset, color_rgb);
+	stack_store_float3(stack, col_offset, color_rgb);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h
index f8aeeba4a21..1ce7777aac3 100644
--- a/intern/cycles/kernel/svm/svm_math_util.h
+++ b/intern/cycles/kernel/svm/svm_math_util.h
@@ -100,40 +100,42 @@ ccl_device float svm_math(NodeMath type, float Fac1, float Fac2)
 	return Fac;
 }
 
-ccl_device float3 svm_math_blackbody_color(float t) {
-	/* Calculate color in range 800..12000 using an approximation
-	 * a/x+bx+c for R and G and ((at + b)t + c)t + d) for B
-	 * Max absolute error for RGB is (0.00095, 0.00077, 0.00057),
-	 * which is enough to get the same 8 bit/channel color.
-	 */
-
-	const float rc[6][3] = {
-		{  2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f },
-		{  3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f },
-		{  4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f },
-		{  4.66849800e+03f,  2.85655028e-05f, 1.29075375e-01f },
-		{  4.60124770e+03f,  2.89727618e-05f, 1.48001316e-01f },
-		{  3.78765709e+03f,  9.36026367e-06f, 3.98995841e-01f },
-	};
-
-	const float gc[6][3] = {
-		{ -7.50343014e+02f,  3.15679613e-04f, 4.73464526e-01f },
-		{ -1.00402363e+03f,  1.29189794e-04f, 9.08181524e-01f },
-		{ -1.22075471e+03f,  2.56245413e-05f, 1.20753416e+00f },
-		{ -1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f },
-		{ -1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f },
-		{ -5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f },
-	};
-
-	const float bc[6][4] = {
-		{ 0.0f, 0.0f, 0.0f, 0.0f }, /* zeros should be optimized by compiler */
-		{ 0.0f, 0.0f, 0.0f, 0.0f },
-		{ 0.0f, 0.0f, 0.0f, 0.0f },
-		{ -2.02524603e-11f,  1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f },
-		{ -2.22463426e-13f, -1.55078698e-08f,  3.81675160e-04f, -7.30646033e-01f },
-		{  6.72595954e-13f, -2.73059993e-08f,  4.24068546e-04f, -7.52204323e-01f },
-	};
+/* Calculate color in range 800..12000 using an approximation
+ * a/x+bx+c for R and G and ((at + b)t + c)t + d) for B
+ * Max absolute error for RGB is (0.00095, 0.00077, 0.00057),
+ * which is enough to get the same 8 bit/channel color.
+ */
 
+ccl_static_constant float blackbody_table_r[6][3] = {
+	{  2.52432244e+03f, -1.06185848e-03f, 3.11067539e+00f },
+	{  3.37763626e+03f, -4.34581697e-04f, 1.64843306e+00f },
+	{  4.10671449e+03f, -8.61949938e-05f, 6.41423749e-01f },
+	{  4.66849800e+03f,  2.85655028e-05f, 1.29075375e-01f },
+	{  4.60124770e+03f,  2.89727618e-05f, 1.48001316e-01f },
+	{  3.78765709e+03f,  9.36026367e-06f, 3.98995841e-01f },
+};
+
+ccl_static_constant float blackbody_table_g[6][3] = {
+	{ -7.50343014e+02f,  3.15679613e-04f, 4.73464526e-01f },
+	{ -1.00402363e+03f,  1.29189794e-04f, 9.08181524e-01f },
+	{ -1.22075471e+03f,  2.56245413e-05f, 1.20753416e+00f },
+	{ -1.42546105e+03f, -4.01730887e-05f, 1.44002695e+00f },
+	{ -1.18134453e+03f, -2.18913373e-05f, 1.30656109e+00f },
+	{ -5.00279505e+02f, -4.59745390e-06f, 1.09090465e+00f },
+};
+
+ccl_static_constant float blackbody_table_b[6][4] = {
+	{ 0.0f, 0.0f, 0.0f, 0.0f }, /* zeros should be optimized by compiler */
+	{ 0.0f, 0.0f, 0.0f, 0.0f },
+	{ 0.0f, 0.0f, 0.0f, 0.0f },
+	{ -2.02524603e-11f,  1.79435860e-07f, -2.60561875e-04f, -1.41761141e-02f },
+	{ -2.22463426e-13f, -1.55078698e-08f,  3.81675160e-04f, -7.30646033e-01f },
+	{  6.72595954e-13f, -2.73059993e-08f,  4.24068546e-04f, -7.52204323e-01f },
+};
+
+
+ccl_device float3 svm_math_blackbody_color(float t)
+{
 	if(t >= 12000.0f) {
 		return make_float3(0.826270103f, 0.994478524f, 1.56626022f);
 	}
@@ -148,10 +150,14 @@ ccl_device float3 svm_math_blackbody_color(float t) {
 		(t >= 1449.0f)? 2:
 		(t >= 1167.0f)? 1: 0;
 
+	ccl_constant float *r = blackbody_table_r[i];
+	ccl_constant float *g = blackbody_table_g[i];
+	ccl_constant float *b = blackbody_table_b[i];
+
 	const float t_inv = 1.0f / t;
-	return make_float3(rc[i][0] * t_inv + rc[i][1] * t + rc[i][2],
-	                   gc[i][0] * t_inv + gc[i][1] * t + gc[i][2],
-	                   ((bc[i][0] * t + bc[i][1]) * t + bc[i][2]) * t + bc[i][3]);
+	return make_float3(r[0] * t_inv + r[1] * t + r[2],
+	                   g[0] * t_inv + g[1] * t + g[2],
+	                   ((b[0] * t + b[1]) * t + b[2]) * t + b[3]);
 }
 
 ccl_device_inline float3 svm_math_gamma_color(float3 color, float gamma)
diff --git a/intern/cycles/kernel/svm/svm_wavelength.h b/intern/cycles/kernel/svm/svm_wavelength.h
index 57030f3979d..855b356b397 100644
--- a/intern/cycles/kernel/svm/svm_wavelength.h
+++ b/intern/cycles/kernel/svm/svm_wavelength.h
@@ -34,44 +34,44 @@ CCL_NAMESPACE_BEGIN
 
 /* Wavelength to RGB */
 
+// CIE colour matching functions xBar, yBar, and zBar for
+//	 wavelengths from 380 through 780 nanometers, every 5
+//	 nanometers.  For a wavelength lambda in this range:
+//		  cie_colour_match[(lambda - 380) / 5][0] = xBar
+//		  cie_colour_match[(lambda - 380) / 5][1] = yBar
+//		  cie_colour_match[(lambda - 380) / 5][2] = zBar
+ccl_static_constant float cie_colour_match[81][3] = {
+	{0.0014f,0.0000f,0.0065f}, {0.0022f,0.0001f,0.0105f}, {0.0042f,0.0001f,0.0201f},
+	{0.0076f,0.0002f,0.0362f}, {0.0143f,0.0004f,0.0679f}, {0.0232f,0.0006f,0.1102f},
+	{0.0435f,0.0012f,0.2074f}, {0.0776f,0.0022f,0.3713f}, {0.1344f,0.0040f,0.6456f},
+	{0.2148f,0.0073f,1.0391f}, {0.2839f,0.0116f,1.3856f}, {0.3285f,0.0168f,1.6230f},
+	{0.3483f,0.0230f,1.7471f}, {0.3481f,0.0298f,1.7826f}, {0.3362f,0.0380f,1.7721f},
+	{0.3187f,0.0480f,1.7441f}, {0.2908f,0.0600f,1.6692f}, {0.2511f,0.0739f,1.5281f},
+	{0.1954f,0.0910f,1.2876f}, {0.1421f,0.1126f,1.0419f}, {0.0956f,0.1390f,0.8130f},
+	{0.0580f,0.1693f,0.6162f}, {0.0320f,0.2080f,0.4652f}, {0.0147f,0.2586f,0.3533f},
+	{0.0049f,0.3230f,0.2720f}, {0.0024f,0.4073f,0.2123f}, {0.0093f,0.5030f,0.1582f},
+	{0.0291f,0.6082f,0.1117f}, {0.0633f,0.7100f,0.0782f}, {0.1096f,0.7932f,0.0573f},
+	{0.1655f,0.8620f,0.0422f}, {0.2257f,0.9149f,0.0298f}, {0.2904f,0.9540f,0.0203f},
+	{0.3597f,0.9803f,0.0134f}, {0.4334f,0.9950f,0.0087f}, {0.5121f,1.0000f,0.0057f},
+	{0.5945f,0.9950f,0.0039f}, {0.6784f,0.9786f,0.0027f}, {0.7621f,0.9520f,0.0021f},
+	{0.8425f,0.9154f,0.0018f}, {0.9163f,0.8700f,0.0017f}, {0.9786f,0.8163f,0.0014f},
+	{1.0263f,0.7570f,0.0011f}, {1.0567f,0.6949f,0.0010f}, {1.0622f,0.6310f,0.0008f},
+	{1.0456f,0.5668f,0.0006f}, {1.0026f,0.5030f,0.0003f}, {0.9384f,0.4412f,0.0002f},
+	{0.8544f,0.3810f,0.0002f}, {0.7514f,0.3210f,0.0001f}, {0.6424f,0.2650f,0.0000f},
+	{0.5419f,0.2170f,0.0000f}, {0.4479f,0.1750f,0.0000f}, {0.3608f,0.1382f,0.0000f},
+	{0.2835f,0.1070f,0.0000f}, {0.2187f,0.0816f,0.0000f}, {0.1649f,0.0610f,0.0000f},
+	{0.1212f,0.0446f,0.0000f}, {0.0874f,0.0320f,0.0000f}, {0.0636f,0.0232f,0.0000f},
+	{0.0468f,0.0170f,0.0000f}, {0.0329f,0.0119f,0.0000f}, {0.0227f,0.0082f,0.0000f},
+	{0.0158f,0.0057f,0.0000f}, {0.0114f,0.0041f,0.0000f}, {0.0081f,0.0029f,0.0000f},
+	{0.0058f,0.0021f,0.0000f}, {0.0041f,0.0015f,0.0000f}, {0.0029f,0.0010f,0.0000f},
+	{0.0020f,0.0007f,0.0000f}, {0.0014f,0.0005f,0.0000f}, {0.0010f,0.0004f,0.0000f},
+	{0.0007f,0.0002f,0.0000f}, {0.0005f,0.0002f,0.0000f}, {0.0003f,0.0001f,0.0000f},
+	{0.0002f,0.0001f,0.0000f}, {0.0002f,0.0001f,0.0000f}, {0.0001f,0.0000f,0.0000f},
+	{0.0001f,0.0000f,0.0000f}, {0.0001f,0.0000f,0.0000f}, {0.0000f,0.0000f,0.0000f}
+};
+
 ccl_device void svm_node_wavelength(ShaderData *sd, float *stack, uint wavelength, uint color_out)
 {	
-	// CIE colour matching functions xBar, yBar, and zBar for
-	//	 wavelengths from 380 through 780 nanometers, every 5
-	//	 nanometers.  For a wavelength lambda in this range:
-	//		  cie_colour_match[(lambda - 380) / 5][0] = xBar
-	//		  cie_colour_match[(lambda - 380) / 5][1] = yBar
-	//		  cie_colour_match[(lambda - 380) / 5][2] = zBar
-	const float cie_colour_match[81][3] = {
-		{0.0014f,0.0000f,0.0065f}, {0.0022f,0.0001f,0.0105f}, {0.0042f,0.0001f,0.0201f},
-		{0.0076f,0.0002f,0.0362f}, {0.0143f,0.0004f,0.0679f}, {0.0232f,0.0006f,0.1102f},
-		{0.0435f,0.0012f,0.2074f}, {0.0776f,0.0022f,0.3713f}, {0.1344f,0.0040f,0.6456f},
-		{0.2148f,0.0073f,1.0391f}, {0.2839f,0.0116f,1.3856f}, {0.3285f,0.0168f,1.6230f},
-		{0.3483f,0.0230f,1.7471f}, {0.3481f,0.0298f,1.7826f}, {0.3362f,0.0380f,1.7721f},
-		{0.3187f,0.0480f,1.7441f}, {0.2908f,0.0600f,1.6692f}, {0.2511f,0.0739f,1.5281f},
-		{0.1954f,0.0910f,1.2876f}, {0.1421f,0.1126f,1.0419f}, {0.0956f,0.1390f,0.8130f},
-		{0.0580f,0.1693f,0.6162f}, {0.0320f,0.2080f,0.4652f}, {0.0147f,0.2586f,0.3533f},
-		{0.0049f,0.3230f,0.2720f}, {0.0024f,0.4073f,0.2123f}, {0.0093f,0.5030f,0.1582f},
-		{0.0291f,0.6082f,0.1117f}, {0.0633f,0.7100f,0.0782f}, {0.1096f,0.7932f,0.0573f},
-		{0.1655f,0.8620f,0.0422f}, {0.2257f,0.9149f,0.0298f}, {0.2904f,0.9540f,0.0203f},
-		{0.3597f,0.9803f,0.0134f}, {0.4334f,0.9950f,0.0087f}, {0.5121f,1.0

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list