[Bf-blender-cvs] [500e0e9] master: Cycles: Some more inline policy tweaks for CUDA 8

Sergey Sharybin noreply at git.blender.org
Tue Aug 2 15:39:18 CEST 2016


Commit: 500e0e9a3d5a9e7982d4df31a1477b8732c76e71
Author: Sergey Sharybin
Date:   Tue Aug 2 15:04:34 2016 +0200
Branches: master
https://developer.blender.org/rB500e0e9a3d5a9e7982d4df31a1477b8732c76e71

Cycles: Some more inline policy tweaks for CUDA 8

Makes it so toolkit does exactly the same decision about what to inline,
but unfortunately it has really barely visible difference on GTX-980.

===================================================================

M	intern/cycles/kernel/closure/bsdf.h
M	intern/cycles/kernel/kernel_path_branched.h
M	intern/cycles/kernel/kernel_shader.h
M	intern/cycles/kernel/svm/svm_attribute.h
M	intern/cycles/kernel/svm/svm_wireframe.h

===================================================================

diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index 55bdf3e..86e1a7f 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -144,7 +144,16 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
 	return label;
 }
 
-ccl_device float3 bsdf_eval(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, const float3 omega_in, float *pdf)
+#ifndef __KERNEL_CUDS__
+ccl_device
+#else
+ccl_device_inline
+#endif
+float3 bsdf_eval(KernelGlobals *kg,
+                 ShaderData *sd,
+                 const ShaderClosure *sc,
+                 const float3 omega_in,
+                 float *pdf)
 {
 	float3 eval;
 
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index 5651696..64f1468 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -18,13 +18,13 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __BRANCHED_PATH__
 
-ccl_device void kernel_branched_path_ao(KernelGlobals *kg,
-                                        ShaderData *sd,
-                                        ShaderData *emission_sd,
-                                        PathRadiance *L,
-                                        PathState *state,
-                                        RNG *rng,
-                                        float3 throughput)
+ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
+                                               ShaderData *sd,
+                                               ShaderData *emission_sd,
+                                               PathRadiance *L,
+                                               PathState *state,
+                                               RNG *rng,
+                                               float3 throughput)
 {
 	int num_samples = kernel_data.integrator.ao_samples;
 	float num_samples_inv = 1.0f/num_samples;
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index bb3fe93..98d321c 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -149,7 +149,12 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
 /* ShaderData setup from BSSRDF scatter */
 
 #ifdef __SUBSURFACE__
-ccl_device void shader_setup_from_subsurface(
+#  ifndef __KERNEL_CUDS__
+ccl_device
+#  else
+ccl_device_inline
+#  endif
+void shader_setup_from_subsurface(
         KernelGlobals *kg,
         ShaderData *sd,
         const Intersection *isect,
@@ -533,12 +538,18 @@ ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
 }
 #endif
 
-ccl_device void shader_bsdf_eval(KernelGlobals *kg,
-                                 ShaderData *sd,
-                                 const float3 omega_in,
-                                 BsdfEval *eval,
-                                 float light_pdf,
-                                 bool use_mis)
+
+#ifndef __KERNEL_CUDS__
+ccl_device
+#else
+ccl_device_inline
+#endif
+void shader_bsdf_eval(KernelGlobals *kg,
+                      ShaderData *sd,
+                      const float3 omega_in,
+                      BsdfEval *eval,
+                      float light_pdf,
+                      bool use_mis)
 {
 	bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
 
diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h
index ff92920..bd6013e 100644
--- a/intern/cycles/kernel/svm/svm_attribute.h
+++ b/intern/cycles/kernel/svm/svm_attribute.h
@@ -87,7 +87,12 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u
 	}
 }
 
-ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+#ifndef __KERNEL_CUDS__
+ccl_device
+#else
+ccl_device_noinline
+#endif
+void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
 	NodeAttributeType type, mesh_type;
 	AttributeElement elem;
@@ -123,10 +128,15 @@ ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *
 	}
 }
 
-ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg,
-                                      ShaderData *sd,
-                                      float *stack,
-                                      uint4 node)
+#ifndef __KERNEL_CUDS__
+ccl_device
+#else
+ccl_device_noinline
+#endif
+void svm_node_attr_bump_dy(KernelGlobals *kg,
+                           ShaderData *sd,
+                           float *stack,
+                           uint4 node)
 {
 	NodeAttributeType type, mesh_type;
 	AttributeElement elem;
diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h
index 30ccd52..6eed9bc 100644
--- a/intern/cycles/kernel/svm/svm_wireframe.h
+++ b/intern/cycles/kernel/svm/svm_wireframe.h
@@ -34,11 +34,11 @@ CCL_NAMESPACE_BEGIN
 
 /* Wireframe Node */
 
-ccl_device float wireframe(KernelGlobals *kg,
-                           ShaderData *sd,
-                           float size,
-                           int pixel_size,
-                           float3 *P)
+ccl_device_inline float wireframe(KernelGlobals *kg,
+                                  ShaderData *sd,
+                                  float size,
+                                  int pixel_size,
+                                  float3 *P)
 {
 #ifdef __HAIR__
 	if(ccl_fetch(sd, prim) != PRIM_NONE && ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)




More information about the Bf-blender-cvs mailing list