[Bf-blender-cvs] [2b999c6a68f] master: Cycles: change svm node decoding for more efficient code generation on GPU
Patrick Mours
noreply at git.blender.org
Mon Aug 26 10:35:23 CEST 2019
Commit: 2b999c6a68f85523c46f39bb6a877baba2343d9b
Author: Patrick Mours
Date: Wed Aug 21 11:59:57 2019 +0200
Branches: master
https://developer.blender.org/rB2b999c6a68f85523c46f39bb6a877baba2343d9b
Cycles: change svm node decoding for more efficient code generation on GPU
These functions no longer accept NULL. They were renamed for clarity and to
avoid hidden merge issues.
Ref D5363
===================================================================
M intern/cycles/kernel/svm/svm.h
M intern/cycles/kernel/svm/svm_ao.h
M intern/cycles/kernel/svm/svm_bevel.h
M intern/cycles/kernel/svm/svm_brick.h
M intern/cycles/kernel/svm/svm_brightness.h
M intern/cycles/kernel/svm/svm_checker.h
M intern/cycles/kernel/svm/svm_clamp.h
M intern/cycles/kernel/svm/svm_closure.h
M intern/cycles/kernel/svm/svm_displace.h
M intern/cycles/kernel/svm/svm_fresnel.h
M intern/cycles/kernel/svm/svm_gradient.h
M intern/cycles/kernel/svm/svm_hsv.h
M intern/cycles/kernel/svm/svm_ies.h
M intern/cycles/kernel/svm/svm_image.h
M intern/cycles/kernel/svm/svm_light_path.h
M intern/cycles/kernel/svm/svm_magic.h
M intern/cycles/kernel/svm/svm_map_range.h
M intern/cycles/kernel/svm/svm_math.h
M intern/cycles/kernel/svm/svm_musgrave.h
M intern/cycles/kernel/svm/svm_noisetex.h
M intern/cycles/kernel/svm/svm_ramp.h
M intern/cycles/kernel/svm/svm_tex_coord.h
M intern/cycles/kernel/svm/svm_vector_transform.h
M intern/cycles/kernel/svm/svm_voronoi.h
M intern/cycles/kernel/svm/svm_voxel.h
M intern/cycles/kernel/svm/svm_wave.h
M intern/cycles/kernel/svm/svm_white_noise.h
M intern/cycles/kernel/svm/svm_wireframe.h
===================================================================
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index 8f8451b364d..ab8570618ab 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -132,16 +132,25 @@ ccl_device_inline float4 fetch_node_float(KernelGlobals *kg, int offset)
__uint_as_float(node.w));
}
-ccl_device_inline void decode_node_uchar4(uint i, uint *x, uint *y, uint *z, uint *w)
+ccl_device_forceinline void svm_unpack_node_uchar2(uint i, uint *x, uint *y)
{
- if (x)
- *x = (i & 0xFF);
- if (y)
- *y = ((i >> 8) & 0xFF);
- if (z)
- *z = ((i >> 16) & 0xFF);
- if (w)
- *w = ((i >> 24) & 0xFF);
+ *x = (i & 0xFF);
+ *y = ((i >> 8) & 0xFF);
+}
+
+ccl_device_forceinline void svm_unpack_node_uchar3(uint i, uint *x, uint *y, uint *z)
+{
+ *x = (i & 0xFF);
+ *y = ((i >> 8) & 0xFF);
+ *z = ((i >> 16) & 0xFF);
+}
+
+ccl_device_forceinline void svm_unpack_node_uchar4(uint i, uint *x, uint *y, uint *z, uint *w)
+{
+ *x = (i & 0xFF);
+ *y = ((i >> 8) & 0xFF);
+ *z = ((i >> 16) & 0xFF);
+ *w = ((i >> 24) & 0xFF);
}
CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_ao.h b/intern/cycles/kernel/svm/svm_ao.h
index 3a8f32ac9d2..4cb986b897a 100644
--- a/intern/cycles/kernel/svm/svm_ao.h
+++ b/intern/cycles/kernel/svm/svm_ao.h
@@ -85,10 +85,10 @@ ccl_device void svm_node_ao(
KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint4 node)
{
uint flags, dist_offset, normal_offset, out_ao_offset;
- decode_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset);
+ svm_unpack_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset);
uint color_offset, out_color_offset, samples;
- decode_node_uchar4(node.z, &color_offset, &out_color_offset, &samples, NULL);
+ svm_unpack_node_uchar3(node.z, &color_offset, &out_color_offset, &samples);
float dist = stack_load_float_default(stack, dist_offset, node.w);
float3 normal = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h
index 6045268918b..434502f31f9 100644
--- a/intern/cycles/kernel/svm/svm_bevel.h
+++ b/intern/cycles/kernel/svm/svm_bevel.h
@@ -202,7 +202,7 @@ ccl_device void svm_node_bevel(
KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint4 node)
{
uint num_samples, radius_offset, normal_offset, out_offset;
- decode_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset);
+ svm_unpack_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset);
float radius = stack_load_float(stack, radius_offset);
float3 bevel_N = svm_bevel(kg, sd, state, radius, num_samples);
diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h
index 77f697a78cb..f1d74b7df96 100644
--- a/intern/cycles/kernel/svm/svm_brick.h
+++ b/intern/cycles/kernel/svm/svm_brick.h
@@ -87,13 +87,13 @@ ccl_device void svm_node_tex_brick(
/* RNA properties */
uint offset_frequency, squash_frequency;
- decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset);
- decode_node_uchar4(
+ svm_unpack_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset);
+ svm_unpack_node_uchar4(
node.z, &scale_offset, &mortar_size_offset, &bias_offset, &brick_width_offset);
- decode_node_uchar4(
+ svm_unpack_node_uchar4(
node.w, &row_height_offset, &color_offset, &fac_offset, &mortar_smooth_offset);
- decode_node_uchar4(node2.x, &offset_frequency, &squash_frequency, NULL, NULL);
+ svm_unpack_node_uchar2(node2.x, &offset_frequency, &squash_frequency);
float3 co = stack_load_float3(stack, co_offset);
diff --git a/intern/cycles/kernel/svm/svm_brightness.h b/intern/cycles/kernel/svm/svm_brightness.h
index dcd75a2fe8f..9554b5946fb 100644
--- a/intern/cycles/kernel/svm/svm_brightness.h
+++ b/intern/cycles/kernel/svm/svm_brightness.h
@@ -22,7 +22,7 @@ ccl_device void svm_node_brightness(
uint bright_offset, contrast_offset;
float3 color = stack_load_float3(stack, in_color);
- decode_node_uchar4(node, &bright_offset, &contrast_offset, NULL, NULL);
+ svm_unpack_node_uchar2(node, &bright_offset, &contrast_offset);
float brightness = stack_load_float(stack, bright_offset);
float contrast = stack_load_float(stack, contrast_offset);
diff --git a/intern/cycles/kernel/svm/svm_checker.h b/intern/cycles/kernel/svm/svm_checker.h
index 04a7b690e50..d54cb73df91 100644
--- a/intern/cycles/kernel/svm/svm_checker.h
+++ b/intern/cycles/kernel/svm/svm_checker.h
@@ -37,8 +37,8 @@ ccl_device void svm_node_tex_checker(KernelGlobals *kg, ShaderData *sd, float *s
uint co_offset, color1_offset, color2_offset, scale_offset;
uint color_offset, fac_offset;
- decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &scale_offset);
- decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL);
+ svm_unpack_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &scale_offset);
+ svm_unpack_node_uchar2(node.z, &color_offset, &fac_offset);
float3 co = stack_load_float3(stack, co_offset);
float3 color1 = stack_load_float3(stack, color1_offset);
diff --git a/intern/cycles/kernel/svm/svm_clamp.h b/intern/cycles/kernel/svm/svm_clamp.h
index 5ff4a599028..a45e70a3f15 100644
--- a/intern/cycles/kernel/svm/svm_clamp.h
+++ b/intern/cycles/kernel/svm/svm_clamp.h
@@ -27,7 +27,7 @@ ccl_device void svm_node_clamp(KernelGlobals *kg,
int *offset)
{
uint min_stack_offset, max_stack_offset;
- decode_node_uchar4(parameters_stack_offsets, &min_stack_offset, &max_stack_offset, NULL, NULL);
+ svm_unpack_node_uchar2(parameters_stack_offsets, &min_stack_offset, &max_stack_offset);
uint4 defaults = read_node(kg, offset);
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index 270fe4c8615..1511fc65835 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -85,7 +85,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
uint type, param1_offset, param2_offset;
uint mix_weight_offset;
- decode_node_uchar4(node.y, &type, ¶m1_offset, ¶m2_offset, &mix_weight_offset);
+ svm_unpack_node_uchar4(node.y, &type, ¶m1_offset, ¶m2_offset, &mix_weight_offset);
float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) :
1.0f);
@@ -122,21 +122,21 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
uint4 data_node2 = read_node(kg, offset);
float3 T = stack_load_float3(stack, data_node.y);
- decode_node_uchar4(data_node.z,
- &specular_offset,
- &roughness_offset,
- &specular_tint_offset,
- &anisotropic_offset);
- decode_node_uchar4(data_node.w,
- &sheen_offset,
- &sheen_tint_offset,
- &clearcoat_offset,
- &clearcoat_roughness_offset);
- decode_node_uchar4(data_node2.x,
- &eta_offset,
- &transmission_offset,
- &anisotropic_rotation_offset,
- &transmission_roughness_offset);
+ svm_unpack_node_uchar4(data_node.z,
+ &specular_offset,
+ &roughness_offset,
+ &specular_tint_offset,
+ &anisotropic_offset);
+ svm_unpack_node_uchar4(data_node.w,
+ &sheen_offset,
+ &sheen_tint_offset,
+ &clearcoat_offset,
+ &clearcoat_roughness_offset);
+ svm_unpack_node_uchar4(data_node2.x,
+ &eta_offset,
+ &transmission_offset,
+ &anisotropic_rotation_offset,
+ &transmission_roughness_offset);
// get Disney principled parameters
float metallic = param1;
@@ -793,19 +793,19 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
float3 weight = sd->svm_closure_weight * mix_weight;
uint offset_ofs, ior_ofs, color_ofs, parametrization;
- decode_node_uchar4(data_node.y, &offset_ofs, &ior_ofs, &color_ofs, ¶metrization);
+ svm_unpack_node_uchar4(data_node.y, &offset_ofs, &ior_ofs, &color_ofs, ¶metrization);
float alpha = stack_load_float_default(stack, offset_ofs, data_node.z);
float ior = stack_load_float_default(stack, ior_ofs, data_node.w);
uint coat_ofs, melanin_ofs, melanin_redness_ofs, absorption_coefficient_ofs;
- decode_node_uchar4(data_node2.x,
- &coat_ofs,
- &melanin_ofs,
- &melanin_redness_ofs,
- &absorption_coefficient_ofs);
+ svm_unpack_node_uchar4(data_node2.x,
+ &coat_ofs,
+ &melanin_ofs,
+ &melanin_redness_ofs,
+ &absorption_coefficient_ofs);
uint tint_ofs, random_ofs, random_color_ofs, random_roughness_ofs;
- decode_node_uchar4(
+ svm_unpack_node_uchar4(
data_node3.x, &tint_ofs, &random_ofs, &random_color_ofs, &random_roughness_ofs);
const AttributeDescriptor attr_descr_random = find_attribute(kg, sd, data_node4.y);
@@ -982,7 +982,7 @@ ccl_device void svm_node_closure_volume(
uint type, density_offset, anisotropy_offset;
uint mix_weight_offset;
- decode_node_uchar4(node.y, &type, &density_offset, &anisotropy_offset, &mix_weight_offset);
+ svm_unpack_node_uchar4(node.y, &type, &density_offset, &anisotropy_offset, &mix_weight_offset);
float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) :
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list