[Bf-blender-cvs] [b2933ff] temp-cycles-microdisplacement: Reduce number of calls to patch_eval_*
Mai Lavelle
noreply at git.blender.org
Thu Jul 14 02:50:06 CEST 2016
Commit: b2933ff1412fab809bc639cdc99c6b74750323ee
Author: Mai Lavelle
Date: Wed Jul 13 20:23:42 2016 -0400
Branches: temp-cycles-microdisplacement
https://developer.blender.org/rBb2933ff1412fab809bc639cdc99c6b74750323ee
Reduce number of calls to patch_eval_*
This alleviates a ~5% performance regression caused by calling patch_eval_*
functions multiple times to calculate differentials. Differentials for
subdivided attributes are now calculated directly from the limit.
Thanks to Lukas Stockner for checking my math.
===================================================================
M intern/cycles/kernel/geom/geom_patch.h
M intern/cycles/kernel/geom/geom_triangle.h
===================================================================
diff --git a/intern/cycles/kernel/geom/geom_patch.h b/intern/cycles/kernel/geom/geom_patch.h
index 65fa41c..6a0ff5a 100644
--- a/intern/cycles/kernel/geom/geom_patch.h
+++ b/intern/cycles/kernel/geom/geom_patch.h
@@ -213,6 +213,11 @@ ccl_device_inline void patch_eval_basis(KernelGlobals *kg, const PatchHandle *ha
uint patch_bits = kernel_tex_fetch(__patches, handle->patch_index + 1); /* read patch param */
float d_scale = 1 << patch_eval_depth(patch_bits);
+ bool non_quad_root = (patch_bits >> 4) & 0x1;
+ if(non_quad_root) {
+ d_scale *= 0.5f;
+ }
+
patch_eval_normalize_coords(patch_bits, &u, &v);
/* XXX: regular patches only for now. */
@@ -266,6 +271,8 @@ ccl_device float patch_eval_float(KernelGlobals *kg, const ShaderData *sd, int o
indices, weights, weights_du, weights_dv);
float val = 0.0f;
+ if(du) *du = 0.0f;
+ if(dv) *dv = 0.0f;
for(int i = 0; i < num_control; i++) {
float v = kernel_tex_fetch(__attributes_float, offset + indices[i]);
@@ -291,6 +298,8 @@ ccl_device float3 patch_eval_float3(KernelGlobals *kg, const ShaderData *sd, int
indices, weights, weights_du, weights_dv);
float3 val = make_float3(0.0f, 0.0f, 0.0f);
+ if(du) *du = make_float3(0.0f, 0.0f, 0.0f);
+ if(dv) *dv = make_float3(0.0f, 0.0f, 0.0f);
for(int i = 0; i < num_control; i++) {
float3 v = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + indices[i]));
@@ -316,6 +325,8 @@ ccl_device float3 patch_eval_uchar4(KernelGlobals *kg, const ShaderData *sd, int
indices, weights, weights_du, weights_dv);
float3 val = make_float3(0.0f, 0.0f, 0.0f);
+ if(du) *du = make_float3(0.0f, 0.0f, 0.0f);
+ if(dv) *dv = make_float3(0.0f, 0.0f, 0.0f);
for(int i = 0; i < num_control; i++) {
float3 v = color_byte_to_float(kernel_tex_fetch(__attributes_uchar4, offset + indices[i]));
diff --git a/intern/cycles/kernel/geom/geom_triangle.h b/intern/cycles/kernel/geom/geom_triangle.h
index e211534..9181d01 100644
--- a/intern/cycles/kernel/geom/geom_triangle.h
+++ b/intern/cycles/kernel/geom/geom_triangle.h
@@ -283,7 +283,50 @@ ccl_device float subd_triangle_attribute_float(KernelGlobals *kg, const ShaderDa
{
int patch = subd_triangle_patch(kg, sd);
- if(desc->element == ATTR_ELEMENT_FACE) {
+ if(desc->flags & ATTR_SUBDIVIDED) {
+ float2 uv[3];
+ subd_triangle_patch_uv(kg, sd, uv);
+
+ float2 dpdu = uv[0] - uv[2];
+ float2 dpdv = uv[1] - uv[2];
+
+ /* p is [s, t] */
+ float2 p = dpdu * ccl_fetch(sd, u) + dpdv * ccl_fetch(sd, v) + uv[2];
+
+ float a, dads, dadt;
+ a = patch_eval_float(kg, sd, desc->offset, patch, p.x, p.y, 0, &dads, &dadt);
+
+#ifdef __RAY_DIFFERENTIALS__
+ if(dx || dy) {
+ float dsdu = dpdu.x;
+ float dtdu = dpdu.y;
+ float dsdv = dpdv.x;
+ float dtdv = dpdv.y;
+
+ if(dx) {
+ float dudx = ccl_fetch(sd, du).dx;
+ float dvdx = ccl_fetch(sd, dv).dx;
+
+ float dsdx = dsdu*dudx + dsdv*dvdx;
+ float dtdx = dtdu*dudx + dtdv*dvdx;
+
+ *dx = dads*dsdx + dadt*dtdx;
+ }
+ if(dy) {
+ float dudy = ccl_fetch(sd, du).dy;
+ float dvdy = ccl_fetch(sd, dv).dy;
+
+ float dsdy = dsdu*dudy + dsdv*dvdy;
+ float dtdy = dtdu*dudy + dtdv*dvdy;
+
+ *dy = dads*dsdy + dadt*dtdy;
+ }
+ }
+#endif
+
+ return a;
+ }
+ else if(desc->element == ATTR_ELEMENT_FACE) {
if(dx) *dx = 0.0f;
if(dy) *dy = 0.0f;
@@ -293,31 +336,22 @@ ccl_device float subd_triangle_attribute_float(KernelGlobals *kg, const ShaderDa
float2 uv[3];
subd_triangle_patch_uv(kg, sd, uv);
- float a, b, c;
+ uint4 v = subd_triangle_patch_indices(kg, patch);
- if(desc->flags & ATTR_SUBDIVIDED) {
- a = patch_eval_float(kg, sd, desc->offset, patch, uv[0].x, uv[0].y, 0, NULL, NULL);
- b = patch_eval_float(kg, sd, desc->offset, patch, uv[1].x, uv[1].y, 0, NULL, NULL);
- c = patch_eval_float(kg, sd, desc->offset, patch, uv[2].x, uv[2].y, 0, NULL, NULL);
- }
- else {
- uint4 v = subd_triangle_patch_indices(kg, patch);
+ float f0 = kernel_tex_fetch(__attributes_float, desc->offset + v.x);
+ float f1 = kernel_tex_fetch(__attributes_float, desc->offset + v.y);
+ float f2 = kernel_tex_fetch(__attributes_float, desc->offset + v.z);
+ float f3 = kernel_tex_fetch(__attributes_float, desc->offset + v.w);
- float f0 = kernel_tex_fetch(__attributes_float, desc->offset + v.x);
- float f1 = kernel_tex_fetch(__attributes_float, desc->offset + v.y);
- float f2 = kernel_tex_fetch(__attributes_float, desc->offset + v.z);
- float f3 = kernel_tex_fetch(__attributes_float, desc->offset + v.w);
-
- if(subd_triangle_patch_num_corners(kg, patch) != 4) {
- f1 = (f1+f0)*0.5f;
- f3 = (f3+f0)*0.5f;
- }
-
- a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
- b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
- c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+ if(subd_triangle_patch_num_corners(kg, patch) != 4) {
+ f1 = (f1+f0)*0.5f;
+ f3 = (f3+f0)*0.5f;
}
+ float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+ float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+ float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+
#ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = ccl_fetch(sd, du).dx*a + ccl_fetch(sd, dv).dx*b - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*c;
if(dy) *dy = ccl_fetch(sd, du).dy*a + ccl_fetch(sd, dv).dy*b - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*c;
@@ -329,32 +363,23 @@ ccl_device float subd_triangle_attribute_float(KernelGlobals *kg, const ShaderDa
float2 uv[3];
subd_triangle_patch_uv(kg, sd, uv);
- float a, b, c;
-
- if(desc->flags & ATTR_SUBDIVIDED) {
- a = patch_eval_float(kg, sd, desc->offset, patch, uv[0].x, uv[0].y, 0, NULL, NULL);
- b = patch_eval_float(kg, sd, desc->offset, patch, uv[1].x, uv[1].y, 0, NULL, NULL);
- c = patch_eval_float(kg, sd, desc->offset, patch, uv[2].x, uv[2].y, 0, NULL, NULL);
- }
- else {
- int corners[4];
- subd_triangle_patch_corners(kg, patch, corners);
-
- float f0 = kernel_tex_fetch(__attributes_float, corners[0] + desc->offset);
- float f1 = kernel_tex_fetch(__attributes_float, corners[1] + desc->offset);
- float f2 = kernel_tex_fetch(__attributes_float, corners[2] + desc->offset);
- float f3 = kernel_tex_fetch(__attributes_float, corners[3] + desc->offset);
+ int corners[4];
+ subd_triangle_patch_corners(kg, patch, corners);
- if(subd_triangle_patch_num_corners(kg, patch) != 4) {
- f1 = (f1+f0)*0.5f;
- f3 = (f3+f0)*0.5f;
- }
+ float f0 = kernel_tex_fetch(__attributes_float, corners[0] + desc->offset);
+ float f1 = kernel_tex_fetch(__attributes_float, corners[1] + desc->offset);
+ float f2 = kernel_tex_fetch(__attributes_float, corners[2] + desc->offset);
+ float f3 = kernel_tex_fetch(__attributes_float, corners[3] + desc->offset);
- a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
- b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
- c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+ if(subd_triangle_patch_num_corners(kg, patch) != 4) {
+ f1 = (f1+f0)*0.5f;
+ f3 = (f3+f0)*0.5f;
}
+ float a = mix(mix(f0, f1, uv[0].x), mix(f3, f2, uv[0].x), uv[0].y);
+ float b = mix(mix(f0, f1, uv[1].x), mix(f3, f2, uv[1].x), uv[1].y);
+ float c = mix(mix(f0, f1, uv[2].x), mix(f3, f2, uv[2].x), uv[2].y);
+
#ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = ccl_fetch(sd, du).dx*a + ccl_fetch(sd, dv).dx*b - (ccl_fetch(sd, du).dx + ccl_fetch(sd, dv).dx)*c;
if(dy) *dy = ccl_fetch(sd, du).dy*a + ccl_fetch(sd, dv).dy*b - (ccl_fetch(sd, du).dy + ccl_fetch(sd, dv).dy)*c;
@@ -374,7 +399,56 @@ ccl_device float3 subd_triangle_attribute_float3(KernelGlobals *kg, const Shader
{
int patch = subd_triangle_patch(kg, sd);
- if(desc->element == ATTR_ELEMENT_FACE) {
+ if(desc->flags & ATTR_SUBDIVIDED) {
+ float2 uv[3];
+ subd_triangle_patch_uv(kg, sd, uv);
+
+ float2 dpdu = uv[0] - uv[2];
+ float2 dpdv = uv[1] - uv[2];
+
+ /* p is [s, t] */
+ float2 p = dpdu * ccl_fetch(sd, u) + dpdv * ccl_fetch(sd, v) + uv[2];
+
+ float3 a, dads, dadt;
+
+ if(desc->element == ATTR_ELEMENT_CORNER_BYTE) {
+ a = patch_eval_uchar4(kg, sd, desc->offset, patch, p.x, p.y, 0, &dads, &dadt);
+ }
+ else {
+ a = patch_eval_float3(kg, sd, desc->offset, patch, p.x, p.y, 0, &dads, &dadt);
+ }
+
+#ifdef __RAY_DIFFERENTIALS__
+ if(dx || dy) {
+ float dsdu = dpdu.x;
+ float dtdu = dpdu.y;
+ float dsdv = dpdv.x;
+ float dtdv = dpdv.y;
+
+ if(dx) {
+ float dudx = ccl_fetch(sd, du).dx;
+ float dvdx = ccl_fetch(sd, dv).dx;
+
+ float dsdx = dsdu*dudx + dsdv*dvdx;
+ float dtdx = dtdu*dudx + dtdv*dvdx;
+
+ *dx = dads*dsdx + dadt*dtdx;
+ }
+ if(dy) {
+ float dudy = ccl_fetch(sd, du).dy;
+ float dvdy = ccl_fetch(sd, dv).dy;
+
+ float dsdy = dsdu*dudy + dsdv*dvdy;
+ float dtdy = dtdu*dudy + dtdv*dvdy;
+
+ *dy = dads*dsdy + dadt*dtdy;
+ }
+ }
+#endif
+
+ return a;
+ }
+ else if(desc->element == ATTR_ELEMENT_FACE) {
if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
@@ -384,31 +458,22 @@ ccl_device float3 subd_triangle_attribute_float3(KernelGlobals *kg, const Shader
float2 uv[3];
subd_triangle_patch_uv(kg, sd, uv);
- float3 a, b, c;
+ uint4 v = subd_triangle_patch_indices(kg, patch);
- if(desc->flags & ATTR_SUBDIVIDED) {
- a = patch_eval_float3(kg, sd, desc->offset, patch, uv[0].x, uv[0].y, 0, NULL, NULL);
- b = patch_eval_float3(kg, sd, desc->offset, patch, uv[1].x, uv[1].y, 0, NULL, NULL);
- c = patch_eval_float3(kg, sd, desc->offset, patch, uv[2].x, uv[2].y, 0, NULL, NULL);
- }
- else {
- uint4 v = subd_triangle_patch_indices(kg, patch);
-
- float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc->offset + v.x));
- float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc->offset + v.y));
- float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc->offset + v.z));
- float3 f3 = float4_to_float3(kernel_tex_fetch(__attributes_float3, desc->offset + v.w));
-
- if(subd_triangle_patch_num_corners(kg, patch) !
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list