[Bf-blender-cvs] [61eab74] master: Cycles: Optimization for CMJ in CUDA kernels
Sergey Sharybin
noreply at git.blender.org
Fri Mar 13 08:38:51 CET 2015
Commit: 61eab743f1377fdfcf44f2e4928290a3fc4ccfea
Author: Sergey Sharybin
Date: Fri Mar 13 12:14:43 2015 +0500
Branches: master
https://developer.blender.org/rB61eab743f1377fdfcf44f2e4928290a3fc4ccfea
Cycles: Optimization for CMJ in CUDA kernels
Two things:
- Use intrinsics for clz/ctz (ctz is implemented via ffs()).
- Use faster sqrt() function which precision is enough for
integer values.
===================================================================
M intern/cycles/kernel/kernel_jitter.h
===================================================================
diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h
index 6aa2931..6953f00 100644
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -47,6 +47,8 @@ ccl_device_inline int cmj_fast_div_pow2(int a, int b)
# else
return a >> __builtin_ctz(b);
# endif
+#elif defined(__KERNEL_CUDA__)
+ return a >> (__ffs(b) - 1);
#else
return a/b;
#endif
@@ -63,6 +65,8 @@ ccl_device_inline uint cmj_w_mask(uint w)
# else
return ((1 << (32 - __builtin_clz(w))) - 1);
# endif
+#elif defined(__KERNEL_CUDA__)
+ return ((1 << (32 - __clz(w))) - 1);
#else
w |= w >> 1;
w |= w >> 2;
@@ -167,7 +171,11 @@ ccl_device void cmj_sample_2D(int s, int N, int p, float *fx, float *fy)
{
kernel_assert(s < N);
+#if defined(__KERNEL_CUDA__)
+ int m = float_to_int(__fsqrt_ru(N));
+#else
int m = float_to_int(sqrtf(N));
+#endif
int n = (N + m - 1)/m;
float invN = 1.0f/N;
float invm = 1.0f/m;
More information about the Bf-blender-cvs
mailing list