[Bf-blender-cvs] [aaf4b00ba12] functions: test impact of manual vectorization (2x speedup)
Jacques Lucke
noreply at git.blender.org
Sun Jun 30 16:00:46 CEST 2019
Commit: aaf4b00ba12eebd3cd8fc1d973807d338ab8a32e
Author: Jacques Lucke
Date: Sun Jun 30 10:40:31 2019 +0200
Branches: functions
https://developer.blender.org/rBaaf4b00ba12eebd3cd8fc1d973807d338ab8a32e
test impact of manual vectorization (2x speedup)
===================================================================
M source/blender/simulations/bparticles/simulate.cpp
===================================================================
diff --git a/source/blender/simulations/bparticles/simulate.cpp b/source/blender/simulations/bparticles/simulate.cpp
index c74d458666e..4d7c101bfc1 100644
--- a/source/blender/simulations/bparticles/simulate.cpp
+++ b/source/blender/simulations/bparticles/simulate.cpp
@@ -5,6 +5,8 @@
#include "BLI_task.h"
#include "BLI_timeit.hpp"
+#include "xmmintrin.h"
+
#define USE_THREADING false
namespace BParticles {
@@ -309,9 +311,39 @@ BLI_NOINLINE static void simulate_with_max_n_events(
r_unfinished_particle_indices = std::move(unfinished_particle_indices);
}
+BLI_NOINLINE static void add_float3_arrays(ArrayRef<float3> base, ArrayRef<float3> values)
+{
+ /* I'm just testing the impact of vectorization here.
+ * This should eventually be moved to another place. */
+ BLI_assert(base.size() == values.size());
+ BLI_assert(POINTER_AS_UINT(base.begin()) % 16 == 0);
+ BLI_assert(POINTER_AS_UINT(values.begin()) % 16 == 0);
+
+ float *base_start = (float *)base.begin();
+ float *values_start = (float *)values.begin();
+ uint total_size = base.size() * 3;
+ uint overshoot = total_size % 4;
+ uint vectorized_size = total_size - overshoot;
+
+ /* Twice as fast in my test than the normal loop.
+ * The compiler did not vectorize it, maybe for compatibility? */
+ for (uint i = 0; i < vectorized_size; i += 4) {
+ __m128 a = _mm_load_ps(base_start + i);
+ __m128 b = _mm_load_ps(values_start + i);
+ __m128 result = _mm_add_ps(a, b);
+ _mm_store_ps(base_start + i, result);
+ }
+
+ for (uint i = vectorized_size; i < total_size; i++) {
+ base_start[i] += values_start[i];
+ }
+}
+
BLI_NOINLINE static void apply_remaining_offsets(ParticleSet particles,
AttributeArrays attribute_offsets)
{
+ SCOPED_TIMER_STATS(__func__);
+
for (uint attribute_index : attribute_offsets.info().float3_attributes()) {
StringRef name = attribute_offsets.info().name_of(attribute_index);
@@ -319,9 +351,7 @@ BLI_NOINLINE static void apply_remaining_offsets(ParticleSet particles,
auto offsets = attribute_offsets.get_float3(attribute_index);
if (particles.indices_are_trivial()) {
- for (uint pindex : particles.range()) {
- values[pindex] += offsets[pindex];
- }
+ add_float3_arrays(values, offsets);
}
else {
for (uint pindex : particles.indices()) {
More information about the Bf-blender-cvs
mailing list