[Bf-blender-cvs] [f3059723771] master: OpenSubdiv: Initial implementation of batched evaluation

Wed Oct 30 15:18:09 CET 2019

Commit: f3059723771b90337b126a87a58b1ada981a21b1
Author: Sergey Sharybin
Date:   Wed Oct 16 14:59:18 2019 +0200
Branches: master
https://developer.blender.org/rBf3059723771b90337b126a87a58b1ada981a21b1

OpenSubdiv: Initial implementation of batched evaluation

The idea is to give multiple coordinates to evaluator and evaluate them
all at once, avoiding any possible overhead.

===================================================================

M	intern/opensubdiv/internal/opensubdiv_evaluator.cc
M	intern/opensubdiv/internal/opensubdiv_evaluator_internal.cc
M	intern/opensubdiv/internal/opensubdiv_evaluator_internal.h
M	intern/opensubdiv/opensubdiv_capi_type.h
M	intern/opensubdiv/opensubdiv_evaluator_capi.h

===================================================================

diff --git a/intern/opensubdiv/internal/opensubdiv_evaluator.cc b/intern/opensubdiv/internal/opensubdiv_evaluator.cc
index 2500691885c..4f5a1db82ca 100644
--- a/intern/opensubdiv/internal/opensubdiv_evaluator.cc
+++ b/intern/opensubdiv/internal/opensubdiv_evaluator.cc
@@ -102,6 +102,17 @@ void evaluateLimit(OpenSubdiv_Evaluator *evaluator,
   evaluator->internal->eval_output->evaluateLimit(ptex_face_index, face_u, face_v, P, dPdu, dPdv);
 }
 
+void evaluatePatchesLimit(OpenSubdiv_Evaluator *evaluator,
+                          const OpenSubdiv_PatchCoord *patch_coords,
+                          const int num_patch_coords,
+                          float *P,
+                          float *dPdu,
+                          float *dPdv)
+{
+  evaluator->internal->eval_output->evaluatePatchesLimit(
+      patch_coords, num_patch_coords, P, dPdu, dPdv);
+}
+
 void evaluateVarying(OpenSubdiv_Evaluator *evaluator,
                      const int ptex_face_index,
                      float face_u,
@@ -137,6 +148,8 @@ void assignFunctionPointers(OpenSubdiv_Evaluator *evaluator)
   evaluator->evaluateLimit = evaluateLimit;
   evaluator->evaluateVarying = evaluateVarying;
   evaluator->evaluateFaceVarying = evaluateFaceVarying;
+
+  evaluator->evaluatePatchesLimit = evaluatePatchesLimit;
 }
 
 }  // namespace
diff --git a/intern/opensubdiv/internal/opensubdiv_evaluator_internal.cc b/intern/opensubdiv/internal/opensubdiv_evaluator_internal.cc
index acd8472b5f1..c5dd4509976 100644
--- a/intern/opensubdiv/internal/opensubdiv_evaluator_internal.cc
+++ b/intern/opensubdiv/internal/opensubdiv_evaluator_internal.cc
@@ -58,6 +58,88 @@ namespace opensubdiv_capi {
 
 namespace {
 
+// Array implementation which stores small data on stack (or, rather, in the class itself).
+template<typename T, int kNumMaxElementsOnStack> class StackOrHeapArray {
+ public:
+  StackOrHeapArray()
+      : num_elements_(0), heap_elements_(NULL), num_heap_elements_(0), effective_elements_(NULL)
+  {
+  }
+
+  explicit StackOrHeapArray(int size) : StackOrHeapArray()
+  {
+    resize(size);
+  }
+
+  ~StackOrHeapArray()
+  {
+    delete[] heap_elements_;
+  }
+
+  int size() const
+  {
+    return num_elements_;
+  };
+
+  T *data()
+  {
+    return effective_elements_;
+  }
+
+  void resize(int num_elements)
+  {
+    const int old_num_elements = num_elements_;
+    num_elements_ = num_elements;
+    // Early output if allcoation size did not change, or allocation size is smaller.
+    // We never re-allocate, sacrificing some memory over performance.
+    if (old_num_elements >= num_elements) {
+      return;
+    }
+    // Simple case: no previously allocated buffer, can simply do one allocation.
+    if (effective_elements_ == NULL) {
+      effective_elements_ = allocate(num_elements);
+      return;
+    }
+    // Make new allocation, and copy elements if needed.
+    T *old_buffer = effective_elements_;
+    effective_elements_ = allocate(num_elements);
+    if (old_buffer != effective_elements_) {
+      memcpy(effective_elements_, old_buffer, sizeof(T) * min(old_num_elements, num_elements));
+    }
+    if (old_buffer != stack_elements_) {
+      delete[] old_buffer;
+    }
+  }
+
+ protected:
+  T *allocate(int num_elements)
+  {
+    if (num_elements < kNumMaxElementsOnStack) {
+      return stack_elements_;
+    }
+    heap_elements_ = new T[num_elements];
+    return heap_elements_;
+  }
+
+  // Number of elements in the buffer.
+  int num_elements_;
+
+  // Elements which are allocated on a stack (or, rather, in the same allocation as the buffer
+  // itself).
+  // Is used as long as buffer is smaller than kNumMaxElementsOnStack.
+  T stack_elements_[kNumMaxElementsOnStack];
+
+  // Heap storage for buffer larger than kNumMaxElementsOnStack.
+  T *heap_elements_;
+  int num_heap_elements_;
+
+  // Depending on the current buffer size points to rither stack_elements_ or heap_elements_.
+  T *effective_elements_;
+};
+
+// 32 is a number of inner vertices along the patch size at subdivision level 6.
+typedef StackOrHeapArray<PatchCoord, 32 * 32> StackOrHeapPatchCoordArray;
+
 // Buffer which implements API required by OpenSubdiv and uses an existing memory as an underlying
 // storage.
 template<typename T> class RawDataWrapperBuffer {
@@ -441,6 +523,19 @@ class VolatileEvalOutput {
   DEVICE_CONTEXT *device_context_;
 };
 
+void convertPatchCoordsToArray(const OpenSubdiv_PatchCoord *patch_coords,
+                               const int num_patch_coords,
+                               const OpenSubdiv::Far::PatchMap *patch_map,
+                               StackOrHeapPatchCoordArray *array)
+{
+  array->resize(num_patch_coords);
+  for (int i = 0; i < num_patch_coords; ++i) {
+    const PatchTable::PatchHandle *handle = patch_map->FindPatch(
+        patch_coords[i].ptex_face, patch_coords[i].u, patch_coords[i].v);
+    (array->data())[i] = PatchCoord(*handle, patch_coords[i].u, patch_coords[i].v);
+  }
+}
+
 }  // namespace
 
 // Note: Define as a class instead of typedcef to make it possible
@@ -620,6 +715,23 @@ void CpuEvalOutputAPI::evaluateFaceVarying(const int face_varying_channel,
   implementation_->evalPatchesFaceVarying(face_varying_channel, &patch_coord, 1, face_varying);
 }
 
+void CpuEvalOutputAPI::evaluatePatchesLimit(const OpenSubdiv_PatchCoord *patch_coords,
+                                            const int num_patch_coords,
+                                            float *P,
+                                            float *dPdu,
+                                            float *dPdv)
+{
+  StackOrHeapPatchCoordArray patch_coords_array;
+  convertPatchCoordsToArray(patch_coords, num_patch_coords, patch_map_, &patch_coords_array);
+  if (dPdu != NULL || dPdv != NULL) {
+    implementation_->evalPatchesWithDerivatives(
+        patch_coords_array.data(), num_patch_coords, P, dPdu, dPdv);
+  }
+  else {
+    implementation_->evalPatches(patch_coords_array.data(), num_patch_coords, P);
+  }
+}
+
 }  // namespace opensubdiv_capi
 
 OpenSubdiv_EvaluatorInternal::OpenSubdiv_EvaluatorInternal()
diff --git a/intern/opensubdiv/internal/opensubdiv_evaluator_internal.h b/intern/opensubdiv/internal/opensubdiv_evaluator_internal.h
index 7c963227d17..392633944c6 100644
--- a/intern/opensubdiv/internal/opensubdiv_evaluator_internal.h
+++ b/intern/opensubdiv/internal/opensubdiv_evaluator_internal.h
@@ -26,6 +26,7 @@
 #include <opensubdiv/far/patchMap.h>
 #include <opensubdiv/far/patchTable.h>
 
+struct OpenSubdiv_PatchCoord;
 struct OpenSubdiv_TopologyRefiner;
 
 namespace opensubdiv_capi {
@@ -114,6 +115,18 @@ class CpuEvalOutputAPI {
                            float face_v,
                            float face_varying[2]);
 
+  // Batched evaluation of multiple input coordinates.
+
+  // Evaluate given ptex face at given bilinear coordinate.
+  // If derivatives are NULL, they will not be evaluated.
+  //
+  // NOTE: Output arrays must point to a memory of size float[3]*num_patch_coords.
+  void evaluatePatchesLimit(const OpenSubdiv_PatchCoord *patch_coords,
+                            const int num_patch_coords,
+                            float *P,
+                            float *dPdu,
+                            float *dPdv);
+
  protected:
   CpuEvalOutput *implementation_;
   OpenSubdiv::Far::PatchMap *patch_map_;
diff --git a/intern/opensubdiv/opensubdiv_capi_type.h b/intern/opensubdiv/opensubdiv_capi_type.h
index 35eeb71dede..e759c5f43b0 100644
--- a/intern/opensubdiv/opensubdiv_capi_type.h
+++ b/intern/opensubdiv/opensubdiv_capi_type.h
@@ -58,6 +58,13 @@ typedef enum OpenSubdiv_FVarLinearInterpolation {
   OSD_FVAR_LINEAR_INTERPOLATION_ALL,
 } OpenSubdiv_FVarLinearInterpolation;
 
+typedef struct OpenSubdiv_PatchCoord {
+  int ptex_face;
+
+  // Parametric location on patch.
+  float u, v;
+} OpenSubdiv_PatchCoord;
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/intern/opensubdiv/opensubdiv_evaluator_capi.h b/intern/opensubdiv/opensubdiv_evaluator_capi.h
index ceb0c58feba..1572d01b851 100644
--- a/intern/opensubdiv/opensubdiv_evaluator_capi.h
+++ b/intern/opensubdiv/opensubdiv_evaluator_capi.h
@@ -24,6 +24,7 @@ extern "C" {
 #endif
 
 struct OpenSubdiv_EvaluatorInternal;
+struct OpenSubdiv_PatchCoord;
 struct OpenSubdiv_TopologyRefiner;
 
 typedef struct OpenSubdiv_Evaluator {
@@ -108,6 +109,19 @@ typedef struct OpenSubdiv_Evaluator {
                               float face_v,
                               float face_varying[2]);
 
+  // Batched evaluation of multiple input coordinates.
+
+  // Evaluate limit surface.
+  // If derivatives are NULL, they will not be evaluated.
+  //
+  // NOTE: Output arrays must point to a memory of size float[3]*num_patch_coords.
+  void (*evaluatePatchesLimit)(struct OpenSubdiv_Evaluator *evaluator,
+                               const struct OpenSubdiv_PatchCoord *patch_coords,
+                               const int num_patch_coords,
+                               float *P,
+                               float *dPdu,
+                               float *dPdv);
+
   // Internal storage for the use in this module only.
   //
   // This is where actual OpenSubdiv's evaluator is living.