[Bf-blender-cvs] [70892ec1e19] tmp-drw-callbatching: DRW: Refactor to support draw call batching

Clément Foucault noreply at git.blender.org
Sat Aug 17 14:50:04 CEST 2019


Commit: 70892ec1e19b033b431d0d5af19f97d187cdfd6a
Author: Clément Foucault
Date:   Mon Jun 3 12:53:36 2019 +0200
Branches: tmp-drw-callbatching
https://developer.blender.org/rB70892ec1e19b033b431d0d5af19f97d187cdfd6a

DRW: Refactor to support draw call batching

This refactor improve draw structures CPU/Memory efficiency and lower the
driver overhead of doing many drawcalls.

- Model Matrix is now part of big UBOs that contain 1024 matrices.
- Object Infos follow the same improvement.
- Matrices are indexed by gl_BaseInstanceARB or a fallback uniform.
- All these resources are using a single 32bit identifier (DRWResourceHandle).
- DRWUniform & DRWCall are alloced in chunks to improve cache coherence & memory usage.
- DRWUniform now support up to vec4_copy.
- Draw calls are batch together if their resource id are consecutive.

This has a great impact on CPU usage when using lots of instances. Even if the biggest
bottleneck in these situations is the depsgraph iteration, the driver overhead when doing
thousands of drawcalls is still high.

This only improve situations where the CPU is the bottleneck: small geometry, lots of
instances.

The next step is to sort the drawcall inside a DRWCallChunk to improve the batching process
when instancing order is pretty random.

Reviewers: brecht, antoniov

Differential Revision: https://developer.blender.org/D4997

===================================================================

M	source/blender/draw/intern/draw_manager.h
M	source/blender/draw/intern/draw_manager_exec.c

===================================================================

diff --git a/source/blender/draw/intern/draw_manager.h b/source/blender/draw/intern/draw_manager.h
index 047cab5a930..a1ab0934a3b 100644
--- a/source/blender/draw/intern/draw_manager.h
+++ b/source/blender/draw/intern/draw_manager.h
@@ -44,6 +44,9 @@
 /* Use draw manager to call GPU_select, see: DRW_draw_select_loop */
 #define USE_GPU_SELECT
 
+/* Use drawcall batching using instanced rendering. */
+#define USE_BATCHING
+
 // #define DRW_DEBUG_CULLING
 #define DRW_DEBUG_USE_UNIFORM_NAME 0
 #define DRW_UNIFORM_BUFFER_NAME 64
diff --git a/source/blender/draw/intern/draw_manager_exec.c b/source/blender/draw/intern/draw_manager_exec.c
index c8d0b143404..6fafc0bf1aa 100644
--- a/source/blender/draw/intern/draw_manager_exec.c
+++ b/source/blender/draw/intern/draw_manager_exec.c
@@ -986,6 +986,9 @@ static DRWCall *draw_call_iter_step(DRWCallIterator *iter)
 typedef struct DRWCommandsState {
   int callid;
   uint resource_chunk;
+  uint base_inst;
+  uint inst_count;
+  GPUBatch *batch;
   bool neg_scale;
 } DRWCommandsState;
 
@@ -1013,6 +1016,56 @@ static void draw_call_resource_bind(DRWCommandsState *state,
   }
 }
 
+#ifdef USE_BATCHING
+/* Return true if the given drawcall can be batched with following calls. */
+static bool draw_call_do_batching(DRWShadingGroup *shgroup,
+                                  DRWCommandsState *state,
+                                  DRWCall *call,
+                                  int obmats_loc,
+                                  int obinfos_loc,
+                                  int baseinst_loc,
+                                  int callid_loc)
+{
+  if (call->inst_count > 0 || call->vert_first > 0 || call->vert_count > 0 || callid_loc != -1 ||
+      obmats_loc == -1 || G.f & G_FLAG_PICKSEL) {
+    /* Safety guard. Batching should not happen in a shgroup
+     * where any if the above condition are true. */
+    BLI_assert(state->inst_count == 0);
+    if (state->inst_count > 0) {
+      draw_geometry_execute(
+          shgroup, state->batch, 0, 0, state->base_inst, state->inst_count, baseinst_loc);
+    }
+    /* We cannot pack in this situation. */
+    state->inst_count = 0;
+    state->base_inst = 0;
+    state->batch = call->batch;
+    return false;
+  }
+  else {
+    /* See if any condition requires to interupt the packing. */
+    if ((call->handle.id != state->base_inst + state->inst_count) || /* Is the id consecutive? */
+        (call->handle.negative_scale != state->neg_scale) ||         /* */
+        (call->handle.chunk != state->resource_chunk) ||             /* */
+        (call->batch != state->batch)                                /* */
+    ) {
+      /* We need to draw the pending instances. */
+      if (state->inst_count > 0) {
+        draw_geometry_execute(
+            shgroup, state->batch, 0, 0, state->base_inst, state->inst_count, baseinst_loc);
+      }
+      state->inst_count = 1;
+      state->base_inst = call->handle.id;
+      state->batch = call->batch;
+      draw_call_resource_bind(state, call->handle, obmats_loc, obinfos_loc);
+    }
+    else {
+      state->inst_count++;
+    }
+    return true;
+  }
+}
+#endif
+
 static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
 {
   BLI_assert(shgroup->shader);
@@ -1059,6 +1112,8 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
 
   /* Rendering Calls */
   {
+    GPUBatch *first_batch = (shgroup->calls.first) ? shgroup->calls.first->calls[0].batch : NULL;
+
     DRWCallIterator iter;
     draw_call_iter_begin(&iter, shgroup);
     DRWCall *call;
@@ -1066,7 +1121,10 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
     DRWCommandsState state = {
         .neg_scale = false,
         .resource_chunk = 0,
+        .base_inst = 0,
+        .inst_count = 0,
         .callid = 0,
+        .batch = first_batch,
     };
     while ((call = draw_call_iter_step(&iter))) {
 
@@ -1074,6 +1132,14 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
         continue;
       }
 
+#ifdef USE_BATCHING
+      /* Pack calls together if their handle.id are consecutive. */
+      if (draw_call_do_batching(
+              shgroup, &state, call, obmats_loc, obinfos_loc, baseinst_loc, callid_loc)) {
+        continue;
+      }
+#endif
+
       draw_call_resource_bind(&state, call->handle, obmats_loc, obinfos_loc);
 
       /* XXX small exception/optimisation for outline rendering. */
@@ -1100,6 +1166,14 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
                             baseinst_loc);
     }
 
+#ifdef USE_BATCHING
+    /* Flush the last pending drawcall batched together. */
+    if (state.inst_count > 0) {
+      draw_geometry_execute(
+          shgroup, state.batch, 0, 0, state.base_inst, state.inst_count, baseinst_loc);
+    }
+#endif
+
     /* Reset state */
     glFrontFace(GL_CCW);



More information about the Bf-blender-cvs mailing list