[Bf-blender-cvs] [d9a7ec3947e] tmp-drw-callbatching: DRW: Use new GPUDrawList to speedup instancing
Clément Foucault
noreply at git.blender.org
Sat Aug 17 14:50:33 CEST 2019
Commit: d9a7ec3947eb7056bb3662cd8ce4a9b420ab3065
Author: Clément Foucault
Date: Wed Jun 19 16:01:02 2019 +0200
Branches: tmp-drw-callbatching
https://developer.blender.org/rBd9a7ec3947eb7056bb3662cd8ce4a9b420ab3065
DRW: Use new GPUDrawList to speedup instancing
This improves the performances of random instancing even further.
Test 30K objects:
42 fps -> 52 fps
24 ms -> 19 ms
===================================================================
M source/blender/draw/intern/draw_manager.c
M source/blender/draw/intern/draw_manager.h
M source/blender/draw/intern/draw_manager_exec.c
===================================================================
diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c
index 8a43ddbc4d5..6037d1e6435 100644
--- a/source/blender/draw/intern/draw_manager.c
+++ b/source/blender/draw/intern/draw_manager.c
@@ -724,6 +724,10 @@ static void drw_viewport_var_init(void)
G_draw.view_ubo = DRW_uniformbuffer_create(sizeof(DRWViewUboStorage), NULL);
}
+ if (DST.draw_list == NULL) {
+ DST.draw_list = GPU_draw_list_create(DRW_DRAWLIST_LEN);
+ }
+
memset(DST.object_instance_data, 0x0, sizeof(DST.object_instance_data));
}
@@ -2908,6 +2912,10 @@ void DRW_engines_free(void)
MEM_SAFE_FREE(DST.uniform_names.buffer);
+ if (DST.draw_list) {
+ GPU_draw_list_discard(DST.draw_list);
+ }
+
DRW_opengl_context_disable();
}
diff --git a/source/blender/draw/intern/draw_manager.h b/source/blender/draw/intern/draw_manager.h
index 54e35c7e5c4..1b4cb06a59c 100644
--- a/source/blender/draw/intern/draw_manager.h
+++ b/source/blender/draw/intern/draw_manager.h
@@ -286,14 +286,6 @@ struct DRWView {
void *user_data;
};
-/* TODO(fclem): Future awaits */
-#if 0
-typedef struct ModelUboStorage {
- float model[4][4];
- float modelinverse[4][4];
-} ModelUboStorage;
-#endif
-
/* ------------ Data Chunks --------------- */
/**
* In order to keep a cache friendly data structure,
@@ -347,6 +339,7 @@ typedef struct DRWDebugSphere {
#define DST_MAX_SLOTS 64 /* Cannot be changed without modifying RST.bound_tex_slots */
#define MAX_CLIP_PLANES 6 /* GL_MAX_CLIP_PLANES is at least 6 */
#define STENCIL_UNDEFINED 256
+#define DRW_DRAWLIST_LEN 256
typedef struct DRWManager {
/* TODO clean up this struct a bit */
/* Cache generation */
@@ -431,6 +424,8 @@ typedef struct DRWManager {
/** Mutex to lock the drw manager and avoid concurrent context usage. */
TicketMutex *gl_context_mutex;
+ GPUDrawList *draw_list;
+
/** GPU Resource State: Memory storage between drawing. */
struct {
/* High end GPUs supports up to 32 binds per shader stage.
diff --git a/source/blender/draw/intern/draw_manager_exec.c b/source/blender/draw/intern/draw_manager_exec.c
index 107c80d7619..e140d005796 100644
--- a/source/blender/draw/intern/draw_manager_exec.c
+++ b/source/blender/draw/intern/draw_manager_exec.c
@@ -597,6 +597,23 @@ BLI_INLINE void draw_legacy_matrix_update(DRWShadingGroup *shgroup,
}
}
+BLI_INLINE void draw_geometry_bind(DRWShadingGroup *shgroup, GPUBatch *geom)
+{
+ /* XXX hacking gawain. we don't want to call glUseProgram! (huge performance loss) */
+ if (DST.batch) {
+ DST.batch->program_in_use = false;
+ }
+
+ DST.batch = geom;
+
+ GPU_batch_program_set_no_use(
+ geom, GPU_shader_get_program(shgroup->shader), GPU_shader_get_interface(shgroup->shader));
+
+ geom->program_in_use = true;
+
+ GPU_batch_bind(geom);
+}
+
BLI_INLINE void draw_geometry_execute(DRWShadingGroup *shgroup,
GPUBatch *geom,
int vert_first,
@@ -617,20 +634,33 @@ BLI_INLINE void draw_geometry_execute(DRWShadingGroup *shgroup,
/* bind vertex array */
if (DST.batch != geom) {
- DST.batch = geom;
-
- GPU_batch_program_set_no_use(
- geom, GPU_shader_get_program(shgroup->shader), GPU_shader_get_interface(shgroup->shader));
-
- GPU_batch_bind(geom);
+ draw_geometry_bind(shgroup, geom);
}
- /* XXX hacking gawain. we don't want to call glUseProgram! (huge performance loss) */
- geom->program_in_use = true;
-
GPU_batch_draw_advanced(geom, vert_first, vert_count, inst_first, inst_count);
+}
- geom->program_in_use = false; /* XXX hacking gawain */
+BLI_INLINE void draw_indirect_call(DRWShadingGroup *shgroup,
+ GPUBatch *geom,
+ int vert_first,
+ int vert_count,
+ int inst_first,
+ int inst_count,
+ int baseinst_loc)
+{
+ if (baseinst_loc == -1) {
+ /* bind vertex array */
+ if (DST.batch != geom) {
+ GPU_draw_list_submit(DST.draw_list);
+ draw_geometry_bind(shgroup, geom);
+ }
+ GPU_draw_list_command_add(DST.draw_list, vert_first, vert_count, inst_first, inst_count);
+ }
+ /* Fallback when unsupported */
+ else if (inst_count > 0) {
+ draw_geometry_execute(
+ shgroup, geom, vert_first, vert_count, inst_first, inst_count, baseinst_loc);
+ }
}
enum {
@@ -992,6 +1022,8 @@ typedef struct DRWCommandsState {
int resource_chunk;
int base_inst;
int inst_count;
+ int v_first;
+ int v_count;
GPUBatch *batch;
bool neg_scale;
} DRWCommandsState;
@@ -1040,9 +1072,18 @@ static bool draw_call_do_batching(DRWShadingGroup *shgroup,
* where any if the above condition are true. */
BLI_assert(state->inst_count == 0);
if (state->inst_count > 0) {
- draw_geometry_execute(
- shgroup, state->batch, 0, 0, state->base_inst, state->inst_count, baseinst_loc);
+ /* We need to draw the pending instances. */
+ draw_indirect_call(shgroup,
+ state->batch,
+ state->v_first,
+ state->v_count,
+ state->base_inst,
+ state->inst_count,
+ baseinst_loc);
}
+ /* Submit the pending commands. */
+ /* NOTE/TODO: We could allow command list usage in this case. */
+ GPU_draw_list_submit(DST.draw_list);
/* We cannot pack in this situation. */
state->inst_count = 0;
state->base_inst = 0;
@@ -1051,20 +1092,44 @@ static bool draw_call_do_batching(DRWShadingGroup *shgroup,
}
else {
/* See if any condition requires to interupt the packing. */
- if ((call->handle.id != state->base_inst + state->inst_count) || /* Is the id consecutive? */
- (call->handle.negative_scale != state->neg_scale) || /* */
- (call->handle.chunk != state->resource_chunk) || /* */
- (call->batch != state->batch) /* */
+ if ((call->handle.negative_scale != state->neg_scale) || /* Need to change state. */
+ (call->handle.chunk != state->resource_chunk) || /* Need to change UBOs. */
+ (call->batch != state->batch) /* Need to change VAO. */
) {
/* We need to draw the pending instances. */
- if (state->inst_count > 0) {
- draw_geometry_execute(
- shgroup, state->batch, 0, 0, state->base_inst, state->inst_count, baseinst_loc);
- }
+ draw_indirect_call(shgroup,
+ state->batch,
+ state->v_first,
+ state->v_count,
+ state->base_inst,
+ state->inst_count,
+ baseinst_loc);
+ /* Submit the pending commands. */
+ GPU_draw_list_submit(DST.draw_list);
+
+ state->batch = call->batch;
+ state->v_first = 0;
+ state->v_count = (call->batch->elem) ? call->batch->elem->index_len :
+ call->batch->verts[0]->vertex_len;
state->inst_count = 1;
state->base_inst = call->handle.id;
- state->batch = call->batch;
+
draw_call_resource_bind(state, call->handle, obmats_loc, obinfos_loc, chunkid_loc);
+
+ GPU_draw_list_init(DST.draw_list, state->batch);
+ }
+ /* Is the id consecutive? */
+ else if (call->handle.id != state->base_inst + state->inst_count) {
+ /* We need to add a draw command for the pending instances. */
+ draw_indirect_call(shgroup,
+ state->batch,
+ state->v_first,
+ state->v_count,
+ state->base_inst,
+ state->inst_count,
+ baseinst_loc);
+ state->inst_count = 1;
+ state->base_inst = call->handle.id;
}
else {
state->inst_count++;
@@ -1072,6 +1137,25 @@ static bool draw_call_do_batching(DRWShadingGroup *shgroup,
return true;
}
}
+
+/* Flush remaining pending drawcalls. */
+static void draw_call_batching_finish(DRWShadingGroup *shgroup,
+ DRWCommandsState *state,
+ int baseinst_loc)
+{
+ if (state->inst_count > 0) {
+ /* Add last instance call if there was any in preparation. */
+ draw_indirect_call(shgroup,
+ state->batch,
+ state->v_first,
+ state->v_count,
+ state->base_inst,
+ state->inst_count,
+ baseinst_loc);
+ }
+ /* Flush the last pending drawcalls batched together. */
+ GPU_draw_list_submit(DST.draw_list);
+}
#endif
static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
@@ -1095,6 +1179,10 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
}
GPU_shader_bind(shgroup->shader);
DST.shader = shgroup->shader;
+ /* XXX hacking gawain */
+ if (DST.batch) {
+ DST.batch->program_in_use = false;
+ }
DST.batch = NULL;
}
@@ -1122,6 +1210,10 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
{
GPUBatch *first_batch = (shgroup->calls.first) ? shgroup->calls.first->calls[0].batch : NULL;
+ if (first_batch) {
+ GPU_draw_list_init(DST.draw_list, first_batch);
+ }
+
DRWCallIterator iter;
draw_call_iter_begin(&iter, shgroup);
DRWCall *call;
@@ -1132,6 +1224,10 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
.base_inst = 0,
.inst_count = 0,
.callid = 0,
+ .v_first = 0,
+ .v_count = (first_batch ? (first_batch->elem ? first_batch->elem->index_len :
+ first_batch->verts[0]->vertex_len) :
+ 0),
.batch = first_batch,
};
while ((call = draw_call_iter_step(&iter))) {
@@ -1167,13 +1263,8 @@ static void draw_shgroup(DRW
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list