[Bf-blender-cvs] [a53e560ca57] tmp-worbench-rewrite2-optimizations: GPU Debug Groups profiling (WIP)
Miguel Pozo
noreply at git.blender.org
Sun Jan 1 15:39:55 CET 2023
Commit: a53e560ca57cdcd7d79f9852c6da88f8eb1aaa01
Author: Miguel Pozo
Date: Fri Dec 30 19:53:55 2022 +0100
Branches: tmp-worbench-rewrite2-optimizations
https://developer.blender.org/rBa53e560ca57cdcd7d79f9852c6da88f8eb1aaa01
GPU Debug Groups profiling (WIP)
===================================================================
M source/blender/draw/intern/draw_manager.c
M source/blender/gpu/opengl/gl_context.cc
M source/blender/gpu/opengl/gl_context.hh
M source/blender/gpu/opengl/gl_debug.cc
===================================================================
diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c
index 5c1f5dd0a4a..4a3611eb048 100644
--- a/source/blender/draw/intern/draw_manager.c
+++ b/source/blender/draw/intern/draw_manager.c
@@ -1007,8 +1007,10 @@ void DRW_cache_free_old_batches(Main *bmain)
static void drw_engines_init(void)
{
+ DRW_stats_group_start("drw_engines.engines_init");
DRW_ENABLED_ENGINE_ITER (DST.view_data_active, engine, data) {
PROFILE_START(stime);
+ DRW_stats_group_start(engine->idname);
const DrawEngineDataSize *data_size = engine->vedata_size;
memset(data->psl->passes, 0, sizeof(*data->psl->passes) * data_size->psl_len);
@@ -1017,15 +1019,19 @@ static void drw_engines_init(void)
engine->engine_init(data);
}
+ DRW_stats_group_end();
PROFILE_END_UPDATE(data->init_time, stime);
}
+ DRW_stats_group_end();
}
static void drw_engines_cache_init(void)
{
+ DRW_stats_group_start("drw_engines.cache_init");
DRW_manager_begin_sync();
DRW_ENABLED_ENGINE_ITER (DST.view_data_active, engine, data) {
+ DRW_stats_group_start(engine->idname);
if (data->text_draw_cache) {
DRW_text_cache_destroy(data->text_draw_cache);
data->text_draw_cache = NULL;
@@ -1037,7 +1043,9 @@ static void drw_engines_cache_init(void)
if (engine->cache_init) {
engine->cache_init(data);
}
+ DRW_stats_group_end();
}
+ DRW_stats_group_end();
}
static void drw_engines_world_update(Scene *scene)
@@ -1045,12 +1053,16 @@ static void drw_engines_world_update(Scene *scene)
if (scene->world == NULL) {
return;
}
+ DRW_stats_group_start("drw_engines.world_update");
DRW_ENABLED_ENGINE_ITER (DST.view_data_active, engine, data) {
if (engine->id_update) {
+ DRW_stats_group_start(engine->idname);
engine->id_update(data, &scene->world->id);
+ DRW_stats_group_end();
}
}
+ DRW_stats_group_end();
}
static void drw_engines_cache_populate(Object *ob)
@@ -1091,17 +1103,22 @@ static void drw_engines_cache_populate(Object *ob)
static void drw_engines_cache_finish(void)
{
+ DRW_stats_group_start("drw_engines.cache_finish");
DRW_ENABLED_ENGINE_ITER (DST.view_data_active, engine, data) {
if (engine->cache_finish) {
+ DRW_stats_group_start(engine->idname);
engine->cache_finish(data);
+ DRW_stats_group_end();
}
}
DRW_manager_end_sync();
+ DRW_stats_group_end();
}
static void drw_engines_draw_scene(void)
{
+ DRW_stats_group_start("drw_engines.draw_scene");
DRW_ENABLED_ENGINE_ITER (DST.view_data_active, engine, data) {
PROFILE_START(stime);
if (engine->draw_scene) {
@@ -1117,6 +1134,7 @@ static void drw_engines_draw_scene(void)
}
/* Reset state after drawing */
DRW_state_reset();
+ DRW_stats_group_end();
}
static void drw_engines_draw_text(void)
@@ -1678,6 +1696,7 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph,
drw_context_state_init();
drw_manager_init(&DST, viewport, NULL);
+
DRW_viewport_colormanagement_set(viewport);
const int object_type_exclude_viewport = v3d->object_type_exclude_viewport;
@@ -1716,6 +1735,8 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph,
/* Only iterate over objects for internal engines or when overlays are enabled */
if (do_populate_loop) {
+ DRW_stats_group_start("drw_render.populate_loop");
+
DST.dupli_origin = NULL;
DST.dupli_origin_data = NULL;
DEGObjectIterSettings deg_iter_settings = {0};
@@ -1737,6 +1758,8 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph,
drw_engines_cache_populate(ob);
}
DEG_OBJECT_ITER_END;
+
+ DRW_stats_group_end();
}
drw_duplidata_free();
diff --git a/source/blender/gpu/opengl/gl_context.cc b/source/blender/gpu/opengl/gl_context.cc
index 375194c09f3..f3107746a5c 100644
--- a/source/blender/gpu/opengl/gl_context.cc
+++ b/source/blender/gpu/opengl/gl_context.cc
@@ -141,6 +141,8 @@ void GLContext::activate()
bound_ubo_slots = 0;
immActivate();
+
+ process_frame_timings();
}
void GLContext::deactivate()
diff --git a/source/blender/gpu/opengl/gl_context.hh b/source/blender/gpu/opengl/gl_context.hh
index 1d413750fd4..18efa4d151e 100644
--- a/source/blender/gpu/opengl/gl_context.hh
+++ b/source/blender/gpu/opengl/gl_context.hh
@@ -95,6 +95,21 @@ class GLContext : public Context {
/** #GLBackend owns this data. */
GLSharedOrphanLists &shared_orphan_list_;
+ struct TimeQuery {
+ std::string name;
+ GLuint handles[2];
+ int stack_depth;
+ bool finished;
+ int64_t cpu_start;
+ float cpu_time;
+ };
+ struct FrameQueries {
+ Vector<TimeQuery> queries;
+ };
+ Vector<FrameQueries> frame_timings;
+
+ void process_frame_timings();
+
public:
GLContext(void *ghost_window, GLSharedOrphanLists &shared_orphan_list);
~GLContext();
diff --git a/source/blender/gpu/opengl/gl_debug.cc b/source/blender/gpu/opengl/gl_debug.cc
index ea9ca0ba115..bcb61b53a11 100644
--- a/source/blender/gpu/opengl/gl_debug.cc
+++ b/source/blender/gpu/opengl/gl_debug.cc
@@ -366,6 +366,9 @@ namespace blender::gpu {
* Useful for debugging through render-doc. This makes all the API calls grouped into "passes".
* \{ */
+#define PROFILE_DEBUG_GROUPS 0
+#define MAX_DEBUG_GROUPS_STACK_DEPTH 4
+
void GLContext::debug_group_begin(const char *name, int index)
{
if ((G.debug & G_DEBUG_GPU) &&
@@ -373,6 +376,23 @@ void GLContext::debug_group_begin(const char *name, int index)
/* Add 10 to avoid collision with other indices from other possible callback layers. */
index += 10;
glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, index, -1, name);
+
+#if PROFILE_DEBUG_GROUPS
+ if (frame_timings.is_empty()) {
+ frame_timings.append({});
+ }
+
+ TimeQuery query = {};
+ query.finished = false;
+ query.name = name;
+ query.stack_depth = debug_stack.size();
+ glGetInteger64v(GL_TIMESTAMP, &query.cpu_start);
+
+ /* Use GL_TIMESTAMP instead of GL_ELAPSED_TIME to support nested debug groups */
+ glGenQueries(2, query.handles);
+ glQueryCounter(query.handles[0], GL_TIMESTAMP);
+ frame_timings.last().queries.append(query);
+#endif
}
}
@@ -381,9 +401,96 @@ void GLContext::debug_group_end()
if ((G.debug & G_DEBUG_GPU) &&
(epoxy_gl_version() >= 43 || epoxy_has_gl_extension("GL_KHR_debug"))) {
glPopDebugGroup();
+
+#if PROFILE_DEBUG_GROUPS
+ Vector<TimeQuery> &queries = frame_timings.last().queries;
+ for (int i = queries.size() - 1; i >= 0; i--) {
+ TimeQuery &query = queries[i];
+ if (!query.finished) {
+ glQueryCounter(query.handles[1], GL_TIMESTAMP);
+ query.finished = true;
+ int64_t cpu_end;
+ glGetInteger64v(GL_TIMESTAMP, &cpu_end);
+ query.cpu_time = (cpu_end - query.cpu_start) / 1000000.0;
+ break;
+ }
+ BLI_assert(i != 0);
+ }
+#endif
}
}
+void GLContext::process_frame_timings()
+{
+#if PROFILE_DEBUG_GROUPS
+ if (G.debug & G_DEBUG_GPU) {
+ for (int frame_i = 0; frame_i < frame_timings.size(); frame_i++) {
+ Vector<TimeQuery> &queries = frame_timings[frame_i].queries;
+ if (queries.is_empty() || !queries.last().finished /* Group begin/end mismatch */) {
+ frame_timings.remove(frame_i--);
+ continue;
+ }
+
+ GLint ready = 0;
+ glGetQueryObjectiv(queries.last().handles[1], GL_QUERY_RESULT_AVAILABLE, &ready);
+ if (!ready) {
+ break;
+ }
+
+ std::stringstream result;
+ result << "\n";
+ // clang-format off
+ result << " Group | GPU | CPU | Latency\n";
+ result << "--------------------------------|------|------|--------\n";
+ result << " Total | ";
+ // clang-format on
+ GLuint64 begin_timestamp = 0;
+ GLuint64 end_timestamp = 0;
+ glGetQueryObjectui64v(queries.first().handles[0], GL_QUERY_RESULT, &begin_timestamp);
+ glGetQueryObjectui64v(queries.last().handles[1], GL_QUERY_RESULT, &end_timestamp);
+
+ float gpu_total_time = (end_timestamp - begin_timestamp) / 1000000.0;
+ result << std::to_string(gpu_total_time).substr(0, 4) << " | ";
+
+ float cpu_total_time = (queries.last().cpu_start - queries.first().cpu_start) / 1000000.0 +
+ queries.last().cpu_time;
+ result << std::to_string(cpu_total_time).substr(0, 4) << " | \n";
+
+ for (TimeQuery &query : queries) {
+ if (query.stack_depth >= MAX_DEBUG_GROUPS_STACK_DEPTH) {
+ glDeleteQueries(2, query.handles);
+ continue;
+ }
+ GLuint64 begin_timestamp = 0;
+ GLuint64 end_timestamp = 0;
+ glGetQueryObjectui64v(query.handles[0], GL_QUERY_RESULT, &begin_timestamp);
+ glGetQueryObjectui64v(query.handles[1], GL_QUERY_RESULT, &end_timestamp);
+ glDeleteQueries(2, query.handles);
+
+ result << std::string(query.stack_depth, '.');
+ result << " " << query.name
+ << std::string(max_ii(0, 30 - query.stack_depth - query.name.length()), ' ')
+ << " | ";
+
+ float gpu_time = (end_timestamp - begin_timestamp) / 1000000.0;
+
+ result << std::to_string(gpu_time).substr(0, 4) << " | ";
+ result << std::to_string(query.cpu_time).substr(0, 4) << " | ";
+ result << std::to_string((begin_timestamp - query.cpu_start) / 1000000.0).substr(0, 4)
+ << "\n";
+ }
+
+ std::string print = result.str();
+ printf("%s", print.c_str());
+
+ frame_timings.remove(frame_i--);
+ }
+
+ frame_timings.append({});
+ }
+#endif
+}
+
/** \} */
} // namespace blender::gpu
More information about the Bf-blender-cvs
mailing list