[Bf-blender-cvs] [a53e560ca57] tmp-worbench-rewrite2-optimizations: GPU Debug Groups profiling (WIP)

Sun Jan 1 15:39:55 CET 2023

Commit: a53e560ca57cdcd7d79f9852c6da88f8eb1aaa01
Author: Miguel Pozo
Date:   Fri Dec 30 19:53:55 2022 +0100
Branches: tmp-worbench-rewrite2-optimizations
https://developer.blender.org/rBa53e560ca57cdcd7d79f9852c6da88f8eb1aaa01

GPU Debug Groups profiling (WIP)

===================================================================

M	source/blender/draw/intern/draw_manager.c
M	source/blender/gpu/opengl/gl_context.cc
M	source/blender/gpu/opengl/gl_context.hh
M	source/blender/gpu/opengl/gl_debug.cc

===================================================================

diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c
index 5c1f5dd0a4a..4a3611eb048 100644
--- a/source/blender/draw/intern/draw_manager.c
+++ b/source/blender/draw/intern/draw_manager.c
@@ -1007,8 +1007,10 @@ void DRW_cache_free_old_batches(Main *bmain)
 
 static void drw_engines_init(void)
 {
+  DRW_stats_group_start("drw_engines.engines_init");
   DRW_ENABLED_ENGINE_ITER (DST.view_data_active, engine, data) {
     PROFILE_START(stime);
+    DRW_stats_group_start(engine->idname);
 
     const DrawEngineDataSize *data_size = engine->vedata_size;
     memset(data->psl->passes, 0, sizeof(*data->psl->passes) * data_size->psl_len);
@@ -1017,15 +1019,19 @@ static void drw_engines_init(void)
       engine->engine_init(data);
     }
 
+    DRW_stats_group_end();
     PROFILE_END_UPDATE(data->init_time, stime);
   }
+  DRW_stats_group_end();
 }
 
 static void drw_engines_cache_init(void)
 {
+  DRW_stats_group_start("drw_engines.cache_init");
   DRW_manager_begin_sync();
 
   DRW_ENABLED_ENGINE_ITER (DST.view_data_active, engine, data) {
+    DRW_stats_group_start(engine->idname);
     if (data->text_draw_cache) {
       DRW_text_cache_destroy(data->text_draw_cache);
       data->text_draw_cache = NULL;
@@ -1037,7 +1043,9 @@ static void drw_engines_cache_init(void)
     if (engine->cache_init) {
       engine->cache_init(data);
     }
+    DRW_stats_group_end();
   }
+  DRW_stats_group_end();
 }
 
 static void drw_engines_world_update(Scene *scene)
@@ -1045,12 +1053,16 @@ static void drw_engines_world_update(Scene *scene)
   if (scene->world == NULL) {
     return;
   }
+  DRW_stats_group_start("drw_engines.world_update");
 
   DRW_ENABLED_ENGINE_ITER (DST.view_data_active, engine, data) {
     if (engine->id_update) {
+      DRW_stats_group_start(engine->idname);
       engine->id_update(data, &scene->world->id);
+      DRW_stats_group_end();
     }
   }
+  DRW_stats_group_end();
 }
 
 static void drw_engines_cache_populate(Object *ob)
@@ -1091,17 +1103,22 @@ static void drw_engines_cache_populate(Object *ob)
 
 static void drw_engines_cache_finish(void)
 {
+  DRW_stats_group_start("drw_engines.cache_finish");
   DRW_ENABLED_ENGINE_ITER (DST.view_data_active, engine, data) {
     if (engine->cache_finish) {
+      DRW_stats_group_start(engine->idname);
       engine->cache_finish(data);
+      DRW_stats_group_end();
     }
   }
 
   DRW_manager_end_sync();
+  DRW_stats_group_end();
 }
 
 static void drw_engines_draw_scene(void)
 {
+  DRW_stats_group_start("drw_engines.draw_scene");
   DRW_ENABLED_ENGINE_ITER (DST.view_data_active, engine, data) {
     PROFILE_START(stime);
     if (engine->draw_scene) {
@@ -1117,6 +1134,7 @@ static void drw_engines_draw_scene(void)
   }
   /* Reset state after drawing */
   DRW_state_reset();
+  DRW_stats_group_end();
 }
 
 static void drw_engines_draw_text(void)
@@ -1678,6 +1696,7 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph,
   drw_context_state_init();
 
   drw_manager_init(&DST, viewport, NULL);
+
   DRW_viewport_colormanagement_set(viewport);
 
   const int object_type_exclude_viewport = v3d->object_type_exclude_viewport;
@@ -1716,6 +1735,8 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph,
 
     /* Only iterate over objects for internal engines or when overlays are enabled */
     if (do_populate_loop) {
+      DRW_stats_group_start("drw_render.populate_loop");
+
       DST.dupli_origin = NULL;
       DST.dupli_origin_data = NULL;
       DEGObjectIterSettings deg_iter_settings = {0};
@@ -1737,6 +1758,8 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph,
         drw_engines_cache_populate(ob);
       }
       DEG_OBJECT_ITER_END;
+
+      DRW_stats_group_end();
     }
 
     drw_duplidata_free();
diff --git a/source/blender/gpu/opengl/gl_context.cc b/source/blender/gpu/opengl/gl_context.cc
index 375194c09f3..f3107746a5c 100644
--- a/source/blender/gpu/opengl/gl_context.cc
+++ b/source/blender/gpu/opengl/gl_context.cc
@@ -141,6 +141,8 @@ void GLContext::activate()
   bound_ubo_slots = 0;
 
   immActivate();
+
+  process_frame_timings();
 }
 
 void GLContext::deactivate()
diff --git a/source/blender/gpu/opengl/gl_context.hh b/source/blender/gpu/opengl/gl_context.hh
index 1d413750fd4..18efa4d151e 100644
--- a/source/blender/gpu/opengl/gl_context.hh
+++ b/source/blender/gpu/opengl/gl_context.hh
@@ -95,6 +95,21 @@ class GLContext : public Context {
   /** #GLBackend owns this data. */
   GLSharedOrphanLists &shared_orphan_list_;
 
+  struct TimeQuery {
+    std::string name;
+    GLuint handles[2];
+    int stack_depth;
+    bool finished;
+    int64_t cpu_start;
+    float cpu_time;
+  };
+  struct FrameQueries {
+    Vector<TimeQuery> queries;
+  };
+  Vector<FrameQueries> frame_timings;
+
+  void process_frame_timings();
+
  public:
   GLContext(void *ghost_window, GLSharedOrphanLists &shared_orphan_list);
   ~GLContext();
diff --git a/source/blender/gpu/opengl/gl_debug.cc b/source/blender/gpu/opengl/gl_debug.cc
index ea9ca0ba115..bcb61b53a11 100644
--- a/source/blender/gpu/opengl/gl_debug.cc
+++ b/source/blender/gpu/opengl/gl_debug.cc
@@ -366,6 +366,9 @@ namespace blender::gpu {
  * Useful for debugging through render-doc. This makes all the API calls grouped into "passes".
  * \{ */
 
+#define PROFILE_DEBUG_GROUPS 0
+#define MAX_DEBUG_GROUPS_STACK_DEPTH 4
+
 void GLContext::debug_group_begin(const char *name, int index)
 {
   if ((G.debug & G_DEBUG_GPU) &&
@@ -373,6 +376,23 @@ void GLContext::debug_group_begin(const char *name, int index)
     /* Add 10 to avoid collision with other indices from other possible callback layers. */
     index += 10;
     glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, index, -1, name);
+
+#if PROFILE_DEBUG_GROUPS
+    if (frame_timings.is_empty()) {
+      frame_timings.append({});
+    }
+
+    TimeQuery query = {};
+    query.finished = false;
+    query.name = name;
+    query.stack_depth = debug_stack.size();
+    glGetInteger64v(GL_TIMESTAMP, &query.cpu_start);
+
+    /* Use GL_TIMESTAMP instead of GL_ELAPSED_TIME to support nested debug groups */
+    glGenQueries(2, query.handles);
+    glQueryCounter(query.handles[0], GL_TIMESTAMP);
+    frame_timings.last().queries.append(query);
+#endif
   }
 }
 
@@ -381,9 +401,96 @@ void GLContext::debug_group_end()
   if ((G.debug & G_DEBUG_GPU) &&
       (epoxy_gl_version() >= 43 || epoxy_has_gl_extension("GL_KHR_debug"))) {
     glPopDebugGroup();
+
+#if PROFILE_DEBUG_GROUPS
+    Vector<TimeQuery> &queries = frame_timings.last().queries;
+    for (int i = queries.size() - 1; i >= 0; i--) {
+      TimeQuery &query = queries[i];
+      if (!query.finished) {
+        glQueryCounter(query.handles[1], GL_TIMESTAMP);
+        query.finished = true;
+        int64_t cpu_end;
+        glGetInteger64v(GL_TIMESTAMP, &cpu_end);
+        query.cpu_time = (cpu_end - query.cpu_start) / 1000000.0;
+        break;
+      }
+      BLI_assert(i != 0);
+    }
+#endif
   }
 }
 
+void GLContext::process_frame_timings()
+{
+#if PROFILE_DEBUG_GROUPS
+  if (G.debug & G_DEBUG_GPU) {
+    for (int frame_i = 0; frame_i < frame_timings.size(); frame_i++) {
+      Vector<TimeQuery> &queries = frame_timings[frame_i].queries;
+      if (queries.is_empty() || !queries.last().finished /* Group begin/end mismatch */) {
+        frame_timings.remove(frame_i--);
+        continue;
+      }
+
+      GLint ready = 0;
+      glGetQueryObjectiv(queries.last().handles[1], GL_QUERY_RESULT_AVAILABLE, &ready);
+      if (!ready) {
+        break;
+      }
+
+      std::stringstream result;
+      result << "\n";
+      // clang-format off
+      result << " Group                          | GPU  | CPU  | Latency\n";
+      result << "--------------------------------|------|------|--------\n";
+      result << " Total                          | ";
+      // clang-format on
+      GLuint64 begin_timestamp = 0;
+      GLuint64 end_timestamp = 0;
+      glGetQueryObjectui64v(queries.first().handles[0], GL_QUERY_RESULT, &begin_timestamp);
+      glGetQueryObjectui64v(queries.last().handles[1], GL_QUERY_RESULT, &end_timestamp);
+
+      float gpu_total_time = (end_timestamp - begin_timestamp) / 1000000.0;
+      result << std::to_string(gpu_total_time).substr(0, 4) << " | ";
+
+      float cpu_total_time = (queries.last().cpu_start - queries.first().cpu_start) / 1000000.0 +
+                             queries.last().cpu_time;
+      result << std::to_string(cpu_total_time).substr(0, 4) << " | \n";
+
+      for (TimeQuery &query : queries) {
+        if (query.stack_depth >= MAX_DEBUG_GROUPS_STACK_DEPTH) {
+          glDeleteQueries(2, query.handles);
+          continue;
+        }
+        GLuint64 begin_timestamp = 0;
+        GLuint64 end_timestamp = 0;
+        glGetQueryObjectui64v(query.handles[0], GL_QUERY_RESULT, &begin_timestamp);
+        glGetQueryObjectui64v(query.handles[1], GL_QUERY_RESULT, &end_timestamp);
+        glDeleteQueries(2, query.handles);
+
+        result << std::string(query.stack_depth, '.');
+        result << " " << query.name
+               << std::string(max_ii(0, 30 - query.stack_depth - query.name.length()), ' ')
+               << " | ";
+
+        float gpu_time = (end_timestamp - begin_timestamp) / 1000000.0;
+
+        result << std::to_string(gpu_time).substr(0, 4) << " | ";
+        result << std::to_string(query.cpu_time).substr(0, 4) << " | ";
+        result << std::to_string((begin_timestamp - query.cpu_start) / 1000000.0).substr(0, 4)
+               << "\n";
+      }
+
+      std::string print = result.str();
+      printf("%s", print.c_str());
+
+      frame_timings.remove(frame_i--);
+    }
+
+    frame_timings.append({});
+  }
+#endif
+}
+
 /** \} */
 
 }  // namespace blender::gpu