[Bf-blender-cvs] [7f8d05131a7] master: IDManagement: Speedup ID unique name assignment by tracking used names/basenames/suffixes

Wed Jul 20 13:28:20 CEST 2022

Commit: 7f8d05131a7738327ae125d065df44be492ff1f2
Author: Aras Pranckevicius
Date:   Wed Jul 20 14:27:14 2022 +0300
Branches: master
https://developer.blender.org/rB7f8d05131a7738327ae125d065df44be492ff1f2

IDManagement: Speedup ID unique name assignment by tracking used names/basenames/suffixes

An implementation of T73412, roughly as outlined there:

Track the names that are in use, as well as base names (before
numeric suffix) plus a bit map for each base name, indicating which
numeric suffixes are already used. This is done per-Main/Library,
per-object-type.

Timings (Windows, VS2022 Release build, AMD Ryzen 5950X):

- Scene with 10k cubes, Shift+D to duplicate them all: 8.7s -> 1.9s.
  Name map memory usage for resulting 20k objects: 4.3MB.
- Importing a 2.5GB .obj file of exported Blender 3.0 splash scene
  (24k objects), using the new C++ importer: 34.2s-> 22.0s. Name map
  memory usage for resulting scene: 8.6MB.
- Importing Disney Moana USD scene (almost half a million objects):
  56min -> 10min. Name map usage: ~100MB. Blender crashes later on
  when trying to render it, in the same place in both cases, but
  that's for another day.

Reviewed By: Bastien Montagne
Differential Revision: https://developer.blender.org/D14162

===================================================================

M	source/blender/blenkernel/BKE_lib_id.h
M	source/blender/blenkernel/BKE_main.h
A	source/blender/blenkernel/BKE_main_namemap.h
M	source/blender/blenkernel/CMakeLists.txt
M	source/blender/blenkernel/intern/lib_id.c
M	source/blender/blenkernel/intern/lib_id_delete.c
M	source/blender/blenkernel/intern/lib_id_test.cc
M	source/blender/blenkernel/intern/library.c
M	source/blender/blenkernel/intern/main.c
A	source/blender/blenkernel/intern/main_namemap.cc
M	source/blender/blenloader/intern/versioning_250.c
M	source/blender/blenloader/intern/versioning_280.c
M	source/blender/blenloader/intern/versioning_290.c
M	source/blender/blenloader/intern/versioning_300.c
M	source/blender/blenloader/intern/versioning_defaults.c
M	source/blender/editors/space_outliner/outliner_draw.cc
M	source/blender/makesdna/DNA_ID.h
M	source/blender/makesrna/intern/rna_ID.c

===================================================================

diff --git a/source/blender/blenkernel/BKE_lib_id.h b/source/blender/blenkernel/BKE_lib_id.h
index beac608a138..59c842d614e 100644
--- a/source/blender/blenkernel/BKE_lib_id.h
+++ b/source/blender/blenkernel/BKE_lib_id.h
@@ -478,10 +478,12 @@ void BKE_lib_id_expand_local(struct Main *bmain, struct ID *id, int flags);
  *
  * \return true if a new name had to be created.
  */
-bool BKE_id_new_name_validate(struct ListBase *lb,
+bool BKE_id_new_name_validate(struct Main *bmain,
+                              struct ListBase *lb,
                               struct ID *id,
                               const char *name,
-                              bool do_linked_data) ATTR_NONNULL(1, 2);
+                              bool do_linked_data) ATTR_NONNULL(1, 2, 3);
+
 /**
  * Pull an ID out of a library (make it local). Only call this for IDs that
  * don't have other library users.
@@ -526,7 +528,7 @@ void BKE_main_lib_objects_recalc_all(struct Main *bmain);
 /**
  * Only for repairing files via versioning, avoid for general use.
  */
-void BKE_main_id_repair_duplicate_names_listbase(struct ListBase *lb);
+void BKE_main_id_repair_duplicate_names_listbase(struct Main *bmain, struct ListBase *lb);
 
 #define MAX_ID_FULL_NAME (64 + 64 + 3 + 1)         /* 64 is MAX_ID_NAME - 2 */
 #define MAX_ID_FULL_NAME_UI (MAX_ID_FULL_NAME + 3) /* Adds 'keycode' two letters at beginning. */
diff --git a/source/blender/blenkernel/BKE_main.h b/source/blender/blenkernel/BKE_main.h
index 2c444f42c46..4d26ed11f1b 100644
--- a/source/blender/blenkernel/BKE_main.h
+++ b/source/blender/blenkernel/BKE_main.h
@@ -36,6 +36,7 @@ struct IDNameLib_Map;
 struct ImBuf;
 struct Library;
 struct MainLock;
+struct UniqueName_Map;
 
 /* Blender thumbnail, as written on file (width, height, and data as char RGBA). */
 /* We pack pixel data after that struct. */
@@ -193,6 +194,9 @@ typedef struct Main {
   /* IDMap of IDs. Currently used when reading (expanding) libraries. */
   struct IDNameLib_Map *id_map;
 
+  /* Used for efficient calculations of unique names. */
+  struct UniqueName_Map *name_map;
+
   struct MainLock *lock;
 } Main;
 
diff --git a/source/blender/blenkernel/BKE_main_namemap.h b/source/blender/blenkernel/BKE_main_namemap.h
new file mode 100644
index 00000000000..d201e45a2c9
--- /dev/null
+++ b/source/blender/blenkernel/BKE_main_namemap.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#pragma once
+
+/** \file
+ * \ingroup bke
+ *
+ * API to ensure name uniqueness.
+ *
+ * Main database contains the UniqueName_Map which is a cache that tracks names, base
+ * names and their suffixes currently in use. So that whenever a new name has to be
+ * assigned or validated, it can quickly ensure uniqueness and adjust the name in case
+ * of collisions.
+ *
+ * \section Function Names
+ *
+ * - `BKE_main_namemap_` Should be used for functions in this file.
+ */
+
+#include "BLI_compiler_attrs.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ID;
+struct Main;
+struct UniqueName_Map;
+
+struct UniqueName_Map *BKE_main_namemap_create(void) ATTR_WARN_UNUSED_RESULT;
+void BKE_main_namemap_destroy(struct UniqueName_Map **r_name_map) ATTR_NONNULL();
+
+/**
+ * Ensures the given name is unique within the given ID type.
+ *
+ * In case of name collisions, the name will be adjusted to be unique.
+ *
+ * \return true if the name had to be adjusted for uniqueness.
+ */
+bool BKE_main_namemap_get_name(struct Main *bmain, struct ID *id, char *name) ATTR_NONNULL();
+
+/**
+ * Remove a given name from usage.
+ *
+ * Call this whenever deleting or renaming an object.
+ */
+void BKE_main_namemap_remove_name(struct Main *bmain, struct ID *id, const char *name)
+    ATTR_NONNULL();
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/source/blender/blenkernel/CMakeLists.txt b/source/blender/blenkernel/CMakeLists.txt
index 044a306a1b9..45a9e85874d 100644
--- a/source/blender/blenkernel/CMakeLists.txt
+++ b/source/blender/blenkernel/CMakeLists.txt
@@ -185,6 +185,7 @@ set(SRC
   intern/linestyle.c
   intern/main.c
   intern/main_idmap.c
+  intern/main_namemap.cc
   intern/mask.c
   intern/mask_evaluate.c
   intern/mask_rasterize.c
@@ -412,6 +413,7 @@ set(SRC
   BKE_linestyle.h
   BKE_main.h
   BKE_main_idmap.h
+  BKE_main_namemap.h
   BKE_mask.h
   BKE_material.h
   BKE_mball.h
diff --git a/source/blender/blenkernel/intern/lib_id.c b/source/blender/blenkernel/intern/lib_id.c
index 90a4853fd3e..affa1e72ad0 100644
--- a/source/blender/blenkernel/intern/lib_id.c
+++ b/source/blender/blenkernel/intern/lib_id.c
@@ -53,6 +53,7 @@
 #include "BKE_lib_query.h"
 #include "BKE_lib_remap.h"
 #include "BKE_main.h"
+#include "BKE_main_namemap.h"
 #include "BKE_node.h"
 #include "BKE_rigidbody.h"
 
@@ -186,7 +187,7 @@ void BKE_lib_id_clear_library_data(Main *bmain, ID *id, const int flags)
   id->tag &= ~(LIB_TAG_INDIRECT | LIB_TAG_EXTERN);
   id->flag &= ~LIB_INDIRECT_WEAK_LINK;
   if (id_in_mainlist) {
-    if (BKE_id_new_name_validate(which_libbase(bmain, GS(id->name)), id, NULL, false)) {
+    if (BKE_id_new_name_validate(bmain, which_libbase(bmain, GS(id->name)), id, NULL, false)) {
       bmain->is_memfile_undo_written = false;
     }
   }
@@ -842,7 +843,7 @@ void BKE_libblock_management_main_add(Main *bmain, void *idv)
   BLI_addtail(lb, id);
   /* We need to allow adding extra datablocks into libraries too, e.g. to support generating new
    * overrides for recursive resync. */
-  BKE_id_new_name_validate(lb, id, NULL, true);
+  BKE_id_new_name_validate(bmain, lb, id, NULL, true);
   /* alphabetic insertion: is in new_id */
   id->tag &= ~(LIB_TAG_NO_MAIN | LIB_TAG_NO_USER_REFCOUNT);
   bmain->is_memfile_undo_written = false;
@@ -865,6 +866,7 @@ void BKE_libblock_management_main_remove(Main *bmain, void *idv)
   ListBase *lb = which_libbase(bmain, GS(id->name));
   BKE_main_lock(bmain);
   BLI_remlink(lb, id);
+  BKE_main_namemap_remove_name(bmain, id, id->name + 2);
   id->tag |= LIB_TAG_NO_MAIN;
   bmain->is_memfile_undo_written = false;
   BKE_main_unlock(bmain);
@@ -958,7 +960,7 @@ void BKE_main_id_flag_all(Main *bmain, const int flag, const bool value)
   }
 }
 
-void BKE_main_id_repair_duplicate_names_listbase(ListBase *lb)
+void BKE_main_id_repair_duplicate_names_listbase(Main *bmain, ListBase *lb)
 {
   int lb_len = 0;
   LISTBASE_FOREACH (ID *, id, lb) {
@@ -982,7 +984,7 @@ void BKE_main_id_repair_duplicate_names_listbase(ListBase *lb)
   }
   for (i = 0; i < lb_len; i++) {
     if (!BLI_gset_add(gset, id_array[i]->name + 2)) {
-      BKE_id_new_name_validate(lb, id_array[i], NULL, false);
+      BKE_id_new_name_validate(bmain, lb, id_array[i], NULL, false);
     }
   }
   BLI_gset_free(gset, NULL);
@@ -1073,7 +1075,7 @@ void *BKE_libblock_alloc(Main *bmain, short type, const char *name, const int fl
 
       BKE_main_lock(bmain);
       BLI_addtail(lb, id);
-      BKE_id_new_name_validate(lb, id, name, false);
+      BKE_id_new_name_validate(bmain, lb, id, name, false);
       bmain->is_memfile_undo_written = false;
       /* alphabetic insertion: is in new_id */
       BKE_main_unlock(bmain);
@@ -1415,255 +1417,8 @@ void id_sort_by_name(ListBase *lb, ID *id, ID *id_sorting_hint)
 #undef ID_SORT_STEP_SIZE
 }
 
-/* NOTE: this code assumes and ensures that the suffix number can never go beyond 1 billion. */
-#define MAX_NUMBER 1000000000
-/* We do not want to get "name.000", so minimal number is 1. */
-#define MIN_NUMBER 1
-/* The maximum value up to which we search for the actual smallest unused number. Beyond that
- * value, we will only use the first biggest unused number, without trying to 'fill the gaps'
- * in-between already used numbers... */
-#define MAX_NUMBERS_IN_USE 1024
-
-/**
- * Helper building final ID name from given base_name and number.
- *
- * If everything goes well and we do generate a valid final ID name in given name, we return
- * true. In case the final name would overflow the allowed ID name length, or given number is
- * bigger than maximum allowed value, we truncate further the base_name (and given name, which is
- * assumed to have the same 'base_name' part), and return false.
- */
-static bool id_name_final_build(char *name, char *base_name, size_t base_name_len, int number)
-{
-  char number_str[11]; /* Dot + nine digits + NULL terminator. */
-  size_t number_str_len = BLI_snprintf_rlen(number_str, ARRAY_SIZE(number_str), ".%.3d", number);
-
-  /* If the number would lead to an overflow of the maximum ID name length, we need to truncate
-   * the base name part and do all the number checks again. */
-  if (base_name_len + number_str_len >= MAX_ID_NAME - 2 || number >= MAX_NUMBER) {
-    if (base_name_len + number_str_len >= MAX_ID_NAME - 2) {
-      base_name_len = MAX_ID_NAME - 2 - number_str_len - 1;
-    }
-    else {
-      base_name_len--;
-    }
-    base_name[base_name_len] = '\0';
-
-    /* Code above may have generated invalid utf-8 string, due to raw truncation.
-     * Ensure we get a valid one now. */
-    base_name_len -= (size_t)BLI_str_utf8_invalid_strip(base_name, base_name_len);
-
-    /* Also truncate orig name, and start the whole check again. */
-    name[base_name_len] = '\0';
-    return false;
-  }
-
-  /* We have our final number, we can put it in name and exit the function. */
-  BLI_strncpy(name + base_name_len, number_str, number_str_len + 1);
-  return true;
-}
-
-/**
- * Check to see if an ID name is already used, and find a new one if so.
- * Return true if a new name was created (returned in name).
- *
- * Normally the ID that's being checked is already in the ListBase, so ID *id points at the new
- * entry. The Python Library module needs to know what the name of a data-block will be before it
- * is appended, in this case ID *id is NULL.
- */
-static bool check_for_dupid(ListBase *lb, ID *id, char *name, ID **r_id_sorting_hint)
-{
-  BLI_assert(strlen(name) < MAX_ID_NAME - 2);
-
-  *r_id_sorting_hint = NULL;
-
-  ID *id_test = lb->first;
-  bool is_name_changed = false;
-
-  if (id_test == NULL) {
-    return is_name_changed;
-  }
-
-  const short id_type = (short)GS(id_test->name);
-
-  /* Static storage of previous handled ID/name info, used to perform a quicker test and optimize
-   * creation of huge number 

@@ Diff output truncated at 10240 characters. @@