[Bf-blender-cvs] [89dae554f9d] master: Cleanup: utf8 stepping functions

Fri Aug 27 09:04:17 CEST 2021

Commit: 89dae554f9d5ae0204ad9c51c5ba00e14b16e858
Author: Campbell Barton
Date:   Fri Aug 27 16:42:31 2021 +1000
Branches: master
https://developer.blender.org/rB89dae554f9d5ae0204ad9c51c5ba00e14b16e858

Cleanup: utf8 stepping functions

Various changes to reduce risk of out of bounds errors in utf8 seeking.

- Remove BLI_str_prev_char_utf8
  This function could potentially scan past the beginning of a string.
  Use BLI_str_find_prev_char_utf8 instead which takes a limiting
  string start argument.

- Swap arguments for BLI_str_find_prev_char_utf8 so the stepping
  argument is first and the limiting argument is last.
  This matches BLI_str_find_next_char_utf8.

- Change behavior of these functions to return it the start or end
  pointers instead of NULL, which complicated use of these functions
  to calculate offsets.

  Callers that need to check if the limits were reached can compare
  the return value with the start/end pointers.

- Return 'const char *' from these functions
  so they don't remove const from the input arguments.

===================================================================

M	source/blender/blenfont/intern/blf_font.c
M	source/blender/blenkernel/intern/text.c
M	source/blender/blenkernel/intern/unit.c
M	source/blender/blenlib/BLI_string_utf8.h
M	source/blender/blenlib/intern/string_cursor_utf8.c
M	source/blender/blenlib/intern/string_utf8.c
M	source/blender/editors/interface/interface_handlers.c
M	source/blender/editors/interface/interface_widgets.c
M	source/blender/editors/space_text/text_draw.c
M	source/blender/editors/space_text/text_ops.c

===================================================================

diff --git a/source/blender/blenfont/intern/blf_font.c b/source/blender/blenfont/intern/blf_font.c
index dbcd1d6016d..27478bd7f8e 100644
--- a/source/blender/blenfont/intern/blf_font.c
+++ b/source/blender/blenfont/intern/blf_font.c
@@ -673,23 +673,23 @@ size_t blf_font_width_to_rstrlen(
   GlyphBLF *g, *g_prev;
   int pen_x, width_new;
   size_t i, i_prev, i_tmp;
-  char *s, *s_prev;
+  const char *s, *s_prev;
 
   GlyphCacheBLF *gc = blf_glyph_cache_acquire(font);
   const int width_i = (int)width;
 
   i = BLI_strnlen(str, str_len);
-  s = BLI_str_find_prev_char_utf8(str, &str[i]);
-  i = (size_t)((s != NULL) ? s - str : 0);
-  s_prev = BLI_str_find_prev_char_utf8(str, s);
-  i_prev = (size_t)((s_prev != NULL) ? s_prev - str : 0);
+  s = BLI_str_find_prev_char_utf8(&str[i], str);
+  i = (size_t)(s - str);
+  s_prev = BLI_str_find_prev_char_utf8(s, str);
+  i_prev = (size_t)(s_prev - str);
 
   i_tmp = i;
   g = blf_utf8_next_fast(font, gc, str, str_len, &i_tmp, &c);
   for (width_new = pen_x = 0; (s != NULL);
        i = i_prev, s = s_prev, c = c_prev, g = g_prev, g_prev = NULL, width_new = pen_x) {
-    s_prev = BLI_str_find_prev_char_utf8(str, s);
-    i_prev = (size_t)((s_prev != NULL) ? s_prev - str : 0);
+    s_prev = BLI_str_find_prev_char_utf8(s, str);
+    i_prev = (size_t)(s_prev - str);
 
     if (s_prev != NULL) {
       i_tmp = i_prev;
diff --git a/source/blender/blenkernel/intern/text.c b/source/blender/blenkernel/intern/text.c
index 7f1f6590e48..bdc82fe626c 100644
--- a/source/blender/blenkernel/intern/text.c
+++ b/source/blender/blenkernel/intern/text.c
@@ -933,7 +933,7 @@ void txt_move_left(Text *text, const bool sel)
       (*charp) -= tabsize;
     }
     else {
-      const char *prev = BLI_str_prev_char_utf8((*linep)->line + *charp);
+      const char *prev = BLI_str_find_prev_char_utf8((*linep)->line + *charp, (*linep)->line);
       *charp = prev - (*linep)->line;
     }
   }
@@ -1938,7 +1938,8 @@ void txt_backspace_char(Text *text)
     txt_pop_sel(text);
   }
   else { /* Just backspacing a char */
-    const char *prev = BLI_str_prev_char_utf8(text->curl->line + text->curc);
+    const char *prev = BLI_str_find_prev_char_utf8(text->curl->line + text->curc,
+                                                   text->curl->line);
     size_t c_len = prev - text->curl->line;
     c = BLI_str_utf8_as_unicode_step(text->curl->line, text->curl->len, &c_len);
     c_len -= prev - text->curl->line;
diff --git a/source/blender/blenkernel/intern/unit.c b/source/blender/blenkernel/intern/unit.c
index 4581d410444..4e9a3c9fb2e 100644
--- a/source/blender/blenkernel/intern/unit.c
+++ b/source/blender/blenkernel/intern/unit.c
@@ -717,7 +717,7 @@ static const char *unit_find_str(const char *str, const char *substr, bool case_
       if (str_found == str ||
           /* Weak unicode support!, so "µm" won't match up be replaced by "m"
            * since non ascii utf8 values will NEVER return true */
-          isalpha_or_utf8(*BLI_str_prev_char_utf8(str_found)) == 0) {
+          isalpha_or_utf8(*BLI_str_find_prev_char_utf8(str_found, str)) == 0) {
         /* Next char cannot be alphanum. */
         int len_name = strlen(substr);
 
diff --git a/source/blender/blenlib/BLI_string_utf8.h b/source/blender/blenlib/BLI_string_utf8.h
index a9cb13a3277..937b36758f2 100644
--- a/source/blender/blenlib/BLI_string_utf8.h
+++ b/source/blender/blenlib/BLI_string_utf8.h
@@ -57,11 +57,10 @@ size_t BLI_str_utf32_as_utf8(char *__restrict dst,
                              const size_t maxncpy) ATTR_NONNULL(1, 2);
 size_t BLI_str_utf32_as_utf8_len(const char32_t *src) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1);
 
-char *BLI_str_find_prev_char_utf8(const char *str, const char *p) ATTR_WARN_UNUSED_RESULT
-    ATTR_NONNULL(1, 2);
-char *BLI_str_find_next_char_utf8(const char *p, const char *end) ATTR_WARN_UNUSED_RESULT
-    ATTR_NONNULL(1);
-char *BLI_str_prev_char_utf8(const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1);
+const char *BLI_str_find_prev_char_utf8(const char *p, const char *str_start)
+    ATTR_WARN_UNUSED_RESULT ATTR_RETURNS_NONNULL ATTR_NONNULL(1, 2);
+const char *BLI_str_find_next_char_utf8(const char *p, const char *str_end)
+    ATTR_WARN_UNUSED_RESULT ATTR_RETURNS_NONNULL ATTR_NONNULL(1, 2);
 
 /* wchar_t functions, copied from blenders own font.c originally */
 size_t BLI_wstrlen_utf8(const wchar_t *src) ATTR_NONNULL(1) ATTR_WARN_UNUSED_RESULT;
diff --git a/source/blender/blenlib/intern/string_cursor_utf8.c b/source/blender/blenlib/intern/string_cursor_utf8.c
index f76a3114e09..eb49572f06c 100644
--- a/source/blender/blenlib/intern/string_cursor_utf8.c
+++ b/source/blender/blenlib/intern/string_cursor_utf8.c
@@ -117,7 +117,7 @@ bool BLI_str_cursor_step_next_utf8(const char *str, size_t maxlen, int *pos)
   const char *str_end = str + (maxlen + 1);
   const char *str_pos = str + (*pos);
   const char *str_next = BLI_str_find_next_char_utf8(str_pos, str_end);
-  if (str_next) {
+  if (str_next != str_end) {
     (*pos) += (str_next - str_pos);
     if ((*pos) > (int)maxlen) {
       (*pos) = (int)maxlen;
@@ -132,11 +132,9 @@ bool BLI_str_cursor_step_prev_utf8(const char *str, size_t UNUSED(maxlen), int *
 {
   if ((*pos) > 0) {
     const char *str_pos = str + (*pos);
-    const char *str_prev = BLI_str_find_prev_char_utf8(str, str_pos);
-    if (str_prev) {
-      (*pos) -= (str_pos - str_prev);
-      return true;
-    }
+    const char *str_prev = BLI_str_find_prev_char_utf8(str_pos, str);
+    (*pos) -= (str_pos - str_prev);
+    return true;
   }
 
   return false;
diff --git a/source/blender/blenlib/intern/string_utf8.c b/source/blender/blenlib/intern/string_utf8.c
index e35e2bcca3c..222b4df7c0e 100644
--- a/source/blender/blenlib/intern/string_utf8.c
+++ b/source/blender/blenlib/intern/string_utf8.c
@@ -686,12 +686,7 @@ size_t BLI_str_utf8_as_utf32(char32_t *__restrict dst_w,
     else {
       *dst_w = '?';
       const char *src_c_next = BLI_str_find_next_char_utf8(src_c + index, src_c_end);
-      if (src_c_next != NULL) {
-        index = (size_t)(src_c_next - src_c);
-      }
-      else {
-        index += 1;
-      }
+      index = (size_t)(src_c_next - src_c);
     }
     dst_w++;
     len++;
@@ -758,31 +753,33 @@ size_t BLI_str_utf32_as_utf8_len(const char32_t *src)
  * \param p: pointer to some position within \a str
  *
  * Given a position \a p with a UTF-8 encoded string \a str, find the start
- * of the previous UTF-8 character starting before. \a p Returns %NULL if no
- * UTF-8 characters are present in \a str before \a p
+ * of the previous UTF-8 character starting before. \a p Returns \a str_start if no
+ * UTF-8 characters are present in \a str_start before \a p.
  *
  * \a p does not have to be at the beginning of a UTF-8 character. No check
  * is made to see if the character found is actually valid other than
  * it starts with an appropriate byte.
  *
- * Return value: a pointer to the found character or %NULL.
+ * \return A pointer to the found character.
  */
-char *BLI_str_find_prev_char_utf8(const char *str, const char *p)
+const char *BLI_str_find_prev_char_utf8(const char *p, const char *str_start)
 {
-  for (--p; p >= str; p--) {
-    if ((*p & 0xc0) != 0x80) {
-      return (char *)p;
+  BLI_assert(p >= str_start);
+  if (str_start < p) {
+    for (--p; p >= str_start; p--) {
+      if ((*p & 0xc0) != 0x80) {
+        return (char *)p;
+      }
     }
   }
-  return NULL;
+  return p;
 }
 
 /* was g_utf8_find_next_char */
 /**
  * BLI_str_find_next_char_utf8:
  * \param p: a pointer to a position within a UTF-8 encoded string
- * \param end: a pointer to the byte following the end of the string,
- * or %NULL to indicate that the string is nul-terminated.
+ * \param end: a pointer to the byte following the end of the string.
  *
  * Finds the start of the next UTF-8 character in the string after \a p
  *
@@ -790,50 +787,18 @@ char *BLI_str_find_prev_char_utf8(const char *str, const char *p)
  * is made to see if the character found is actually valid other than
  * it starts with an appropriate byte.
  *
- * Return value: a pointer to the found character or %NULL
- */
-char *BLI_str_find_next_char_utf8(const char *p, const char *end)
-{
-  if (*p) {
-    if (end) {
-      BLI_assert(end >= p);
-      for (++p; p < end && (*p & 0xc0) == 0x80; p++) {
-        /* do nothing */
-      }
-    }
-    else {
-      for (++p; (*p & 0xc0) == 0x80; p++) {
-        /* do nothing */
-      }
-    }
-  }
-  return (p == end) ? NULL : (char *)p;
-}
-
-/* was g_utf8_prev_char */
-/**
- * BLI_str_prev_char_utf8:
- * \param p: a pointer to a position within a UTF-8 encoded string
- *
- * Finds the previous UTF-8 character in the string before \a p
- *
- * \a p does not have to be at the beginning of a UTF-8 character. No check
- * is made to see if the character found is actually valid other than
- * it starts with an appropriate byte. If \a p might be the first
- * character of the string, you must use g_utf8_find_prev_char() instead.
- *
- * Return value: a pointer to the found character.
+ * \return a pointer to the found character or a pointer to the null terminating character '\0'.
  */
-char *BLI_str_prev_char_utf8(const char *p)
+const char *BLI_str_find_next_char_utf8(const char *p, const char *str_end)
 {
-  while (1) {
-    p--;
-    if ((*p & 0xc0) != 0x80) {
-      return (char *)p;
+  BLI_assert(p <= str_end);
+  if ((p < str_end) && (*p != '\0')) {
+    for (++p; p < str_end && (*p & 0xc0) == 0x80; p++) {
+      /* do nothing */
     }
   }
+  return p;
 }
-/* end glib copy */
 
 size_t BLI_str_partition_utf8(const char *str,
                               const uint delim[],
@@ -858,19 +823,21 @@ size_t BLI_str_partition_ex_utf8(const char *str,
                                  const char **suf,
                                  const bool from_right)
 {
-  const uint *d;
   const size_t str_len = end ? (size_t)(end - str) : strlen(str);
-  size_t index;
+  if (end == NULL) {
+    end = str + str_len;
+  }
 
   /* Note that here, we assume end points to a valid utf8 char! */
-  BLI_assert(end == NULL || (end >= str && (BLI_str_utf8_as_unicode(end) != BLI_UTF8_ERR)));
+  BLI_assert((end >= str) && (

@@ Diff output truncated at 10240 characters. @@