[Bf-blender-cvs] [8b55cda0481] master: Fix BLI_str_utf8_as_unicode_step reading past intended bounds

Campbell Barton noreply at git.blender.org
Tue Aug 24 06:29:03 CEST 2021


Commit: 8b55cda04812255922f488ad6bacd228d5d290a6
Author: Campbell Barton
Date:   Tue Aug 24 13:25:26 2021 +1000
Branches: master
https://developer.blender.org/rB8b55cda04812255922f488ad6bacd228d5d290a6

Fix BLI_str_utf8_as_unicode_step reading past intended bounds

Add a string length argument to BLI_str_utf8_as_unicode_step to prevent
reading past the buffer bounds or the intended range since some callers
of this function take a string length to operate on part of the string.

Font drawing for example didn't respect the length argument,
potentially causing a buffer over-read with multi-byte characters
that could read past the end of the string.

The following command would read 5 bytes past the end of the input.

`BLF_draw(font_id, (char[]){252}, 1);`

In practice strings are typically null terminated so this didn't crash
reading past buffer bounds.

Nevertheless, this wasn't correct and could cause bugs in the future.

Clamping by the length now has the same behavior as a null byte.

Add test to ensure this is working as intended.

===================================================================

M	source/blender/blenfont/intern/blf_font.c
M	source/blender/blenkernel/intern/text.c
M	source/blender/blenlib/BLI_string_utf8.h
M	source/blender/blenlib/intern/string_utf8.c
M	source/blender/blenlib/tests/BLI_string_utf8_test.cc
M	source/blender/editors/space_text/text_ops.c

===================================================================

diff --git a/source/blender/blenfont/intern/blf_font.c b/source/blender/blenfont/intern/blf_font.c
index 50b4bb09b7b..5ad48aa08d4 100644
--- a/source/blender/blenfont/intern/blf_font.c
+++ b/source/blender/blenfont/intern/blf_font.c
@@ -298,7 +298,7 @@ static void blf_batch_draw_end(void)
  */
 
 BLI_INLINE GlyphBLF *blf_utf8_next_fast(
-    FontBLF *font, GlyphCacheBLF *gc, const char *str, size_t *i_p, uint *r_c)
+    FontBLF *font, GlyphCacheBLF *gc, const char *str, size_t str_len, size_t *i_p, uint *r_c)
 {
   GlyphBLF *g;
   if ((*r_c = str[*i_p]) < GLYPH_ASCII_TABLE_SIZE) {
@@ -309,7 +309,7 @@ BLI_INLINE GlyphBLF *blf_utf8_next_fast(
     }
     (*i_p)++;
   }
-  else if ((*r_c = BLI_str_utf8_as_unicode_step(str, i_p)) != BLI_UTF8_ERR) {
+  else if ((*r_c = BLI_str_utf8_as_unicode_step(str, str_len, i_p)) != BLI_UTF8_ERR) {
     g = blf_glyph_search(gc, *r_c);
     if (UNLIKELY(g == NULL)) {
       g = blf_glyph_add(font, gc, FT_Get_Char_Index(font->face, *r_c), *r_c);
@@ -382,7 +382,7 @@ static void blf_font_draw_ex(FontBLF *font,
   blf_batch_draw_begin(font);
 
   while ((i < str_len) && str[i]) {
-    g = blf_utf8_next_fast(font, gc, str, &i, &c);
+    g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c);
 
     if (UNLIKELY(c == BLI_UTF8_ERR)) {
       break;
@@ -478,7 +478,7 @@ int blf_font_draw_mono(FontBLF *font, const char *str, const size_t str_len, int
   blf_batch_draw_begin(font);
 
   while ((i < str_len) && str[i]) {
-    g = blf_utf8_next_fast(font, gc, str, &i, &c);
+    g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c);
 
     if (UNLIKELY(c == BLI_UTF8_ERR)) {
       break;
@@ -535,7 +535,7 @@ static void blf_font_draw_buffer_ex(FontBLF *font,
   /* another buffer specific call for color conversion */
 
   while ((i < str_len) && str[i]) {
-    g = blf_utf8_next_fast(font, gc, str, &i, &c);
+    g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c);
 
     if (UNLIKELY(c == BLI_UTF8_ERR)) {
       break;
@@ -703,7 +703,7 @@ size_t blf_font_width_to_strlen(
 
   for (i_prev = i = 0, width_new = pen_x = 0, g_prev = NULL, c_prev = 0; (i < str_len) && str[i];
        i_prev = i, width_new = pen_x, c_prev = c, g_prev = g) {
-    g = blf_utf8_next_fast(font, gc, str, &i, &c);
+    g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c);
 
     if (blf_font_width_to_strlen_glyph_process(font, c_prev, c, g_prev, g, &pen_x, width_i)) {
       break;
@@ -737,7 +737,7 @@ size_t blf_font_width_to_rstrlen(
   i_prev = (size_t)((s_prev != NULL) ? s_prev - str : 0);
 
   i_tmp = i;
-  g = blf_utf8_next_fast(font, gc, str, &i_tmp, &c);
+  g = blf_utf8_next_fast(font, gc, str, str_len, &i_tmp, &c);
   for (width_new = pen_x = 0; (s != NULL);
        i = i_prev, s = s_prev, c = c_prev, g = g_prev, g_prev = NULL, width_new = pen_x) {
     s_prev = BLI_str_find_prev_char_utf8(str, s);
@@ -745,7 +745,7 @@ size_t blf_font_width_to_rstrlen(
 
     if (s_prev != NULL) {
       i_tmp = i_prev;
-      g_prev = blf_utf8_next_fast(font, gc, str, &i_tmp, &c_prev);
+      g_prev = blf_utf8_next_fast(font, gc, str, str_len, &i_tmp, &c_prev);
       BLI_assert(i_tmp == i);
     }
 
@@ -788,7 +788,7 @@ static void blf_font_boundbox_ex(FontBLF *font,
   box->ymax = -32000.0f;
 
   while ((i < str_len) && str[i]) {
-    g = blf_utf8_next_fast(font, gc, str, &i, &c);
+    g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c);
 
     if (UNLIKELY(c == BLI_UTF8_ERR)) {
       break;
@@ -961,7 +961,7 @@ static void blf_font_boundbox_foreach_glyph_ex(FontBLF *font,
 
   while ((i < str_len) && str[i]) {
     i_curr = i;
-    g = blf_utf8_next_fast(font, gc, str, &i, &c);
+    g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c);
 
     if (UNLIKELY(c == BLI_UTF8_ERR)) {
       break;
@@ -1051,7 +1051,7 @@ static void blf_font_wrap_apply(FontBLF *font,
     size_t i_curr = i;
     bool do_draw = false;
 
-    g = blf_utf8_next_fast(font, gc, str, &i, &c);
+    g = blf_utf8_next_fast(font, gc, str, str_len, &i, &c);
 
     if (UNLIKELY(c == BLI_UTF8_ERR)) {
       break;
@@ -1202,7 +1202,7 @@ int blf_font_count_missing_chars(FontBLF *font,
     if ((c = str[i]) < GLYPH_ASCII_TABLE_SIZE) {
       i++;
     }
-    else if ((c = BLI_str_utf8_as_unicode_step(str, &i)) != BLI_UTF8_ERR) {
+    else if ((c = BLI_str_utf8_as_unicode_step(str, str_len, &i)) != BLI_UTF8_ERR) {
       if (FT_Get_Char_Index((font)->face, c) == 0) {
         missing++;
       }
diff --git a/source/blender/blenkernel/intern/text.c b/source/blender/blenkernel/intern/text.c
index 06137f5d110..c2ab91251b6 100644
--- a/source/blender/blenkernel/intern/text.c
+++ b/source/blender/blenkernel/intern/text.c
@@ -1660,7 +1660,7 @@ void txt_insert_buf(Text *text, const char *in_buffer)
 
   /* Read the first line (or as close as possible */
   while (buffer[i] && buffer[i] != '\n') {
-    txt_add_raw_char(text, BLI_str_utf8_as_unicode_step(buffer, &i));
+    txt_add_raw_char(text, BLI_str_utf8_as_unicode_step(buffer, len, &i));
   }
 
   if (buffer[i] == '\n') {
@@ -1682,7 +1682,7 @@ void txt_insert_buf(Text *text, const char *in_buffer)
       }
       else {
         for (j = i - l; j < i && j < len;) {
-          txt_add_raw_char(text, BLI_str_utf8_as_unicode_step(buffer, &j));
+          txt_add_raw_char(text, BLI_str_utf8_as_unicode_step(buffer, len, &j));
         }
         break;
       }
diff --git a/source/blender/blenlib/BLI_string_utf8.h b/source/blender/blenlib/BLI_string_utf8.h
index e1d7e2c58f7..b936e39731d 100644
--- a/source/blender/blenlib/BLI_string_utf8.h
+++ b/source/blender/blenlib/BLI_string_utf8.h
@@ -43,8 +43,10 @@ unsigned int BLI_str_utf8_as_unicode_and_size(const char *__restrict p, size_t *
     ATTR_NONNULL();
 unsigned int BLI_str_utf8_as_unicode_and_size_safe(const char *__restrict p,
                                                    size_t *__restrict index) ATTR_NONNULL();
-unsigned int BLI_str_utf8_as_unicode_step(const char *__restrict p, size_t *__restrict index)
-    ATTR_NONNULL();
+unsigned int BLI_str_utf8_as_unicode_step(const char *__restrict p,
+                                          size_t p_len,
+                                          size_t *__restrict index) ATTR_NONNULL(1, 3);
+
 size_t BLI_str_utf8_from_unicode(unsigned int c, char *outbuf);
 size_t BLI_str_utf8_as_utf32(char32_t *__restrict dst_w,
                              const char *__restrict src_c,
diff --git a/source/blender/blenlib/intern/string_utf8.c b/source/blender/blenlib/intern/string_utf8.c
index 5710bd6b150..dbde5221d7e 100644
--- a/source/blender/blenlib/intern/string_utf8.c
+++ b/source/blender/blenlib/intern/string_utf8.c
@@ -582,49 +582,69 @@ uint BLI_str_utf8_as_unicode_and_size_safe(const char *__restrict p, size_t *__r
 
 /**
  * Another variant that steps over the index.
+ *
+ * \param p: The text to step over.
+ * \param p_len: The length of `p`.
+ * \param index: Index of `p` to step over.
+ *
  * \note currently this also falls back to latin1 for text drawing.
+ *
+ * \note The behavior for clipped text (where `p_len` limits decoding trailing bytes)
+ * must have the same behavior is encountering a nil byte,
+ * so functions that only use the first part of a string has matching behavior to functions
+ * that null terminate the text.
  */
-uint BLI_str_utf8_as_unicode_step(const char *__restrict p, size_t *__restrict index)
+uint BLI_str_utf8_as_unicode_step(const char *__restrict p,
+                                  const size_t p_len,
+                                  size_t *__restrict index)
 {
   int i, len;
   uint mask = 0;
   uint result;
-  unsigned char c;
+  const char c = p[*index];
 
-  p += *index;
-  c = (unsigned char)*p;
+  BLI_assert(*index < p_len);
+  BLI_assert(c != '\0');
 
   UTF8_COMPUTE(c, mask, len, -1);
   if (UNLIKELY(len == -1)) {
-    /* when called with NULL end, result will never be NULL,
-     * checks for a NULL character */
-    const char *p_next = BLI_str_find_next_char_utf8(p, NULL);
-    /* will never return the same pointer unless '\0',
-     * eternal loop is prevented */
-    *index += (size_t)(p_next - p);
-    return BLI_UTF8_ERR;
+    const char *p_next = BLI_str_find_next_char_utf8(p + *index, p + p_len);
+    /* #BLI_str_find_next_char_utf8 ensures the nil byte will terminate.
+     * so there is no chance this sets the index past the nil byte (assert this is the case). */
+    BLI_assert(p_next || (memchr(p + *index, '\0', p_len - *index) == NULL));
+    len = (int)((p_next ? (size_t)(p_next - p) : p_len) - *index);
+    result = BLI_UTF8_ERR;
+  }
+  else if (UNLIKELY(*index + (size_t)len > p_len)) {
+    /* A multi-byte character reads past the buffer bounds,
+     * match the behavior of encountering an byte with invalid encoding below. */
+    len = 1;
+    result = (uint)c;
   }
-
-  /* this is tricky since there are a few ways we can bail out of bad unicode
-   * values, 3 possible solutions. */
+  else {
+    /* This is tricky since there are a few ways we can bail out of bad unicode
+     * values, 3 possible solutions. */
+    p += *index;
 #if 0
-  UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
+    UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
 #elif 1
-  /* WARNING: this is NOT part of glib, or supported by similar functions.
-   * this is added for text drawing because some filepaths can have latin1
-   * characters */
-  UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
-  if (result == BLI_UTF8_ERR) {
-    len = 1;
-    result = *p;
-  }
-  /* end warning! */
+    /* WARNING: this is NOT part of glib, or supported by similar functions.
+     * this is added for text drawing because some filepaths can have latin1
+     * characters */
+    UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
+    if (result == BLI_UTF8_ERR) {
+      len = 1;
+      result = (uint)c;
+    }
+    /* end warning! */
 #else
-  /* without a fallback like '?', text drawing will stop on this value */
-  UTF8_GET(result, p, i, mask, len, '?');
+    /* Without a fallback like '?', text drawing will stop on this value. */
+    UTF8_GET(result, p, i, mask, len, '?');
 #endif
+  }
 
   *index += (size_t)len;
+  BLI_assert(*index <= p_len);
   return result;
 }
 
@@ -810,6 +830,7 @@ char *BLI_str_find_next_char_utf8(const 

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list