From 24a327427d7e7a1ef5c69a0f90bc6297ab17a9ab Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 9 Jul 2025 09:14:02 -0400 Subject: [PATCH] shared/util: Introduce strnlenutf8 This introduces strnlenutf8 which works similarly to strnlen but return only the number of valid bytes of UTF-8 encoded string then replace the other copies of similar code. --- src/shared/util.c | 52 ++++++++++++++--------------------------------- src/shared/util.h | 2 ++ 2 files changed, 17 insertions(+), 37 deletions(-) diff --git a/src/shared/util.c b/src/shared/util.c index 4780f26b6..fa058170e 100644 --- a/src/shared/util.c +++ b/src/shared/util.c @@ -1909,7 +1909,8 @@ char *strstrip(char *str) return str; } -bool strisutf8(const char *str, size_t len) +size_t strnlenutf8(const char *str, size_t len) + { size_t i = 0; @@ -1930,22 +1931,28 @@ bool strisutf8(const char *str, size_t len) size = 4; else /* Invalid UTF-8 sequence */ - return false; + goto done; /* Check the following bytes to ensure they have the correct * format. */ for (size_t j = 1; j < size; ++j) { - if (i + j > len || (str[i + j] & 0xC0) != 0x80) + if (i + j >= len || (str[i + j] & 0xC0) != 0x80) /* Invalid UTF-8 sequence */ - return false; + goto done; } /* Move to the next character */ i += size; } - return true; +done: + return i; +} + +bool strisutf8(const char *str, size_t len) +{ + return strnlenutf8(str, len) == len; } bool argsisutf8(int argc, char *argv[]) @@ -1964,39 +1971,10 @@ char *strtoutf8(char *str, size_t len) { size_t i = 0; - while (i < len) { - unsigned char c = str[i]; - size_t size = 0; - - /* Check the first byte to determine the number of bytes in the - * UTF-8 character. - */ - if ((c & 0x80) == 0x00) - size = 1; - else if ((c & 0xE0) == 0xC0) - size = 2; - else if ((c & 0xF0) == 0xE0) - size = 3; - else if ((c & 0xF8) == 0xF0) - size = 4; - else - /* Invalid UTF-8 sequence */ - goto done; - - /* Check the following bytes to ensure they have the correct - * format. - */ - for (size_t j = 1; j < size; ++j) { - if (i + j > len || (str[i + j] & 0xC0) != 0x80) - /* Invalid UTF-8 sequence */ - goto done; - } - - /* Move to the next character */ - i += size; - } + i = strnlenutf8(str, len); + if (i == len) + return str; -done: /* Truncate to the longest valid UTF-8 string */ memset(str + i, 0, len - i); return str; diff --git a/src/shared/util.h b/src/shared/util.h index 6fc02a9dc..c480351d6 100644 --- a/src/shared/util.h +++ b/src/shared/util.h @@ -90,6 +90,8 @@ do { \ char *strdelimit(char *str, char *del, char c); int strsuffix(const char *str, const char *suffix); char *strstrip(char *str); + +size_t strnlenutf8(const char *str, size_t len); bool strisutf8(const char *str, size_t length); bool argsisutf8(int argc, char *argv[]); char *strtoutf8(char *str, size_t len); -- 2.47.3