diff --git a/src/shared/util.c b/src/shared/util.c
index 4780f26..fa05817 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
return str;
}
-bool strisutf8(const char *str, size_t len)
+size_t strnlenutf8(const char *str, size_t len)
+
{
size_t i = 0;
size = 4;
else
/* Invalid UTF-8 sequence */
- return false;
+ goto done;
/* Check the following bytes to ensure they have the correct
* format.
*/
for (size_t j = 1; j < size; ++j) {
- if (i + j > len || (str[i + j] & 0xC0) != 0x80)
+ if (i + j >= len || (str[i + j] & 0xC0) != 0x80)
/* Invalid UTF-8 sequence */
- return false;
+ goto done;
}
/* Move to the next character */
i += size;
}
- return true;
+done:
+ return i;
+}
+
+bool strisutf8(const char *str, size_t len)
+{
+ return strnlenutf8(str, len) == len;
}
bool argsisutf8(int argc, char *argv[])
{
size_t i = 0;
- while (i < len) {
- unsigned char c = str[i];
- size_t size = 0;
-
- /* Check the first byte to determine the number of bytes in the
- * UTF-8 character.
- */
- if ((c & 0x80) == 0x00)
- size = 1;
- else if ((c & 0xE0) == 0xC0)
- size = 2;
- else if ((c & 0xF0) == 0xE0)
- size = 3;
- else if ((c & 0xF8) == 0xF0)
- size = 4;
- else
- /* Invalid UTF-8 sequence */
- goto done;
-
- /* Check the following bytes to ensure they have the correct
- * format.
- */
- for (size_t j = 1; j < size; ++j) {
- if (i + j > len || (str[i + j] & 0xC0) != 0x80)
- /* Invalid UTF-8 sequence */
- goto done;
- }
-
- /* Move to the next character */
- i += size;
- }
+ i = strnlenutf8(str, len);
+ if (i == len)
+ return str;
-done:
/* Truncate to the longest valid UTF-8 string */
memset(str + i, 0, len - i);
return str;
diff --git a/src/shared/util.h b/src/shared/util.h
index 6fc02a9..c480351 100644
--- a/src/shared/util.h
+++ b/src/shared/util.h
char *strdelimit(char *str, char *del, char c);
int strsuffix(const char *str, const char *suffix);
char *strstrip(char *str);
+
+size_t strnlenutf8(const char *str, size_t len);
bool strisutf8(const char *str, size_t length);
bool argsisutf8(int argc, char *argv[]);
char *strtoutf8(char *str, size_t len);