Diff between 09212f9d110e268332041c6038895e581d73d4bd and 24a327427d7e7a1ef5c69a0f90bc6297ab17a9ab

Changed Files

File Additions Deletions Status
src/shared/util.c +15 -37 modified
src/shared/util.h +2 -0 modified

Full Patch

diff --git a/src/shared/util.c b/src/shared/util.c
index 4780f26..fa05817 100644
--- a/src/shared/util.c
+++ b/src/shared/util.c
@@ -1909,7 +1909,8 @@ char *strstrip(char *str)
 	return str;
 }
 
-bool strisutf8(const char *str, size_t len)
+size_t strnlenutf8(const char *str, size_t len)
+
 {
 	size_t i = 0;
 
@@ -1930,22 +1931,28 @@ bool strisutf8(const char *str, size_t len)
 			size = 4;
 		else
 			/* Invalid UTF-8 sequence */
-			return false;
+			goto done;
 
 		/* Check the following bytes to ensure they have the correct
 		 * format.
 		 */
 		for (size_t j = 1; j < size; ++j) {
-			if (i + j > len || (str[i + j] & 0xC0) != 0x80)
+			if (i + j >= len || (str[i + j] & 0xC0) != 0x80)
 				/* Invalid UTF-8 sequence */
-				return false;
+				goto done;
 		}
 
 		/* Move to the next character */
 		i += size;
 	}
 
-	return true;
+done:
+	return i;
+}
+
+bool strisutf8(const char *str, size_t len)
+{
+	return strnlenutf8(str, len) == len;
 }
 
 bool argsisutf8(int argc, char *argv[])
@@ -1964,39 +1971,10 @@ char *strtoutf8(char *str, size_t len)
 {
 	size_t i = 0;
 
-	while (i < len) {
-		unsigned char c = str[i];
-		size_t size = 0;
-
-		/* Check the first byte to determine the number of bytes in the
-		 * UTF-8 character.
-		 */
-		if ((c & 0x80) == 0x00)
-			size = 1;
-		else if ((c & 0xE0) == 0xC0)
-			size = 2;
-		else if ((c & 0xF0) == 0xE0)
-			size = 3;
-		else if ((c & 0xF8) == 0xF0)
-			size = 4;
-		else
-			/* Invalid UTF-8 sequence */
-			goto done;
-
-		/* Check the following bytes to ensure they have the correct
-		 * format.
-		 */
-		for (size_t j = 1; j < size; ++j) {
-			if (i + j > len || (str[i + j] & 0xC0) != 0x80)
-				/* Invalid UTF-8 sequence */
-				goto done;
-		}
-
-		/* Move to the next character */
-		i += size;
-	}
+	i = strnlenutf8(str, len);
+	if (i == len)
+		return str;
 
-done:
 	/* Truncate to the longest valid UTF-8 string */
 	memset(str + i, 0, len - i);
 	return str;
diff --git a/src/shared/util.h b/src/shared/util.h
index 6fc02a9..c480351 100644
--- a/src/shared/util.h
+++ b/src/shared/util.h
@@ -90,6 +90,8 @@ do {						\
 char *strdelimit(char *str, char *del, char c);
 int strsuffix(const char *str, const char *suffix);
 char *strstrip(char *str);
+
+size_t strnlenutf8(const char *str, size_t len);
 bool strisutf8(const char *str, size_t length);
 bool argsisutf8(int argc, char *argv[]);
 char *strtoutf8(char *str, size_t len);