summaryrefslogtreecommitdiff
path: root/src/utf8.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/utf8.c')
-rw-r--r--src/utf8.c71
1 files changed, 37 insertions, 34 deletions
diff --git a/src/utf8.c b/src/utf8.c
index 4b85714..a4449dd 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -54,11 +54,9 @@ static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len)
}
// Validate a single UTF-8 character according to RFC 3629.
-// Assumes a multi-byte UTF-8 sequence.
static int utf8proc_valid(const uint8_t *str, bufsize_t str_len)
{
int length = utf8proc_utf8class[str[0]];
- assert(length != 1);
if (!length)
return -1;
@@ -66,48 +64,53 @@ static int utf8proc_valid(const uint8_t *str, bufsize_t str_len)
if ((bufsize_t)length > str_len)
return -str_len;
- if ((str[1] & 0xC0) != 0x80)
- return -1;
-
- if (length == 2) {
+ switch (length) {
+ case 2:
+ if ((str[1] & 0xC0) != 0x80)
+ return -1;
if (str[0] < 0xC2) {
// Overlong
return -length;
}
- }
- else {
+ break;
+
+ case 3:
+ if ((str[1] & 0xC0) != 0x80)
+ return -1;
if ((str[2] & 0xC0) != 0x80)
return -2;
-
- if (length == 3) {
- if (str[0] == 0xE0) {
- if (str[1] < 0xA0) {
- // Overlong
- return -length;
- }
- } else if (str[0] == 0xED) {
- if (str[1] >= 0xA0) {
- // Surrogate
- return -length;
- }
+ if (str[0] == 0xE0) {
+ if (str[1] < 0xA0) {
+ // Overlong
+ return -length;
+ }
+ } else if (str[0] == 0xED) {
+ if (str[1] >= 0xA0) {
+ // Surrogate
+ return -length;
}
}
- else {
- if ((str[3] & 0xC0) != 0x80)
- return -3;
-
- if (str[0] == 0xF0) {
- if (str[1] < 0x90) {
- // Overlong
- return -length;
- }
- } else if (str[0] >= 0xF4) {
- if (str[0] > 0xF4 || str[1] >= 0x90) {
- // Above 0x10FFFF
- return -length;
- }
+ break;
+
+ case 4:
+ if ((str[1] & 0xC0) != 0x80)
+ return -1;
+ if ((str[2] & 0xC0) != 0x80)
+ return -2;
+ if ((str[3] & 0xC0) != 0x80)
+ return -3;
+ if (str[0] == 0xF0) {
+ if (str[1] < 0x90) {
+ // Overlong
+ return -length;
+ }
+ } else if (str[0] >= 0xF4) {
+ if (str[0] > 0xF4 || str[1] >= 0x90) {
+ // Above 0x10FFFF
+ return -length;
}
}
+ break;
}
return length;