diff options
author | John MacFarlane <jgm@berkeley.edu> | 2015-06-10 11:53:06 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2015-06-10 11:53:06 -0700 |
commit | a79b2ed9b7051b259f1ab874e5168a853723c314 (patch) | |
tree | 03f90d522e0aa1394c7978d095c0b2551175cd22 | |
parent | 52b2f3e8c7f8d039eb1ba509a75f98046afb4c5e (diff) |
Revert "Merge pull request #58 from nwellnhof/optimize_utf8proc_detab"
This reverts commit 54d1249c2caebf45a24d691dc765fb93c9a5e594, reversing
changes made to bc14d869323650e936c7143dcf941b28ccd5b57d.
-rw-r--r-- | src/utf8.c | 71 |
1 files changed, 37 insertions, 34 deletions
@@ -54,11 +54,9 @@ static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len) } // Validate a single UTF-8 character according to RFC 3629. -// Assumes a multi-byte UTF-8 sequence. static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) { int length = utf8proc_utf8class[str[0]]; - assert(length != 1); if (!length) return -1; @@ -66,48 +64,53 @@ static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) if ((bufsize_t)length > str_len) return -str_len; - if ((str[1] & 0xC0) != 0x80) - return -1; - - if (length == 2) { + switch (length) { + case 2: + if ((str[1] & 0xC0) != 0x80) + return -1; if (str[0] < 0xC2) { // Overlong return -length; } - } - else { + break; + + case 3: + if ((str[1] & 0xC0) != 0x80) + return -1; if ((str[2] & 0xC0) != 0x80) return -2; - - if (length == 3) { - if (str[0] == 0xE0) { - if (str[1] < 0xA0) { - // Overlong - return -length; - } - } else if (str[0] == 0xED) { - if (str[1] >= 0xA0) { - // Surrogate - return -length; - } + if (str[0] == 0xE0) { + if (str[1] < 0xA0) { + // Overlong + return -length; + } + } else if (str[0] == 0xED) { + if (str[1] >= 0xA0) { + // Surrogate + return -length; } } - else { - if ((str[3] & 0xC0) != 0x80) - return -3; - - if (str[0] == 0xF0) { - if (str[1] < 0x90) { - // Overlong - return -length; - } - } else if (str[0] >= 0xF4) { - if (str[0] > 0xF4 || str[1] >= 0x90) { - // Above 0x10FFFF - return -length; - } + break; + + case 4: + if ((str[1] & 0xC0) != 0x80) + return -1; + if ((str[2] & 0xC0) != 0x80) + return -2; + if ((str[3] & 0xC0) != 0x80) + return -3; + if (str[0] == 0xF0) { + if (str[1] < 0x90) { + // Overlong + return -length; + } + } else if (str[0] >= 0xF4) { + if (str[0] > 0xF4 || str[1] >= 0x90) { + // Above 0x10FFFF + return -length; } } + break; } return length; |