diff options
Diffstat (limited to 'src/utf8.c')
-rw-r--r-- | src/utf8.c | 31 |
1 files changed, 10 insertions, 21 deletions
@@ -116,53 +116,42 @@ static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) return length; } -void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, bufsize_t size) +void utf8proc_check(cmark_strbuf *ob, const uint8_t *line, bufsize_t size) { - static const uint8_t whitespace[] = " "; - - bufsize_t i = 0, tab = 0; + bufsize_t i = 0; while (i < size) { bufsize_t org = i; int charlen = 0; - while (i < size && line[i] != '\t') { - if (line[i] >= 0x80) { + while (i < size) { + if (line[i] < 0x80 && line[i] != 0) { + i++; + } else if (line[i] >= 0x80) { charlen = utf8proc_valid(line + i, size - i); if (charlen < 0) { charlen = -charlen; break; } i += charlen; - } else if (line[i] == '\0') { + } else if (line[i] == 0) { // ASCII NUL is technically valid but rejected // for security reasons. charlen = 1; break; - } else { - i++; } - - tab++; } - if (i > org) + if (i > org) { cmark_strbuf_put(ob, line + org, i - org); + } - if (i >= size) + if (i >= size) { break; - - if (line[i] == '\t') { - int numspaces = 4 - (tab % 4); - cmark_strbuf_put(ob, whitespace, numspaces); - i += 1; - tab += numspaces; } else { // Invalid UTF-8 encode_unknown(ob); - i += charlen; - tab += 1; } } } |