diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/blocks.c | 2 | ||||
| -rw-r--r-- | src/utf8.c | 31 | ||||
| -rw-r--r-- | src/utf8.h | 2 | 
3 files changed, 12 insertions, 23 deletions
| diff --git a/src/blocks.c b/src/blocks.c index 06f6dcb..08f2e63 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -619,7 +619,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte  	cmark_chunk input;  	bool maybe_lazy; -	cmark_strbuf_put(parser->curline, buffer, bytes); +	utf8proc_check(parser->curline, buffer, bytes);  	parser->offset = 0;  	parser->column = 0;  	parser->blank = false; @@ -116,53 +116,42 @@ static int utf8proc_valid(const uint8_t *str, bufsize_t str_len)  	return length;  } -void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, bufsize_t size) +void utf8proc_check(cmark_strbuf *ob, const uint8_t *line, bufsize_t size)  { -	static const uint8_t whitespace[] = "    "; - -	bufsize_t i = 0, tab = 0; +	bufsize_t i = 0;  	while (i < size) {  		bufsize_t org = i;  		int charlen = 0; -		while (i < size && line[i] != '\t') { -			if (line[i] >= 0x80) { +		while (i < size) { +			if (line[i] < 0x80 && line[i] != 0) { +				i++; +			} else if (line[i] >= 0x80) {  				charlen = utf8proc_valid(line + i, size - i);  				if (charlen < 0) {  					charlen = -charlen;  					break;  				}  				i += charlen; -			} else if (line[i] == '\0') { +			} else if (line[i] == 0) {  				// ASCII NUL is technically valid but rejected  				// for security reasons.  				charlen = 1;  				break; -			} else { -				i++;  			} - -			tab++;  		} -		if (i > org) +		if (i > org) {  			cmark_strbuf_put(ob, line + org, i - org); +		} -		if (i >= size) +		if (i >= size) {  			break; - -		if (line[i] == '\t') { -			int numspaces = 4 - (tab % 4); -			cmark_strbuf_put(ob, whitespace, numspaces); -			i += 1; -			tab += numspaces;  		} else {  			// Invalid UTF-8  			encode_unknown(ob); -  			i += charlen; -			tab += 1;  		}  	}  } @@ -11,7 +11,7 @@ extern "C" {  void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len);  void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf);  int utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst); -void utf8proc_detab(cmark_strbuf *dest, const uint8_t *line, bufsize_t size); +void utf8proc_check(cmark_strbuf *dest, const uint8_t *line, bufsize_t size);  int utf8proc_is_space(int32_t uc);  int utf8proc_is_punctuation(int32_t uc); | 
