diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/blocks.c | 92 | ||||
| -rw-r--r-- | src/parser.h | 2 | 
2 files changed, 71 insertions, 23 deletions
diff --git a/src/blocks.c b/src/blocks.c index 8c3e18c..06f6dcb 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -16,6 +16,8 @@  #include "debug.h"  #define CODE_INDENT 4 +#define TAB_STOP 4 +  #define peek_at(i, n) (i)->data[n]  static inline bool @@ -70,7 +72,9 @@ cmark_parser *cmark_parser_new(int options)  	parser->current = document;  	parser->line_number = 0;  	parser->offset = 0; +	parser->column = 0;  	parser->first_nonspace = 0; +	parser->first_nonspace_column = 0;  	parser->indent = 0;  	parser->blank = false;  	parser->curline = line; @@ -555,16 +559,53 @@ static void chop_trailing_hashtags(cmark_chunk *ch)  static void  S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input)  { +	char c; +	int chars_to_tab = TAB_STOP - (parser->column % TAB_STOP); +  	parser->first_nonspace = parser->offset; -	while (peek_at(input, parser->first_nonspace) == ' ') { -		parser->first_nonspace++; +	parser->first_nonspace_column = parser->column; +	while ((c = peek_at(input, parser->first_nonspace))) { +		if (c == ' ') { +			parser->first_nonspace += 1; +			parser->first_nonspace_column += 1; +			chars_to_tab = chars_to_tab - 1; +			if (chars_to_tab == 0) { +				chars_to_tab = TAB_STOP; +			} +		} else if (c == '\t') { +			parser->first_nonspace += 1; +			parser->first_nonspace_column += chars_to_tab; +			chars_to_tab = TAB_STOP; +		} else { +			break; +		}  	} -	parser->indent = parser->first_nonspace - parser->offset; +	parser->indent = parser->first_nonspace_column - parser->column;  	parser->blank = S_is_line_end_char(peek_at(input, parser->first_nonspace));  }  static void +S_advance_offset(cmark_parser *parser, cmark_chunk *input, bufsize_t count, bool columns) +{ +	char c; +	int chars_to_tab; +	while (count > 0 && (c = peek_at(input, parser->offset))) { +		if (c == '\t') { +			chars_to_tab = 4 - (parser->column % TAB_STOP); +			parser->column += chars_to_tab; +			parser->offset += 1; +			count -= (columns ? chars_to_tab : 1); +		} else { +			parser->offset += 1; +			parser->column += 1; // assume ascii; block starts are ascii +			count -= 1; +		} +	} +} + + +static void  S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t bytes)  {  	cmark_node* last_matched_container; @@ -578,8 +619,9 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte  	cmark_chunk input;  	bool maybe_lazy; -	utf8proc_detab(parser->curline, buffer, bytes); +	cmark_strbuf_put(parser->curline, buffer, bytes);  	parser->offset = 0; +	parser->column = 0;  	parser->blank = false;  	input.data = parser->curline->ptr; @@ -601,7 +643,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte  		if (container->type == NODE_BLOCK_QUOTE) {  			matched = parser->indent <= 3 && peek_at(&input, parser->first_nonspace) == '>';  			if (matched) { -				parser->offset = parser->first_nonspace + 1; +				S_advance_offset(parser, &input, parser->indent + 1, true);  				if (peek_at(&input, parser->offset) == ' ')  					parser->offset++;  			} else { @@ -609,13 +651,14 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte  			}  		} else if (container->type == NODE_ITEM) { -  			if (parser->indent >= container->as.list.marker_offset +  			    container->as.list.padding) { -				parser->offset += container->as.list.marker_offset + -				                  container->as.list.padding; +				S_advance_offset(parser, &input, +						 container->as.list.marker_offset + +						 container->as.list.padding, true);  			} else if (parser->blank) { -				parser->offset = parser->first_nonspace; +				S_advance_offset(parser, &input, +						 parser->first_nonspace - parser->offset, false);  			} else {  				all_matched = false;  			} @@ -624,9 +667,11 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte  			if (!container->as.code.fenced) { // indented  				if (parser->indent >= CODE_INDENT) { -					parser->offset += CODE_INDENT; +					S_advance_offset(parser, &input, CODE_INDENT, true);  				} else if (parser->blank) { -					parser->offset = parser->first_nonspace; +					S_advance_offset(parser, &input, +							 parser->first_nonspace - parser->offset, +							 false);  				} else {  					all_matched = false;  				} @@ -642,7 +687,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte  					// closing fence - and since we're at  					// the end of a line, we can return:  					all_matched = false; -					parser->offset += matched; +					S_advance_offset(parser, &input, matched, false);  					parser->current = finalize(parser, container);  					goto finished;  				} else { @@ -650,7 +695,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte  					i = container->as.code.fence_offset;  					while (i > 0 &&  					       peek_at(&input, parser->offset) == ' ') { -						parser->offset++; +						S_advance_offset(parser, &input, 1, false);  						i--;  					}  				} @@ -697,15 +742,16 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte  		if (!indented && peek_at(&input, parser->first_nonspace) == '>') { -			parser->offset = parser->first_nonspace + 1; +			S_advance_offset(parser, &input, parser->first_nonspace + 1 - parser->offset, false);  			// optional following character  			if (peek_at(&input, parser->offset) == ' ') -				parser->offset++; +				S_advance_offset(parser, &input, 1, false);  			container = add_child(parser, container, NODE_BLOCK_QUOTE, parser->offset + 1);  		} else if (!indented && (matched = scan_atx_header_start(&input, parser->first_nonspace))) { -			parser->offset = parser->first_nonspace + matched; +			S_advance_offset(parser, &input, +					 parser->first_nonspace + matched - parser->offset, false);  			container = add_child(parser, container, NODE_HEADER, parser->offset + 1);  			bufsize_t hashpos = cmark_chunk_strchr(&input, '#', parser->first_nonspace); @@ -726,7 +772,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte  			container->as.code.fence_length = matched;  			container->as.code.fence_offset = parser->first_nonspace - parser->offset;  			container->as.code.info = cmark_chunk_literal(""); -			parser->offset = parser->first_nonspace + matched; +			S_advance_offset(parser, &input, parser->first_nonspace + matched - parser->offset, false);  		} else if (!indented && (matched = scan_html_block_tag(&input, parser->first_nonspace))) { @@ -743,7 +789,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte  			container->type = NODE_HEADER;  			container->as.header.level = lev;  			container->as.header.setext = true; -			parser->offset = input.len - 1; +			S_advance_offset(parser, &input, input.len - 1 - parser->offset, false);  		} else if (!indented &&  		           !(container->type == NODE_PARAGRAPH && @@ -753,7 +799,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte  			// it's only now that we know the line is not part of a setext header:  			container = add_child(parser, container, NODE_HRULE, parser->first_nonspace + 1);  			container = finalize(parser, container); -			parser->offset = input.len - 1; +		        S_advance_offset(parser, &input, input.len - 1 - parser->offset, false);  		} else if ((matched = parse_list_marker(&input, parser->first_nonspace, &data)) &&  		           (!indented || container->type == NODE_LIST)) { @@ -761,7 +807,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte  			// spaces indent, as long as the list container is still open.  			// compute padding: -			parser->offset = parser->first_nonspace + matched; +			S_advance_offset(parser, &input, parser->first_nonspace + matched - parser->offset, false);  			i = 0;  			while (i <= 5 && peek_at(&input, parser->offset + i) == ' ') {  				i++; @@ -771,11 +817,11 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte  			    S_is_line_end_char(peek_at(&input, parser->offset))) {  				data->padding = matched + 1;  				if (i > 0) { -					parser->offset += 1; +					S_advance_offset(parser, &input, 1, false);  				}  			} else {  				data->padding = matched + i; -				parser->offset += i; +				S_advance_offset(parser, &input, i, true);  			}  			// check container; if it's a list, see if this list item @@ -799,7 +845,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte  			free(data);  		} else if (indented && !maybe_lazy && !parser->blank) { -			parser->offset += CODE_INDENT; +			S_advance_offset(parser, &input, CODE_INDENT, true);  			container = add_child(parser, container, NODE_CODE_BLOCK, parser->offset + 1);  			container->as.code.fenced = false;  			container->as.code.fence_char = 0; diff --git a/src/parser.h b/src/parser.h index 6e18c67..01a7aeb 100644 --- a/src/parser.h +++ b/src/parser.h @@ -17,7 +17,9 @@ struct cmark_parser {  	struct cmark_node* current;  	int line_number;  	bufsize_t offset; +	bufsize_t column;  	bufsize_t first_nonspace; +	bufsize_t first_nonspace_column;  	int indent;  	bool blank;  	cmark_strbuf *curline;  | 
