diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/blocks.c | 92 | ||||
-rw-r--r-- | src/parser.h | 2 |
2 files changed, 71 insertions, 23 deletions
diff --git a/src/blocks.c b/src/blocks.c index 8c3e18c..06f6dcb 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -16,6 +16,8 @@ #include "debug.h" #define CODE_INDENT 4 +#define TAB_STOP 4 + #define peek_at(i, n) (i)->data[n] static inline bool @@ -70,7 +72,9 @@ cmark_parser *cmark_parser_new(int options) parser->current = document; parser->line_number = 0; parser->offset = 0; + parser->column = 0; parser->first_nonspace = 0; + parser->first_nonspace_column = 0; parser->indent = 0; parser->blank = false; parser->curline = line; @@ -555,16 +559,53 @@ static void chop_trailing_hashtags(cmark_chunk *ch) static void S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input) { + char c; + int chars_to_tab = TAB_STOP - (parser->column % TAB_STOP); + parser->first_nonspace = parser->offset; - while (peek_at(input, parser->first_nonspace) == ' ') { - parser->first_nonspace++; + parser->first_nonspace_column = parser->column; + while ((c = peek_at(input, parser->first_nonspace))) { + if (c == ' ') { + parser->first_nonspace += 1; + parser->first_nonspace_column += 1; + chars_to_tab = chars_to_tab - 1; + if (chars_to_tab == 0) { + chars_to_tab = TAB_STOP; + } + } else if (c == '\t') { + parser->first_nonspace += 1; + parser->first_nonspace_column += chars_to_tab; + chars_to_tab = TAB_STOP; + } else { + break; + } } - parser->indent = parser->first_nonspace - parser->offset; + parser->indent = parser->first_nonspace_column - parser->column; parser->blank = S_is_line_end_char(peek_at(input, parser->first_nonspace)); } static void +S_advance_offset(cmark_parser *parser, cmark_chunk *input, bufsize_t count, bool columns) +{ + char c; + int chars_to_tab; + while (count > 0 && (c = peek_at(input, parser->offset))) { + if (c == '\t') { + chars_to_tab = 4 - (parser->column % TAB_STOP); + parser->column += chars_to_tab; + parser->offset += 1; + count -= (columns ? chars_to_tab : 1); + } else { + parser->offset += 1; + parser->column += 1; // assume ascii; block starts are ascii + count -= 1; + } + } +} + + +static void S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t bytes) { cmark_node* last_matched_container; @@ -578,8 +619,9 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte cmark_chunk input; bool maybe_lazy; - utf8proc_detab(parser->curline, buffer, bytes); + cmark_strbuf_put(parser->curline, buffer, bytes); parser->offset = 0; + parser->column = 0; parser->blank = false; input.data = parser->curline->ptr; @@ -601,7 +643,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte if (container->type == NODE_BLOCK_QUOTE) { matched = parser->indent <= 3 && peek_at(&input, parser->first_nonspace) == '>'; if (matched) { - parser->offset = parser->first_nonspace + 1; + S_advance_offset(parser, &input, parser->indent + 1, true); if (peek_at(&input, parser->offset) == ' ') parser->offset++; } else { @@ -609,13 +651,14 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte } } else if (container->type == NODE_ITEM) { - if (parser->indent >= container->as.list.marker_offset + container->as.list.padding) { - parser->offset += container->as.list.marker_offset + - container->as.list.padding; + S_advance_offset(parser, &input, + container->as.list.marker_offset + + container->as.list.padding, true); } else if (parser->blank) { - parser->offset = parser->first_nonspace; + S_advance_offset(parser, &input, + parser->first_nonspace - parser->offset, false); } else { all_matched = false; } @@ -624,9 +667,11 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte if (!container->as.code.fenced) { // indented if (parser->indent >= CODE_INDENT) { - parser->offset += CODE_INDENT; + S_advance_offset(parser, &input, CODE_INDENT, true); } else if (parser->blank) { - parser->offset = parser->first_nonspace; + S_advance_offset(parser, &input, + parser->first_nonspace - parser->offset, + false); } else { all_matched = false; } @@ -642,7 +687,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte // closing fence - and since we're at // the end of a line, we can return: all_matched = false; - parser->offset += matched; + S_advance_offset(parser, &input, matched, false); parser->current = finalize(parser, container); goto finished; } else { @@ -650,7 +695,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte i = container->as.code.fence_offset; while (i > 0 && peek_at(&input, parser->offset) == ' ') { - parser->offset++; + S_advance_offset(parser, &input, 1, false); i--; } } @@ -697,15 +742,16 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte if (!indented && peek_at(&input, parser->first_nonspace) == '>') { - parser->offset = parser->first_nonspace + 1; + S_advance_offset(parser, &input, parser->first_nonspace + 1 - parser->offset, false); // optional following character if (peek_at(&input, parser->offset) == ' ') - parser->offset++; + S_advance_offset(parser, &input, 1, false); container = add_child(parser, container, NODE_BLOCK_QUOTE, parser->offset + 1); } else if (!indented && (matched = scan_atx_header_start(&input, parser->first_nonspace))) { - parser->offset = parser->first_nonspace + matched; + S_advance_offset(parser, &input, + parser->first_nonspace + matched - parser->offset, false); container = add_child(parser, container, NODE_HEADER, parser->offset + 1); bufsize_t hashpos = cmark_chunk_strchr(&input, '#', parser->first_nonspace); @@ -726,7 +772,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte container->as.code.fence_length = matched; container->as.code.fence_offset = parser->first_nonspace - parser->offset; container->as.code.info = cmark_chunk_literal(""); - parser->offset = parser->first_nonspace + matched; + S_advance_offset(parser, &input, parser->first_nonspace + matched - parser->offset, false); } else if (!indented && (matched = scan_html_block_tag(&input, parser->first_nonspace))) { @@ -743,7 +789,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte container->type = NODE_HEADER; container->as.header.level = lev; container->as.header.setext = true; - parser->offset = input.len - 1; + S_advance_offset(parser, &input, input.len - 1 - parser->offset, false); } else if (!indented && !(container->type == NODE_PARAGRAPH && @@ -753,7 +799,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte // it's only now that we know the line is not part of a setext header: container = add_child(parser, container, NODE_HRULE, parser->first_nonspace + 1); container = finalize(parser, container); - parser->offset = input.len - 1; + S_advance_offset(parser, &input, input.len - 1 - parser->offset, false); } else if ((matched = parse_list_marker(&input, parser->first_nonspace, &data)) && (!indented || container->type == NODE_LIST)) { @@ -761,7 +807,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte // spaces indent, as long as the list container is still open. // compute padding: - parser->offset = parser->first_nonspace + matched; + S_advance_offset(parser, &input, parser->first_nonspace + matched - parser->offset, false); i = 0; while (i <= 5 && peek_at(&input, parser->offset + i) == ' ') { i++; @@ -771,11 +817,11 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte S_is_line_end_char(peek_at(&input, parser->offset))) { data->padding = matched + 1; if (i > 0) { - parser->offset += 1; + S_advance_offset(parser, &input, 1, false); } } else { data->padding = matched + i; - parser->offset += i; + S_advance_offset(parser, &input, i, true); } // check container; if it's a list, see if this list item @@ -799,7 +845,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t byte free(data); } else if (indented && !maybe_lazy && !parser->blank) { - parser->offset += CODE_INDENT; + S_advance_offset(parser, &input, CODE_INDENT, true); container = add_child(parser, container, NODE_CODE_BLOCK, parser->offset + 1); container->as.code.fenced = false; container->as.code.fence_char = 0; diff --git a/src/parser.h b/src/parser.h index 6e18c67..01a7aeb 100644 --- a/src/parser.h +++ b/src/parser.h @@ -17,7 +17,9 @@ struct cmark_parser { struct cmark_node* current; int line_number; bufsize_t offset; + bufsize_t column; bufsize_t first_nonspace; + bufsize_t first_nonspace_column; int indent; bool blank; cmark_strbuf *curline; |