From 1b6a4ce8ab921ddc98581abd395428e2cadd0c22 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 23 Nov 2014 07:08:57 -0800 Subject: Do not distinguish btw fenced and indented code in AST. Use a single CMARK_NODE_CODE_BLOCK tag for both. Distinguish them when needed for parsing by looking at the fence_length attribute, which is 0 for indented blocks. --- api_test/main.c | 9 ++---- commonmark.rb | 8 ++---- src/blocks.c | 88 +++++++++++++++++++++++++++++++-------------------------- src/cmark.h | 6 ++-- src/html/html.c | 12 ++++---- src/node.c | 15 ++++------ src/print.c | 10 ++----- 7 files changed, 68 insertions(+), 80 deletions(-) diff --git a/api_test/main.c b/api_test/main.c index 1a2f328..06d9be2 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -13,8 +13,7 @@ static const cmark_node_type node_types[] = { CMARK_NODE_BLOCK_QUOTE, CMARK_NODE_LIST, CMARK_NODE_LIST_ITEM, - CMARK_NODE_FENCED_CODE, - CMARK_NODE_INDENTED_CODE, + CMARK_NODE_CODE_BLOCK, CMARK_NODE_HTML, CMARK_NODE_PARAGRAPH, CMARK_NODE_HEADER, @@ -374,8 +373,7 @@ hierarchy(test_batch_runner *runner) int top_level_blocks = (1 << CMARK_NODE_BLOCK_QUOTE) | (1 << CMARK_NODE_LIST) | - (1 << CMARK_NODE_FENCED_CODE) | - (1 << CMARK_NODE_INDENTED_CODE) | + (1 << CMARK_NODE_CODE_BLOCK) | (1 << CMARK_NODE_HTML) | (1 << CMARK_NODE_PARAGRAPH) | (1 << CMARK_NODE_HEADER) | @@ -396,8 +394,7 @@ hierarchy(test_batch_runner *runner) test_content(runner, CMARK_NODE_BLOCK_QUOTE, top_level_blocks); test_content(runner, CMARK_NODE_LIST, list_item_flag); test_content(runner, CMARK_NODE_LIST_ITEM, top_level_blocks); - test_content(runner, CMARK_NODE_FENCED_CODE, 0); - test_content(runner, CMARK_NODE_INDENTED_CODE, 0); + test_content(runner, CMARK_NODE_CODE_BLOCK , 0); test_content(runner, CMARK_NODE_HTML, 0); test_content(runner, CMARK_NODE_PARAGRAPH, all_inlines); test_content(runner, CMARK_NODE_HEADER, all_inlines); diff --git a/commonmark.rb b/commonmark.rb index 94744dd..dead84c 100755 --- a/commonmark.rb +++ b/commonmark.rb @@ -10,7 +10,7 @@ module CMark ffi_lib ['libcmark', 'cmark'] typedef :pointer, :node enum :node_type, [:document, :blockquote, :list, :list_item, - :fenced_code, :indented_code, :html, :paragraph, + :code_block, :html, :paragraph, :header, :hrule, :reference_def, :str, :softbreak, :linebreak, :code, :inline_html, :emph, :strong, :link, :image] @@ -187,11 +187,7 @@ class Renderer self.out(node.children) end - def indented_code(node) - self.code_block(node) - end - - def fenced_code(node) + def code_block(node) self.code_block(node) end diff --git a/src/blocks.c b/src/blocks.c index 4a3f4fa..f916936 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -98,8 +98,7 @@ static inline bool accepts_lines(cmark_node_type block_type) { return (block_type == NODE_PARAGRAPH || block_type == NODE_HEADER || - block_type == NODE_INDENTED_CODE || - block_type == NODE_FENCED_CODE); + block_type == NODE_CODE_BLOCK); } static void add_line(cmark_node* cmark_node, chunk *ch, int offset) @@ -194,27 +193,28 @@ static void finalize(cmark_doc_parser *parser, cmark_node* b, int line_number) } break; - case NODE_INDENTED_CODE: - remove_trailing_blank_lines(&b->string_content); - strbuf_putc(&b->string_content, '\n'); - break; + case NODE_CODE_BLOCK: + if (b->as.code.fence_length == 0) { // indented code + remove_trailing_blank_lines(&b->string_content); + strbuf_putc(&b->string_content, '\n'); + break; + } else { - case NODE_FENCED_CODE: - // first line of contents becomes info - firstlinelen = strbuf_strchr(&b->string_content, '\n', 0); + // first line of contents becomes info + firstlinelen = strbuf_strchr(&b->string_content, '\n', 0); - strbuf_init(&b->as.code.info, 0); - houdini_unescape_html_f( - &b->as.code.info, - b->string_content.ptr, - firstlinelen - ); + houdini_unescape_html_f( + &b->as.code.info, + b->string_content.ptr, + firstlinelen + ); - strbuf_drop(&b->string_content, firstlinelen + 1); + strbuf_drop(&b->string_content, firstlinelen + 1); - strbuf_trim(&b->as.code.info); - strbuf_unescape(&b->as.code.info); - break; + strbuf_trim(&b->as.code.info); + strbuf_unescape(&b->as.code.info); + break; + } case NODE_LIST: // determine tight/loose status b->as.list.tight = true; // tight by default @@ -537,14 +537,23 @@ void cmark_process_line(cmark_doc_parser *parser, const char *buffer, all_matched = false; } - } else if (container->type == NODE_INDENTED_CODE) { + } else if (container->type == NODE_CODE_BLOCK) { - if (indent >= CODE_INDENT) { - offset += CODE_INDENT; - } else if (blank) { - offset = first_nonspace; + if (container->as.code.fence_length == 0) { // indented + if (indent >= CODE_INDENT) { + offset += CODE_INDENT; + } else if (blank) { + offset = first_nonspace; + } else { + all_matched = false; + } } else { - all_matched = false; + // skip optional spaces of fence offset + i = container->as.code.fence_offset; + while (i > 0 && peek_at(&input, offset) == ' ') { + offset++; + i--; + } } } else if (container->type == NODE_HEADER) { @@ -555,15 +564,6 @@ void cmark_process_line(cmark_doc_parser *parser, const char *buffer, container->last_line_blank = true; } - } else if (container->type == NODE_FENCED_CODE) { - - // skip optional spaces of fence offset - i = container->as.code.fence_offset; - while (i > 0 && peek_at(&input, offset) == ' ') { - offset++; - i--; - } - } else if (container->type == NODE_HTML) { if (blank) { @@ -594,7 +594,7 @@ void cmark_process_line(cmark_doc_parser *parser, const char *buffer, } // unless last matched container is code cmark_node, try new container starts: - while (container->type != NODE_FENCED_CODE && container->type != NODE_INDENTED_CODE && + while (container->type != NODE_CODE_BLOCK && container->type != NODE_HTML) { first_nonspace = offset; @@ -607,7 +607,11 @@ void cmark_process_line(cmark_doc_parser *parser, const char *buffer, if (indent >= CODE_INDENT) { if (cur->type != NODE_PARAGRAPH && !blank) { offset += CODE_INDENT; - container = add_child(parser, container, NODE_INDENTED_CODE, parser->line_number, offset + 1); + container = add_child(parser, container, NODE_CODE_BLOCK, parser->line_number, offset + 1); + container->as.code.fence_char = 0; + container->as.code.fence_length = 0; + container->as.code.fence_offset = 0; + strbuf_init(&container->as.code.info, 0); } else { // indent > 4 in lazy line break; } @@ -636,10 +640,11 @@ void cmark_process_line(cmark_doc_parser *parser, const char *buffer, } else if ((matched = scan_open_code_fence(&input, first_nonspace))) { - container = add_child(parser, container, NODE_FENCED_CODE, parser->line_number, first_nonspace + 1); + container = add_child(parser, container, NODE_CODE_BLOCK, parser->line_number, first_nonspace + 1); container->as.code.fence_char = peek_at(&input, first_nonspace); container->as.code.fence_length = matched; container->as.code.fence_offset = first_nonspace - offset; + strbuf_init(&container->as.code.info, 0); offset = first_nonspace + matched; } else if ((matched = scan_html_block_tag(&input, first_nonspace))) { @@ -731,7 +736,8 @@ void cmark_process_line(cmark_doc_parser *parser, const char *buffer, container->last_line_blank = (blank && container->type != NODE_BLOCK_QUOTE && container->type != NODE_HEADER && - container->type != NODE_FENCED_CODE && + (container->type != NODE_CODE_BLOCK && + container->as.code.fence_length != 0) && !(container->type == NODE_LIST_ITEM && container->first_child == NULL && container->start_line == parser->line_number)); @@ -759,11 +765,13 @@ void cmark_process_line(cmark_doc_parser *parser, const char *buffer, assert(cur != NULL); } - if (container->type == NODE_INDENTED_CODE) { + if (container->type == NODE_CODE_BLOCK && + container->as.code.fence_length == 0) { add_line(container, &input, offset); - } else if (container->type == NODE_FENCED_CODE) { + } else if (container->type == NODE_CODE_BLOCK && + container->as.code.fence_length != 0) { matched = 0; if (indent <= 3 && diff --git a/src/cmark.h b/src/cmark.h index b83f886..6e58f0b 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -16,8 +16,7 @@ typedef enum { CMARK_NODE_BLOCK_QUOTE, CMARK_NODE_LIST, CMARK_NODE_LIST_ITEM, - CMARK_NODE_FENCED_CODE, - CMARK_NODE_INDENTED_CODE, + CMARK_NODE_CODE_BLOCK, CMARK_NODE_HTML, CMARK_NODE_PARAGRAPH, CMARK_NODE_HEADER, @@ -199,8 +198,7 @@ char *cmark_markdown_to_html(const char *text, int len); #define NODE_BLOCK_QUOTE CMARK_NODE_BLOCK_QUOTE #define NODE_LIST CMARK_NODE_LIST #define NODE_LIST_ITEM CMARK_NODE_LIST_ITEM - #define NODE_FENCED_CODE CMARK_NODE_FENCED_CODE - #define NODE_INDENTED_CODE CMARK_NODE_INDENTED_CODE + #define NODE_CODE_BLOCK CMARK_NODE_CODE_BLOCK #define NODE_HTML CMARK_NODE_HTML #define NODE_PARAGRAPH CMARK_NODE_PARAGRAPH #define NODE_HEADER CMARK_NODE_HEADER diff --git a/src/html/html.c b/src/html/html.c index e6971f8..27d1f0e 100644 --- a/src/html/html.c +++ b/src/html/html.c @@ -88,6 +88,7 @@ static void node_to_html(strbuf *html, cmark_node *node) char start_header[] = ""; bool tight = false; bool visit_children; + strbuf *info; if (node == NULL) { return; @@ -155,12 +156,11 @@ static void node_to_html(strbuf *html, cmark_node *node) strbuf_puts(html, start_header); break; - case NODE_INDENTED_CODE: - case NODE_FENCED_CODE: { - strbuf *info = &cur->as.code.info; + case NODE_CODE_BLOCK: + info = &cur->as.code.info; cr(html); - if (cur->type != NODE_FENCED_CODE + if (&cur->as.code.fence_length == 0 || strbuf_len(info) == 0) { strbuf_puts(html, "
");
 			}
@@ -177,7 +177,6 @@ static void node_to_html(strbuf *html, cmark_node *node)
 
 			escape_html(html, cur->string_content.ptr, cur->string_content.size);
 			break;
-		}
 
 		case NODE_HTML:
 			cr(html);
@@ -320,8 +319,7 @@ finish_node(strbuf *html, cmark_node *node, bool tight)
 		strbuf_puts(html, end_header);
 		break;
 
-	case NODE_INDENTED_CODE:
-	case NODE_FENCED_CODE:
+	case NODE_CODE_BLOCK:
 		strbuf_puts(html, "
\n"); break; diff --git a/src/node.c b/src/node.c index 3e51fec..cdf9d60 100644 --- a/src/node.c +++ b/src/node.c @@ -53,8 +53,7 @@ S_type_string(cmark_node *node) case CMARK_NODE_BLOCK_QUOTE: return "BLOCK_QUOTE"; case CMARK_NODE_LIST: return "LIST"; case CMARK_NODE_LIST_ITEM: return "LIST_ITEM"; - case CMARK_NODE_FENCED_CODE: return "FENCED_CODE"; - case CMARK_NODE_INDENTED_CODE: return "INDENTED_CODE"; + case CMARK_NODE_CODE_BLOCK: return "CODE_BLOCK"; case CMARK_NODE_HTML: return "HTML"; case CMARK_NODE_PARAGRAPH: return "PARAGRAPH"; case CMARK_NODE_HEADER: return "HEADER"; @@ -115,8 +114,7 @@ S_strdup(const char *str) { const char* cmark_node_get_string_content(cmark_node *node) { switch (node->type) { - case NODE_INDENTED_CODE: - case NODE_FENCED_CODE: + case NODE_CODE_BLOCK: case NODE_HTML: return cmark_strbuf_cstr(&node->string_content); @@ -135,8 +133,7 @@ cmark_node_get_string_content(cmark_node *node) { int cmark_node_set_string_content(cmark_node *node, const char *content) { switch (node->type) { - case NODE_INDENTED_CODE: - case NODE_FENCED_CODE: + case NODE_CODE_BLOCK: case NODE_HTML: cmark_strbuf_sets(&node->string_content, content); return 1; @@ -258,7 +255,7 @@ cmark_node_set_list_tight(cmark_node *node, int tight) { const char* cmark_node_get_fence_info(cmark_node *node) { - if (node->type == NODE_FENCED_CODE) { + if (node->type == NODE_CODE_BLOCK) { return cmark_strbuf_cstr(&node->as.code.info); } else { @@ -268,7 +265,7 @@ cmark_node_get_fence_info(cmark_node *node) { int cmark_node_set_fence_info(cmark_node *node, const char *info) { - if (node->type == NODE_FENCED_CODE) { + if (node->type == NODE_CODE_BLOCK) { cmark_strbuf_sets(&node->as.code.info, info); return 1; } @@ -622,7 +619,7 @@ void cmark_free_nodes(cmark_node *e) while (e != NULL) { strbuf_free(&e->string_content); switch (e->type){ - case NODE_FENCED_CODE: + case NODE_CODE_BLOCK: strbuf_free(&e->as.code.info); break; case NODE_STRING: diff --git a/src/print.c b/src/print.c index 2064276..1afa584 100644 --- a/src/print.c +++ b/src/print.c @@ -143,14 +143,8 @@ static void print_blocks(cmark_node* b, int indent) case NODE_HRULE: printf("hrule\n"); break; - case NODE_INDENTED_CODE: - printf("indented_code "); - print_str(b->string_content.ptr, -1); - putchar('\n'); - break; - case NODE_FENCED_CODE: - printf("fenced_code length=%d info=", - b->as.code.fence_length); + case NODE_CODE_BLOCK: + printf("code block info="); print_str(b->as.code.info.ptr, -1); putchar(' '); print_str(b->string_content.ptr, -1); -- cgit v1.2.3