diff options
author | John MacFarlane <jgm@berkeley.edu> | 2014-12-12 22:12:09 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2014-12-12 22:55:11 -0800 |
commit | c41bf11bb38ef513fa53f88b2c80afd1504aaeaf (patch) | |
tree | 87447007f83e40b627037e15108576113dc020c9 /src | |
parent | 5c20df20af6be9444f27a8c1bbfa5b027a1fa8d8 (diff) |
Rewrote HTML renderer using cmark_walk.
This version is shorter, more readable, and more regular.
It should serve as a template for creating new writers.
Performance is the same. All tests pass.
Diffstat (limited to 'src')
-rw-r--r-- | src/html.c | 418 | ||||
-rw-r--r-- | src/node.c | 10 |
2 files changed, 180 insertions, 248 deletions
@@ -11,9 +11,6 @@ // Functions to convert cmark_nodes to HTML strings. -static bool -finish_node(strbuf *html, cmark_node *node, bool tight); - static void escape_html(strbuf *dest, const unsigned char *source, int length) { if (length < 0) @@ -36,322 +33,251 @@ static inline void cr(strbuf *html) strbuf_putc(html, '\n'); } -// Convert the inline children of a node to a plain string. -static void inlines_to_plain_html(strbuf *html, cmark_node* node) +struct render_state { + strbuf* html; + cmark_node *plain; +}; + +static int +S_render_node(cmark_node *node, int entering, void *vstate) { - cmark_node* cur = node->first_child; + struct render_state *state = vstate; + cmark_node *parent; + cmark_node *grandparent; + strbuf *html = state->html; + char start_header[] = "<h0>"; + char end_header[] = "</h0>"; + strbuf *info; + bool tight; - if (cur == NULL) { - return; + if (state->plain == node) { // back at original node + state->plain = NULL; } - while (true) { - switch(cur->type) { - case NODE_TEXT: - case NODE_INLINE_CODE: - case NODE_INLINE_HTML: - escape_html(html, cur->as.literal.data, cur->as.literal.len); + if (state->plain != NULL) { + switch(node->type) { + case CMARK_NODE_TEXT: + case CMARK_NODE_INLINE_CODE: + case CMARK_NODE_INLINE_HTML: + escape_html(html, node->as.literal.data, + node->as.literal.len); break; - case NODE_LINEBREAK: - case NODE_SOFTBREAK: + case CMARK_NODE_LINEBREAK: + case CMARK_NODE_SOFTBREAK: strbuf_putc(html, ' '); break; default: break; } - - if (cur->first_child) { - cur = cur->first_child; - continue; - } - - next_sibling: - if (cur->next) { - cur = cur->next; - continue; - } - cur = cur->parent; - if (cur == node) { - break; - } - goto next_sibling; - } -} - - -// Convert a cmark_node to HTML. -static void node_to_html(strbuf *html, cmark_node *node) -{ - cmark_node *cur; - char start_header[] = "<h0>"; - bool tight = false; - bool visit_children; - strbuf *info; - - if (node == NULL) { - return; + return 1; } - cur = node; - while (true) { - // Only NODE_IMAGE wants to skip its children. - visit_children = true; - - switch(cur->type) { - case NODE_DOCUMENT: - break; - - case NODE_PARAGRAPH: - if (!tight) { - cr(html); - strbuf_puts(html, "<p>"); - } - break; - - case NODE_BLOCK_QUOTE: + switch (node->type) { + case CMARK_NODE_BLOCK_QUOTE: + if (entering) { cr(html); strbuf_puts(html, "<blockquote>\n"); - // BLOCK_QUOTE doesn't use any of the 'as' structs, - // so the 'list' member can be used to store the - // current value of 'tight'. - cur->as.list.tight = tight; - tight = false; - break; - - case NODE_LIST_ITEM: + } else { cr(html); - strbuf_puts(html, "<li>"); - break; + strbuf_puts(html, "</blockquote>\n"); + } + break; - case NODE_LIST: { - cmark_list *list = &cur->as.list; - bool tmp; + case CMARK_NODE_LIST: { + cmark_list_type list_type = node->as.list.list_type; + int start = node->as.list.start; - // make sure a list starts at the beginning of the line: + if (entering) { cr(html); - - if (list->list_type == CMARK_BULLET_LIST) { + if (list_type == CMARK_BULLET_LIST) { strbuf_puts(html, "<ul>\n"); } - else if (list->start == 1) { + else if (start == 1) { strbuf_puts(html, "<ol>\n"); } else { strbuf_printf(html, "<ol start=\"%d\">\n", - list->start); + start); } - - // Store the current value of 'tight' by swapping. - tmp = list->tight; - list->tight = tight; - tight = tmp; - break; + } else { + strbuf_puts(html, + list_type == CMARK_BULLET_LIST ? + "</ul>\n" : "</ol>\n"); } + break; + } - case NODE_HEADER: - cr(html); - start_header[2] = '0' + cur->as.header.level; - strbuf_puts(html, start_header); - break; - - case NODE_CODE_BLOCK: - info = &cur->as.code.info; - cr(html); - - if (&cur->as.code.fence_length == 0 - || strbuf_len(info) == 0) { - strbuf_puts(html, "<pre><code>"); - } - else { - int first_tag = strbuf_strchr(info, ' ', 0); - if (first_tag < 0) - first_tag = strbuf_len(info); - - strbuf_puts(html, - "<pre><code class=\"language-"); - escape_html(html, info->ptr, first_tag); - strbuf_puts(html, "\">"); - } - - escape_html(html, cur->string_content.ptr, cur->string_content.size); - break; - - case NODE_HTML: + case CMARK_NODE_LIST_ITEM: + if (entering) { cr(html); - strbuf_put(html, cur->string_content.ptr, cur->string_content.size); - break; + strbuf_puts(html, "<li>"); + } else { + strbuf_puts(html, "</li>\n"); + } + break; - case NODE_HRULE: + case CMARK_NODE_HEADER: + if (entering) { cr(html); - strbuf_puts(html, "<hr />\n"); - break; - - case NODE_REFERENCE_DEF: - break; - - case NODE_TEXT: - escape_html(html, cur->as.literal.data, cur->as.literal.len); - break; - - case NODE_LINEBREAK: - strbuf_puts(html, "<br />\n"); - break; - - case NODE_SOFTBREAK: + start_header[2] = '0' + node->as.header.level; + strbuf_puts(html, start_header); + } else { + end_header[3] = '0' + node->as.header.level; + strbuf_puts(html, end_header); strbuf_putc(html, '\n'); - break; - - case NODE_INLINE_CODE: - strbuf_puts(html, "<code>"); - escape_html(html, cur->as.literal.data, cur->as.literal.len); - break; - - case NODE_INLINE_HTML: - strbuf_put(html, - cur->as.literal.data, - cur->as.literal.len); - break; + } + break; - case NODE_LINK: - strbuf_puts(html, "<a href=\""); - if (cur->as.link.url) - escape_href(html, cur->as.link.url, -1); + case CMARK_NODE_CODE_BLOCK: + info = &node->as.code.info; + cr(html); - if (cur->as.link.title) { - strbuf_puts(html, "\" title=\""); - escape_html(html, cur->as.link.title, -1); - } + if (&node->as.code.fence_length == 0 + || strbuf_len(info) == 0) { + strbuf_puts(html, "<pre><code>"); + } + else { + int first_tag = strbuf_strchr(info, ' ', 0); + if (first_tag < 0) + first_tag = strbuf_len(info); + strbuf_puts(html, "<pre><code class=\"language-"); + escape_html(html, info->ptr, first_tag); strbuf_puts(html, "\">"); - break; - - case NODE_IMAGE: - strbuf_puts(html, "<img src=\""); - if (cur->as.link.url) - escape_href(html, cur->as.link.url, -1); - - strbuf_puts(html, "\" alt=\""); - inlines_to_plain_html(html, cur); - - if (cur->as.link.title) { - strbuf_puts(html, "\" title=\""); - escape_html(html, cur->as.link.title, -1); - } - - strbuf_puts(html, "\" />"); - visit_children = false; - break; + } - case NODE_STRONG: - strbuf_puts(html, "<strong>"); - break; + escape_html(html, node->string_content.ptr, node->string_content.size); + strbuf_puts(html, "</code></pre>\n"); + break; - case NODE_EMPH: - strbuf_puts(html, "<em>"); - break; + case CMARK_NODE_HTML: + cr(html); + strbuf_put(html, node->string_content.ptr, + node->string_content.size); + break; - default: - assert(false); - } + case CMARK_NODE_HRULE: + cr(html); + strbuf_puts(html, "<hr />\n"); + break; - if (visit_children && cur->first_child) { - cur = cur->first_child; - continue; - } + case CMARK_NODE_REFERENCE_DEF: + break; - next_sibling: - tight = finish_node(html, cur, tight); - if (cur == node) { - break; - } - if (cur->next) { - cur = cur->next; - continue; + case CMARK_NODE_PARAGRAPH: + parent = cmark_node_parent(node); + grandparent = cmark_node_parent(parent); + if (grandparent != NULL && + grandparent->type == CMARK_NODE_LIST) { + tight = grandparent->as.list.tight; + } else { + tight = false; } - cur = cur->parent; - goto next_sibling; - } -} - -// Returns the restored value of 'tight'. -static bool -finish_node(strbuf *html, cmark_node *node, bool tight) -{ - char end_header[] = "</h0>\n"; - - switch (node->type) { - case NODE_PARAGRAPH: if (!tight) { - strbuf_puts(html, "</p>\n"); + if (entering) { + cr(html); + strbuf_puts(html, "<p>"); + } else { + strbuf_puts(html, "</p>\n"); + } } break; - case NODE_BLOCK_QUOTE: { - cmark_list *list = &node->as.list; - strbuf_puts(html, "</blockquote>\n"); - // Restore old 'tight' value. - tight = list->tight; - list->tight = false; + case CMARK_NODE_TEXT: + escape_html(html, node->as.literal.data, + node->as.literal.len); break; - } - case NODE_LIST_ITEM: - strbuf_puts(html, "</li>\n"); + case CMARK_NODE_LINEBREAK: + strbuf_puts(html, "<br />\n"); break; - case NODE_LIST: { - cmark_list *list = &node->as.list; - bool tmp; - strbuf_puts(html, - list->list_type == CMARK_BULLET_LIST ? - "</ul>\n" : "</ol>\n"); - // Restore old 'tight' value. - tmp = tight; - tight = list->tight; - list->tight = tmp; + case CMARK_NODE_SOFTBREAK: + strbuf_putc(html, '\n'); break; - } - case NODE_HEADER: - end_header[3] = '0' + node->as.header.level; - strbuf_puts(html, end_header); + case CMARK_NODE_INLINE_CODE: + strbuf_puts(html, "<code>"); + escape_html(html, node->as.literal.data, node->as.literal.len); + strbuf_puts(html, "</code>"); break; - case NODE_CODE_BLOCK: - strbuf_puts(html, "</code></pre>\n"); + case CMARK_NODE_INLINE_HTML: + strbuf_put(html, node->as.literal.data, node->as.literal.len); break; - case NODE_INLINE_CODE: - strbuf_puts(html, "</code>"); + case CMARK_NODE_STRONG: + if (entering) { + strbuf_puts(html, "<strong>"); + } else { + strbuf_puts(html, "</strong>"); + } break; - case NODE_LINK: - strbuf_puts(html, "</a>"); + case CMARK_NODE_EMPH: + if (entering) { + strbuf_puts(html, "<em>"); + } else { + strbuf_puts(html, "</em>"); + } break; - case NODE_STRONG: - strbuf_puts(html, "</strong>"); + case CMARK_NODE_LINK: + if (entering) { + strbuf_puts(html, "<a href=\""); + if (node->as.link.url) + escape_href(html, node->as.link.url, -1); + + if (node->as.link.title) { + strbuf_puts(html, "\" title=\""); + escape_html(html, node->as.link.title, -1); + } + + strbuf_puts(html, "\">"); + } else { + strbuf_puts(html, "</a>"); + } break; - case NODE_EMPH: - strbuf_puts(html, "</em>"); + case CMARK_NODE_IMAGE: + if (entering) { + strbuf_puts(html, "<img src=\""); + if (node->as.link.url) + escape_href(html, node->as.link.url, -1); + + strbuf_puts(html, "\" alt=\""); + state->plain = node; + } else { + if (node->as.link.title) { + strbuf_puts(html, "\" title=\""); + escape_html(html, node->as.link.title, -1); + } + + strbuf_puts(html, "\" />"); + } break; default: + assert(false); break; } - return tight; + // strbuf_putc(html, 'x'); + return 1; } char *cmark_render_html(cmark_node *root) { char *result; strbuf html = GH_BUF_INIT; - node_to_html(&html, root); - result = (char *)strbuf_detach(&html); - strbuf_free(&html); - return result; + struct render_state state = { &html, NULL }; + if (cmark_walk(root, S_render_node, &state)) { + result = (char *)strbuf_detach(&html); + strbuf_free(&html); + return result; + } else { + return NULL; + } } @@ -773,6 +773,7 @@ int S_is_leaf_node(cmark_node *current_node) switch (cmark_node_get_type(current_node)) { case CMARK_NODE_HTML: case CMARK_NODE_HRULE: + case CMARK_NODE_CODE_BLOCK: case CMARK_NODE_REFERENCE_DEF: case CMARK_NODE_TEXT: case CMARK_NODE_SOFTBREAK: @@ -815,8 +816,13 @@ int cmark_walk(cmark_node *root, cmark_node_handler handler, void *state) parent = current_node->parent; } if (next) { - begin = 1; - current_node = next; + // don't go past root: + if (current_node == root) { + return 1; + } else { + begin = 1; + current_node = next; + } } else { begin = 0; depth -= 1; |