diff options
-rw-r--r-- | api_test/main.c | 269 | ||||
-rw-r--r-- | src/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/blocks.c | 18 | ||||
-rw-r--r-- | src/cmark.c | 4 | ||||
-rw-r--r-- | src/cmark.h | 8 | ||||
-rw-r--r-- | src/html/html.c | 469 | ||||
-rw-r--r-- | src/main.c | 4 | ||||
-rw-r--r-- | src/node.c | 85 | ||||
-rw-r--r-- | src/node.h | 4 |
9 files changed, 544 insertions, 319 deletions
diff --git a/api_test/main.c b/api_test/main.c index f74dee2..b3b5b5a 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -2,15 +2,81 @@ #include <stdlib.h> #include <string.h> +#define CMARK_NO_SHORT_NAMES #include "cmark.h" #include "node.h" #include "harness.h" +static const cmark_node_type node_types[] = { + CMARK_NODE_DOCUMENT, + CMARK_NODE_BQUOTE, + CMARK_NODE_LIST, + CMARK_NODE_LIST_ITEM, + CMARK_NODE_FENCED_CODE, + CMARK_NODE_INDENTED_CODE, + CMARK_NODE_HTML, + CMARK_NODE_PARAGRAPH, + CMARK_NODE_ATX_HEADER, + CMARK_NODE_SETEXT_HEADER, + CMARK_NODE_HRULE, + CMARK_NODE_REFERENCE_DEF, + CMARK_NODE_STRING, + CMARK_NODE_SOFTBREAK, + CMARK_NODE_LINEBREAK, + CMARK_NODE_INLINE_CODE, + CMARK_NODE_INLINE_HTML, + CMARK_NODE_EMPH, + CMARK_NODE_STRONG, + CMARK_NODE_LINK, + CMARK_NODE_IMAGE +}; +static const int num_node_types = sizeof(node_types) / sizeof(*node_types); + +static void +test_content(test_batch_runner *runner, cmark_node_type type, + int allowed_content); + +static void +constructor(test_batch_runner *runner) +{ + for (int i = 0; i < num_node_types; ++i) { + cmark_node_type type = node_types[i]; + cmark_node *node = cmark_node_new(type); + OK(runner, node != NULL, "new type %d", type); + INT_EQ(runner, cmark_node_get_type(node), type, + "get_type %d", type); + + switch (node->type) { + case CMARK_NODE_ATX_HEADER: + case CMARK_NODE_SETEXT_HEADER: + INT_EQ(runner, cmark_node_get_header_level(node), 1, + "default header level is 1"); + node->as.header.level = 1; + break; + + case CMARK_NODE_LIST: + INT_EQ(runner, cmark_node_get_list_type(node), + CMARK_BULLET_LIST, + "default is list type is bullet"); + INT_EQ(runner, cmark_node_get_list_start(node), 1, + "default is list start is 1"); + INT_EQ(runner, cmark_node_get_list_tight(node), 0, + "default is list is loose"); + break; + + default: + break; + } + + cmark_node_destroy(node); + } +} + static void accessors(test_batch_runner *runner) { - static const unsigned char markdown[] = + static const char markdown[] = "## Header\n" "\n" "* Item 1\n" @@ -36,22 +102,16 @@ accessors(test_batch_runner *runner) // Getters cmark_node *header = cmark_node_first_child(doc); - INT_EQ(runner, cmark_node_get_type(header), CMARK_NODE_ATX_HEADER, - "get_type header"); INT_EQ(runner, cmark_node_get_header_level(header), 2, "get_header_level"); cmark_node *bullet_list = cmark_node_next(header); - INT_EQ(runner, cmark_node_get_type(bullet_list), CMARK_NODE_LIST, - "get_type bullet list"); INT_EQ(runner, cmark_node_get_list_type(bullet_list), CMARK_BULLET_LIST, "get_list_type bullet"); INT_EQ(runner, cmark_node_get_list_tight(bullet_list), 1, "get_list_tight tight"); cmark_node *ordered_list = cmark_node_next(bullet_list); - INT_EQ(runner, cmark_node_get_type(ordered_list), CMARK_NODE_LIST, - "get_type ordered list"); INT_EQ(runner, cmark_node_get_list_type(ordered_list), CMARK_ORDERED_LIST, "get_list_type ordered"); INT_EQ(runner, cmark_node_get_list_start(ordered_list), 2, @@ -60,28 +120,20 @@ accessors(test_batch_runner *runner) "get_list_tight loose"); cmark_node *code = cmark_node_next(ordered_list); - INT_EQ(runner, cmark_node_get_type(code), CMARK_NODE_INDENTED_CODE, - "get_type indented code"); STR_EQ(runner, cmark_node_get_string_content(code), "code\n", "get_string_content indented code"); cmark_node *fenced = cmark_node_next(code); - INT_EQ(runner, cmark_node_get_type(fenced), CMARK_NODE_FENCED_CODE, - "get_type fenced code"); STR_EQ(runner, cmark_node_get_string_content(fenced), "fenced\n", "get_string_content fenced code"); STR_EQ(runner, cmark_node_get_fence_info(fenced), "lang", "get_fence_info"); cmark_node *html = cmark_node_next(fenced); - INT_EQ(runner, cmark_node_get_type(html), CMARK_NODE_HTML, - "get_type html"); STR_EQ(runner, cmark_node_get_string_content(html), "<div>html</div>\n", "get_string_content html"); cmark_node *paragraph = cmark_node_next(html); - INT_EQ(runner, cmark_node_get_type(paragraph), CMARK_NODE_PARAGRAPH, - "get_type paragraph"); INT_EQ(runner, cmark_node_get_start_line(paragraph), 19, "get_start_line"); INT_EQ(runner, cmark_node_get_start_column(paragraph), 1, @@ -90,16 +142,12 @@ accessors(test_batch_runner *runner) "get_end_line"); cmark_node *link = cmark_node_first_child(paragraph); - INT_EQ(runner, cmark_node_get_type(link), CMARK_NODE_LINK, - "get_type link"); STR_EQ(runner, cmark_node_get_url(link), "url", "get_url"); STR_EQ(runner, cmark_node_get_title(link), "title", "get_title"); cmark_node *string = cmark_node_first_child(link); - INT_EQ(runner, cmark_node_get_type(string), CMARK_NODE_STRING, - "get_type string"); STR_EQ(runner, cmark_node_get_string_content(string), "link", "get_string_content string"); @@ -139,7 +187,7 @@ accessors(test_batch_runner *runner) OK(runner, cmark_node_set_string_content(string, "LINK"), "set_string_content string"); - char *rendered_html = (char *)cmark_render_html(doc); + char *rendered_html = cmark_render_html(doc); static const char expected_html[] = "<h3>Header</h3>\n" "<ol start=\"3\">\n" @@ -150,7 +198,7 @@ accessors(test_batch_runner *runner) "<p>Item 2</p>\n" "</li>\n" "</ol>\n" - "<ul start=\"2\">\n" + "<ul>\n" "<li>Item 1</li>\n" "<li>Item 2</li>\n" "</ul>\n" @@ -214,6 +262,22 @@ accessors(test_batch_runner *runner) } static void +node_check(test_batch_runner *runner) { + // Construct an incomplete tree. + cmark_node *doc = cmark_node_new(CMARK_NODE_DOCUMENT); + cmark_node *p1 = cmark_node_new(CMARK_NODE_PARAGRAPH); + cmark_node *p2 = cmark_node_new(CMARK_NODE_PARAGRAPH); + doc->first_child = p1; + p1->next = p2; + + INT_EQ(runner, cmark_node_check(doc, NULL), 4, "node_check works"); + INT_EQ(runner, cmark_node_check(doc, NULL), 0, + "node_check fixes tree"); + + cmark_node_destroy(doc); +} + +static void create_tree(test_batch_runner *runner) { char *html; @@ -221,66 +285,201 @@ create_tree(test_batch_runner *runner) cmark_node *p = cmark_node_new(CMARK_NODE_PARAGRAPH); OK(runner, cmark_node_append_child(doc, p), "append1"); - INT_EQ(runner, cmark_node_check(doc), 0, "append1 consistent"); + INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append1 consistent"); + OK(runner, cmark_node_parent(p) == doc, "node_parent"); cmark_node *emph = cmark_node_new(CMARK_NODE_EMPH); OK(runner, cmark_node_prepend_child(p, emph), "prepend1"); - INT_EQ(runner, cmark_node_check(doc), 0, "prepend1 consistent"); + INT_EQ(runner, cmark_node_check(doc, NULL), 0, "prepend1 consistent"); cmark_node *str1 = cmark_node_new(CMARK_NODE_STRING); cmark_node_set_string_content(str1, "Hello, "); OK(runner, cmark_node_prepend_child(p, str1), "prepend2"); - INT_EQ(runner, cmark_node_check(doc), 0, "prepend2 consistent"); + INT_EQ(runner, cmark_node_check(doc, NULL), 0, "prepend2 consistent"); cmark_node *str3 = cmark_node_new(CMARK_NODE_STRING); cmark_node_set_string_content(str3, "!"); OK(runner, cmark_node_append_child(p, str3), "append2"); - INT_EQ(runner, cmark_node_check(doc), 0, "append2 consistent"); + INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append2 consistent"); cmark_node *str2 = cmark_node_new(CMARK_NODE_STRING); cmark_node_set_string_content(str2, "world"); OK(runner, cmark_node_append_child(emph, str2), "append3"); - INT_EQ(runner, cmark_node_check(doc), 0, "append3 consistent"); + INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append3 consistent"); - html = (char *)cmark_render_html(doc); + html = cmark_render_html(doc); STR_EQ(runner, html, "<p>Hello, <em>world</em>!</p>\n", "render_html"); free(html); OK(runner, cmark_node_insert_before(str1, str3), "ins before1"); - INT_EQ(runner, cmark_node_check(doc), 0, "ins before1 consistent"); + INT_EQ(runner, cmark_node_check(doc, NULL), 0, + "ins before1 consistent"); // 31e OK(runner, cmark_node_first_child(p) == str3, "ins before1 works"); OK(runner, cmark_node_insert_before(str1, emph), "ins before2"); - INT_EQ(runner, cmark_node_check(doc), 0, "ins before2 consistent"); + INT_EQ(runner, cmark_node_check(doc, NULL), 0, + "ins before2 consistent"); // 3e1 OK(runner, cmark_node_last_child(p) == str1, "ins before2 works"); OK(runner, cmark_node_insert_after(str1, str3), "ins after1"); - INT_EQ(runner, cmark_node_check(doc), 0, "ins after1 consistent"); + INT_EQ(runner, cmark_node_check(doc, NULL), 0, + "ins after1 consistent"); // e13 - OK(runner, cmark_node_last_child(p) == str3, "ins after1 works"); + OK(runner, cmark_node_next(str1) == str3, "ins after1 works"); OK(runner, cmark_node_insert_after(str1, emph), "ins after2"); - INT_EQ(runner, cmark_node_check(doc), 0, "ins after2 consistent"); + INT_EQ(runner, cmark_node_check(doc, NULL), 0, + "ins after2 consistent"); // 1e3 - OK(runner, cmark_node_first_child(p) == str1, "ins after2 works"); + OK(runner, cmark_node_previous(emph) == str1, "ins after2 works"); - html = (char *)cmark_render_html(doc); - STR_EQ(runner, html, "<p>Hello, <em>world</em>!</p>\n", + cmark_node_unlink(emph); + + html = cmark_render_html(doc); + STR_EQ(runner, html, "<p>Hello, !</p>\n", "render_html after shuffling"); free(html); cmark_node_destroy(doc); + + // TODO: Test that the contents of an unlinked inline are valid + // after the parent block was destroyed. This doesn't work so far. + cmark_node_destroy(emph); +} + +void +hierarchy(test_batch_runner *runner) +{ + cmark_node *bquote1 = cmark_node_new(CMARK_NODE_BQUOTE); + cmark_node *bquote2 = cmark_node_new(CMARK_NODE_BQUOTE); + cmark_node *bquote3 = cmark_node_new(CMARK_NODE_BQUOTE); + + OK(runner, cmark_node_append_child(bquote1, bquote2), + "append bquote2"); + OK(runner, cmark_node_append_child(bquote2, bquote3), + "append bquote3"); + OK(runner, !cmark_node_append_child(bquote3, bquote3), + "adding a node as child of itself fails"); + OK(runner, !cmark_node_append_child(bquote3, bquote1), + "adding a parent as child fails"); + + cmark_node_destroy(bquote1); + + int max_node_type = CMARK_NODE_LAST_BLOCK > CMARK_NODE_LAST_INLINE + ? CMARK_NODE_LAST_BLOCK : CMARK_NODE_LAST_INLINE; + OK(runner, max_node_type < 32, "all node types < 32"); + + int list_item_flag = 1 << CMARK_NODE_LIST_ITEM; + int top_level_blocks = + (1 << CMARK_NODE_BQUOTE) | + (1 << CMARK_NODE_LIST) | + (1 << CMARK_NODE_FENCED_CODE) | + (1 << CMARK_NODE_INDENTED_CODE) | + (1 << CMARK_NODE_HTML) | + (1 << CMARK_NODE_PARAGRAPH) | + (1 << CMARK_NODE_ATX_HEADER) | + (1 << CMARK_NODE_SETEXT_HEADER) | + (1 << CMARK_NODE_HRULE) | + (1 << CMARK_NODE_REFERENCE_DEF); + int all_inlines = + (1 << CMARK_NODE_STRING) | + (1 << CMARK_NODE_SOFTBREAK) | + (1 << CMARK_NODE_LINEBREAK) | + (1 << CMARK_NODE_INLINE_CODE) | + (1 << CMARK_NODE_INLINE_HTML) | + (1 << CMARK_NODE_EMPH) | + (1 << CMARK_NODE_STRONG) | + (1 << CMARK_NODE_LINK) | + (1 << CMARK_NODE_IMAGE); + + test_content(runner, CMARK_NODE_DOCUMENT, top_level_blocks); + test_content(runner, CMARK_NODE_BQUOTE, top_level_blocks); + test_content(runner, CMARK_NODE_LIST, list_item_flag); + test_content(runner, CMARK_NODE_LIST_ITEM, top_level_blocks); + test_content(runner, CMARK_NODE_FENCED_CODE, 0); + test_content(runner, CMARK_NODE_INDENTED_CODE, 0); + test_content(runner, CMARK_NODE_HTML, 0); + test_content(runner, CMARK_NODE_PARAGRAPH, all_inlines); + test_content(runner, CMARK_NODE_ATX_HEADER, all_inlines); + test_content(runner, CMARK_NODE_SETEXT_HEADER, all_inlines); + test_content(runner, CMARK_NODE_HRULE, 0); + test_content(runner, CMARK_NODE_REFERENCE_DEF, 0); + test_content(runner, CMARK_NODE_STRING, 0); + test_content(runner, CMARK_NODE_SOFTBREAK, 0); + test_content(runner, CMARK_NODE_LINEBREAK, 0); + test_content(runner, CMARK_NODE_INLINE_CODE, 0); + test_content(runner, CMARK_NODE_INLINE_HTML, 0); + test_content(runner, CMARK_NODE_EMPH, all_inlines); + test_content(runner, CMARK_NODE_STRONG, all_inlines); + test_content(runner, CMARK_NODE_LINK, all_inlines); + test_content(runner, CMARK_NODE_IMAGE, all_inlines); +} + +static void +test_content(test_batch_runner *runner, cmark_node_type type, + int allowed_content) +{ + cmark_node *node = cmark_node_new(type); + + for (int i = 0; i < num_node_types; ++i) { + cmark_node_type child_type = node_types[i]; + cmark_node *child = cmark_node_new(child_type); + + int got = cmark_node_append_child(node, child); + int expected = (allowed_content >> child_type) & 1; + + INT_EQ(runner, got, expected, + "add %d as child of %d", child_type, type); + + cmark_node_destroy(child); + } + + cmark_node_destroy(node); +} + +static void +render_html(test_batch_runner *runner) +{ + char *html; + + static const char markdown[] = + "foo *bar*\n" + "\n" + "paragraph 2\n"; + cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1); + + cmark_node *paragraph = cmark_node_first_child(doc); + html = cmark_render_html(paragraph); + STR_EQ(runner, html, "<p>foo <em>bar</em></p>\n", + "render single paragraph"); + free(html); + + cmark_node *string = cmark_node_first_child(paragraph); + html = cmark_render_html(string); + STR_EQ(runner, html, "foo ", "render single inline"); + free(html); + + cmark_node *emph = cmark_node_next(string); + html = cmark_render_html(emph); + STR_EQ(runner, html, "<em>bar</em>", "render inline with children"); + free(html); + + cmark_node_destroy(doc); } int main() { int retval; test_batch_runner *runner = test_batch_runner_new(); + constructor(runner); accessors(runner); + node_check(runner); create_tree(runner); + hierarchy(runner); + render_html(runner); test_print_summary(runner); retval = test_ok(runner) ? 0 : 1; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index dbe53a5..71f45a7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -64,7 +64,7 @@ set_target_properties(${PROGRAM} PROPERTIES COMPILE_FLAGS -DCMARK_STATIC_DEFINE) # Check integrity of node structure when compiled as debug: -set(CMAKE_C_FLAGS_DEBUG "-DCMARK_DEBUG_NODES") +set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DCMARK_DEBUG_NODES") if (${CMAKE_MAJOR_VERSION} GREATER 1 AND ${CMAKE_MINOR_VERSION} GREATER 8) set(CMAKE_C_VISIBILITY_PRESET hidden) diff --git a/src/blocks.c b/src/blocks.c index 58162b5..a30560f 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -416,13 +416,13 @@ static cmark_node *finalize_document(cmark_doc_parser *parser) extern cmark_node *cmark_parse_file(FILE *f) { - unsigned char buffer[4096]; + char buffer[4096]; cmark_doc_parser *parser = cmark_new_doc_parser(); size_t offset; cmark_node *document; - while (fgets((char *)buffer, sizeof(buffer), f)) { - offset = strlen((char *)buffer); + while (fgets(buffer, sizeof(buffer), f)) { + offset = strlen(buffer); cmark_process_line(parser, buffer, offset); } @@ -431,16 +431,16 @@ extern cmark_node *cmark_parse_file(FILE *f) return document; } -extern cmark_node *cmark_parse_document(const unsigned char *buffer, size_t len) +extern cmark_node *cmark_parse_document(const char *buffer, size_t len) { int linenum = 1; - const unsigned char *end = buffer + len; + const char *end = buffer + len; size_t offset; cmark_doc_parser *parser = cmark_new_doc_parser(); cmark_node *document; while (buffer < end) { - const unsigned char *eol = (unsigned char *)memchr(buffer, '\n', end - buffer); + const char *eol = memchr(buffer, '\n', end - buffer); offset = eol ? (eol - buffer) + 1 : eol - buffer; cmark_process_line(parser, buffer, offset); buffer += offset; @@ -470,7 +470,7 @@ static void chop_trailing_hashtags(chunk *ch) } } -void cmark_process_line(cmark_doc_parser *parser, const unsigned char *buffer, +void cmark_process_line(cmark_doc_parser *parser, const char *buffer, size_t bytes) { cmark_node* last_matched_container; @@ -487,7 +487,7 @@ void cmark_process_line(cmark_doc_parser *parser, const unsigned char *buffer, int indent; chunk input; - utf8proc_detab(parser->curline, buffer, bytes); + utf8proc_detab(parser->curline, (unsigned char *)buffer, bytes); // Add a newline to the end if not present: // TODO this breaks abstraction: @@ -818,7 +818,7 @@ cmark_node *cmark_finish(cmark_doc_parser *parser) finalize_document(parser); strbuf_free(parser->curline); #if CMARK_DEBUG_NODES - if (cmark_node_check(parser->root)) { + if (cmark_node_check(parser->root, stderr)) { abort(); } #endif diff --git a/src/cmark.c b/src/cmark.c index b20b84b..273a37b 100644 --- a/src/cmark.c +++ b/src/cmark.c @@ -7,10 +7,10 @@ #include "cmark.h" #include "buffer.h" -unsigned char *cmark_markdown_to_html(unsigned char *text, int len) +char *cmark_markdown_to_html(const char *text, int len) { cmark_node *blocks; - unsigned char *result; + char *result; blocks = cmark_parse_document(text, len); diff --git a/src/cmark.h b/src/cmark.h index a74fe93..6131024 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -176,10 +176,10 @@ CMARK_EXPORT cmark_node *cmark_finish(cmark_doc_parser *parser); CMARK_EXPORT -void cmark_process_line(cmark_doc_parser *parser, const unsigned char *buffer, size_t bytes); +void cmark_process_line(cmark_doc_parser *parser, const char *buffer, size_t bytes); CMARK_EXPORT -cmark_node *cmark_parse_document(const unsigned char *buffer, size_t len); +cmark_node *cmark_parse_document(const char *buffer, size_t len); CMARK_EXPORT cmark_node *cmark_parse_file(FILE *f); @@ -190,10 +190,10 @@ CMARK_EXPORT void cmark_debug_print(cmark_node *root); CMARK_EXPORT -unsigned char *cmark_render_html(cmark_node *root); +char *cmark_render_html(cmark_node *root); CMARK_EXPORT -unsigned char *cmark_markdown_to_html(unsigned char *text, int len); +char *cmark_markdown_to_html(const char *text, int len); #ifndef CMARK_NO_SHORT_NAMES #define NODE_DOCUMENT CMARK_NODE_DOCUMENT diff --git a/src/html/html.c b/src/html/html.c index 76d488a..4fa79b1 100644 --- a/src/html/html.c +++ b/src/html/html.c @@ -7,58 +7,12 @@ #include "cmark.h" #include "node.h" #include "buffer.h" -#include "debug.h" #include "html/houdini.h" -typedef struct RenderStack { - struct RenderStack *previous; - const char* literal; - cmark_node* next_sibling; - bool tight; - bool trim; -} render_stack; +// Functions to convert cmark_nodes to HTML strings. -static void free_render_stack(render_stack * rstack) -{ - render_stack * tempstack; - while (rstack) { - tempstack = rstack; - rstack = rstack->previous; - free(tempstack); - } -} - -static render_stack* push_render_stack(render_stack* rstack, - cmark_node* node, - const char* literal) -{ - render_stack* newstack; - newstack = (render_stack*)malloc(sizeof(render_stack)); - if (newstack == NULL) { - return NULL; - } - newstack->previous = rstack; - newstack->next_sibling = node; - newstack->literal = literal; - newstack->tight = false; - newstack->trim = false; - return newstack; -} - -static render_stack* pop_render_stack(render_stack* rstack) -{ - render_stack* top = rstack; - - if (rstack == NULL) { - return NULL; - } - rstack = rstack->previous; - top->previous = NULL; - free_render_stack(top); - return rstack; -} - -// Functions to convert cmark_node and inline lists to HTML strings. +static bool +finish_node(strbuf *html, cmark_node *node, bool tight); static void escape_html(strbuf *dest, const unsigned char *source, int length) { @@ -82,65 +36,163 @@ static inline void cr(strbuf *html) strbuf_putc(html, '\n'); } -// Convert an inline list to HTML. Returns 0 on success, and sets result. -static void inlines_to_plain_html(strbuf *html, cmark_node* ils) +// Convert the inline children of a node to a plain string. +static void inlines_to_plain_html(strbuf *html, cmark_node* node) { - cmark_node* children; - bool visit_children; - render_stack* rstack = NULL; + cmark_node* cur = node->first_child; - while(ils != NULL) { - visit_children = false; - switch(ils->type) { + if (cur == NULL) { + return; + } + + while (true) { + switch(cur->type) { case NODE_STRING: case NODE_INLINE_CODE: case NODE_INLINE_HTML: - escape_html(html, ils->as.literal.data, ils->as.literal.len); + escape_html(html, cur->as.literal.data, cur->as.literal.len); break; case NODE_LINEBREAK: case NODE_SOFTBREAK: - strbuf_putc(html, '\n'); + strbuf_putc(html, ' '); break; - case NODE_LINK: - case NODE_IMAGE: - case NODE_STRONG: - case NODE_EMPH: - children = ils->first_child; - visit_children = true; - rstack = push_render_stack(rstack, ils->next, ""); - break; default: break; } - if (visit_children) { - ils = children; - } else { - ils = ils->next; + + if (cur->first_child) { + cur = cur->first_child; + continue; } - while (ils == NULL && rstack != NULL) { - strbuf_puts(html, rstack->literal); - ils = rstack->next_sibling; - rstack = pop_render_stack(rstack); + + next_sibling: + if (cur->next) { + cur = cur->next; + continue; } + cur = cur->parent; + if (cur == node) { + break; + } + goto next_sibling; } - - free_render_stack(rstack); } -// Convert an inline list to HTML. Returns 0 on success, and sets result. -static void inlines_to_html(strbuf *html, cmark_node* ils) +// Convert a cmark_node to HTML. +static void node_to_html(strbuf *html, cmark_node *node) { + cmark_node *cur; + char start_header[] = "<h0>"; + bool tight = false; bool visit_children; - render_stack* rstack = NULL; - while(ils != NULL) { - visit_children = false; - switch(ils->type) { + if (node == NULL) { + return; + } + + cur = node; + while (true) { + // Only NODE_IMAGE wants to skip its children. + visit_children = true; + + switch(cur->type) { + case NODE_DOCUMENT: + break; + + case NODE_PARAGRAPH: + if (!tight) { + cr(html); + strbuf_puts(html, "<p>"); + } + break; + + case NODE_BQUOTE: + cr(html); + strbuf_puts(html, "<blockquote>\n"); + // BQUOTE doesn't use any of the 'as' structs, + // so the 'list' member can be used to store the + // current value of 'tight'. + cur->as.list.tight = tight; + tight = false; + break; + + case NODE_LIST_ITEM: + cr(html); + strbuf_puts(html, "<li>"); + break; + + case NODE_LIST: { + cmark_list *list = &cur->as.list; + bool tmp; + + // make sure a list starts at the beginning of the line: + cr(html); + + if (list->list_type == CMARK_BULLET_LIST) { + strbuf_puts(html, "<ul>\n"); + } + else if (list->start == 1) { + strbuf_puts(html, "<ol>\n"); + } + else { + strbuf_printf(html, "<ol start=\"%d\">\n", + list->start); + } + + // Store the current value of 'tight' by swapping. + tmp = list->tight; + list->tight = tight; + tight = tmp; + break; + } + + case NODE_ATX_HEADER: + case NODE_SETEXT_HEADER: + cr(html); + start_header[2] = '0' + cur->as.header.level; + strbuf_puts(html, start_header); + break; + + case NODE_INDENTED_CODE: + case NODE_FENCED_CODE: { + strbuf *info = &cur->as.code.info; + cr(html); + + if (cur->type != NODE_FENCED_CODE + || strbuf_len(info) == 0) { + strbuf_puts(html, "<pre><code>"); + } + else { + int first_tag = strbuf_strchr(info, ' ', 0); + if (first_tag < 0) + first_tag = strbuf_len(info); + + strbuf_puts(html, + "<pre><code class=\"language-"); + escape_html(html, info->ptr, first_tag); + strbuf_puts(html, "\">"); + } + + escape_html(html, cur->string_content.ptr, cur->string_content.size); + break; + } + + case NODE_HTML: + strbuf_put(html, cur->string_content.ptr, cur->string_content.size); + break; + + case NODE_HRULE: + strbuf_puts(html, "<hr />\n"); + break; + + case NODE_REFERENCE_DEF: + break; + case NODE_STRING: - escape_html(html, ils->as.literal.data, ils->as.literal.len); + escape_html(html, cur->as.literal.data, cur->as.literal.len); break; case NODE_LINEBREAK: @@ -153,218 +205,155 @@ static void inlines_to_html(strbuf *html, cmark_node* ils) case NODE_INLINE_CODE: strbuf_puts(html, "<code>"); - escape_html(html, ils->as.literal.data, ils->as.literal.len); - strbuf_puts(html, "</code>"); + escape_html(html, cur->as.literal.data, cur->as.literal.len); break; case NODE_INLINE_HTML: strbuf_put(html, - ils->as.literal.data, - ils->as.literal.len); + cur->as.literal.data, + cur->as.literal.len); break; case NODE_LINK: strbuf_puts(html, "<a href=\""); - if (ils->as.link.url) - escape_href(html, ils->as.link.url, -1); + if (cur->as.link.url) + escape_href(html, cur->as.link.url, -1); - if (ils->as.link.title) { + if (cur->as.link.title) { strbuf_puts(html, "\" title=\""); - escape_html(html, ils->as.link.title, -1); + escape_html(html, cur->as.link.title, -1); } strbuf_puts(html, "\">"); - visit_children = true; - rstack = push_render_stack(rstack, ils->next, "</a>"); break; case NODE_IMAGE: strbuf_puts(html, "<img src=\""); - if (ils->as.link.url) - escape_href(html, ils->as.link.url, -1); + if (cur->as.link.url) + escape_href(html, cur->as.link.url, -1); strbuf_puts(html, "\" alt=\""); - inlines_to_plain_html(html, ils->first_child); + inlines_to_plain_html(html, cur); - if (ils->as.link.title) { + if (cur->as.link.title) { strbuf_puts(html, "\" title=\""); - escape_html(html, ils->as.link.title, -1); + escape_html(html, cur->as.link.title, -1); } strbuf_puts(html, "\" />"); + visit_children = false; break; case NODE_STRONG: strbuf_puts(html, "<strong>"); - visit_children = true; - rstack = push_render_stack(rstack, ils->next, "</strong>"); break; case NODE_EMPH: strbuf_puts(html, "<em>"); - visit_children = true; - rstack = push_render_stack(rstack, ils->next, "</em>"); break; + default: - break; + assert(false); } - if (visit_children) { - ils = ils->first_child; - } else { - ils = ils->next; + + if (visit_children && cur->first_child) { + cur = cur->first_child; + continue; + } + + next_sibling: + tight = finish_node(html, cur, tight); + if (cur == node) { + break; } - while (ils == NULL && rstack != NULL) { - strbuf_puts(html, rstack->literal); - ils = rstack->next_sibling; - rstack = pop_render_stack(rstack); + if (cur->next) { + cur = cur->next; + continue; } + cur = cur->parent; + goto next_sibling; } - - free_render_stack(rstack); } -// Convert a cmark_node list to HTML. Returns 0 on success, and sets result. -static void blocks_to_html(strbuf *html, cmark_node *b) +// Returns the restored value of 'tight'. +static bool +finish_node(strbuf *html, cmark_node *node, bool tight) { - cmark_list *data; - render_stack* rstack = NULL; - bool visit_children = false; - bool tight = false; - - while(b != NULL) { - visit_children = false; - switch(b->type) { - case NODE_DOCUMENT: - rstack = push_render_stack(rstack, b->next, ""); - rstack->tight = false; - rstack->trim = false; - visit_children = true; - break; - - case NODE_PARAGRAPH: - if (tight) { - inlines_to_html(html, b->first_child); - } else { - cr(html); - strbuf_puts(html, "<p>"); - inlines_to_html(html, b->first_child); - strbuf_puts(html, "</p>\n"); - } - break; + char end_header[] = "</h0>\n"; - case NODE_BQUOTE: - cr(html); - strbuf_puts(html, "<blockquote>\n"); - rstack = push_render_stack(rstack, b->next, "</blockquote>\n"); - rstack->tight = tight; - rstack->trim = false; - tight = false; - visit_children = true; - break; - - case NODE_LIST_ITEM: - cr(html); - strbuf_puts(html, "<li>"); - rstack = push_render_stack(rstack, b->next, "</li>\n"); - rstack->tight = tight; - rstack->trim = true; - visit_children = true; - break; - - case NODE_LIST: - // make sure a list starts at the beginning of the line: - cr(html); - data = &(b->as.list); - - if (data->start > 1) { - strbuf_printf(html, "<%s start=\"%d\">\n", - data->list_type == CMARK_BULLET_LIST ? "ul" : "ol", - data->start); - } else { - strbuf_puts(html, data->list_type == CMARK_BULLET_LIST ? "<ul>\n" : "<ol>\n"); - } - - rstack = push_render_stack(rstack, b->next, - data->list_type == CMARK_BULLET_LIST ? - "\n</ul>\n" : "\n</ol>\n"); - rstack->tight = tight; - rstack->trim = false; - tight = data->tight; - visit_children = true; - break; - - case NODE_ATX_HEADER: - case NODE_SETEXT_HEADER: - cr(html); - strbuf_printf(html, "<h%d>", b->as.header.level); - inlines_to_html(html, b->first_child); - strbuf_printf(html, "</h%d>\n", b->as.header.level); - break; - - case NODE_INDENTED_CODE: - case NODE_FENCED_CODE: - cr(html); - - strbuf_puts(html, "<pre><code"); + switch (node->type) { + case NODE_PARAGRAPH: + if (!tight) { + strbuf_puts(html, "</p>\n"); + } + break; + + case NODE_BQUOTE: { + cmark_list *list = &node->as.list; + strbuf_puts(html, "</blockquote>\n"); + // Restore old 'tight' value. + tight = list->tight; + list->tight = false; + break; + } - if (b->type == NODE_FENCED_CODE) { - strbuf *info = &b->as.code.info; + case NODE_LIST_ITEM: + strbuf_puts(html, "</li>\n"); + break; + + case NODE_LIST: { + cmark_list *list = &node->as.list; + bool tmp; + strbuf_puts(html, + list->list_type == CMARK_BULLET_LIST ? + "</ul>\n" : "</ol>\n"); + // Restore old 'tight' value. + tmp = tight; + tight = list->tight; + list->tight = tmp; + break; + } - if (strbuf_len(info) > 0) { - int first_tag = strbuf_strchr(info, ' ', 0); - if (first_tag < 0) - first_tag = strbuf_len(info); + case NODE_ATX_HEADER: + case NODE_SETEXT_HEADER: + end_header[3] = '0' + node->as.header.level; + strbuf_puts(html, end_header); + break; - strbuf_puts(html, " class=\"language-"); - escape_html(html, info->ptr, first_tag); - strbuf_putc(html, '"'); - } - } + case NODE_INDENTED_CODE: + case NODE_FENCED_CODE: + strbuf_puts(html, "</code></pre>\n"); + break; - strbuf_putc(html, '>'); - escape_html(html, b->string_content.ptr, b->string_content.size); - strbuf_puts(html, "</code></pre>\n"); - break; + case NODE_INLINE_CODE: + strbuf_puts(html, "</code>"); + break; - case NODE_HTML: - strbuf_put(html, b->string_content.ptr, b->string_content.size); - break; + case NODE_LINK: + strbuf_puts(html, "</a>"); + break; - case NODE_HRULE: - strbuf_puts(html, "<hr />\n"); - break; + case NODE_STRONG: + strbuf_puts(html, "</strong>"); + break; - case NODE_REFERENCE_DEF: - break; + case NODE_EMPH: + strbuf_puts(html, "</em>"); + break; - default: - assert(false); - } - if (visit_children) { - b = b->first_child; - } else { - b = b->next; - } - while (b == NULL && rstack != NULL) { - strbuf_puts(html, rstack->literal); - if (rstack->trim) { - strbuf_rtrim(html); - } - tight = rstack->tight; - b = rstack->next_sibling; - rstack = pop_render_stack(rstack); - } + default: + break; } - free_render_stack(rstack); + return tight; } -unsigned char *cmark_render_html(cmark_node *root) +char *cmark_render_html(cmark_node *root) { - unsigned char *result; + char *result; strbuf html = GH_BUF_INIT; - blocks_to_html(&html, root); - result = strbuf_detach(&html); + node_to_html(&html, root); + result = (char *)strbuf_detach(&html); strbuf_free(&html); return result; } @@ -17,7 +17,7 @@ void print_usage() static void print_document(cmark_node *document, bool ast) { - unsigned char *result; + char *result; if (ast) { cmark_debug_print(document); } else { @@ -33,7 +33,7 @@ int main(int argc, char *argv[]) int i, numfps = 0; bool ast = false; int *files; - unsigned char buffer[4096]; + char buffer[4096]; cmark_doc_parser *parser; size_t offset; cmark_node *document; @@ -11,6 +11,25 @@ cmark_node* cmark_node_new(cmark_node_type type) { cmark_node *node = (cmark_node *)calloc(1, sizeof(*node)); node->type = type; + + switch (node->type) { + case CMARK_NODE_ATX_HEADER: + case CMARK_NODE_SETEXT_HEADER: + node->as.header.level = 1; + break; + + case CMARK_NODE_LIST: { + cmark_list *list = &node->as.list; + list->list_type = CMARK_BULLET_LIST; + list->start = 1; + list->tight = false; + break; + } + + default: + break; + } + return node; } @@ -348,6 +367,17 @@ S_is_inline(cmark_node *node) { static bool S_can_contain(cmark_node *node, cmark_node *child) { + cmark_node *cur; + + // Verify that child is not an ancestor of node or equal to node. + cur = node; + do { + if (cur == child) { + return false; + } + cur = cur->parent; + } while (cur != NULL); + if (child->type == CMARK_NODE_DOCUMENT) { return false; } @@ -528,58 +558,63 @@ cmark_node_append_child(cmark_node *node, cmark_node *child) } static void -S_print_error(cmark_node *node, const char *elem) +S_print_error(FILE *out, cmark_node *node, const char *elem) { - fprintf(stderr, "Invalid '%s' in node type %s at %d:%d\n", elem, + if (out == NULL) { + return; + } + fprintf(out, "Invalid '%s' in node type %s at %d:%d\n", elem, S_type_string(node), node->start_line, node->start_column); } int -cmark_node_check(cmark_node *node) +cmark_node_check(cmark_node *node, FILE *out) { - cmark_node *cur = node; + cmark_node *cur; int errors = 0; - while (cur) { + if (!node) { + return 0; + } + + cur = node; + while (true) { if (cur->first_child) { if (cur->first_child->parent != cur) { - S_print_error(cur->first_child, "parent"); + S_print_error(out, cur->first_child, "parent"); cur->first_child->parent = cur; ++errors; } cur = cur->first_child; + continue; } - else if (cur->next) { + + next_sibling: + if (cur == node) { + break; + } + if (cur->next) { if (cur->next->prev != cur) { - S_print_error(cur->next, "prev"); + S_print_error(out, cur->next, "prev"); cur->next->prev = cur; ++errors; } if (cur->next->parent != cur->parent) { - S_print_error(cur->next, "parent"); + S_print_error(out, cur->next, "parent"); cur->next->parent = cur->parent; ++errors; } cur = cur->next; + continue; } - else { - if (cur->parent->last_child != cur) { - S_print_error(cur->parent, "last_child"); - cur->parent->last_child = cur; - ++errors; - } - - cmark_node *ancestor = cur->parent; - cur = NULL; - while (ancestor != node->parent) { - if (ancestor->next) { - cur = ancestor->next; - break; - } - ancestor = ancestor->parent; - } + if (cur->parent->last_child != cur) { + S_print_error(out, cur->parent, "last_child"); + cur->parent->last_child = cur; + ++errors; } + cur = cur->parent; + goto next_sibling; } return errors; @@ -5,6 +5,8 @@ extern "C" { #endif +#include <stdio.h> + #include "cmark.h" #include "buffer.h" #include "chunk.h" @@ -62,7 +64,7 @@ struct cmark_node { }; CMARK_EXPORT int -cmark_node_check(cmark_node *node); +cmark_node_check(cmark_node *node, FILE *out); #ifdef __cplusplus } |