diff options
author | Nguyễn Thái Ngọc Duy <pclouds@gmail.com> | 2017-08-01 19:28:16 +0700 |
---|---|---|
committer | Nguyễn Thái Ngọc Duy <pclouds@gmail.com> | 2018-07-07 19:30:34 +0200 |
commit | 55afc6fd496de4e75c27d725c2bf1470cc3cf5aa (patch) | |
tree | 9b28765be135f080a680bca08b33c017013af1b9 | |
parent | 26f5144fa925d5d7275819ff86388a47e6063f7e (diff) |
Add xml:space="preserve" in XML output when appropriate
With current HTML escaping, sometimes we may produce an XML tag like
<code_block> </code_block>
Many XML parsers consider these spaces insignificant and strip them
out but we need this. There's actually a test case like this in
spec.txt (search "A code block can have all empty lines as its
content:") and without proper hinting, an external xml->html converter
will fail the spec.
XML standard covers this case. If xml:space is "preserve", then
whitespaces are significant and should be kept. Add this hint for
text, code, code_block, html_inline and html_block tags.
-rw-r--r-- | api_test/main.c | 50 | ||||
-rw-r--r-- | src/xml.c | 4 |
2 files changed, 29 insertions, 25 deletions
diff --git a/api_test/main.c b/api_test/main.c index c30dc71..a843530 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -527,7 +527,9 @@ static void render_xml(test_batch_runner *runner) { static const char markdown[] = "foo *bar*\n" "\n" - "paragraph 2\n"; + "paragraph 2\n" + "\n" + "```\ncode\n```\n"; cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT); @@ -536,14 +538,16 @@ static void render_xml(test_batch_runner *runner) { "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n" "<document xmlns=\"http://commonmark.org/xml/1.0\">\n" " <paragraph>\n" - " <text>foo </text>\n" + " <text xml:space=\"preserve\">foo </text>\n" " <emph>\n" - " <text>bar</text>\n" + " <text xml:space=\"preserve\">bar</text>\n" " </emph>\n" " </paragraph>\n" " <paragraph>\n" - " <text>paragraph 2</text>\n" + " <text xml:space=\"preserve\">paragraph 2</text>\n" " </paragraph>\n" + " <code_block xml:space=\"preserve\">code\n" + "</code_block>\n" "</document>\n", "render document"); free(xml); @@ -552,9 +556,9 @@ static void render_xml(test_batch_runner *runner) { STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n" "<paragraph sourcepos=\"1:1-1:9\">\n" - " <text sourcepos=\"1:1-1:4\">foo </text>\n" + " <text sourcepos=\"1:1-1:4\" xml:space=\"preserve\">foo </text>\n" " <emph sourcepos=\"1:5-1:9\">\n" - " <text sourcepos=\"1:6-1:8\">bar</text>\n" + " <text sourcepos=\"1:6-1:8\" xml:space=\"preserve\">bar</text>\n" " </emph>\n" "</paragraph>\n", "render first paragraph with source pos"); @@ -902,41 +906,41 @@ static void source_pos(test_batch_runner *runner) { "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n" "<document sourcepos=\"1:1-10:20\" xmlns=\"http://commonmark.org/xml/1.0\">\n" " <heading sourcepos=\"1:1-1:13\" level=\"1\">\n" - " <text sourcepos=\"1:3-1:5\">Hi </text>\n" + " <text sourcepos=\"1:3-1:5\" xml:space=\"preserve\">Hi </text>\n" " <emph sourcepos=\"1:6-1:12\">\n" - " <text sourcepos=\"1:7-1:11\">there</text>\n" + " <text sourcepos=\"1:7-1:11\" xml:space=\"preserve\">there</text>\n" " </emph>\n" - " <text sourcepos=\"1:13-1:13\">.</text>\n" + " <text sourcepos=\"1:13-1:13\" xml:space=\"preserve\">.</text>\n" " </heading>\n" " <paragraph sourcepos=\"3:1-4:42\">\n" - " <text sourcepos=\"3:1-3:14\">Hello “ </text>\n" + " <text sourcepos=\"3:1-3:14\" xml:space=\"preserve\">Hello “ </text>\n" " <link sourcepos=\"3:15-3:37\" destination=\"http://www.google.com\" title=\"\">\n" - " <text sourcepos=\"3:16-3:36\">http://www.google.com</text>\n" + " <text sourcepos=\"3:16-3:36\" xml:space=\"preserve\">http://www.google.com</text>\n" " </link>\n" " <softbreak />\n" - " <text sourcepos=\"4:1-4:6\">there </text>\n" - " <code sourcepos=\"4:8-4:9\">hi</code>\n" - " <text sourcepos=\"4:11-4:14\"> -- </text>\n" + " <text sourcepos=\"4:1-4:6\" xml:space=\"preserve\">there </text>\n" + " <code sourcepos=\"4:8-4:9\" xml:space=\"preserve\">hi</code>\n" + " <text sourcepos=\"4:11-4:14\" xml:space=\"preserve\"> -- </text>\n" " <link sourcepos=\"4:15-4:41\" destination=\"www.google.com\" title=\"ok\">\n" - " <text sourcepos=\"4:16-4:19\">okay</text>\n" + " <text sourcepos=\"4:16-4:19\" xml:space=\"preserve\">okay</text>\n" " </link>\n" - " <text sourcepos=\"4:42-4:42\">.</text>\n" + " <text sourcepos=\"4:42-4:42\" xml:space=\"preserve\">.</text>\n" " </paragraph>\n" " <block_quote sourcepos=\"6:1-10:20\">\n" " <list sourcepos=\"6:3-10:20\" type=\"ordered\" start=\"1\" delim=\"period\" tight=\"false\">\n" " <item sourcepos=\"6:3-8:1\">\n" " <paragraph sourcepos=\"6:6-7:10\">\n" - " <text sourcepos=\"6:6-6:10\">Okay.</text>\n" + " <text sourcepos=\"6:6-6:10\" xml:space=\"preserve\">Okay.</text>\n" " <softbreak />\n" - " <text sourcepos=\"7:6-7:10\">Sure.</text>\n" + " <text sourcepos=\"7:6-7:10\" xml:space=\"preserve\">Sure.</text>\n" " </paragraph>\n" " </item>\n" " <item sourcepos=\"9:3-10:20\">\n" " <paragraph sourcepos=\"9:6-10:20\">\n" - " <text sourcepos=\"9:6-9:15\">Yes, okay.</text>\n" + " <text sourcepos=\"9:6-9:15\" xml:space=\"preserve\">Yes, okay.</text>\n" " <softbreak />\n" " <image sourcepos=\"10:6-10:20\" destination=\"hi\" title=\"yes\">\n" - " <text sourcepos=\"10:8-10:9\">ok</text>\n" + " <text sourcepos=\"10:8-10:9\" xml:space=\"preserve\">ok</text>\n" " </image>\n" " </paragraph>\n" " </item>\n" @@ -960,11 +964,11 @@ static void ref_source_pos(test_batch_runner *runner) { "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n" "<document sourcepos=\"1:1-3:40\" xmlns=\"http://commonmark.org/xml/1.0\">\n" " <paragraph sourcepos=\"1:1-1:28\">\n" - " <text sourcepos=\"1:1-1:10\">Let's try </text>\n" + " <text sourcepos=\"1:1-1:10\" xml:space=\"preserve\">Let's try </text>\n" " <link sourcepos=\"1:11-1:21\" destination=\"https://github.com\" title=\"GitHub\">\n" - " <text sourcepos=\"1:12-1:20\">reference</text>\n" + " <text sourcepos=\"1:12-1:20\" xml:space=\"preserve\">reference</text>\n" " </link>\n" - " <text sourcepos=\"1:22-1:28\"> links.</text>\n" + " <text sourcepos=\"1:22-1:28\" xml:space=\"preserve\"> links.</text>\n" " </paragraph>\n" "</document>\n", "sourcepos are as expected"); @@ -60,7 +60,7 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_CODE: case CMARK_NODE_HTML_BLOCK: case CMARK_NODE_HTML_INLINE: - cmark_strbuf_puts(xml, ">"); + cmark_strbuf_puts(xml, " xml:space=\"preserve\">"); escape_xml(xml, node->as.literal.data, node->as.literal.len); cmark_strbuf_puts(xml, "</"); cmark_strbuf_puts(xml, cmark_node_get_type_string(node)); @@ -100,7 +100,7 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, escape_xml(xml, node->as.code.info.data, node->as.code.info.len); cmark_strbuf_putc(xml, '"'); } - cmark_strbuf_puts(xml, ">"); + cmark_strbuf_puts(xml, " xml:space=\"preserve\">"); escape_xml(xml, node->as.code.literal.data, node->as.code.literal.len); cmark_strbuf_puts(xml, "</"); cmark_strbuf_puts(xml, cmark_node_get_type_string(node)); |