summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNguyễn Thái Ngọc Duy <pclouds@gmail.com>2017-08-01 19:28:16 +0700
committerNguyễn Thái Ngọc Duy <pclouds@gmail.com>2018-07-07 19:30:34 +0200
commit55afc6fd496de4e75c27d725c2bf1470cc3cf5aa (patch)
tree9b28765be135f080a680bca08b33c017013af1b9
parent26f5144fa925d5d7275819ff86388a47e6063f7e (diff)
Add xml:space="preserve" in XML output when appropriate
With current HTML escaping, sometimes we may produce an XML tag like <code_block> </code_block> Many XML parsers consider these spaces insignificant and strip them out but we need this. There's actually a test case like this in spec.txt (search "A code block can have all empty lines as its content:") and without proper hinting, an external xml->html converter will fail the spec. XML standard covers this case. If xml:space is "preserve", then whitespaces are significant and should be kept. Add this hint for text, code, code_block, html_inline and html_block tags.
-rw-r--r--api_test/main.c50
-rw-r--r--src/xml.c4
2 files changed, 29 insertions, 25 deletions
diff --git a/api_test/main.c b/api_test/main.c
index c30dc71..a843530 100644
--- a/api_test/main.c
+++ b/api_test/main.c
@@ -527,7 +527,9 @@ static void render_xml(test_batch_runner *runner) {
static const char markdown[] = "foo *bar*\n"
"\n"
- "paragraph 2\n";
+ "paragraph 2\n"
+ "\n"
+ "```\ncode\n```\n";
cmark_node *doc =
cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT);
@@ -536,14 +538,16 @@ static void render_xml(test_batch_runner *runner) {
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
"<document xmlns=\"http://commonmark.org/xml/1.0\">\n"
" <paragraph>\n"
- " <text>foo </text>\n"
+ " <text xml:space=\"preserve\">foo </text>\n"
" <emph>\n"
- " <text>bar</text>\n"
+ " <text xml:space=\"preserve\">bar</text>\n"
" </emph>\n"
" </paragraph>\n"
" <paragraph>\n"
- " <text>paragraph 2</text>\n"
+ " <text xml:space=\"preserve\">paragraph 2</text>\n"
" </paragraph>\n"
+ " <code_block xml:space=\"preserve\">code\n"
+ "</code_block>\n"
"</document>\n",
"render document");
free(xml);
@@ -552,9 +556,9 @@ static void render_xml(test_batch_runner *runner) {
STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
"<paragraph sourcepos=\"1:1-1:9\">\n"
- " <text sourcepos=\"1:1-1:4\">foo </text>\n"
+ " <text sourcepos=\"1:1-1:4\" xml:space=\"preserve\">foo </text>\n"
" <emph sourcepos=\"1:5-1:9\">\n"
- " <text sourcepos=\"1:6-1:8\">bar</text>\n"
+ " <text sourcepos=\"1:6-1:8\" xml:space=\"preserve\">bar</text>\n"
" </emph>\n"
"</paragraph>\n",
"render first paragraph with source pos");
@@ -902,41 +906,41 @@ static void source_pos(test_batch_runner *runner) {
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
"<document sourcepos=\"1:1-10:20\" xmlns=\"http://commonmark.org/xml/1.0\">\n"
" <heading sourcepos=\"1:1-1:13\" level=\"1\">\n"
- " <text sourcepos=\"1:3-1:5\">Hi </text>\n"
+ " <text sourcepos=\"1:3-1:5\" xml:space=\"preserve\">Hi </text>\n"
" <emph sourcepos=\"1:6-1:12\">\n"
- " <text sourcepos=\"1:7-1:11\">there</text>\n"
+ " <text sourcepos=\"1:7-1:11\" xml:space=\"preserve\">there</text>\n"
" </emph>\n"
- " <text sourcepos=\"1:13-1:13\">.</text>\n"
+ " <text sourcepos=\"1:13-1:13\" xml:space=\"preserve\">.</text>\n"
" </heading>\n"
" <paragraph sourcepos=\"3:1-4:42\">\n"
- " <text sourcepos=\"3:1-3:14\">Hello “ </text>\n"
+ " <text sourcepos=\"3:1-3:14\" xml:space=\"preserve\">Hello “ </text>\n"
" <link sourcepos=\"3:15-3:37\" destination=\"http://www.google.com\" title=\"\">\n"
- " <text sourcepos=\"3:16-3:36\">http://www.google.com</text>\n"
+ " <text sourcepos=\"3:16-3:36\" xml:space=\"preserve\">http://www.google.com</text>\n"
" </link>\n"
" <softbreak />\n"
- " <text sourcepos=\"4:1-4:6\">there </text>\n"
- " <code sourcepos=\"4:8-4:9\">hi</code>\n"
- " <text sourcepos=\"4:11-4:14\"> -- </text>\n"
+ " <text sourcepos=\"4:1-4:6\" xml:space=\"preserve\">there </text>\n"
+ " <code sourcepos=\"4:8-4:9\" xml:space=\"preserve\">hi</code>\n"
+ " <text sourcepos=\"4:11-4:14\" xml:space=\"preserve\"> -- </text>\n"
" <link sourcepos=\"4:15-4:41\" destination=\"www.google.com\" title=\"ok\">\n"
- " <text sourcepos=\"4:16-4:19\">okay</text>\n"
+ " <text sourcepos=\"4:16-4:19\" xml:space=\"preserve\">okay</text>\n"
" </link>\n"
- " <text sourcepos=\"4:42-4:42\">.</text>\n"
+ " <text sourcepos=\"4:42-4:42\" xml:space=\"preserve\">.</text>\n"
" </paragraph>\n"
" <block_quote sourcepos=\"6:1-10:20\">\n"
" <list sourcepos=\"6:3-10:20\" type=\"ordered\" start=\"1\" delim=\"period\" tight=\"false\">\n"
" <item sourcepos=\"6:3-8:1\">\n"
" <paragraph sourcepos=\"6:6-7:10\">\n"
- " <text sourcepos=\"6:6-6:10\">Okay.</text>\n"
+ " <text sourcepos=\"6:6-6:10\" xml:space=\"preserve\">Okay.</text>\n"
" <softbreak />\n"
- " <text sourcepos=\"7:6-7:10\">Sure.</text>\n"
+ " <text sourcepos=\"7:6-7:10\" xml:space=\"preserve\">Sure.</text>\n"
" </paragraph>\n"
" </item>\n"
" <item sourcepos=\"9:3-10:20\">\n"
" <paragraph sourcepos=\"9:6-10:20\">\n"
- " <text sourcepos=\"9:6-9:15\">Yes, okay.</text>\n"
+ " <text sourcepos=\"9:6-9:15\" xml:space=\"preserve\">Yes, okay.</text>\n"
" <softbreak />\n"
" <image sourcepos=\"10:6-10:20\" destination=\"hi\" title=\"yes\">\n"
- " <text sourcepos=\"10:8-10:9\">ok</text>\n"
+ " <text sourcepos=\"10:8-10:9\" xml:space=\"preserve\">ok</text>\n"
" </image>\n"
" </paragraph>\n"
" </item>\n"
@@ -960,11 +964,11 @@ static void ref_source_pos(test_batch_runner *runner) {
"<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
"<document sourcepos=\"1:1-3:40\" xmlns=\"http://commonmark.org/xml/1.0\">\n"
" <paragraph sourcepos=\"1:1-1:28\">\n"
- " <text sourcepos=\"1:1-1:10\">Let's try </text>\n"
+ " <text sourcepos=\"1:1-1:10\" xml:space=\"preserve\">Let's try </text>\n"
" <link sourcepos=\"1:11-1:21\" destination=\"https://github.com\" title=\"GitHub\">\n"
- " <text sourcepos=\"1:12-1:20\">reference</text>\n"
+ " <text sourcepos=\"1:12-1:20\" xml:space=\"preserve\">reference</text>\n"
" </link>\n"
- " <text sourcepos=\"1:22-1:28\"> links.</text>\n"
+ " <text sourcepos=\"1:22-1:28\" xml:space=\"preserve\"> links.</text>\n"
" </paragraph>\n"
"</document>\n",
"sourcepos are as expected");
diff --git a/src/xml.c b/src/xml.c
index 4898cd2..48674cc 100644
--- a/src/xml.c
+++ b/src/xml.c
@@ -60,7 +60,7 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
case CMARK_NODE_CODE:
case CMARK_NODE_HTML_BLOCK:
case CMARK_NODE_HTML_INLINE:
- cmark_strbuf_puts(xml, ">");
+ cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
escape_xml(xml, node->as.literal.data, node->as.literal.len);
cmark_strbuf_puts(xml, "</");
cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
@@ -100,7 +100,7 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
escape_xml(xml, node->as.code.info.data, node->as.code.info.len);
cmark_strbuf_putc(xml, '"');
}
- cmark_strbuf_puts(xml, ">");
+ cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
escape_xml(xml, node->as.code.literal.data, node->as.code.literal.len);
cmark_strbuf_puts(xml, "</");
cmark_strbuf_puts(xml, cmark_node_get_type_string(node));