diff options
author | John MacFarlane <jgm@berkeley.edu> | 2015-02-14 16:36:43 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2015-02-14 17:52:55 -0800 |
commit | bb26b18173df983c57459809e8b1691b89907a58 (patch) | |
tree | a7878c0960755c79bf79a310363f20a91c2c87d4 | |
parent | c6417fc0b9cd240eb175501d44f68ea9d4406ec4 (diff) |
Added CMARK_OPT_SMARTPUNCT and --smart option.
So far this is only implemented for the HTML renderer.
Ultimately some of this should be factored out into a form that
can be used in other renderers.
-rw-r--r-- | man/man3/cmark.3 | 14 | ||||
-rw-r--r-- | src/cmark.h | 4 | ||||
-rw-r--r-- | src/html.c | 88 | ||||
-rw-r--r-- | src/main.c | 3 |
4 files changed, 106 insertions, 3 deletions
diff --git a/man/man3/cmark.3 b/man/man3/cmark.3 index 2c4dd14..c8fc4c7 100644 --- a/man/man3/cmark.3 +++ b/man/man3/cmark.3 @@ -1,4 +1,4 @@ -.TH cmark 3 "January 28, 2015" "LOCAL" "Library Functions Manual" +.TH cmark 3 "February 14, 2015" "LOCAL" "Library Functions Manual" .SH NAME .PP @@ -520,6 +520,18 @@ Render \f[C]softbreak\f[] elements as hard line breaks. .PP Normalize tree by consolidating adjacent text nodes. +.PP +.nf +\fC +.RS 0n +#define CMARK_OPT_SMARTPUNCT 8 +.RE +\f[] +.fi + +.PP +Convert straight quotes to curly, \-\-\- to em dashes, \-\- to en dashes. + .SS Version information diff --git a/src/cmark.h b/src/cmark.h index 9f312bc..f106371 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -496,6 +496,10 @@ char *cmark_render_man(cmark_node *root, long options); */ #define CMARK_OPT_NORMALIZE 4 +/** Convert straight quotes to curly, --- to em dashes, -- to en dashes. + */ +#define CMARK_OPT_SMARTPUNCT 8 + /** * ## Version information */ @@ -6,6 +6,7 @@ #include "config.h" #include "cmark.h" #include "node.h" +#include "utf8.h" #include "buffer.h" #include "houdini.h" @@ -60,6 +61,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, char start_header[] = "<h0"; char end_header[] = "</h0"; bool tight; + int lastout, i; + cmark_chunk lit; + char before_char, after_char, c; + bool left_flanking, right_flanking; bool entering = (ev_type == CMARK_EVENT_ENTER); @@ -217,8 +222,87 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, break; case CMARK_NODE_TEXT: - escape_html(html, node->as.literal.data, - node->as.literal.len); + if (options & CMARK_OPT_SMARTPUNCT) { + lastout = 0; + i = 0; + lit = node->as.literal; + while (i < lit.len) { + c = lit.data[i]; + // replace with efficient lookup table: + if (c != '"' && c != '-' && c != '\'' && c != '.') { + i++; + continue; + } + escape_html(html, lit.data + lastout, + i - lastout); + if (c == '\'' || c == '"') { + before_char = i == 0 ? ',' : lit.data[i - 1]; + after_char = i == lit.len - 1 ? ',' : lit.data[i + 1]; + left_flanking = !utf8proc_is_space(after_char) && + !(utf8proc_is_punctuation(after_char) && + !utf8proc_is_space(before_char) && + !utf8proc_is_punctuation(before_char)); + right_flanking = !utf8proc_is_space(before_char) && + !(utf8proc_is_punctuation(before_char) && + !utf8proc_is_space(after_char) && + !utf8proc_is_punctuation(after_char)); + } + switch (lit.data[i]) { + case '"': + if (right_flanking) { + cmark_strbuf_puts(html, "”"); + } else { + cmark_strbuf_puts(html, "“"); + } + i += 1; + break; + case '\'': + if (left_flanking && !right_flanking) { + cmark_strbuf_puts(html, "‘"); + } else { + cmark_strbuf_puts(html, "’"); + } + i += 1; + break; + case '-': + if (i < lit.len - 1 && lit.data[i + 1] == '-') { + if (lit.data[i + 2] == '-') { + cmark_strbuf_puts(html, + "—"); + i += 3; + } else { + cmark_strbuf_puts(html, "–"); + i += 2; + } + } else { + cmark_strbuf_putc(html, c); + i += 1; + } + break; + case '.': + if (i < lit.len - 2 && lit.data[i + 1] == '.' && + lit.data[i + 2] == '.') { + cmark_strbuf_puts(html, + "…"); + i += 3; + } else { + cmark_strbuf_putc(html, c); + i += 1; + } + break; + default: + cmark_strbuf_putc(html, c); + i++; + } + lastout = i; + } + escape_html(html, node->as.literal.data + lastout, + i - lastout); + + } else { + escape_html(html, node->as.literal.data, + node->as.literal.len); + } break; case CMARK_NODE_LINEBREAK: @@ -26,6 +26,7 @@ void print_usage() printf(" --to, -t FORMAT Specify output format (html, xml, man)\n"); printf(" --sourcepos Include source position attribute\n"); printf(" --hardbreaks Treat newlines as hard line breaks\n"); + printf(" --smart Use smart punctuation\n"); printf(" --normalize Consolidate adjacent text nodes\n"); printf(" --help, -h Print usage information\n"); printf(" --version Print version\n"); @@ -80,6 +81,8 @@ int main(int argc, char *argv[]) options |= CMARK_OPT_SOURCEPOS; } else if (strcmp(argv[i], "--hardbreaks") == 0) { options |= CMARK_OPT_HARDBREAKS; + } else if (strcmp(argv[i], "--smart") == 0) { + options |= CMARK_OPT_SMARTPUNCT; } else if (strcmp(argv[i], "--normalize") == 0) { options |= CMARK_OPT_NORMALIZE; } else if ((strcmp(argv[i], "--help") == 0) || |