summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2016-01-17 14:28:53 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2016-01-17 14:28:53 -0800
commit01cb5c9563cc257e14a0093843d87621563d961f (patch)
tree6b53401cef0dca299fa1b9e3b437a7669fe96c12 /src
parent1f8ea828409287b7901bf32d01f8ec662ffdc9ba (diff)
Improved escaping in commonmark renderer.
We try not to escape punctuation unless we absolutely have to. So, `)` and `.` are no longer escaped whenever they occur after digits; now they are only escaped if they are geuninely in a position where they'd cause a list item. This required a couple changes to render.c. - `renderer->begin_content` is only set to false AFTER a string of digits at the beginning of the line. (This is slightly unprincipled.) - We never break before a numeral (also slightly unprincipled).
Diffstat (limited to 'src')
-rw-r--r--src/commonmark.c11
-rwxr-xr-xsrc/render.c18
2 files changed, 23 insertions, 6 deletions
diff --git a/src/commonmark.c b/src/commonmark.c
index 3eac076..4fb9cec 100644
--- a/src/commonmark.c
+++ b/src/commonmark.c
@@ -24,6 +24,8 @@ static inline void outc(cmark_renderer *renderer, cmark_escaping escape,
int32_t c, unsigned char nextc) {
bool needs_escaping = false;
char encoded[20];
+ bool follows_digit = renderer->buffer->size > 0 &&
+ cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]);
needs_escaping =
escape != LITERAL &&
@@ -31,9 +33,12 @@ static inline void outc(cmark_renderer *renderer, cmark_escaping escape,
(c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
c == '>' || c == '\\' || c == '`' || c == '!' ||
(c == '&' && isalpha(nextc)) || (c == '!' && nextc == '[') ||
- (renderer->begin_content && (c == '-' || c == '+' || c == '=')) ||
- ((c == '.' || c == ')') &&
- isdigit(renderer->buffer->ptr[renderer->buffer->size - 1])))) ||
+ (renderer->begin_content && (c == '-' || c == '+' || c == '=') &&
+ // begin_content doesn't get set to false til we've passed digits
+ // at the beginning of line, so...
+ !follows_digit) ||
+ (renderer->begin_content && (c == '.' || c == ')') && follows_digit &&
+ (nextc == 0 || cmark_isspace(nextc))))) ||
(escape == URL && (c == '`' || c == '<' || c == '>' || isspace(c) ||
c == '\\' || c == ')' || c == '(')) ||
(escape == TITLE &&
diff --git a/src/render.c b/src/render.c
index 898a9e2..2c941bf 100755
--- a/src/render.c
+++ b/src/render.c
@@ -23,6 +23,7 @@ static void S_out(cmark_renderer *renderer, const char *source, bool wrap,
unsigned char nextc;
int32_t c;
int i = 0;
+ int last_nonspace;
int len;
cmark_chunk remainder = cmark_chunk_literal("");
int k = renderer->buffer->size - 1;
@@ -63,15 +64,20 @@ static void S_out(cmark_renderer *renderer, const char *source, bool wrap,
nextc = source[i + len];
if (c == 32 && wrap) {
if (!renderer->begin_line) {
+ last_nonspace = renderer->buffer->size;
cmark_strbuf_putc(renderer->buffer, ' ');
renderer->column += 1;
renderer->begin_line = false;
renderer->begin_content = false;
- renderer->last_breakable = renderer->buffer->size - 1;
// skip following spaces
while (source[i + 1] == ' ') {
i++;
}
+ // We don't allow breaks that make a digit the first character
+ // because this causes problems with commonmark output.
+ if (!cmark_isdigit(source[i + 1])) {
+ renderer->last_breakable = last_nonspace;
+ }
}
} else if (c == 10) {
@@ -83,11 +89,17 @@ static void S_out(cmark_renderer *renderer, const char *source, bool wrap,
} else if (escape == LITERAL) {
cmark_render_code_point(renderer, c);
renderer->begin_line = false;
- renderer->begin_content = false;
+ // we don't set 'begin_content' to false til we've
+ // finished parsing a digit. Reason: in commonmark
+ // we need to escape a potential list marker after
+ // a digit:
+ renderer->begin_content = renderer->begin_content &&
+ cmark_isdigit(c) == 1;
} else {
(renderer->outc)(renderer, escape, c, nextc);
renderer->begin_line = false;
- renderer->begin_content = false;
+ renderer->begin_content = renderer->begin_content &&
+ cmark_isdigit(c) == 1;
}
// If adding the character went beyond width, look for an