From 2361ad944c65ad9118b76cb3d223129fb34f21a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pablo=20Rodr=C3=ADguez?= %s ");
+ inlines_to_html(html, b->inline_content);
+ gh_buf_puts(html, " ");
inlines_to_html(html, b->inline_content);
- gh_buf_puts(html, "\n%s
", contents->data);
- cr(html);
- bdestroy(contents);
- break;
- case list_item:
- check(blocks_to_html(b->children, &contents, tight) == 0,
- "error converting blocks to html");
- brtrimws(contents);
- cr(html);
- bformata(html, "
", escaped->data);
- cr(html);
- bdestroy(escaped);
- break;
- case fenced_code:
- escaped = escape_html(b->string_content, false);
- cr(html);
- bformata(html, "%s
", escaped->data);
- cr(html);
- bdestroy(escaped);
- break;
- case html_block:
- bformata(html, "%s", b->string_content->data);
- break;
- case hrule:
- bformata(html, "attributes.fenced_code_data.info) > 0) {
- escaped2 = escape_html(b->attributes.fenced_code_data.info, true);
- info_words = bsplit(escaped2, ' ');
- bformata(html, " class=\"language-%s\"", info_words->entry[0]->data);
- bdestroy(escaped2);
- bstrListDestroy(info_words);
- }
- bformata(html, ">%s
");
- cr(html);
- break;
- case reference_def:
- break;
- default:
- log_warn("block type %d not implemented\n", b->tag);
- break;
- }
- b = b->next;
- }
- *result = html;
- return 0;
- error:
- return -1;
-}
-
-// Convert an inline list to HTML. Returns 0 on success, and sets result.
-extern int inlines_to_html(inl* ils, bstring* result)
-{
- bstring contents = NULL;
- bstring html = blk2bstr("", 0);
- bstring mbtitle, escaped, escaped2;
-
- while(ils != NULL) {
- switch(ils->tag) {
- case str:
- escaped = escape_html(ils->content.literal, false);
- bformata(html, "%s", escaped->data);
- bdestroy(escaped);
- break;
- case linebreak:
- bformata(html, "
\n");
- break;
- case softbreak:
- bformata(html, "\n");
- break;
- case code:
- escaped = escape_html(ils->content.literal, false);
- bformata(html, "%s", escaped->data);
- bdestroy(escaped);
- break;
- case raw_html:
- case entity:
- bformata(html, "%s", ils->content.literal->data);
- break;
- case link:
- check(inlines_to_html(ils->content.inlines, &contents) == 0,
- "error converting inlines to html");
- if (blength(ils->content.linkable.title) > 0) {
- escaped = escape_html(ils->content.linkable.title, true);
- mbtitle = bformat(" title=\"%s\"", escaped->data);
- bdestroy(escaped);
- } else {
- mbtitle = blk2bstr("",0);
- }
- escaped = escape_html(ils->content.linkable.url, true);
- bformata(html, "%s",
- escaped->data,
- mbtitle->data,
- contents->data);
- bdestroy(escaped);
- bdestroy(mbtitle);
- bdestroy(contents);
- break;
- case image:
- check(inlines_to_html(ils->content.inlines, &contents) == 0,
- "error converting inlines to html");
- escaped = escape_html(ils->content.linkable.url, true);
- escaped2 = escape_html(contents, false);
- bdestroy(contents);
- bformata(html, "data, escaped2->data);
- bdestroy(escaped);
- bdestroy(escaped2);
- if (blength(ils->content.linkable.title) > 0) {
- escaped = escape_html(ils->content.linkable.title, true);
- bformata(html, " title=\"%s\"", escaped->data);
- bdestroy(escaped);
- }
- bformata(html, " />");
- break;
- case strong:
- check(inlines_to_html(ils->content.inlines, &contents) == 0,
- "error converting inlines to html");
- bformata(html, "%s", contents->data);
- bdestroy(contents);
- break;
- case emph:
- check(inlines_to_html(ils->content.inlines, &contents) == 0,
- "error converting inlines to html");
- bformata(html, "%s", contents->data);
- bdestroy(contents);
- break;
- }
- ils = ils->next;
- }
- *result = html;
- return 0;
- error:
- return -1;
-}
diff --git a/src/inlines.c b/src/inlines.c
index 4ff45ad..82c7219 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -6,9 +6,7 @@
#include "stmd.h"
#include "uthash.h"
-#include "debug.h"
#include "scanners.h"
-#include "utf8.h"
typedef struct Subject {
const gh_buf *buffer;
@@ -119,7 +117,7 @@ inline static inl* make_linkable(int t, inl* label, chunk url, chunk title)
e->tag = t;
e->content.linkable.label = label;
e->content.linkable.url = chunk_to_cstr(&url);
- e->content.linkable.title = chunk_to_cstr(&title);
+ e->content.linkable.title = url.len ? chunk_to_cstr(&title) : NULL;
e->next = NULL;
return e;
}
diff --git a/src/main.c b/src/main.c
index 9e0a3c8..e1abedc 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,99 +1,77 @@
#include
");
+ blocks_to_html(html, b->children, false);
+ gh_buf_puts(html, "
");
+ cr(html);
+ break;
+
+ case list_item:
+ cr(html);
+ gh_buf_puts(html, "\n" : "
" : "");
+ cr(html);
+ break;
+
+ case atx_header:
+ case setext_header:
+ cr(html);
+ gh_buf_printf(html, "\n");
+ }
+
+ blocks_to_html(html, b->children, data->tight);
+ gh_buf_puts(html, data->list_type == bullet ? "
");
+ cr(html);
+ break;
+
+ case html_block:
+ gh_buf_put(html, b->string_content.ptr, b->string_content.size);
+ break;
+
+ case hrule:
+ gh_buf_puts(html, "");
+ escape_html(html, b->string_content.ptr, b->string_content.size);
+ gh_buf_puts(html, "
");
+ cr(html);
+ break;
+
+ case reference_def:
+ break;
+
+ default:
+ assert(false);
+ }
+
+ b = b->next;
+ }
+}
+
+// Convert an inline list to HTML. Returns 0 on success, and sets result.
+void inlines_to_html(gh_buf *html, inl* ils)
+{
+ gh_buf scrap = GH_BUF_INIT;
+
+ while(ils != NULL) {
+ switch(ils->tag) {
+ case INL_STRING:
+ escape_html(html, ils->content.literal.data, ils->content.literal.len);
+ break;
+
+ case INL_LINEBREAK:
+ gh_buf_puts(html, "
\n");
+ break;
+
+ case INL_SOFTBREAK:
+ gh_buf_putc(html, '\n');
+ break;
+
+ case INL_CODE:
+ gh_buf_puts(html, "");
+ escape_html(html, ils->content.literal.data, ils->content.literal.len);
+ gh_buf_puts(html, "");
+ break;
+
+ case INL_RAW_HTML:
+ case INL_ENTITY:
+ gh_buf_put(html,
+ ils->content.literal.data,
+ ils->content.literal.len);
+ break;
+
+ case INL_LINK:
+ gh_buf_puts(html, "content.linkable.url, -1);
+
+ if (ils->content.linkable.title) {
+ gh_buf_puts(html, "\" title=\"");
+ escape_html(html, ils->content.linkable.title, -1);
+ }
+
+ gh_buf_puts(html, "\">");
+ inlines_to_html(html, ils->content.inlines);
+ gh_buf_puts(html, "");
+ break;
+
+ case INL_IMAGE:
+ gh_buf_puts(html, "content.linkable.url, -1);
+
+ inlines_to_html(&scrap, ils->content.inlines);
+ if (scrap.size) {
+ gh_buf_puts(html, "\" alt=\"");
+ escape_html(html, scrap.ptr, scrap.size);
+ }
+ gh_buf_clear(&scrap);
+
+ if (ils->content.linkable.title) {
+ gh_buf_puts(html, "\" title=\"");
+ escape_html(html, ils->content.linkable.title, -1);
+ }
+
+ gh_buf_puts(html, "\"/>");
+ break;
+
+ case INL_STRONG:
+ gh_buf_puts(html, "");
+ inlines_to_html(html, ils->content.inlines);
+ gh_buf_puts(html, "");
+ break;
+
+ case INL_EMPH:
+ gh_buf_puts(html, "");
+ inlines_to_html(html, ils->content.inlines);
+ gh_buf_puts(html, "");
+ break;
+ }
+ ils = ils->next;
+ }
+}
diff --git a/src/stmd.h b/src/stmd.h
index 1e490d6..3e284bd 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -1,4 +1,5 @@
#include
");
+ gh_buf_puts(html, "");
cr(html);
break;
diff --git a/src/inlines.c b/src/inlines.c
index 7b48ad9..ef27a24 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -9,10 +9,10 @@
#include "scanners.h"
typedef struct Subject {
- const gh_buf *buffer;
- int pos;
- reference** reference_map;
- int label_nestlevel;
+ chunk input;
+ int pos;
+ int label_nestlevel;
+ reference** reference_map;
} subject;
reference* lookup_reference(reference** refmap, chunk *label);
@@ -27,12 +27,16 @@ inline static void chunk_trim(chunk *c);
inline static chunk chunk_literal(const char *data);
inline static chunk chunk_buf_detach(gh_buf *buf);
-inline static chunk chunk_buf(const gh_buf *buf, int pos, int len);
+inline static chunk chunk_dup(const chunk *ch, int pos, int len);
static inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
static inl *parse_inlines_while(subject* subj, int (*f)(subject*));
static int parse_inline(subject* subj, inl ** last);
+static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap);
+static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap);
+static int subject_find_special_char(subject *subj);
+
extern void free_reference(reference *ref) {
free(ref->label);
free(ref->url);
@@ -101,10 +105,12 @@ extern reference* make_reference(chunk *label, chunk *url, chunk *title)
extern void add_reference(reference** refmap, reference* ref)
{
reference * t = NULL;
- HASH_FIND(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), t);
+ const char *label = (const char *)ref->label;
+
+ HASH_FIND(hh, *refmap, label, strlen(label), t);
if (t == NULL) {
- HASH_ADD_KEYPTR(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), ref);
+ HASH_ADD_KEYPTR(hh, *refmap, label, strlen(label), ref);
} else {
free_reference(ref); // we free this now since it won't be in the refmap
}
@@ -210,87 +216,49 @@ inline static inl* append_inlines(inl* a, inl* b)
return a;
}
-// Make a 'subject' from an input string.
-static void init_subject(subject *e, gh_buf *buffer, int input_pos, reference** refmap)
+static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap)
{
- e->buffer = buffer;
- e->pos = input_pos;
+ e->input.data = buffer->ptr;
+ e->input.len = buffer->size;
+ e->input.alloc = 0;
+ e->pos = 0;
e->label_nestlevel = 0;
e->reference_map = refmap;
-}
-
-inline static int isbacktick(int c)
-{
- return (c == '`');
-}
-
-inline static void chunk_free(chunk *c)
-{
- if (c->alloc)
- free((char *)c->data);
-
- c->data = NULL;
- c->alloc = 0;
- c->len = 0;
-}
-
-inline static void chunk_trim(chunk *c)
-{
- while (c->len && isspace(c->data[0])) {
- c->data++;
- c->len--;
- }
-
- while (c->len > 0) {
- if (!isspace(c->data[c->len - 1]))
- break;
- c->len--;
- }
+ chunk_rtrim(&e->input);
}
-inline static unsigned char *chunk_to_cstr(chunk *c)
+static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap)
{
- unsigned char *str;
-
- str = malloc(c->len + 1);
- memcpy(str, c->data, c->len);
- str[c->len] = 0;
+ e->input.data = chunk->data;
+ e->input.len = chunk->len;
+ e->input.alloc = 0;
+ e->pos = 0;
+ e->label_nestlevel = 0;
+ e->reference_map = refmap;
- return str;
+ chunk_rtrim(&e->input);
}
-inline static chunk chunk_literal(const char *data)
+inline static int isbacktick(int c)
{
- chunk c = {data, data ? strlen(data) : 0, 0};
- return c;
+ return (c == '`');
}
-inline static chunk chunk_buf(const gh_buf *buf, int pos, int len)
+static inline unsigned char peek_char(subject *subj)
{
- chunk c = {buf->ptr + pos, len, 0};
- return c;
+ return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
}
-inline static chunk chunk_buf_detach(gh_buf *buf)
+static inline unsigned char peek_at(subject *subj, int pos)
{
- chunk c;
-
- c.len = buf->size;
- c.data = gh_buf_detach(buf);
- c.alloc = 1;
-
- return c;
+ return subj->input.data[pos];
}
-// Return the next character in the subject, without advancing.
-// Return 0 if at the end of the subject.
-#define peek_char(subj) gh_buf_at((subj)->buffer, (subj)->pos)
-
// Return true if there are more characters in the subject.
inline static int is_eof(subject* subj)
{
- return (subj->pos >= gh_buf_len(subj->buffer));
+ return (subj->pos >= subj->input.len);
}
// Advance the subject. Doesn't check for eof.
@@ -308,7 +276,7 @@ inline static chunk take_while(subject* subj, int (*f)(int))
len++;
}
- return chunk_buf(subj->buffer, startpos, len);
+ return chunk_dup(&subj->input, startpos, len);
}
// Try to process a backtick code span that began with a
@@ -388,7 +356,7 @@ static inl* handle_backticks(subject *subj)
} else {
gh_buf buf = GH_BUF_INIT;
- gh_buf_set(&buf, subj->buffer->ptr + startpos, endpos - startpos - openticks.len);
+ gh_buf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
gh_buf_trim(&buf);
normalize_whitespace(&buf);
@@ -404,7 +372,7 @@ static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close)
char char_before, char_after;
int startpos = subj->pos;
- char_before = subj->pos == 0 ? '\n' : gh_buf_at(subj->buffer, subj->pos - 1);
+ char_before = subj->pos == 0 ? '\n' : peek_at(subj, subj->pos - 1);
while (peek_char(subj) == c) {
numdelims++;
advance(subj);
@@ -439,7 +407,7 @@ static inl* handle_strong_emph(subject* subj, char c)
numdelims = scan_delims(subj, c, &can_open, &can_close);
subj->pos += numdelims;
- new = make_str(chunk_buf(subj->buffer, subj->pos - numdelims, numdelims));
+ new = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims));
*last = new;
first_head = new;
result = new;
@@ -488,7 +456,7 @@ static inl* handle_strong_emph(subject* subj, char c)
numdelims = scan_delims(subj, c, &can_open, &can_close);
if (can_close && numdelims >= 1 && numdelims <= 3 &&
numdelims != first_close_delims) {
- new = make_str(chunk_buf(subj->buffer, subj->pos, numdelims));
+ new = make_str(chunk_dup(&subj->input, subj->pos, numdelims));
append_inlines(*last, new);
*last = new;
if (first_close_delims == 1 && numdelims > 2) {
@@ -554,7 +522,7 @@ static inl* handle_backslash(subject *subj)
unsigned char nextchar = peek_char(subj);
if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped
advance(subj);
- return make_str(chunk_buf(subj->buffer, subj->pos - 1, 1));
+ return make_str(chunk_dup(&subj->input, subj->pos - 1, 1));
} else if (nextchar == '\n') {
advance(subj);
return make_linebreak();
@@ -569,9 +537,9 @@ static inl* handle_entity(subject* subj)
{
int match;
inl *result;
- match = scan_entity(subj->buffer, subj->pos);
+ match = scan_entity(&subj->input, subj->pos);
if (match) {
- result = make_entity(chunk_buf(subj->buffer, subj->pos, match));
+ result = make_entity(chunk_dup(&subj->input, subj->pos, match));
subj->pos += match;
} else {
advance(subj);
@@ -584,15 +552,13 @@ static inl* handle_entity(subject* subj)
// Returns an inline sequence consisting of str and entity elements.
static inl *make_str_with_entities(chunk *content)
{
- inl * result = NULL;
- inl * new;
+ inl *result = NULL;
+ inl *new;
int searchpos;
char c;
subject subj;
- gh_buf content_buf = GH_BUF_INIT;
- gh_buf_set(&content_buf, content->data, content->len);
- init_subject(&subj, &content_buf, 0, NULL);
+ subject_from_chunk(&subj, content, NULL);
while ((c = peek_char(&subj))) {
switch (c) {
@@ -600,18 +566,13 @@ static inl *make_str_with_entities(chunk *content)
new = handle_entity(&subj);
break;
default:
- searchpos = gh_buf_strchr(subj.buffer, '&', subj.pos);
- if (searchpos < 0) {
- searchpos = gh_buf_len(subj.buffer);
- }
-
- new = make_str(chunk_buf(subj.buffer, subj.pos, searchpos - subj.pos));
+ searchpos = chunk_strchr(&subj.input, '&', subj.pos);
+ new = make_str(chunk_dup(&subj.input, subj.pos, searchpos - subj.pos));
subj.pos = searchpos;
}
result = append_inlines(result, new);
}
- gh_buf_free(&content_buf);
return result;
}
@@ -678,9 +639,9 @@ static inl* handle_pointy_brace(subject* subj)
advance(subj); // advance past first <
// first try to match a URL autolink
- matchlen = scan_autolink_uri(subj->buffer, subj->pos);
+ matchlen = scan_autolink_uri(&subj->input, subj->pos);
if (matchlen > 0) {
- contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1);
+ contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
subj->pos += matchlen;
return make_link(
@@ -691,11 +652,11 @@ static inl* handle_pointy_brace(subject* subj)
}
// next try to match an email autolink
- matchlen = scan_autolink_email(subj->buffer, subj->pos);
+ matchlen = scan_autolink_email(&subj->input, subj->pos);
if (matchlen > 0) {
gh_buf mail_url = GH_BUF_INIT;
- contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1);
+ contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
subj->pos += matchlen;
gh_buf_puts(&mail_url, "mailto:");
@@ -709,9 +670,9 @@ static inl* handle_pointy_brace(subject* subj)
}
// finally, try to match an html tag
- matchlen = scan_html_tag(subj->buffer, subj->pos);
+ matchlen = scan_html_tag(&subj->input, subj->pos);
if (matchlen > 0) {
- contents = chunk_buf(subj->buffer, subj->pos - 1, matchlen + 1);
+ contents = chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
subj->pos += matchlen;
return make_raw_html(contents);
}
@@ -776,12 +737,7 @@ static int link_label(subject* subj, chunk *raw_label)
}
}
if (c == ']') {
- *raw_label = chunk_buf(
- subj->buffer,
- startpos + 1,
- subj->pos - (startpos + 1)
- );
-
+ *raw_label = chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
subj->label_nestlevel = 0;
advance(subj); // advance past ]
return 1;
@@ -813,25 +769,25 @@ static inl* handle_left_bracket(subject* subj)
if (found_label) {
if (peek_char(subj) == '(' &&
- ((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) &&
- ((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) {
+ ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
+ ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {
// try to parse an explicit link:
starturl = subj->pos + 1 + sps; // after (
endurl = starturl + n;
- starttitle = endurl + scan_spacechars(subj->buffer, endurl);
+ starttitle = endurl + scan_spacechars(&subj->input, endurl);
// ensure there are spaces btw url and title
endtitle = (starttitle == endurl) ? starttitle :
- starttitle + scan_link_title(subj->buffer, starttitle);
+ starttitle + scan_link_title(&subj->input, starttitle);
- endall = endtitle + scan_spacechars(subj->buffer, endtitle);
+ endall = endtitle + scan_spacechars(&subj->input, endtitle);
- if (gh_buf_at(subj->buffer, endall) == ')') {
+ if (peek_at(subj, endall) == ')') {
subj->pos = endall + 1;
- url = chunk_buf(subj->buffer, starturl, endurl - starturl);
- title = chunk_buf(subj->buffer, starttitle, endtitle - starttitle);
+ url = chunk_dup(&subj->input, starturl, endurl - starturl);
+ title = chunk_dup(&subj->input, starttitle, endtitle - starttitle);
lab = parse_chunk_inlines(&rawlabel, NULL);
return make_link(lab, url, title);
@@ -850,7 +806,7 @@ static inl* handle_left_bracket(subject* subj)
// Check for reference link.
// First, see if there's another label:
- subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel);
+ subj->pos = subj->pos + scan_spacechars(&subj->input, endlabel);
reflabel = rawlabel;
// if followed by a nonempty link label, we change reflabel to it:
@@ -892,8 +848,8 @@ static inl* handle_newline(subject *subj)
advance(subj);
}
if (nlpos > 1 &&
- gh_buf_at(subj->buffer, nlpos - 1) == ' ' &&
- gh_buf_at(subj->buffer, nlpos - 2) == ' ') {
+ peek_at(subj, nlpos - 1) == ' ' &&
+ peek_at(subj, nlpos - 2) == ' ') {
return make_linebreak();
} else {
return make_softbreak();
@@ -917,30 +873,22 @@ extern inl* parse_inlines_while(subject* subj, int (*f)(subject*))
inl *parse_chunk_inlines(chunk *chunk, reference** refmap)
{
- inl *result;
subject subj;
- gh_buf full_chunk = GH_BUF_INIT;
-
- gh_buf_set(&full_chunk, chunk->data, chunk->len);
- init_subject(&subj, &full_chunk, 0, refmap);
- result = parse_inlines_while(&subj, not_eof);
-
- gh_buf_free(&full_chunk);
- return result;
+ subject_from_chunk(&subj, chunk, refmap);
+ return parse_inlines_while(&subj, not_eof);
}
-static int find_special_char(subject *subj)
+static int subject_find_special_char(subject *subj)
{
int n = subj->pos + 1;
- int size = (int)gh_buf_len(subj->buffer);
- while (n < size) {
- if (strchr("\n\\`&_*[]buffer, n)))
+ while (n < subj->input.len) {
+ if (strchr("\n\\`&_*[]input.data[n]))
return n;
n++;
}
- return -1;
+ return subj->input.len;
}
// Parse an inline, advancing subject, and add it to last element.
@@ -973,11 +921,13 @@ static int parse_inline(subject* subj, inl ** last)
new = handle_pointy_brace(subj);
break;
case '_':
- if (subj->pos > 0 && (isalnum(gh_buf_at(subj->buffer, subj->pos - 1)) ||
- gh_buf_at(subj->buffer, subj->pos - 1) == '_')) {
- new = make_str(chunk_literal("_"));
- advance(subj);
- break;
+ if (subj->pos > 0) {
+ unsigned char prev = peek_at(subj, subj->pos - 1);
+ if (isalnum(prev) || prev == '_') {
+ new = make_str(chunk_literal("_"));
+ advance(subj);
+ break;
+ }
}
new = handle_strong_emph(subj, '_');
@@ -1002,18 +952,13 @@ static int parse_inline(subject* subj, inl ** last)
}
break;
default:
- text_literal:
- endpos = find_special_char(subj);
- if (endpos < 0) {
- endpos = gh_buf_len(subj->buffer);
- }
-
- contents = chunk_buf(subj->buffer, subj->pos, endpos - subj->pos);
+ endpos = subject_find_special_char(subj);
+ contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
subj->pos = endpos;
// if we're at a newline, strip trailing spaces.
if (peek_char(subj) == '\n') {
- chunk_trim(&contents);
+ chunk_rtrim(&contents);
}
new = make_str(contents);
@@ -1026,10 +971,10 @@ static int parse_inline(subject* subj, inl ** last)
return 1;
}
-extern inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap)
+extern inl* parse_inlines(gh_buf *input, reference** refmap)
{
subject subj;
- init_subject(&subj, input, input_pos, refmap);
+ subject_from_buf(&subj, input, refmap);
return parse_inlines_while(&subj, not_eof);
}
@@ -1048,7 +993,7 @@ void spnl(subject* subj)
// Modify refmap if a reference is encountered.
// Return 0 if no reference found, otherwise position of subject
// after reference is parsed.
-extern int parse_reference(gh_buf *input, int input_pos, reference** refmap)
+extern int parse_reference(gh_buf *input, reference** refmap)
{
subject subj;
@@ -1058,9 +1003,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap)
int matchlen = 0;
int beforetitle;
- reference * new = NULL;
+ reference *new = NULL;
- init_subject(&subj, input, input_pos, NULL);
+ subject_from_buf(&subj, input, NULL);
// parse label:
if (!link_label(&subj, &lab))
@@ -1075,9 +1020,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap)
// parse link url:
spnl(&subj);
- matchlen = scan_link_url(subj.buffer, subj.pos);
+ matchlen = scan_link_url(&subj.input, subj.pos);
if (matchlen) {
- url = chunk_buf(subj.buffer, subj.pos, matchlen);
+ url = chunk_dup(&subj.input, subj.pos, matchlen);
subj.pos += matchlen;
} else {
return 0;
@@ -1086,9 +1031,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap)
// parse optional link_title
beforetitle = subj.pos;
spnl(&subj);
- matchlen = scan_link_title(subj.buffer, subj.pos);
+ matchlen = scan_link_title(&subj.input, subj.pos);
if (matchlen) {
- title = chunk_buf(subj.buffer, subj.pos, matchlen);
+ title = chunk_dup(&subj.input, subj.pos, matchlen);
subj.pos += matchlen;
} else {
subj.pos = beforetitle;
diff --git a/src/print.c b/src/print.c
index 0a87925..c262995 100644
--- a/src/print.c
+++ b/src/print.c
@@ -9,7 +9,7 @@ static void print_str(const unsigned char *s, int len)
int i;
if (len < 0)
- len = strlen(s);
+ len = strlen((char *)s);
putchar('"');
for (i = 0; i < len; ++i) {
diff --git a/src/scanners.h b/src/scanners.h
index b6e586b..f96c42d 100644
--- a/src/scanners.h
+++ b/src/scanners.h
@@ -1,15 +1,15 @@
-#include "buffer.h"
+#include "stmd.h"
-int scan_autolink_uri(const gh_buf *s, int pos);
-int scan_autolink_email(const gh_buf *s, int pos);
-int scan_html_tag(const gh_buf *s, int pos);
-int scan_html_block_tag(const gh_buf *s, int pos);
-int scan_link_url(const gh_buf *s, int pos);
-int scan_link_title(const gh_buf *s, int pos);
-int scan_spacechars(const gh_buf *s, int pos);
-int scan_atx_header_start(const gh_buf *s, int pos);
-int scan_setext_header_line(const gh_buf *s, int pos);
-int scan_hrule(const gh_buf *s, int pos);
-int scan_open_code_fence(const gh_buf *s, int pos);
-int scan_close_code_fence(const gh_buf *s, int pos, int len);
-int scan_entity(const gh_buf *s, int pos);
+int scan_autolink_uri(chunk *c, int offset);
+int scan_autolink_email(chunk *c, int offset);
+int scan_html_tag(chunk *c, int offset);
+int scan_html_block_tag(chunk *c, int offset);
+int scan_link_url(chunk *c, int offset);
+int scan_link_title(chunk *c, int offset);
+int scan_spacechars(chunk *c, int offset);
+int scan_atx_header_start(chunk *c, int offset);
+int scan_setext_header_line(chunk *c, int offset);
+int scan_hrule(chunk *c, int offset);
+int scan_open_code_fence(chunk *c, int offset);
+int scan_close_code_fence(chunk *c, int offset, int len);
+int scan_entity(chunk *c, int offset);
diff --git a/src/scanners.re b/src/scanners.re
index 7323ef9..5ac7c15 100644
--- a/src/scanners.re
+++ b/src/scanners.re
@@ -1,8 +1,15 @@
-#include "buffer.h"
+#include "scanners.h"
+
+#define SCAN_DATA \
+ const unsigned char *marker = NULL; \
+ const unsigned char *p = c->data + offset; \
+ const unsigned char *start = p; \
+ const unsigned char *end = c->data + c->len
/*!re2c
re2c:define:YYCTYPE = "unsigned char";
re2c:define:YYCURSOR = p;
+ re2c:define:YYLIMIT = end;
re2c:define:YYMARKER = marker;
re2c:define:YYCTXMARKER = marker;
re2c:yyfill:enable = 0;
@@ -55,11 +62,9 @@
*/
// Try to match URI autolink after first <, returning number of chars matched.
-extern int scan_autolink_uri(const gh_buf *s, int pos)
+extern int scan_autolink_uri(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
scheme [:]([^\x00-\x20<>\\]|escaped_char)*[>] { return (p - start); }
.? { return 0; }
@@ -67,11 +72,9 @@ extern int scan_autolink_uri(const gh_buf *s, int pos)
}
// Try to match email autolink after first <, returning num of chars matched.
-extern int scan_autolink_email(const gh_buf *s, int pos)
+extern int scan_autolink_email(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+
[@]
@@ -83,11 +86,9 @@ extern int scan_autolink_email(const gh_buf *s, int pos)
}
// Try to match an HTML tag after first <, returning num of chars matched.
-extern int scan_html_tag(const gh_buf *s, int pos)
+extern int scan_html_tag(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
htmltag { return (p - start); }
.? { return 0; }
@@ -96,11 +97,9 @@ extern int scan_html_tag(const gh_buf *s, int pos)
// Try to match an HTML block tag including first <,
// returning num of chars matched.
-extern int scan_html_block_tag(const gh_buf *s, int pos)
+extern int scan_html_block_tag(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[<] [/] blocktagname (spacechar | [>]) { return (p - start); }
[<] blocktagname (spacechar | [/>]) { return (p - start); }
@@ -113,11 +112,9 @@ extern int scan_html_block_tag(const gh_buf *s, int pos)
// This may optionally be contained in <..>; otherwise
// whitespace and unbalanced right parentheses aren't allowed.
// Newlines aren't ever allowed.
-extern int scan_link_url(const gh_buf *s, int pos)
+extern int scan_link_url(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); }
[ \n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); }
@@ -128,11 +125,9 @@ extern int scan_link_url(const gh_buf *s, int pos)
// Try to match a link title (in single quotes, in double quotes, or
// in parentheses), returning number of chars matched. Allow one
// level of internal nesting (quotes within quotes).
-extern int scan_link_title(const gh_buf *s, int pos)
+extern int scan_link_title(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
["] (escaped_char|[^"\x00])* ["] { return (p - start); }
['] (escaped_char|[^'\x00])* ['] { return (p - start); }
@@ -142,10 +137,9 @@ extern int scan_link_title(const gh_buf *s, int pos)
}
// Match space characters, including newlines.
-extern int scan_spacechars(const gh_buf *s, int pos)
+extern int scan_spacechars(chunk *c, int offset)
{
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[ \t\n]* { return (p - start); }
. { return 0; }
@@ -153,11 +147,9 @@ extern int scan_spacechars(const gh_buf *s, int pos)
}
// Match ATX header start.
-extern int scan_atx_header_start(const gh_buf *s, int pos)
+extern int scan_atx_header_start(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[#]{1,6} ([ ]+|[\n]) { return (p - start); }
.? { return 0; }
@@ -166,10 +158,9 @@ extern int scan_atx_header_start(const gh_buf *s, int pos)
// Match sexext header line. Return 1 for level-1 header,
// 2 for level-2, 0 for no match.
-extern int scan_setext_header_line(const gh_buf *s, int pos)
+extern int scan_setext_header_line(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
+ SCAN_DATA;
/*!re2c
[=]+ [ ]* [\n] { return 1; }
[-]+ [ ]* [\n] { return 2; }
@@ -180,11 +171,9 @@ extern int scan_setext_header_line(const gh_buf *s, int pos)
// Scan a horizontal rule line: "...three or more hyphens, asterisks,
// or underscores on a line by themselves. If you wish, you may use
// spaces between the hyphens or asterisks."
-extern int scan_hrule(const gh_buf *s, int pos)
+extern int scan_hrule(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
([*][ ]*){3,} [ \t]* [\n] { return (p - start); }
([_][ ]*){3,} [ \t]* [\n] { return (p - start); }
@@ -194,11 +183,9 @@ extern int scan_hrule(const gh_buf *s, int pos)
}
// Scan an opening code fence.
-extern int scan_open_code_fence(const gh_buf *s, int pos)
+extern int scan_open_code_fence(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[`]{3,} / [^`\n\x00]*[\n] { return (p - start); }
[~]{3,} / [^~\n\x00]*[\n] { return (p - start); }
@@ -207,11 +194,9 @@ extern int scan_open_code_fence(const gh_buf *s, int pos)
}
// Scan a closing code fence with length at least len.
-extern int scan_close_code_fence(const gh_buf *s, int pos, int len)
+extern int scan_close_code_fence(chunk *c, int offset, int len)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
([`]{3,} | [~]{3,}) / spacechar* [\n]
{ if (p - start > len) {
@@ -225,11 +210,9 @@ extern int scan_close_code_fence(const gh_buf *s, int pos, int len)
// Scans an entity.
// Returns number of chars matched.
-extern int scan_entity(const gh_buf *s, int pos)
+extern int scan_entity(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;]
{ return (p - start); }
diff --git a/src/stmd.h b/src/stmd.h
index 3e284bd..4a3c399 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -1,17 +1,15 @@
+#ifndef _STDMD_H_
+#define _STDMD_H_
+
#include ");
escape_html(html, b->string_content.ptr, b->string_content.size);
- gh_buf_puts(html, "");
+
+ gh_buf_puts(html, "tag == fenced_code) {
+ gh_buf *info = &b->attributes.fenced_code_data.info;
+
+ if (gh_buf_len(info) > 0) {
+ int first_tag = gh_buf_strchr(info, ' ', 0);
+ if (first_tag < 0)
+ first_tag = gh_buf_len(info);
+
+
+ gh_buf_puts(html, " class=\"");
+ escape_html(html, info->ptr, first_tag);
+ gh_buf_putc(html, '"');
+ }
+ }
+
+ gh_buf_puts(html, ">");
cr(html);
--
cgit v1.2.3
From f5168c63ad305b3e331eb7d31efaf46b0541bba4 Mon Sep 17 00:00:00 2001
From: Vicent Marti ");
escape_html(html, b->string_content.ptr, b->string_content.size);
gh_buf_puts(html, "
"); + gh_buf_puts(html, "\n"); break; case list_item: @@ -68,8 +66,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight) gh_buf_puts(html, "\n"); blocks_to_html(html, b->children, false); - gh_buf_puts(html, ""); - cr(html); + gh_buf_puts(html, "
");
escape_html(html, b->string_content.ptr, b->string_content.size);
- gh_buf_puts(html, "");
- cr(html);
+ gh_buf_puts(html, "\n");
break;
case html_block:
@@ -131,8 +126,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
break;
case hrule:
- gh_buf_puts(html, ""); + strbuf_puts(html, "
"); inlines_to_html(html, b->inline_content); - gh_buf_puts(html, "
\n"); + strbuf_puts(html, "\n"); } break; case block_quote: cr(html); - gh_buf_puts(html, "\n"); + strbuf_puts(html, "\n"); break; case list_item: cr(html); - gh_buf_puts(html, "\n"); blocks_to_html(html, b->children, false); - gh_buf_puts(html, "\n"); + strbuf_puts(html, "
tag == fenced_code) {
- gh_buf *info = &b->attributes.fenced_code_data.info;
+ strbuf *info = &b->attributes.fenced_code_data.info;
- if (gh_buf_len(info) > 0) {
- int first_tag = gh_buf_strchr(info, ' ', 0);
+ if (strbuf_len(info) > 0) {
+ int first_tag = strbuf_strchr(info, ' ', 0);
if (first_tag < 0)
- first_tag = gh_buf_len(info);
+ first_tag = strbuf_len(info);
- gh_buf_puts(html, " class=\"");
+ strbuf_puts(html, " class=\"");
escape_html(html, info->ptr, first_tag);
- gh_buf_putc(html, '"');
+ strbuf_putc(html, '"');
}
}
- gh_buf_puts(html, ">");
+ strbuf_puts(html, ">");
escape_html(html, b->string_content.ptr, b->string_content.size);
- gh_buf_puts(html, "\n");
+ strbuf_puts(html, "\n");
break;
case html_block:
- gh_buf_put(html, b->string_content.ptr, b->string_content.size);
+ strbuf_put(html, b->string_content.ptr, b->string_content.size);
break;
case hrule:
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
escape_html(html, ils->content.literal.data, ils->content.literal.len);
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
break;
case INL_RAW_HTML:
case INL_ENTITY:
- gh_buf_put(html,
+ strbuf_put(html,
ils->content.literal.data,
ils->content.literal.len);
break;
case INL_LINK:
- gh_buf_puts(html, "content.linkable.url)
escape_href(html, ils->content.linkable.url, -1);
if (ils->content.linkable.title) {
- gh_buf_puts(html, "\" title=\"");
+ strbuf_puts(html, "\" title=\"");
escape_html(html, ils->content.linkable.title, -1);
}
- gh_buf_puts(html, "\">");
+ strbuf_puts(html, "\">");
inlines_to_html(html, ils->content.inlines);
- gh_buf_puts(html, "");
+ strbuf_puts(html, "\n"); blocks_to_html(html, b->children, false); strbuf_puts(html, "\n"); break; - case list_item: + case BLOCK_LIST_ITEM: cr(html); strbuf_puts(html, "
tag == fenced_code) {
+ if (b->tag == BLOCK_FENCED_CODE) {
strbuf *info = &b->attributes.fenced_code_data.info;
if (strbuf_len(info) > 0) {
@@ -121,15 +121,15 @@ void blocks_to_html(strbuf *html, node_block *b, bool tight)
strbuf_puts(html, "\n");
break;
- case html_block:
+ case BLOCK_HTML:
strbuf_put(html, b->string_content.ptr, b->string_content.size);
break;
- case hrule:
+ case BLOCK_HRULE:
strbuf_puts(html, "& © Æ Ď ¾ ℋ ⅆ ∲
+& © Æ Ď ¾ ℋ ⅆ ∲
. [Decimal entities](#decimal-entities) -consist of `` + a string of 1--8 arabic digits + `;`. +consist of `` + a string of 1--8 arabic digits + `;`. Again, these entities need to be recognised +and tranformed into their corresponding UTF8 codepoints. Invalid Unicode codepoints will be written +as the "unknown codepoint" character (`0xFFFD`) . - # Ӓ Ϡ +# Ӓ Ϡ . -# Ӓ Ϡ
+# Ӓ Ϡ �
. [Hexadecimal entities](#hexadecimal-entities) @@ -3767,7 +3779,7 @@ Here are some nonentities: . Although HTML5 does accept some entities without a trailing semicolon -(such as `©`), these are not recognized as entities here: +(such as `©`), these are not recognized as entities here, because it makes the grammar too ambiguous: . © @@ -3775,13 +3787,12 @@ Although HTML5 does accept some entities without a trailing semicolon©
. -On the other hand, many strings that are not on the list of HTML5 -named entities are recognized as entities here: +Strings that are not on the list of HTML5 named entities are not recognized as entities either: . &MadeUpEntity; . -&MadeUpEntity;
+&MadeUpEntity;
. Entities are recognized in any context besides code spans or diff --git a/src/inlines.c b/src/inlines.c index 7b27150..aa0e13e 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -20,8 +20,9 @@ typedef struct Subject { reference* lookup_reference(reference** refmap, chunk *label); reference* make_reference(chunk *label, chunk *url, chunk *title); -static unsigned char *clean_url(chunk *url, int is_email); +static unsigned char *clean_url(chunk *url); static unsigned char *clean_title(chunk *title); +static unsigned char *clean_autolink(chunk *url, int is_email); inline static void chunk_free(chunk *c); inline static void chunk_trim(chunk *c); @@ -91,7 +92,7 @@ extern reference* make_reference(chunk *label, chunk *url, chunk *title) reference *ref; ref = malloc(sizeof(reference)); ref->label = normalize_reference(label); - ref->url = clean_url(url, 0); + ref->url = clean_url(url); ref->title = clean_title(title); return ref; } @@ -123,27 +124,31 @@ static unsigned char *bufdup(const unsigned char *buf) return new; } -inline static node_inl* make_link_from_reference(node_inl* label, reference *ref) +static inline node_inl *make_link_(node_inl *label, unsigned char *url, unsigned char *title) { node_inl* e = (node_inl*) malloc(sizeof(node_inl)); e->tag = INL_LINK; e->content.linkable.label = label; - e->content.linkable.url = bufdup(ref->url); - e->content.linkable.title = bufdup(ref->title); + e->content.linkable.url = url; + e->content.linkable.title = title; e->next = NULL; return e; } +inline static node_inl* make_ref_link(node_inl* label, reference *ref) +{ + return make_link_(label, bufdup(ref->url), bufdup(ref->title)); +} + +inline static node_inl* make_autolink(node_inl* label, chunk url, int is_email) +{ + return make_link_(label, clean_autolink(&url, is_email), NULL); +} + // Create an inline with a linkable string value. -inline static node_inl* make_link(node_inl* label, chunk url, chunk title, int is_email) +inline static node_inl* make_link(node_inl* label, chunk url, chunk title) { - node_inl* e = (node_inl*) malloc(sizeof(node_inl)); - e->tag = INL_LINK; - e->content.linkable.label = label; - e->content.linkable.url = clean_url(&url, is_email); - e->content.linkable.title = clean_title(&title); - e->next = NULL; - return e; + return make_link_(label, clean_url(&url), clean_title(&title)); } inline static node_inl* make_inlines(int t, node_inl* contents) @@ -587,7 +592,7 @@ extern void unescape_buffer(strbuf *buf) // Clean a URL: remove surrounding whitespace and surrounding <>, // and remove \ that escape punctuation. -static unsigned char *clean_url(chunk *url, int is_email) +static unsigned char *clean_url(chunk *url) { strbuf buf = GH_BUF_INIT; @@ -596,9 +601,6 @@ static unsigned char *clean_url(chunk *url, int is_email) if (url->len == 0) return NULL; - if (is_email) - strbuf_puts(&buf, "mailto:"); - if (url->data[0] == '<' && url->data[url->len - 1] == '>') { houdini_unescape_html_f(&buf, url->data + 1, url->len - 2); } else { @@ -609,6 +611,22 @@ static unsigned char *clean_url(chunk *url, int is_email) return strbuf_detach(&buf); } +static unsigned char *clean_autolink(chunk *url, int is_email) +{ + strbuf buf = GH_BUF_INIT; + + chunk_trim(url); + + if (url->len == 0) + return NULL; + + if (is_email) + strbuf_puts(&buf, "mailto:"); + + houdini_unescape_html_f(&buf, url->data, url->len); + return strbuf_detach(&buf); +} + // Clean a title: remove surrounding quotes and remove \ that escape punctuation. static unsigned char *clean_title(chunk *title) { @@ -649,11 +667,9 @@ static node_inl* handle_pointy_brace(subject* subj) contents = chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; - return make_link( + return make_autolink( make_str_with_entities(&contents), - contents, - chunk_literal(""), - 0 + contents, 0 ); } @@ -663,11 +679,9 @@ static node_inl* handle_pointy_brace(subject* subj) contents = chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; - return make_link( + return make_autolink( make_str_with_entities(&contents), - contents, - chunk_literal(""), - 1 + contents, 1 ); } @@ -792,7 +806,7 @@ static node_inl* handle_left_bracket(subject* subj) title = chunk_dup(&subj->input, starttitle, endtitle - starttitle); lab = parse_chunk_inlines(&rawlabel, NULL); - return make_link(lab, url, title, 0); + return make_link(lab, url, title); } else { // if we get here, we matched a label but didn't get further: subj->pos = endlabel; @@ -823,7 +837,7 @@ static node_inl* handle_left_bracket(subject* subj) ref = lookup_reference(subj->reference_map, &reflabel); if (ref != NULL) { // found lab = parse_chunk_inlines(&rawlabel, NULL); - result = make_link_from_reference(lab, ref); + result = make_ref_link(lab, ref); } else { subj->pos = endlabel; lab = parse_chunk_inlines(&rawlabel, subj->reference_map); -- cgit v1.2.3 From 7426f9ae60272a19bd4611b8579647118033a1e6 Mon Sep 17 00:00:00 2001 From: Vicent Martitag == BLOCK_FENCED_CODE) {
- strbuf *info = &b->attributes.fenced_code_data.info;
+ strbuf *info = &b->as.code.info;
if (strbuf_len(info) > 0) {
int first_tag = strbuf_strchr(info, ' ', 0);
diff --git a/src/print.c b/src/print.c
index 9240dac..36140a8 100644
--- a/src/print.c
+++ b/src/print.c
@@ -54,12 +54,11 @@ extern void print_blocks(node_block* b, int indent)
print_blocks(b->children, indent + 2);
break;
case BLOCK_LIST_ITEM:
- data = &(b->attributes.list_data);
printf("list_item\n");
print_blocks(b->children, indent + 2);
break;
case BLOCK_LIST:
- data = &(b->attributes.list_data);
+ data = &(b->as.list);
if (data->list_type == ordered) {
printf("list (type=ordered tight=%s start=%d delim=%s)\n",
(data->tight ? "true" : "false"),
@@ -73,11 +72,11 @@ extern void print_blocks(node_block* b, int indent)
print_blocks(b->children, indent + 2);
break;
case BLOCK_ATX_HEADER:
- printf("atx_header (level=%d)\n", b->attributes.header_level);
+ printf("atx_header (level=%d)\n", b->as.header.level);
print_inlines(b->inline_content, indent + 2);
break;
case BLOCK_SETEXT_HEADER:
- printf("setext_header (level=%d)\n", b->attributes.header_level);
+ printf("setext_header (level=%d)\n", b->as.header.level);
print_inlines(b->inline_content, indent + 2);
break;
case BLOCK_PARAGRAPH:
@@ -94,8 +93,8 @@ extern void print_blocks(node_block* b, int indent)
break;
case BLOCK_FENCED_CODE:
printf("fenced_code length=%d info=",
- b->attributes.fenced_code_data.fence_length);
- print_str(b->attributes.fenced_code_data.info.ptr, -1);
+ b->as.code.fence_length);
+ print_str(b->as.code.info.ptr, -1);
putchar(' ');
print_str(b->string_content.ptr, -1);
putchar('\n');
diff --git a/src/stmd.h b/src/stmd.h
index c80eeda..21a86b0 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -96,14 +96,20 @@ struct node_block {
struct node_block* top;
strbuf string_content;
node_inl* inline_content;
+
union {
- struct ListData list_data;
- struct FencedCodeData fenced_code_data;
- int header_level;
- reference** refmap;
- } attributes;
- struct node_block * next;
- struct node_block * prev;
+ struct ListData list;
+ struct FencedCodeData code;
+ struct {
+ int level;
+ } header;
+ struct {
+ reference** refmap;
+ } document;
+ } as;
+
+ struct node_block *next;
+ struct node_block *prev;
};
typedef struct node_block node_block;
--
cgit v1.2.3
From 2c06fa95fd3059a099bbe403beaf62f2e033f5b7 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Tue, 9 Sep 2014 03:42:05 +0200
Subject: Fix the class attribute for code fences
---
src/html/html.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/src/html/html.c b/src/html/html.c
index 129335f..74f6791 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -99,7 +99,7 @@ void blocks_to_html(strbuf *html, node_block *b, bool tight)
case BLOCK_FENCED_CODE:
cr(html);
- strbuf_puts(html, "tag == BLOCK_FENCED_CODE) {
strbuf *info = &b->as.code.info;
@@ -109,14 +109,13 @@ void blocks_to_html(strbuf *html, node_block *b, bool tight)
if (first_tag < 0)
first_tag = strbuf_len(info);
-
- strbuf_puts(html, " class=\"");
+ strbuf_puts(html, " class=\"lang-");
escape_html(html, info->ptr, first_tag);
strbuf_putc(html, '"');
}
}
- strbuf_puts(html, ">");
+ strbuf_putc(html, '>');
escape_html(html, b->string_content.ptr, b->string_content.size);
strbuf_puts(html, "
\n");
break;
--
cgit v1.2.3
From d21ef7b5db11075e038e60732682dfd8a5cf6a13 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Tue, 9 Sep 2014 03:42:46 +0200
Subject: Oops
---
src/html/html.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/html/html.c b/src/html/html.c
index 74f6791..b48b10b 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -109,7 +109,7 @@ void blocks_to_html(strbuf *html, node_block *b, bool tight)
if (first_tag < 0)
first_tag = strbuf_len(info);
- strbuf_puts(html, " class=\"lang-");
+ strbuf_puts(html, " class=\"language-");
escape_html(html, info->ptr, first_tag);
strbuf_putc(html, '"');
}
--
cgit v1.2.3
From 9d86d2f32303ae0048f6a5daa552bacceb9b12ea Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Tue, 9 Sep 2014 04:00:36 +0200
Subject: Update the spec with better entity handling
---
Makefile | 4 ++--
spec.txt | 22 ++++++++++++----------
src/html/houdini_html_u.c | 2 +-
3 files changed, 15 insertions(+), 13 deletions(-)
diff --git a/Makefile b/Makefile
index b5e487d..5d13272 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
-CFLAGS=-g -pg -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
-LDFLAGS=-g -pg -O3 -Wall -Werror
+CFLAGS=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
+LDFLAGS=-g -O3 -Wall -Werror
SRCDIR=src
DATADIR=data
diff --git a/spec.txt b/spec.txt
index ebd6d98..112dccc 100644
--- a/spec.txt
+++ b/spec.txt
@@ -3762,20 +3762,20 @@ as the "unknown codepoint" character (`0xFFFD`)
[Hexadecimal entities](#hexadecimal-entities)
consist of `` + either `X` or `x` + a string of 1-8 hexadecimal digits
-+ `;`.
++ `;`. They will also be parsed and turned into their corresponding UTF8 values in the AST.
.
- " ആ ಫ
+" ആ ಫ
.
- " ആ ಫ
+" ആ ಫ
.
Here are some nonentities:
.
-  &x; &ThisIsWayTooLongToBeAnEntityIsntIt; &hi?;
+  &x; &ThisIsWayTooLongToBeAnEntityIsntIt; &hi?;
.
-  &x; &#; &#x; � &ThisIsWayTooLongToBeAnEntityIsntIt; &hi?;
+  &x; &#; &#x; &ThisIsWayTooLongToBeAnEntityIsntIt; &hi?;
.
Although HTML5 does accept some entities without a trailing semicolon
@@ -3808,7 +3808,7 @@ code blocks, including raw HTML, URLs, [link titles](#link-title), and
.
[foo](/föö "föö")
.
-
+
.
.
@@ -3816,7 +3816,7 @@ code blocks, including raw HTML, URLs, [link titles](#link-title), and
[foo]: /föö "föö"
.
-
+
.
.
@@ -3824,7 +3824,7 @@ code blocks, including raw HTML, URLs, [link titles](#link-title), and
foo
```
.
-foo
+foo
.
@@ -4817,12 +4817,14 @@ in Markdown:
.
-URL-escaping and entities should be left alone inside the destination:
+URL-escaping and should be left alone inside the destination, as all URL-escaped characters
+are also valid URL characters. HTML entities in the destination will be parsed into their UTF8
+codepoints, as usual, and optionally URL-escaped when written as HTML.
.
[link](foo%20bä)
.
-
+
.
Note that, because titles can often be parsed as destinations,
diff --git a/src/html/houdini_html_u.c b/src/html/houdini_html_u.c
index 762f980..b8e2d8d 100644
--- a/src/html/houdini_html_u.c
+++ b/src/html/houdini_html_u.c
@@ -24,7 +24,7 @@ houdini_unescape_ent(strbuf *ob, const uint8_t *src, size_t size)
codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
}
- if (i < size && src[i] == ';') {
+ if (i < size && src[i] == ';' && codepoint) {
utf8proc_encode_char(codepoint, ob);
return i + 1;
}
--
cgit v1.2.3
From df58eee1f127f5c24631032792672bfe5120e6a3 Mon Sep 17 00:00:00 2001
From: Artyom Kazak
Date: Tue, 9 Sep 2014 21:43:23 +0400
Subject: `code`, not `pre`.
---
spec.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/spec.txt b/spec.txt
index 82ae0b6..c06f750 100644
--- a/spec.txt
+++ b/spec.txt
@@ -1058,7 +1058,7 @@ a blank line either before or after.
The content of a code fence is treated as literal text, not parsed
as inlines. The first word of the info string is typically used to
specify the language of the code sample, and rendered in the `class`
-attribute of the `pre` tag. However, this spec does not mandate any
+attribute of the `code` tag. However, this spec does not mandate any
particular treatment of the info string.
Here is a simple example with backticks:
--
cgit v1.2.3
From 58380f379f3fceb87c03fc8553bc100d0eccaa76 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Tue, 9 Sep 2014 14:00:01 -0700
Subject: Use generic 'change' to trigger parseAndRender.
Modifies #98.
---
js/index.html | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/js/index.html b/js/index.html
index 994b147..3c75b5c 100644
--- a/js/index.html
+++ b/js/index.html
@@ -52,7 +52,7 @@ $(document).ready(function() {
render();
}, 0); // ms delay
};
- $("#text").keyup(parseAndRender);
+ $("#text").change(parseAndRender);
$(".option").change(render);
});
--
cgit v1.2.3
From 81de8a753da76566a52a1c8dd80e4d3784e78567 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Tue, 9 Sep 2014 13:59:07 -0700
Subject: Dingus: use bind('keyup paste cut mouseup'...
Modifies #98. (Generic 'change' by itself wasn't enough.)
---
js/index.html | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/js/index.html b/js/index.html
index 3c75b5c..05fc216 100644
--- a/js/index.html
+++ b/js/index.html
@@ -52,7 +52,7 @@ $(document).ready(function() {
render();
}, 0); // ms delay
};
- $("#text").change(parseAndRender);
+ $("#text").bind('keyup paste cut mouseup', parseAndRender);
$(".option").change(render);
});
--
cgit v1.2.3
From 94a79a605f3e76a43f1f87a5044f6761b99e5ca5 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Wed, 10 Sep 2014 18:33:27 +0200
Subject: Cleanup reference implementation
---
Makefile | 2 +-
src/blocks.c | 16 ++---
src/buffer.c | 43 ++++++++++++++
src/buffer.h | 2 +
src/inlines.c | 176 +++++++------------------------------------------------
src/references.c | 109 ++++++++++++++++++++++++++++++++++
src/references.h | 27 +++++++++
src/stmd.h | 26 +++-----
src/utf8.c | 10 ++--
src/utf8.h | 5 +-
10 files changed, 225 insertions(+), 191 deletions(-)
create mode 100644 src/references.c
create mode 100644 src/references.h
diff --git a/Makefile b/Makefile
index 5d13272..11e2141 100644
--- a/Makefile
+++ b/Makefile
@@ -42,7 +42,7 @@ benchjs:
node js/bench.js ${BENCHINP}
HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o $(SRCDIR)/html/houdini_html_u.o
-STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o
+STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o $(SRCDIR)/references.c
$(PROG): $(SRCDIR)/html/html_unescape.h $(SRCDIR)/case_fold_switch.inc $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c
$(CC) $(LDFLAGS) -o $@ $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c
diff --git a/src/blocks.c b/src/blocks.c
index 72b2dc2..30a8284 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -8,7 +8,6 @@
#include "utf8.h"
#include "html/houdini.h"
#include "scanners.h"
-#include "uthash.h"
#define peek_at(i, n) (i)->data[n]
@@ -36,12 +35,7 @@ static node_block* make_block(int tag, int start_line, int start_column)
extern node_block* make_document()
{
node_block *e = make_block(BLOCK_DOCUMENT, 1, 1);
- reference *map = NULL;
- reference ** refmap;
-
- refmap = (reference**) malloc(sizeof(reference*));
- *refmap = map;
- e->as.document.refmap = refmap;
+ e->as.document.refmap = reference_map_new();
e->top = e;
return e;
@@ -164,7 +158,7 @@ static void finalize(node_block* b, int line_number)
case BLOCK_PARAGRAPH:
pos = 0;
while (strbuf_at(&b->string_content, 0) == '[' &&
- (pos = parse_reference(&b->string_content, b->top->as.document.refmap))) {
+ (pos = parse_reference_inline(&b->string_content, b->top->as.document.refmap))) {
strbuf_drop(&b->string_content, pos);
}
@@ -192,7 +186,7 @@ static void finalize(node_block* b, int line_number)
strbuf_drop(&b->string_content, firstlinelen + 1);
strbuf_trim(&b->as.code.info);
- unescape_buffer(&b->as.code.info);
+ strbuf_unescape(&b->as.code.info);
break;
case BLOCK_LIST: // determine tight/loose status
@@ -268,7 +262,7 @@ extern void free_blocks(node_block* e)
if (e->tag == BLOCK_FENCED_CODE) {
strbuf_free(&e->as.code.info);
} else if (e->tag == BLOCK_DOCUMENT) {
- free_reference_map(e->as.document.refmap);
+ reference_map_free(e->as.document.refmap);
}
free_blocks(e->children);
free(e);
@@ -278,7 +272,7 @@ extern void free_blocks(node_block* e)
// Walk through node_block and all children, recursively, parsing
// string content into inline content where appropriate.
-void process_inlines(node_block* cur, reference** refmap)
+void process_inlines(node_block* cur, reference_map *refmap)
{
switch (cur->tag) {
case BLOCK_PARAGRAPH:
diff --git a/src/buffer.c b/src/buffer.c
index 90c2186..cdf8ca0 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -308,3 +308,46 @@ void strbuf_trim(strbuf *buf)
buf->ptr[buf->size] = '\0';
}
+
+// Destructively modify string, collapsing consecutive
+// space and newline characters into a single space.
+void strbuf_normalize_whitespace(strbuf *s)
+{
+ bool last_char_was_space = false;
+ int r, w;
+
+ for (r = 0, w = 0; r < s->size; ++r) {
+ switch (s->ptr[r]) {
+ case ' ':
+ case '\n':
+ if (last_char_was_space)
+ break;
+
+ s->ptr[w++] = ' ';
+ last_char_was_space = true;
+ break;
+
+ default:
+ s->ptr[w++] = s->ptr[r];
+ last_char_was_space = false;
+ }
+ }
+
+ strbuf_truncate(s, w);
+}
+
+// Destructively unescape a string: remove backslashes before punctuation chars.
+extern void strbuf_unescape(strbuf *buf)
+{
+ int r, w;
+
+ for (r = 0, w = 0; r < buf->size; ++r) {
+ if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1]))
+ continue;
+
+ buf->ptr[w++] = buf->ptr[r];
+ }
+
+ strbuf_truncate(buf, w);
+}
+
diff --git a/src/buffer.h b/src/buffer.h
index 6f45cbb..1bc1eee 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -108,5 +108,7 @@ int strbuf_strrchr(const strbuf *buf, int c, int pos);
void strbuf_drop(strbuf *buf, int n);
void strbuf_truncate(strbuf *buf, int len);
void strbuf_trim(strbuf *buf);
+void strbuf_normalize_whitespace(strbuf *s);
+void strbuf_unescape(strbuf *s);
#endif
diff --git a/src/inlines.c b/src/inlines.c
index aa0e13e..3040f09 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -7,110 +7,23 @@
#include "stmd.h"
#include "html/houdini.h"
#include "utf8.h"
-#include "uthash.h"
#include "scanners.h"
typedef struct Subject {
chunk input;
int pos;
- int label_nestlevel;
- reference** reference_map;
+ int label_nestlevel;
+ reference_map *refmap;
} subject;
-reference* lookup_reference(reference** refmap, chunk *label);
-reference* make_reference(chunk *label, chunk *url, chunk *title);
-
-static unsigned char *clean_url(chunk *url);
-static unsigned char *clean_title(chunk *title);
-static unsigned char *clean_autolink(chunk *url, int is_email);
-
-inline static void chunk_free(chunk *c);
-inline static void chunk_trim(chunk *c);
-
-inline static chunk chunk_literal(const char *data);
-inline static chunk chunk_buf_detach(strbuf *buf);
-inline static chunk chunk_dup(const chunk *ch, int pos, int len);
-
-static node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
+static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap);
static node_inl *parse_inlines_while(subject* subj, int (*f)(subject*));
static int parse_inline(subject* subj, node_inl ** last);
-static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap);
-static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap);
+static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap);
+static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap);
static int subject_find_special_char(subject *subj);
-static void normalize_whitespace(strbuf *s);
-
-extern void free_reference(reference *ref) {
- free(ref->label);
- free(ref->url);
- free(ref->title);
- free(ref);
-}
-
-extern void free_reference_map(reference **refmap) {
- /* free the hash table contents */
- reference *s;
- reference *tmp;
- if (refmap != NULL) {
- HASH_ITER(hh, *refmap, s, tmp) {
- HASH_DEL(*refmap, s);
- free_reference(s);
- }
- free(refmap);
- }
-}
-
-// normalize reference: collapse internal whitespace to single space,
-// remove leading/trailing whitespace, case fold
-static unsigned char *normalize_reference(chunk *ref)
-{
- strbuf normalized = GH_BUF_INIT;
-
- utf8proc_case_fold(&normalized, ref->data, ref->len);
- strbuf_trim(&normalized);
- normalize_whitespace(&normalized);
-
- return strbuf_detach(&normalized);
-}
-
-// Returns reference if refmap contains a reference with matching
-// label, otherwise NULL.
-extern reference* lookup_reference(reference** refmap, chunk *label)
-{
- reference *ref = NULL;
- unsigned char *norm = normalize_reference(label);
- if (refmap != NULL) {
- HASH_FIND_STR(*refmap, (char*)norm, ref);
- }
- free(norm);
- return ref;
-}
-
-extern reference* make_reference(chunk *label, chunk *url, chunk *title)
-{
- reference *ref;
- ref = malloc(sizeof(reference));
- ref->label = normalize_reference(label);
- ref->url = clean_url(url);
- ref->title = clean_title(title);
- return ref;
-}
-
-extern void add_reference(reference** refmap, reference* ref)
-{
- reference * t = NULL;
- const char *label = (const char *)ref->label;
-
- HASH_FIND(hh, *refmap, label, strlen(label), t);
-
- if (t == NULL) {
- HASH_ADD_KEYPTR(hh, *refmap, label, strlen(label), ref);
- } else {
- free_reference(ref); // we free this now since it won't be in the refmap
- }
-}
-
static unsigned char *bufdup(const unsigned char *buf)
{
unsigned char *new = NULL;
@@ -236,26 +149,26 @@ inline static node_inl* append_inlines(node_inl* a, node_inl* b)
return a;
}
-static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap)
+static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap)
{
e->input.data = buffer->ptr;
e->input.len = buffer->size;
e->input.alloc = 0;
e->pos = 0;
e->label_nestlevel = 0;
- e->reference_map = refmap;
+ e->refmap = refmap;
chunk_rtrim(&e->input);
}
-static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap)
+static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap)
{
e->input.data = chunk->data;
e->input.len = chunk->len;
e->input.alloc = 0;
e->pos = 0;
e->label_nestlevel = 0;
- e->reference_map = refmap;
+ e->refmap = refmap;
chunk_rtrim(&e->input);
}
@@ -325,33 +238,6 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
return (subj->pos);
}
-// Destructively modify string, collapsing consecutive
-// space and newline characters into a single space.
-static void normalize_whitespace(strbuf *s)
-{
- bool last_char_was_space = false;
- int r, w;
-
- for (r = 0, w = 0; r < s->size; ++r) {
- switch (s->ptr[r]) {
- case ' ':
- case '\n':
- if (last_char_was_space)
- break;
-
- s->ptr[w++] = ' ';
- last_char_was_space = true;
- break;
-
- default:
- s->ptr[w++] = s->ptr[r];
- last_char_was_space = false;
- }
- }
-
- strbuf_truncate(s, w);
-}
-
// Parse backtick code section or raw backticks, return an inline.
// Assumes that the subject has a backtick at the current position.
static node_inl* handle_backticks(subject *subj)
@@ -368,7 +254,7 @@ static node_inl* handle_backticks(subject *subj)
strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
strbuf_trim(&buf);
- normalize_whitespace(&buf);
+ strbuf_normalize_whitespace(&buf);
return make_code(chunk_buf_detach(&buf));
}
@@ -575,24 +461,9 @@ static node_inl *make_str_with_entities(chunk *content)
}
}
-// Destructively unescape a string: remove backslashes before punctuation chars.
-extern void unescape_buffer(strbuf *buf)
-{
- int r, w;
-
- for (r = 0, w = 0; r < buf->size; ++r) {
- if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1]))
- continue;
-
- buf->ptr[w++] = buf->ptr[r];
- }
-
- strbuf_truncate(buf, w);
-}
-
// Clean a URL: remove surrounding whitespace and surrounding <>,
// and remove \ that escape punctuation.
-static unsigned char *clean_url(chunk *url)
+unsigned char *clean_url(chunk *url)
{
strbuf buf = GH_BUF_INIT;
@@ -607,11 +478,11 @@ static unsigned char *clean_url(chunk *url)
houdini_unescape_html_f(&buf, url->data, url->len);
}
- unescape_buffer(&buf);
+ strbuf_unescape(&buf);
return strbuf_detach(&buf);
}
-static unsigned char *clean_autolink(chunk *url, int is_email)
+unsigned char *clean_autolink(chunk *url, int is_email)
{
strbuf buf = GH_BUF_INIT;
@@ -628,7 +499,7 @@ static unsigned char *clean_autolink(chunk *url, int is_email)
}
// Clean a title: remove surrounding quotes and remove \ that escape punctuation.
-static unsigned char *clean_title(chunk *title)
+unsigned char *clean_title(chunk *title)
{
strbuf buf = GH_BUF_INIT;
unsigned char first, last;
@@ -648,7 +519,7 @@ static unsigned char *clean_title(chunk *title)
houdini_unescape_html_f(&buf, title->data, title->len);
}
- unescape_buffer(&buf);
+ strbuf_unescape(&buf);
return strbuf_detach(&buf);
}
@@ -810,7 +681,7 @@ static node_inl* handle_left_bracket(subject* subj)
} else {
// if we get here, we matched a label but didn't get further:
subj->pos = endlabel;
- lab = parse_chunk_inlines(&rawlabel, subj->reference_map);
+ lab = parse_chunk_inlines(&rawlabel, subj->refmap);
result = append_inlines(make_str(chunk_literal("[")),
append_inlines(lab,
make_str(chunk_literal("]"))));
@@ -834,13 +705,13 @@ static node_inl* handle_left_bracket(subject* subj)
}
// lookup rawlabel in subject->reference_map:
- ref = lookup_reference(subj->reference_map, &reflabel);
+ ref = reference_lookup(subj->refmap, &reflabel);
if (ref != NULL) { // found
lab = parse_chunk_inlines(&rawlabel, NULL);
result = make_ref_link(lab, ref);
} else {
subj->pos = endlabel;
- lab = parse_chunk_inlines(&rawlabel, subj->reference_map);
+ lab = parse_chunk_inlines(&rawlabel, subj->refmap);
result = append_inlines(make_str(chunk_literal("[")),
append_inlines(lab, make_str(chunk_literal("]"))));
}
@@ -887,7 +758,7 @@ extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*))
return result;
}
-node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap)
+node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap)
{
subject subj;
subject_from_chunk(&subj, chunk, refmap);
@@ -987,7 +858,7 @@ static int parse_inline(subject* subj, node_inl ** last)
return 1;
}
-extern node_inl* parse_inlines(strbuf *input, reference** refmap)
+extern node_inl* parse_inlines(strbuf *input, reference_map *refmap)
{
subject subj;
subject_from_buf(&subj, input, refmap);
@@ -1009,7 +880,7 @@ void spnl(subject* subj)
// Modify refmap if a reference is encountered.
// Return 0 if no reference found, otherwise position of subject
// after reference is parsed.
-extern int parse_reference(strbuf *input, reference** refmap)
+int parse_reference_inline(strbuf *input, reference_map *refmap)
{
subject subj;
@@ -1019,7 +890,6 @@ extern int parse_reference(strbuf *input, reference** refmap)
int matchlen = 0;
int beforetitle;
- reference *new = NULL;
subject_from_buf(&subj, input, NULL);
@@ -1065,9 +935,7 @@ extern int parse_reference(strbuf *input, reference** refmap)
return 0;
}
// insert reference into refmap
- new = make_reference(&lab, &url, &title);
- add_reference(refmap, new);
-
+ reference_create(refmap, &lab, &url, &title);
return subj.pos;
}
diff --git a/src/references.c b/src/references.c
new file mode 100644
index 0000000..ff64b00
--- /dev/null
+++ b/src/references.c
@@ -0,0 +1,109 @@
+#include "stmd.h"
+#include "utf8.h"
+#include "references.h"
+
+static unsigned int
+refhash(const unsigned char *link_ref)
+{
+ unsigned int hash = 0;
+
+ while (*link_ref)
+ hash = (*link_ref++) + (hash << 6) + (hash << 16) - hash;
+
+ return hash;
+}
+
+// normalize reference: collapse internal whitespace to single space,
+// remove leading/trailing whitespace, case fold
+static unsigned char *normalize_reference(chunk *ref)
+{
+ strbuf normalized = GH_BUF_INIT;
+
+ utf8proc_case_fold(&normalized, ref->data, ref->len);
+ strbuf_trim(&normalized);
+ strbuf_normalize_whitespace(&normalized);
+
+ return strbuf_detach(&normalized);
+}
+
+static void add_reference(reference_map *map, reference* ref)
+{
+ ref->next = map->table[ref->hash % REFMAP_SIZE];
+ map->table[ref->hash % REFMAP_SIZE] = ref;
+}
+
+extern reference *reference_create(reference_map *map, chunk *label, chunk *url, chunk *title)
+{
+ reference *ref;
+ ref = malloc(sizeof(reference));
+ ref->label = normalize_reference(label);
+ ref->hash = refhash(ref->label);
+ ref->url = clean_url(url);
+ ref->title = clean_title(title);
+ ref->next = NULL;
+
+ add_reference(map, ref);
+
+ return ref;
+}
+
+// Returns reference if refmap contains a reference with matching
+// label, otherwise NULL.
+reference* reference_lookup(reference_map *map, chunk *label)
+{
+ reference *ref = NULL;
+ unsigned char *norm;
+ unsigned int hash;
+
+ if (map == NULL)
+ return NULL;
+
+ norm = normalize_reference(label);
+ hash = refhash(norm);
+ ref = map->table[hash % REFMAP_SIZE];
+
+ while (ref) {
+ if (ref->label[0] == norm[0] &&
+ !strcmp((char *)ref->label, (char *)norm))
+ break;
+ ref = ref->next;
+ }
+
+ free(norm);
+ return ref;
+}
+
+static void reference_free(reference *ref)
+{
+ free(ref->label);
+ free(ref->url);
+ free(ref->title);
+ free(ref);
+}
+
+void reference_map_free(reference_map *map)
+{
+ unsigned int i;
+
+ for (i = 0; i < REFMAP_SIZE; ++i) {
+ reference *ref = map->table[i];
+ reference *next;
+
+ while (ref) {
+ next = ref->next;
+ reference_free(ref);
+ ref = next;
+ }
+ }
+
+ free(map->table);
+ free(map);
+}
+
+reference_map *reference_map_new(void)
+{
+ reference_map *map = malloc(sizeof(reference_map));
+ memset(map, 0x0, sizeof(reference_map));
+ return map;
+}
+
diff --git a/src/references.h b/src/references.h
new file mode 100644
index 0000000..78fffe7
--- /dev/null
+++ b/src/references.h
@@ -0,0 +1,27 @@
+#ifndef _REFERENCES_H_
+#define _REFERENCES_H_
+
+#define REFMAP_SIZE 16
+
+struct reference {
+ struct reference *next;
+ unsigned char *label;
+ unsigned char *url;
+ unsigned char *title;
+ unsigned int hash;
+};
+
+typedef struct reference reference;
+
+struct reference_map {
+ reference *table[REFMAP_SIZE];
+};
+
+typedef struct reference_map reference_map;
+
+reference_map *reference_map_new(void);
+void reference_map_free(reference_map *map);
+reference* reference_lookup(reference_map *map, chunk *label);
+extern reference *reference_create(reference_map *map, chunk *label, chunk *url, chunk *title);
+
+#endif
diff --git a/src/stmd.h b/src/stmd.h
index 21a86b0..4e21e6c 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -5,7 +5,7 @@
#include
#include "buffer.h"
#include "chunk.h"
-#include "uthash.h"
+#include "references.h"
#define VERSION "0.1"
#define CODE_INDENT 4
@@ -36,17 +36,7 @@ struct node_inl {
typedef struct node_inl node_inl;
-struct reference {
- unsigned char *label;
- unsigned char *url;
- unsigned char *title;
- UT_hash_handle hh; // used by uthash
-};
-
-typedef struct reference reference;
-
// Types for blocks
-
struct ListData {
enum {
bullet,
@@ -104,7 +94,7 @@ struct node_block {
int level;
} header;
struct {
- reference** refmap;
+ reference_map *refmap;
} document;
} as;
@@ -114,14 +104,10 @@ struct node_block {
typedef struct node_block node_block;
-node_inl* parse_inlines(strbuf *input, reference** refmap);
+node_inl* parse_inlines(strbuf *input, reference_map *refmap);
void free_inlines(node_inl* e);
-int parse_reference(strbuf *input, reference** refmap);
-void free_reference(reference *ref);
-void free_reference_map(reference **refmap);
-
-void add_reference(reference** refmap, reference* ref);
+int parse_reference_inline(strbuf *input, reference_map *refmap);
void unescape_buffer(strbuf *buf);
extern node_block* make_document();
@@ -138,4 +124,8 @@ void print_blocks(node_block* blk, int indent);
void blocks_to_html(strbuf *html, node_block *b, bool tight);
void inlines_to_html(strbuf *html, node_inl *b);
+unsigned char *clean_url(chunk *url);
+unsigned char *clean_autolink(chunk *url, int is_email);
+unsigned char *clean_title(chunk *title);
+
#endif
diff --git a/src/utf8.c b/src/utf8.c
index 12d7ba5..c65aec6 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -25,7 +25,7 @@ static const int8_t utf8proc_utf8class[256] = {
static void encode_unknown(strbuf *buf)
{
- static const unsigned char repl[] = {239, 191, 189};
+ static const uint8_t repl[] = {239, 191, 189};
strbuf_put(buf, repl, 3);
}
@@ -52,9 +52,9 @@ ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len)
return length;
}
-void utf8proc_detab(strbuf *ob, const unsigned char *line, size_t size)
+void utf8proc_detab(strbuf *ob, const uint8_t *line, size_t size)
{
- static const unsigned char whitespace[] = " ";
+ static const uint8_t whitespace[] = " ";
size_t i = 0, tab = 0;
@@ -132,7 +132,7 @@ ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst)
void utf8proc_encode_char(int32_t uc, strbuf *buf)
{
- unsigned char dst[4];
+ uint8_t dst[4];
int len = 0;
assert(uc >= 0);
@@ -169,7 +169,7 @@ void utf8proc_encode_char(int32_t uc, strbuf *buf)
strbuf_put(buf, dst, len);
}
-void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len)
+void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len)
{
int32_t c;
diff --git a/src/utf8.h b/src/utf8.h
index 1e4e556..9506b75 100644
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -1,12 +1,13 @@
#ifndef _H_STMD_UTF8_
#define _H_STMD_UTF8_
+#include
#include "buffer.h"
-void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len);
+void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len);
void utf8proc_encode_char(int32_t uc, strbuf *buf);
ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst);
ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len);
-void utf8proc_detab(strbuf *dest, const unsigned char *line, size_t size);
+void utf8proc_detab(strbuf *dest, const uint8_t *line, size_t size);
#endif
--
cgit v1.2.3
From 0a0a87333083058e945a54fcf196a6f119302d6b Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Wed, 10 Sep 2014 18:33:51 +0200
Subject: Remove old hash table
---
src/uthash.h | 948 -----------------------------------------------------------
1 file changed, 948 deletions(-)
delete mode 100644 src/uthash.h
diff --git a/src/uthash.h b/src/uthash.h
deleted file mode 100644
index b9bc7e9..0000000
--- a/src/uthash.h
+++ /dev/null
@@ -1,948 +0,0 @@
-/*
-Copyright (c) 2003-2013, Troy D. Hanson http://troydhanson.github.com/uthash/
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
-OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef UTHASH_H
-#define UTHASH_H
-
-#include /* memcmp,strlen */
-#include /* ptrdiff_t */
-#include /* exit() */
-
-/* These macros use decltype or the earlier __typeof GNU extension.
- As decltype is only available in newer compilers (VS2010 or gcc 4.3+
- when compiling c++ source) this code uses whatever method is needed
- or, for VS2008 where neither is available, uses casting workarounds. */
-#ifdef _MSC_VER /* MS compiler */
-#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */
-#define DECLTYPE(x) (decltype(x))
-#else /* VS2008 or older (or VS2010 in C mode) */
-#define NO_DECLTYPE
-#define DECLTYPE(x)
-#endif
-#else /* GNU, Sun and other compilers */
-#define DECLTYPE(x) (__typeof(x))
-#endif
-
-#ifdef NO_DECLTYPE
-#define DECLTYPE_ASSIGN(dst,src) \
-do { \
- char **_da_dst = (char**)(&(dst)); \
- *_da_dst = (char*)(src); \
-} while(0)
-#else
-#define DECLTYPE_ASSIGN(dst,src) \
-do { \
- (dst) = DECLTYPE(dst)(src); \
-} while(0)
-#endif
-
-/* a number of the hash function use uint32_t which isn't defined on win32 */
-#ifdef _MSC_VER
-typedef unsigned int uint32_t;
-typedef unsigned char uint8_t;
-#else
-#include /* uint32_t */
-#endif
-
-#define UTHASH_VERSION 1.9.8
-
-#ifndef uthash_fatal
-#define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */
-#endif
-#ifndef uthash_malloc
-#define uthash_malloc(sz) malloc(sz) /* malloc fcn */
-#endif
-#ifndef uthash_free
-#define uthash_free(ptr,sz) free(ptr) /* free fcn */
-#endif
-
-#ifndef uthash_noexpand_fyi
-#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */
-#endif
-#ifndef uthash_expand_fyi
-#define uthash_expand_fyi(tbl) /* can be defined to log expands */
-#endif
-
-/* initial number of buckets */
-#define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */
-#define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */
-#define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */
-
-/* calculate the element whose hash handle address is hhe */
-#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho)))
-
-#define HASH_FIND(hh,head,keyptr,keylen,out) \
-do { \
- unsigned _hf_bkt,_hf_hashv; \
- out=NULL; \
- if (head) { \
- HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \
- if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \
- HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \
- keyptr,keylen,out); \
- } \
- } \
-} while (0)
-
-#ifdef HASH_BLOOM
-#define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM)
-#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0)
-#define HASH_BLOOM_MAKE(tbl) \
-do { \
- (tbl)->bloom_nbits = HASH_BLOOM; \
- (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \
- if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \
- memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \
- (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \
-} while (0)
-
-#define HASH_BLOOM_FREE(tbl) \
-do { \
- uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \
-} while (0)
-
-#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8)))
-#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8)))
-
-#define HASH_BLOOM_ADD(tbl,hashv) \
- HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
-
-#define HASH_BLOOM_TEST(tbl,hashv) \
- HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
-
-#else
-#define HASH_BLOOM_MAKE(tbl)
-#define HASH_BLOOM_FREE(tbl)
-#define HASH_BLOOM_ADD(tbl,hashv)
-#define HASH_BLOOM_TEST(tbl,hashv) (1)
-#define HASH_BLOOM_BYTELEN 0
-#endif
-
-#define HASH_MAKE_TABLE(hh,head) \
-do { \
- (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \
- sizeof(UT_hash_table)); \
- if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \
- memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \
- (head)->hh.tbl->tail = &((head)->hh); \
- (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \
- (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \
- (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \
- (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \
- HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
- if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \
- memset((head)->hh.tbl->buckets, 0, \
- HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \
- HASH_BLOOM_MAKE((head)->hh.tbl); \
- (head)->hh.tbl->signature = HASH_SIGNATURE; \
-} while(0)
-
-#define HASH_ADD(hh,head,fieldname,keylen_in,add) \
- HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add)
-
-#define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced) \
-do { \
- replaced=NULL; \
- HASH_FIND(hh,head,&((add)->fieldname),keylen_in,replaced); \
- if (replaced!=NULL) { \
- HASH_DELETE(hh,head,replaced); \
- }; \
- HASH_ADD(hh,head,fieldname,keylen_in,add); \
-} while(0)
-
-#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \
-do { \
- unsigned _ha_bkt; \
- (add)->hh.next = NULL; \
- (add)->hh.key = (char*)(keyptr); \
- (add)->hh.keylen = (unsigned)(keylen_in); \
- if (!(head)) { \
- head = (add); \
- (head)->hh.prev = NULL; \
- HASH_MAKE_TABLE(hh,head); \
- } else { \
- (head)->hh.tbl->tail->next = (add); \
- (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \
- (head)->hh.tbl->tail = &((add)->hh); \
- } \
- (head)->hh.tbl->num_items++; \
- (add)->hh.tbl = (head)->hh.tbl; \
- HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \
- (add)->hh.hashv, _ha_bkt); \
- HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \
- HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \
- HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \
- HASH_FSCK(hh,head); \
-} while(0)
-
-#define HASH_TO_BKT( hashv, num_bkts, bkt ) \
-do { \
- bkt = ((hashv) & ((num_bkts) - 1)); \
-} while(0)
-
-/* delete "delptr" from the hash table.
- * "the usual" patch-up process for the app-order doubly-linked-list.
- * The use of _hd_hh_del below deserves special explanation.
- * These used to be expressed using (delptr) but that led to a bug
- * if someone used the same symbol for the head and deletee, like
- * HASH_DELETE(hh,users,users);
- * We want that to work, but by changing the head (users) below
- * we were forfeiting our ability to further refer to the deletee (users)
- * in the patch-up process. Solution: use scratch space to
- * copy the deletee pointer, then the latter references are via that
- * scratch pointer rather than through the repointed (users) symbol.
- */
-#define HASH_DELETE(hh,head,delptr) \
-do { \
- unsigned _hd_bkt; \
- struct UT_hash_handle *_hd_hh_del; \
- if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \
- uthash_free((head)->hh.tbl->buckets, \
- (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
- HASH_BLOOM_FREE((head)->hh.tbl); \
- uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
- head = NULL; \
- } else { \
- _hd_hh_del = &((delptr)->hh); \
- if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \
- (head)->hh.tbl->tail = \
- (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \
- (head)->hh.tbl->hho); \
- } \
- if ((delptr)->hh.prev) { \
- ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \
- (head)->hh.tbl->hho))->next = (delptr)->hh.next; \
- } else { \
- DECLTYPE_ASSIGN(head,(delptr)->hh.next); \
- } \
- if (_hd_hh_del->next) { \
- ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next + \
- (head)->hh.tbl->hho))->prev = \
- _hd_hh_del->prev; \
- } \
- HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \
- HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \
- (head)->hh.tbl->num_items--; \
- } \
- HASH_FSCK(hh,head); \
-} while (0)
-
-
-/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */
-#define HASH_FIND_STR(head,findstr,out) \
- HASH_FIND(hh,head,findstr,strlen(findstr),out)
-#define HASH_ADD_STR(head,strfield,add) \
- HASH_ADD(hh,head,strfield,strlen(add->strfield),add)
-#define HASH_REPLACE_STR(head,strfield,add,replaced) \
- HASH_REPLACE(hh,head,strfield,strlen(add->strfield),add,replaced)
-#define HASH_FIND_INT(head,findint,out) \
- HASH_FIND(hh,head,findint,sizeof(int),out)
-#define HASH_ADD_INT(head,intfield,add) \
- HASH_ADD(hh,head,intfield,sizeof(int),add)
-#define HASH_REPLACE_INT(head,intfield,add,replaced) \
- HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced)
-#define HASH_FIND_PTR(head,findptr,out) \
- HASH_FIND(hh,head,findptr,sizeof(void *),out)
-#define HASH_ADD_PTR(head,ptrfield,add) \
- HASH_ADD(hh,head,ptrfield,sizeof(void *),add)
-#define HASH_REPLACE_PTR(head,ptrfield,add,replaced) \
- HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced)
-#define HASH_DEL(head,delptr) \
- HASH_DELETE(hh,head,delptr)
-
-/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined.
- * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined.
- */
-#ifdef HASH_DEBUG
-#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0)
-#define HASH_FSCK(hh,head) \
-do { \
- unsigned _bkt_i; \
- unsigned _count, _bkt_count; \
- char *_prev; \
- struct UT_hash_handle *_thh; \
- if (head) { \
- _count = 0; \
- for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \
- _bkt_count = 0; \
- _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \
- _prev = NULL; \
- while (_thh) { \
- if (_prev != (char*)(_thh->hh_prev)) { \
- HASH_OOPS("invalid hh_prev %p, actual %p\n", \
- _thh->hh_prev, _prev ); \
- } \
- _bkt_count++; \
- _prev = (char*)(_thh); \
- _thh = _thh->hh_next; \
- } \
- _count += _bkt_count; \
- if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \
- HASH_OOPS("invalid bucket count %d, actual %d\n", \
- (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \
- } \
- } \
- if (_count != (head)->hh.tbl->num_items) { \
- HASH_OOPS("invalid hh item count %d, actual %d\n", \
- (head)->hh.tbl->num_items, _count ); \
- } \
- /* traverse hh in app order; check next/prev integrity, count */ \
- _count = 0; \
- _prev = NULL; \
- _thh = &(head)->hh; \
- while (_thh) { \
- _count++; \
- if (_prev !=(char*)(_thh->prev)) { \
- HASH_OOPS("invalid prev %p, actual %p\n", \
- _thh->prev, _prev ); \
- } \
- _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \
- _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \
- (head)->hh.tbl->hho) : NULL ); \
- } \
- if (_count != (head)->hh.tbl->num_items) { \
- HASH_OOPS("invalid app item count %d, actual %d\n", \
- (head)->hh.tbl->num_items, _count ); \
- } \
- } \
-} while (0)
-#else
-#define HASH_FSCK(hh,head)
-#endif
-
-/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
- * the descriptor to which this macro is defined for tuning the hash function.
- * The app can #include to get the prototype for write(2). */
-#ifdef HASH_EMIT_KEYS
-#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \
-do { \
- unsigned _klen = fieldlen; \
- write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \
- write(HASH_EMIT_KEYS, keyptr, fieldlen); \
-} while (0)
-#else
-#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
-#endif
-
-/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
-#ifdef HASH_FUNCTION
-#define HASH_FCN HASH_FUNCTION
-#else
-#define HASH_FCN HASH_JEN
-#endif
-
-/* The Bernstein hash function, used in Perl prior to v5.6 */
-#define HASH_BER(key,keylen,num_bkts,hashv,bkt) \
-do { \
- unsigned _hb_keylen=keylen; \
- char *_hb_key=(char*)(key); \
- (hashv) = 0; \
- while (_hb_keylen--) { (hashv) = ((hashv) * 33) + *_hb_key++; } \
- bkt = (hashv) & (num_bkts-1); \
-} while (0)
-
-
-/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
- * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
-#define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \
-do { \
- unsigned _sx_i; \
- char *_hs_key=(char*)(key); \
- hashv = 0; \
- for(_sx_i=0; _sx_i < keylen; _sx_i++) \
- hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \
- bkt = hashv & (num_bkts-1); \
-} while (0)
-
-#define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \
-do { \
- unsigned _fn_i; \
- char *_hf_key=(char*)(key); \
- hashv = 2166136261UL; \
- for(_fn_i=0; _fn_i < keylen; _fn_i++) \
- hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \
- bkt = hashv & (num_bkts-1); \
-} while(0)
-
-#define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \
-do { \
- unsigned _ho_i; \
- char *_ho_key=(char*)(key); \
- hashv = 0; \
- for(_ho_i=0; _ho_i < keylen; _ho_i++) { \
- hashv += _ho_key[_ho_i]; \
- hashv += (hashv << 10); \
- hashv ^= (hashv >> 6); \
- } \
- hashv += (hashv << 3); \
- hashv ^= (hashv >> 11); \
- hashv += (hashv << 15); \
- bkt = hashv & (num_bkts-1); \
-} while(0)
-
-#define HASH_JEN_MIX(a,b,c) \
-do { \
- a -= b; a -= c; a ^= ( c >> 13 ); \
- b -= c; b -= a; b ^= ( a << 8 ); \
- c -= a; c -= b; c ^= ( b >> 13 ); \
- a -= b; a -= c; a ^= ( c >> 12 ); \
- b -= c; b -= a; b ^= ( a << 16 ); \
- c -= a; c -= b; c ^= ( b >> 5 ); \
- a -= b; a -= c; a ^= ( c >> 3 ); \
- b -= c; b -= a; b ^= ( a << 10 ); \
- c -= a; c -= b; c ^= ( b >> 15 ); \
-} while (0)
-
-#define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \
-do { \
- unsigned _hj_i,_hj_j,_hj_k; \
- unsigned char *_hj_key=(unsigned char*)(key); \
- hashv = 0xfeedbeef; \
- _hj_i = _hj_j = 0x9e3779b9; \
- _hj_k = (unsigned)(keylen); \
- while (_hj_k >= 12) { \
- _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \
- + ( (unsigned)_hj_key[2] << 16 ) \
- + ( (unsigned)_hj_key[3] << 24 ) ); \
- _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \
- + ( (unsigned)_hj_key[6] << 16 ) \
- + ( (unsigned)_hj_key[7] << 24 ) ); \
- hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \
- + ( (unsigned)_hj_key[10] << 16 ) \
- + ( (unsigned)_hj_key[11] << 24 ) ); \
- \
- HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
- \
- _hj_key += 12; \
- _hj_k -= 12; \
- } \
- hashv += keylen; \
- switch ( _hj_k ) { \
- case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \
- case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \
- case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \
- case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \
- case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \
- case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \
- case 5: _hj_j += _hj_key[4]; \
- case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \
- case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \
- case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \
- case 1: _hj_i += _hj_key[0]; \
- } \
- HASH_JEN_MIX(_hj_i, _hj_j, hashv); \
- bkt = hashv & (num_bkts-1); \
-} while(0)
-
-/* The Paul Hsieh hash function */
-#undef get16bits
-#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
- || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
-#define get16bits(d) (*((const uint16_t *) (d)))
-#endif
-
-#if !defined (get16bits)
-#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \
- +(uint32_t)(((const uint8_t *)(d))[0]) )
-#endif
-#define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \
-do { \
- unsigned char *_sfh_key=(unsigned char*)(key); \
- uint32_t _sfh_tmp, _sfh_len = keylen; \
- \
- int _sfh_rem = _sfh_len & 3; \
- _sfh_len >>= 2; \
- hashv = 0xcafebabe; \
- \
- /* Main loop */ \
- for (;_sfh_len > 0; _sfh_len--) { \
- hashv += get16bits (_sfh_key); \
- _sfh_tmp = (uint32_t)(get16bits (_sfh_key+2)) << 11 ^ hashv; \
- hashv = (hashv << 16) ^ _sfh_tmp; \
- _sfh_key += 2*sizeof (uint16_t); \
- hashv += hashv >> 11; \
- } \
- \
- /* Handle end cases */ \
- switch (_sfh_rem) { \
- case 3: hashv += get16bits (_sfh_key); \
- hashv ^= hashv << 16; \
- hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)] << 18); \
- hashv += hashv >> 11; \
- break; \
- case 2: hashv += get16bits (_sfh_key); \
- hashv ^= hashv << 11; \
- hashv += hashv >> 17; \
- break; \
- case 1: hashv += *_sfh_key; \
- hashv ^= hashv << 10; \
- hashv += hashv >> 1; \
- } \
- \
- /* Force "avalanching" of final 127 bits */ \
- hashv ^= hashv << 3; \
- hashv += hashv >> 5; \
- hashv ^= hashv << 4; \
- hashv += hashv >> 17; \
- hashv ^= hashv << 25; \
- hashv += hashv >> 6; \
- bkt = hashv & (num_bkts-1); \
-} while(0)
-
-#ifdef HASH_USING_NO_STRICT_ALIASING
-/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads.
- * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
- * MurmurHash uses the faster approach only on CPU's where we know it's safe.
- *
- * Note the preprocessor built-in defines can be emitted using:
- *
- * gcc -m64 -dM -E - < /dev/null (on gcc)
- * cc -## a.c (where a.c is a simple test file) (Sun Studio)
- */
-#if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86))
-#define MUR_GETBLOCK(p,i) p[i]
-#else /* non intel */
-#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0)
-#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 0x3) == 1)
-#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 0x3) == 2)
-#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 0x3) == 3)
-#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL))
-#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__))
-#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24))
-#define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16))
-#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8))
-#else /* assume little endian non-intel */
-#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24))
-#define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16))
-#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8))
-#endif
-#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \
- (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \
- (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \
- MUR_ONE_THREE(p))))
-#endif
-#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
-#define MUR_FMIX(_h) \
-do { \
- _h ^= _h >> 16; \
- _h *= 0x85ebca6b; \
- _h ^= _h >> 13; \
- _h *= 0xc2b2ae35l; \
- _h ^= _h >> 16; \
-} while(0)
-
-#define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \
-do { \
- const uint8_t *_mur_data = (const uint8_t*)(key); \
- const int _mur_nblocks = (keylen) / 4; \
- uint32_t _mur_h1 = 0xf88D5353; \
- uint32_t _mur_c1 = 0xcc9e2d51; \
- uint32_t _mur_c2 = 0x1b873593; \
- uint32_t _mur_k1 = 0; \
- const uint8_t *_mur_tail; \
- const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+_mur_nblocks*4); \
- int _mur_i; \
- for(_mur_i = -_mur_nblocks; _mur_i; _mur_i++) { \
- _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \
- _mur_k1 *= _mur_c1; \
- _mur_k1 = MUR_ROTL32(_mur_k1,15); \
- _mur_k1 *= _mur_c2; \
- \
- _mur_h1 ^= _mur_k1; \
- _mur_h1 = MUR_ROTL32(_mur_h1,13); \
- _mur_h1 = _mur_h1*5+0xe6546b64; \
- } \
- _mur_tail = (const uint8_t*)(_mur_data + _mur_nblocks*4); \
- _mur_k1=0; \
- switch((keylen) & 3) { \
- case 3: _mur_k1 ^= _mur_tail[2] << 16; \
- case 2: _mur_k1 ^= _mur_tail[1] << 8; \
- case 1: _mur_k1 ^= _mur_tail[0]; \
- _mur_k1 *= _mur_c1; \
- _mur_k1 = MUR_ROTL32(_mur_k1,15); \
- _mur_k1 *= _mur_c2; \
- _mur_h1 ^= _mur_k1; \
- } \
- _mur_h1 ^= (keylen); \
- MUR_FMIX(_mur_h1); \
- hashv = _mur_h1; \
- bkt = hashv & (num_bkts-1); \
-} while(0)
-#endif /* HASH_USING_NO_STRICT_ALIASING */
-
-/* key comparison function; return 0 if keys equal */
-#define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
-
-/* iterate over items in a known bucket to find desired item */
-#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \
-do { \
- if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \
- else out=NULL; \
- while (out) { \
- if ((out)->hh.keylen == keylen_in) { \
- if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) break; \
- } \
- if ((out)->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); \
- else out = NULL; \
- } \
-} while(0)
-
-/* add an item to a bucket */
-#define HASH_ADD_TO_BKT(head,addhh) \
-do { \
- head.count++; \
- (addhh)->hh_next = head.hh_head; \
- (addhh)->hh_prev = NULL; \
- if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \
- (head).hh_head=addhh; \
- if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \
- && (addhh)->tbl->noexpand != 1) { \
- HASH_EXPAND_BUCKETS((addhh)->tbl); \
- } \
-} while(0)
-
-/* remove an item from a given bucket */
-#define HASH_DEL_IN_BKT(hh,head,hh_del) \
- (head).count--; \
- if ((head).hh_head == hh_del) { \
- (head).hh_head = hh_del->hh_next; \
- } \
- if (hh_del->hh_prev) { \
- hh_del->hh_prev->hh_next = hh_del->hh_next; \
- } \
- if (hh_del->hh_next) { \
- hh_del->hh_next->hh_prev = hh_del->hh_prev; \
- }
-
-/* Bucket expansion has the effect of doubling the number of buckets
- * and redistributing the items into the new buckets. Ideally the
- * items will distribute more or less evenly into the new buckets
- * (the extent to which this is true is a measure of the quality of
- * the hash function as it applies to the key domain).
- *
- * With the items distributed into more buckets, the chain length
- * (item count) in each bucket is reduced. Thus by expanding buckets
- * the hash keeps a bound on the chain length. This bounded chain
- * length is the essence of how a hash provides constant time lookup.
- *
- * The calculation of tbl->ideal_chain_maxlen below deserves some
- * explanation. First, keep in mind that we're calculating the ideal
- * maximum chain length based on the *new* (doubled) bucket count.
- * In fractions this is just n/b (n=number of items,b=new num buckets).
- * Since the ideal chain length is an integer, we want to calculate
- * ceil(n/b). We don't depend on floating point arithmetic in this
- * hash, so to calculate ceil(n/b) with integers we could write
- *
- * ceil(n/b) = (n/b) + ((n%b)?1:0)
- *
- * and in fact a previous version of this hash did just that.
- * But now we have improved things a bit by recognizing that b is
- * always a power of two. We keep its base 2 log handy (call it lb),
- * so now we can write this with a bit shift and logical AND:
- *
- * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
- *
- */
-#define HASH_EXPAND_BUCKETS(tbl) \
-do { \
- unsigned _he_bkt; \
- unsigned _he_bkt_i; \
- struct UT_hash_handle *_he_thh, *_he_hh_nxt; \
- UT_hash_bucket *_he_new_buckets, *_he_newbkt; \
- _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \
- 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
- if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \
- memset(_he_new_buckets, 0, \
- 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \
- tbl->ideal_chain_maxlen = \
- (tbl->num_items >> (tbl->log2_num_buckets+1)) + \
- ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \
- tbl->nonideal_items = 0; \
- for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \
- { \
- _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \
- while (_he_thh) { \
- _he_hh_nxt = _he_thh->hh_next; \
- HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \
- _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \
- if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \
- tbl->nonideal_items++; \
- _he_newbkt->expand_mult = _he_newbkt->count / \
- tbl->ideal_chain_maxlen; \
- } \
- _he_thh->hh_prev = NULL; \
- _he_thh->hh_next = _he_newbkt->hh_head; \
- if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \
- _he_thh; \
- _he_newbkt->hh_head = _he_thh; \
- _he_thh = _he_hh_nxt; \
- } \
- } \
- uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \
- tbl->num_buckets *= 2; \
- tbl->log2_num_buckets++; \
- tbl->buckets = _he_new_buckets; \
- tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \
- (tbl->ineff_expands+1) : 0; \
- if (tbl->ineff_expands > 1) { \
- tbl->noexpand=1; \
- uthash_noexpand_fyi(tbl); \
- } \
- uthash_expand_fyi(tbl); \
-} while(0)
-
-
-/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
-/* Note that HASH_SORT assumes the hash handle name to be hh.
- * HASH_SRT was added to allow the hash handle name to be passed in. */
-#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
-#define HASH_SRT(hh,head,cmpfcn) \
-do { \
- unsigned _hs_i; \
- unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \
- struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \
- if (head) { \
- _hs_insize = 1; \
- _hs_looping = 1; \
- _hs_list = &((head)->hh); \
- while (_hs_looping) { \
- _hs_p = _hs_list; \
- _hs_list = NULL; \
- _hs_tail = NULL; \
- _hs_nmerges = 0; \
- while (_hs_p) { \
- _hs_nmerges++; \
- _hs_q = _hs_p; \
- _hs_psize = 0; \
- for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \
- _hs_psize++; \
- _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
- ((void*)((char*)(_hs_q->next) + \
- (head)->hh.tbl->hho)) : NULL); \
- if (! (_hs_q) ) break; \
- } \
- _hs_qsize = _hs_insize; \
- while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \
- if (_hs_psize == 0) { \
- _hs_e = _hs_q; \
- _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
- ((void*)((char*)(_hs_q->next) + \
- (head)->hh.tbl->hho)) : NULL); \
- _hs_qsize--; \
- } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \
- _hs_e = _hs_p; \
- if (_hs_p){ \
- _hs_p = (UT_hash_handle*)((_hs_p->next) ? \
- ((void*)((char*)(_hs_p->next) + \
- (head)->hh.tbl->hho)) : NULL); \
- } \
- _hs_psize--; \
- } else if (( \
- cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \
- DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \
- ) <= 0) { \
- _hs_e = _hs_p; \
- if (_hs_p){ \
- _hs_p = (UT_hash_handle*)((_hs_p->next) ? \
- ((void*)((char*)(_hs_p->next) + \
- (head)->hh.tbl->hho)) : NULL); \
- } \
- _hs_psize--; \
- } else { \
- _hs_e = _hs_q; \
- _hs_q = (UT_hash_handle*)((_hs_q->next) ? \
- ((void*)((char*)(_hs_q->next) + \
- (head)->hh.tbl->hho)) : NULL); \
- _hs_qsize--; \
- } \
- if ( _hs_tail ) { \
- _hs_tail->next = ((_hs_e) ? \
- ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \
- } else { \
- _hs_list = _hs_e; \
- } \
- if (_hs_e) { \
- _hs_e->prev = ((_hs_tail) ? \
- ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \
- } \
- _hs_tail = _hs_e; \
- } \
- _hs_p = _hs_q; \
- } \
- if (_hs_tail){ \
- _hs_tail->next = NULL; \
- } \
- if ( _hs_nmerges <= 1 ) { \
- _hs_looping=0; \
- (head)->hh.tbl->tail = _hs_tail; \
- DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \
- } \
- _hs_insize *= 2; \
- } \
- HASH_FSCK(hh,head); \
- } \
-} while (0)
-
-/* This function selects items from one hash into another hash.
- * The end result is that the selected items have dual presence
- * in both hashes. There is no copy of the items made; rather
- * they are added into the new hash through a secondary hash
- * hash handle that must be present in the structure. */
-#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \
-do { \
- unsigned _src_bkt, _dst_bkt; \
- void *_last_elt=NULL, *_elt; \
- UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \
- ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \
- if (src) { \
- for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \
- for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \
- _src_hh; \
- _src_hh = _src_hh->hh_next) { \
- _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \
- if (cond(_elt)) { \
- _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \
- _dst_hh->key = _src_hh->key; \
- _dst_hh->keylen = _src_hh->keylen; \
- _dst_hh->hashv = _src_hh->hashv; \
- _dst_hh->prev = _last_elt; \
- _dst_hh->next = NULL; \
- if (_last_elt_hh) { _last_elt_hh->next = _elt; } \
- if (!dst) { \
- DECLTYPE_ASSIGN(dst,_elt); \
- HASH_MAKE_TABLE(hh_dst,dst); \
- } else { \
- _dst_hh->tbl = (dst)->hh_dst.tbl; \
- } \
- HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \
- HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \
- (dst)->hh_dst.tbl->num_items++; \
- _last_elt = _elt; \
- _last_elt_hh = _dst_hh; \
- } \
- } \
- } \
- } \
- HASH_FSCK(hh_dst,dst); \
-} while (0)
-
-#define HASH_CLEAR(hh,head) \
-do { \
- if (head) { \
- uthash_free((head)->hh.tbl->buckets, \
- (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \
- HASH_BLOOM_FREE((head)->hh.tbl); \
- uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \
- (head)=NULL; \
- } \
-} while(0)
-
-#define HASH_OVERHEAD(hh,head) \
- (size_t)((((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \
- ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \
- (sizeof(UT_hash_table)) + \
- (HASH_BLOOM_BYTELEN)))
-
-#ifdef NO_DECLTYPE
-#define HASH_ITER(hh,head,el,tmp) \
-for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \
- el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
-#else
-#define HASH_ITER(hh,head,el,tmp) \
-for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \
- el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL))
-#endif
-
-/* obtain a count of items in the hash */
-#define HASH_COUNT(head) HASH_CNT(hh,head)
-#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0)
-
-typedef struct UT_hash_bucket {
- struct UT_hash_handle *hh_head;
- unsigned count;
-
- /* expand_mult is normally set to 0. In this situation, the max chain length
- * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
- * the bucket's chain exceeds this length, bucket expansion is triggered).
- * However, setting expand_mult to a non-zero value delays bucket expansion
- * (that would be triggered by additions to this particular bucket)
- * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
- * (The multiplier is simply expand_mult+1). The whole idea of this
- * multiplier is to reduce bucket expansions, since they are expensive, in
- * situations where we know that a particular bucket tends to be overused.
- * It is better to let its chain length grow to a longer yet-still-bounded
- * value, than to do an O(n) bucket expansion too often.
- */
- unsigned expand_mult;
-
-} UT_hash_bucket;
-
-/* random signature used only to find hash tables in external analysis */
-#define HASH_SIGNATURE 0xa0111fe1
-#define HASH_BLOOM_SIGNATURE 0xb12220f2
-
-typedef struct UT_hash_table {
- UT_hash_bucket *buckets;
- unsigned num_buckets, log2_num_buckets;
- unsigned num_items;
- struct UT_hash_handle *tail; /* tail hh in app order, for fast append */
- ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */
-
- /* in an ideal situation (all buckets used equally), no bucket would have
- * more than ceil(#items/#buckets) items. that's the ideal chain length. */
- unsigned ideal_chain_maxlen;
-
- /* nonideal_items is the number of items in the hash whose chain position
- * exceeds the ideal chain maxlen. these items pay the penalty for an uneven
- * hash distribution; reaching them in a chain traversal takes >ideal steps */
- unsigned nonideal_items;
-
- /* ineffective expands occur when a bucket doubling was performed, but
- * afterward, more than half the items in the hash had nonideal chain
- * positions. If this happens on two consecutive expansions we inhibit any
- * further expansion, as it's not helping; this happens when the hash
- * function isn't a good fit for the key domain. When expansion is inhibited
- * the hash will still work, albeit no longer in constant time. */
- unsigned ineff_expands, noexpand;
-
- uint32_t signature; /* used only to find hash tables in external analysis */
-#ifdef HASH_BLOOM
- uint32_t bloom_sig; /* used only to test bloom exists in external analysis */
- uint8_t *bloom_bv;
- char bloom_nbits;
-#endif
-
-} UT_hash_table;
-
-typedef struct UT_hash_handle {
- struct UT_hash_table *tbl;
- void *prev; /* prev element in app order */
- void *next; /* next element in app order */
- struct UT_hash_handle *hh_prev; /* previous hh in bucket order */
- struct UT_hash_handle *hh_next; /* next hh in bucket order */
- void *key; /* ptr to enclosing struct's key */
- unsigned keylen; /* enclosing struct's key len */
- unsigned hashv; /* result of hash-fcn(key) */
-} UT_hash_handle;
-
-#endif /* UTHASH_H */
--
cgit v1.2.3
From c04e1e7aef06ce0836984b17e48a1d09bb83ce04 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Wed, 10 Sep 2014 18:38:56 +0200
Subject: Fix misc bugs
---
src/references.c | 32 +++++++++++++++++++++-----------
1 file changed, 21 insertions(+), 11 deletions(-)
diff --git a/src/references.c b/src/references.c
index ff64b00..84cb773 100644
--- a/src/references.c
+++ b/src/references.c
@@ -13,6 +13,14 @@ refhash(const unsigned char *link_ref)
return hash;
}
+static void reference_free(reference *ref)
+{
+ free(ref->label);
+ free(ref->url);
+ free(ref->title);
+ free(ref);
+}
+
// normalize reference: collapse internal whitespace to single space,
// remove leading/trailing whitespace, case fold
static unsigned char *normalize_reference(chunk *ref)
@@ -28,7 +36,18 @@ static unsigned char *normalize_reference(chunk *ref)
static void add_reference(reference_map *map, reference* ref)
{
- ref->next = map->table[ref->hash % REFMAP_SIZE];
+ reference *t = ref->next = map->table[ref->hash % REFMAP_SIZE];
+
+ while (t) {
+ if (t->hash == ref->hash &&
+ !strcmp((char *)t->label, (char *)ref->label)) {
+ reference_free(ref);
+ return;
+ }
+
+ t = t->next;
+ }
+
map->table[ref->hash % REFMAP_SIZE] = ref;
}
@@ -63,7 +82,7 @@ reference* reference_lookup(reference_map *map, chunk *label)
ref = map->table[hash % REFMAP_SIZE];
while (ref) {
- if (ref->label[0] == norm[0] &&
+ if (ref->hash == hash &&
!strcmp((char *)ref->label, (char *)norm))
break;
ref = ref->next;
@@ -73,14 +92,6 @@ reference* reference_lookup(reference_map *map, chunk *label)
return ref;
}
-static void reference_free(reference *ref)
-{
- free(ref->label);
- free(ref->url);
- free(ref->title);
- free(ref);
-}
-
void reference_map_free(reference_map *map)
{
unsigned int i;
@@ -96,7 +107,6 @@ void reference_map_free(reference_map *map)
}
}
- free(map->table);
free(map);
}
--
cgit v1.2.3
From c47e3a34adac00a262f72c6d17a1c87deefa33c4 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Wed, 10 Sep 2014 19:39:03 +0200
Subject: Fix infinite loop when case folding invalid UTF8 chars
---
src/utf8.c | 24 ++++++++++++------------
src/utf8.h | 4 ++--
2 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/src/utf8.c b/src/utf8.c
index c65aec6..1b0224b 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -29,9 +29,9 @@ static void encode_unknown(strbuf *buf)
strbuf_put(buf, repl, 3);
}
-ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len)
+int utf8proc_charlen(const uint8_t *str, int str_len)
{
- ssize_t length, i;
+ int length, i;
if (!str_len)
return 0;
@@ -42,11 +42,11 @@ ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len)
return -1;
if (str_len >= 0 && length > str_len)
- return -1;
+ return -str_len;
for (i = 1; i < length; i++) {
if ((str[i] & 0xC0) != 0x80)
- return -1;
+ return -i;
}
return length;
@@ -77,7 +77,7 @@ void utf8proc_detab(strbuf *ob, const uint8_t *line, size_t size)
i += 1;
tab += numspaces;
} else {
- ssize_t charlen = utf8proc_charlen(line + i, size - i);
+ int charlen = utf8proc_charlen(line + i, size - i);
if (charlen < 0) {
encode_unknown(ob);
@@ -92,9 +92,9 @@ void utf8proc_detab(strbuf *ob, const uint8_t *line, size_t size)
}
}
-ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst)
+int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst)
{
- ssize_t length;
+ int length;
int32_t uc = -1;
*dst = -1;
@@ -177,15 +177,15 @@ void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len)
utf8proc_encode_char(x, dest)
while (len > 0) {
- ssize_t char_len = utf8proc_iterate(str, len, &c);
+ int char_len = utf8proc_iterate(str, len, &c);
- if (char_len < 0) {
+ if (char_len >= 0) {
+#include "case_fold_switch.inc"
+ } else {
encode_unknown(dest);
- continue;
+ char_len = -char_len;
}
-#include "case_fold_switch.inc"
-
str += char_len;
len -= char_len;
}
diff --git a/src/utf8.h b/src/utf8.h
index 9506b75..c971250 100644
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -6,8 +6,8 @@
void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len);
void utf8proc_encode_char(int32_t uc, strbuf *buf);
-ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst);
-ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len);
+int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst);
+int utf8proc_charlen(const uint8_t *str, int str_len);
void utf8proc_detab(strbuf *dest, const uint8_t *line, size_t size);
#endif
--
cgit v1.2.3
From 79e7a4bbf7055e33b346564db769f03e85f98988 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Wed, 10 Sep 2014 19:40:40 +0200
Subject: Improve invalid UTF8 codepoint skipping
---
src/utf8.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/utf8.c b/src/utf8.c
index 1b0224b..6b34831 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -79,14 +79,14 @@ void utf8proc_detab(strbuf *ob, const uint8_t *line, size_t size)
} else {
int charlen = utf8proc_charlen(line + i, size - i);
- if (charlen < 0) {
- encode_unknown(ob);
- i++;
- } else {
+ if (charlen >= 0) {
strbuf_put(ob, line + i, charlen);
- i += charlen;
+ } else {
+ encode_unknown(ob);
+ charlen = -charlen;
}
+ i += charlen;
tab += 1;
}
}
--
cgit v1.2.3
From 7c2a062cdf9c0514cdf32f4f8bd07cf52d183c8b Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Wed, 10 Sep 2014 19:46:34 +0200
Subject: Do not use strchr for span searches
Strchr will return a valid pointer for '\0' when searching a static
string, as the NULL byte is part of the string.
---
src/inlines.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/inlines.c b/src/inlines.c
index 3040f09..cd2d124 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -767,10 +767,13 @@ node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap)
static int subject_find_special_char(subject *subj)
{
+ static const char CHARS[] = "\n\\`&_*[]pos + 1;
while (n < subj->input.len) {
- if (strchr("\n\\`&_*[]input.data[n]))
+ if (memchr(CHARS, subj->input.data[n], CHARS_SIZE))
return n;
n++;
}
--
cgit v1.2.3
From 8c028e1a88c2d2aac4a4086202568bee43678aa8 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Wed, 10 Sep 2014 19:50:29 +0200
Subject: Do not create references with empty names
---
src/buffer.c | 7 ++++---
src/references.c | 31 ++++++++++++++++++++++++++-----
src/references.h | 2 +-
3 files changed, 31 insertions(+), 9 deletions(-)
diff --git a/src/buffer.c b/src/buffer.c
index cdf8ca0..7c2b86b 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -215,11 +215,12 @@ unsigned char *strbuf_detach(strbuf *buf)
{
unsigned char *data = buf->ptr;
- if (buf->asize == 0 || buf->ptr == strbuf__oom)
- return NULL;
+ if (buf->asize == 0 || buf->ptr == strbuf__oom) {
+ /* return an empty string */
+ return calloc(1, 1);
+ }
strbuf_init(buf, 0);
-
return data;
}
diff --git a/src/references.c b/src/references.c
index 84cb773..300bbcc 100644
--- a/src/references.c
+++ b/src/references.c
@@ -23,15 +23,29 @@ static void reference_free(reference *ref)
// normalize reference: collapse internal whitespace to single space,
// remove leading/trailing whitespace, case fold
+// Return NULL if the reference name is actually empty (i.e. composed
+// solely from whitespace)
static unsigned char *normalize_reference(chunk *ref)
{
strbuf normalized = GH_BUF_INIT;
+ unsigned char *result;
+
+ if (ref->len == 0)
+ return NULL;
utf8proc_case_fold(&normalized, ref->data, ref->len);
strbuf_trim(&normalized);
strbuf_normalize_whitespace(&normalized);
- return strbuf_detach(&normalized);
+ result = strbuf_detach(&normalized);
+ assert(result);
+
+ if (result[0] == '\0') {
+ free(result);
+ return NULL;
+ }
+
+ return result;
}
static void add_reference(reference_map *map, reference* ref)
@@ -51,19 +65,23 @@ static void add_reference(reference_map *map, reference* ref)
map->table[ref->hash % REFMAP_SIZE] = ref;
}
-extern reference *reference_create(reference_map *map, chunk *label, chunk *url, chunk *title)
+extern void reference_create(reference_map *map, chunk *label, chunk *url, chunk *title)
{
reference *ref;
+ unsigned char *reflabel = normalize_reference(label);
+
+ /* empty reference name, or composed from only whitespace */
+ if (reflabel == NULL)
+ return;
+
ref = malloc(sizeof(reference));
- ref->label = normalize_reference(label);
+ ref->label = reflabel;
ref->hash = refhash(ref->label);
ref->url = clean_url(url);
ref->title = clean_title(title);
ref->next = NULL;
add_reference(map, ref);
-
- return ref;
}
// Returns reference if refmap contains a reference with matching
@@ -78,6 +96,9 @@ reference* reference_lookup(reference_map *map, chunk *label)
return NULL;
norm = normalize_reference(label);
+ if (norm == NULL)
+ return NULL;
+
hash = refhash(norm);
ref = map->table[hash % REFMAP_SIZE];
diff --git a/src/references.h b/src/references.h
index 78fffe7..28937f1 100644
--- a/src/references.h
+++ b/src/references.h
@@ -22,6 +22,6 @@ typedef struct reference_map reference_map;
reference_map *reference_map_new(void);
void reference_map_free(reference_map *map);
reference* reference_lookup(reference_map *map, chunk *label);
-extern reference *reference_create(reference_map *map, chunk *label, chunk *url, chunk *title);
+extern void reference_create(reference_map *map, chunk *label, chunk *url, chunk *title);
#endif
--
cgit v1.2.3
From 0ae7f4f53720e867c92ac9465062285293568856 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Wed, 10 Sep 2014 20:02:01 +0200
Subject: Handle overflows in the codepoint parser
---
src/html/houdini_html_u.c | 20 ++++++++++++++++----
1 file changed, 16 insertions(+), 4 deletions(-)
diff --git a/src/html/houdini_html_u.c b/src/html/houdini_html_u.c
index b8e2d8d..49b4956 100644
--- a/src/html/houdini_html_u.c
+++ b/src/html/houdini_html_u.c
@@ -15,13 +15,25 @@ houdini_unescape_ent(strbuf *ob, const uint8_t *src, size_t size)
int codepoint = 0;
if (_isdigit(src[1])) {
- for (i = 1; i < size && _isdigit(src[i]); ++i)
- codepoint = (codepoint * 10) + (src[i] - '0');
+ for (i = 1; i < size && _isdigit(src[i]); ++i) {
+ int cp = (codepoint * 10) + (src[i] - '0');
+
+ if (cp < codepoint)
+ return 0;
+
+ codepoint = cp;
+ }
}
else if (src[1] == 'x' || src[1] == 'X') {
- for (i = 2; i < size && _isxdigit(src[i]); ++i)
- codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
+ for (i = 2; i < size && _isxdigit(src[i]); ++i) {
+ int cp = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
+
+ if (cp < codepoint)
+ return 0;
+
+ codepoint = cp;
+ }
}
if (i < size && src[i] == ';' && codepoint) {
--
cgit v1.2.3
From 5b16a88558f74eee5b4c93e43e895e98f4ea86d6 Mon Sep 17 00:00:00 2001
From: Artyom Kazak
Date: Thu, 11 Sep 2014 04:19:01 +0400
Subject: =?UTF-8?q?Fix=20a=20broken=20link=20to=20the=20=E2=80=9CA=20parsi?=
=?UTF-8?q?ng=20strategy=E2=80=9D=20section.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
(Line lengths changed so that the link wouldn't have to be broken.)
---
spec.txt | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/spec.txt b/spec.txt
index c06f750..c4e77b2 100644
--- a/spec.txt
+++ b/spec.txt
@@ -1994,11 +1994,11 @@ form of the definition is:
> transforming X in such-and-such a way is a container of type Y
> with these blocks as its content.
-So, we explain what counts as a block quote or list item by
-explaining how these can be *generated* from their contents.
-This should suffice to define the syntax, although it does not
-give a recipe for *parsing* these constructions. (A recipe is
-provided below in the section entitled [A parsing strategy].)
+So, we explain what counts as a block quote or list item by explaining
+how these can be *generated* from their contents. This should suffice
+to define the syntax, although it does not give a recipe for *parsing*
+these constructions. (A recipe is provided below in the section entitled
+[A parsing strategy](#appendix-a-a-parsing-strategy).)
## Block quotes
--
cgit v1.2.3
From a6722b8a737eaefdf3d757227036deb4f10492db Mon Sep 17 00:00:00 2001
From: Artyom Kazak
Date: Thu, 11 Sep 2014 04:30:08 +0400
Subject: Fix another broken link.
---
spec.txt | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/spec.txt b/spec.txt
index c4e77b2..4a9e9fd 100644
--- a/spec.txt
+++ b/spec.txt
@@ -2010,9 +2010,9 @@ The following rules define [block quotes](#block-quote):
1. **Basic case.** If a string of lines *Ls* constitute a sequence
- of blocks *Bs*, then the result of appending a [block quote marker]
- to the beginning of each line in *Ls* is a [block quote](#block-quote)
- containing *Bs*.
+ of blocks *Bs*, then the result of appending a [block quote
+ marker](#block-quote-marker) to the beginning of each line in *Ls*
+ is a [block quote](#block-quote) containing *Bs*.
2. **Laziness.** If a string of lines *Ls* constitute a [block
quote](#block-quote) with contents *Bs*, then the result of deleting
--
cgit v1.2.3
From 6d7d6cf150dedb53b7f0972b79313df3364ebbed Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Sun, 7 Sep 2014 15:20:41 -0700
Subject: stmd.js: Added memoization of inline parsing.
---
js/stmd.js | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/js/stmd.js b/js/stmd.js
index 15d7345..63234f6 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -672,6 +672,13 @@ var parseReference = function(s, refmap) {
// Parse the next inline element in subject, advancing subject position
// and adding the result to 'inlines'.
var parseInline = function(inlines) {
+ var startpos = this.pos;
+ var memoized = this.memo[startpos];
+ if (memoized) {
+ inlines.push(memoized.inlines);
+ this.pos += memoized.len;
+ return memoized.len;
+ }
var c = this.peek();
var res;
switch(c) {
@@ -703,7 +710,13 @@ var parseInline = function(inlines) {
break;
default:
}
- return res || this.parseString(inlines);
+ if (!res) {
+ res = this.parseString(inlines);
+ }
+ if (res > 0) {
+ this.memo[startpos] = { inlines: inlines[inlines.length - 1], len: res };
+ }
+ return res;
};
// Parse s as a list of inlines, using refmap to resolve references.
@@ -711,6 +724,7 @@ var parseInlines = function(s, refmap) {
this.subject = s;
this.pos = 0;
this.refmap = refmap || {};
+ this.memo = {};
var inlines = [];
while (this.parseInline(inlines)) ;
return inlines;
@@ -723,6 +737,7 @@ function InlineParser(){
label_nest_level: 0, // used by parseLinkLabel method
pos: 0,
refmap: {},
+ memo: {},
match: match,
peek: peek,
spnl: spnl,
--
cgit v1.2.3
From a56eca884caec58308387acffb9813b75241f0be Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Sun, 7 Sep 2014 22:12:44 -0700
Subject: New strategy: did parseNewlines, parseString.
---
js/stmd.js | 59 +++++++++++++++++++++++++++--------------------------------
1 file changed, 27 insertions(+), 32 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 63234f6..1de6315 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -71,7 +71,7 @@ var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/;
// Matches a character with a special meaning in markdown,
// or a string of non-special characters.
-var reMain = /^(?:[\n`\[\]\\!<&*_]|[^\n`\[\]\\!<&*_]+)/m;
+var reMain = /^(?: +(?!\n)|[\n `\[\]\\!<&*_]|[^\n `\[\]\\!<&*_]+)/m;
// UTILITY FUNCTIONS
@@ -438,7 +438,7 @@ var parseLinkLabel = function() {
this.parseBackticks([]);
break;
case '<':
- this.parseAutolink([]) || this.parseHtmlTag([]) || this.parseString([]);
+ this.parseAutolink([]) || this.parseHtmlTag([]) || this.parseString();
break;
case '[': // nested []
nest_level++;
@@ -452,7 +452,7 @@ var parseLinkLabel = function() {
this.parseEscaped([]);
break;
default:
- this.parseString([]);
+ this.parseString();
}
}
if (c === ']') {
@@ -559,34 +559,25 @@ var parseEntity = function(inlines) {
// Parse a run of ordinary characters, or a single character with
// a special meaning in markdown, as a plain string, adding to inlines.
-var parseString = function(inlines) {
+var parseString = function() {
var m;
if ((m = this.match(reMain))) {
- inlines.push({ t: 'Str', c: m });
- return m.length;
+ return { t: 'Str', c: m };
} else {
- return 0;
+ return null;
}
};
// Parse a newline. If it was preceded by two spaces, return a hard
// line break; otherwise a soft line break.
-var parseNewline = function(inlines) {
- if (this.peek() == '\n') {
- this.pos++;
- var last = inlines[inlines.length - 1];
- if (last && last.t == 'Str' && last.c.slice(-2) == ' ') {
- last.c = last.c.replace(/ *$/,'');
- inlines.push({ t: 'Hardbreak' });
- } else {
- if (last && last.t == 'Str' && last.c.slice(-1) == ' ') {
- last.c = last.c.slice(0, -1);
- }
- inlines.push({ t: 'Softbreak' });
- }
- return 1;
+var parseNewline = function() {
+ var m = this.match(/ *\n/);
+ if (m.length > 2) {
+ return { t: 'Hardbreak' };
+ } else if (m.length > 0) {
+ return { t: 'Softbreak' };
} else {
- return 0;
+ return null;
}
};
@@ -670,20 +661,20 @@ var parseReference = function(s, refmap) {
};
// Parse the next inline element in subject, advancing subject position
-// and adding the result to 'inlines'.
-var parseInline = function(inlines) {
+// and returning the inline parsed.
+var parseInline = function() {
var startpos = this.pos;
var memoized = this.memo[startpos];
if (memoized) {
- inlines.push(memoized.inlines);
- this.pos += memoized.len;
- return memoized.len;
+ this.pos = memoized.endpos;
+ return memoized.inline;
}
var c = this.peek();
var res;
switch(c) {
case '\n':
- res = this.parseNewline(inlines);
+ case ' ':
+ res = this.parseNewline();
break;
case '\\':
res = this.parseEscaped(inlines);
@@ -711,10 +702,11 @@ var parseInline = function(inlines) {
default:
}
if (!res) {
- res = this.parseString(inlines);
+ res = this.parseString();
}
- if (res > 0) {
- this.memo[startpos] = { inlines: inlines[inlines.length - 1], len: res };
+ if (res) {
+ this.memo[startpos] = { inline: res,
+ endpos: this.pos - startpos };
}
return res;
};
@@ -726,7 +718,10 @@ var parseInlines = function(s, refmap) {
this.refmap = refmap || {};
this.memo = {};
var inlines = [];
- while (this.parseInline(inlines)) ;
+ var next_inline;
+ while (next_inline = this.parseInline(inlines)) {
+ inlines.push(next_inline);
+ }
return inlines;
};
--
cgit v1.2.3
From 70976e9cfa26a83e1cf74cac79e36ba771567b0f Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Sun, 7 Sep 2014 22:15:41 -0700
Subject: Did parseBackslash (used to be parseEscaped).
---
js/stmd.js | 22 +++++++++-------------
1 file changed, 9 insertions(+), 13 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 1de6315..870a253 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -150,8 +150,7 @@ var spnl = function() {
// All of the parsers below try to match something at the current position
// in the subject. If they succeed in matching anything, they
-// push an inline element onto the 'inlines' list. They return the
-// number of characters parsed (possibly 0).
+// return the inline matched, advancing the subject.
// Attempt to parse backticks, adding either a backtick code span or a
// literal sequence of backticks to the 'inlines' list.
@@ -182,25 +181,22 @@ var parseBackticks = function(inlines) {
// Parse a backslash-escaped special character, adding either the escaped
// character, a hard line break (if the backslash is followed by a newline),
// or a literal backslash to the 'inlines' list.
-var parseEscaped = function(inlines) {
+var parseBackslash = function() {
var subj = this.subject,
pos = this.pos;
if (subj[pos] === '\\') {
if (subj[pos + 1] === '\n') {
- inlines.push({ t: 'Hardbreak' });
this.pos = this.pos + 2;
- return 2;
+ return { t: 'Hardbreak' };
} else if (reEscapable.test(subj[pos + 1])) {
- inlines.push({ t: 'Str', c: subj[pos + 1] });
this.pos = this.pos + 2;
- return 2;
+ return { t: 'Str', c: subj[pos + 1] };
} else {
this.pos++;
- inlines.push({t: 'Str', c: '\\'});
- return 1;
+ return {t: 'Str', c: '\\'};
}
} else {
- return 0;
+ return null;
}
};
@@ -449,7 +445,7 @@ var parseLinkLabel = function() {
this.pos++;
break;
case '\\':
- this.parseEscaped([]);
+ this.parseBackslash();
break;
default:
this.parseString();
@@ -677,7 +673,7 @@ var parseInline = function() {
res = this.parseNewline();
break;
case '\\':
- res = this.parseEscaped(inlines);
+ res = this.parseBackslash();
break;
case '`':
res = this.parseBackticks(inlines);
@@ -737,7 +733,7 @@ function InlineParser(){
peek: peek,
spnl: spnl,
parseBackticks: parseBackticks,
- parseEscaped: parseEscaped,
+ parseBackslash: parseBackslash,
parseAutolink: parseAutolink,
parseHtmlTag: parseHtmlTag,
scanDelims: scanDelims,
--
cgit v1.2.3
From cbd2da6c9585bb5070cbac8b964617140047456e Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Sun, 7 Sep 2014 23:18:56 -0700
Subject: Shell of parseEmphasis.
---
js/stmd.js | 81 +++++++++++++++++++++++++++++++-------------------------------
1 file changed, 40 insertions(+), 41 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 870a253..6d86c30 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -71,7 +71,7 @@ var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/;
// Matches a character with a special meaning in markdown,
// or a string of non-special characters.
-var reMain = /^(?: +(?!\n)|[\n `\[\]\\!<&*_]|[^\n `\[\]\\!<&*_]+)/m;
+var reMain = /^(?: +|[\n`\[\]\\!<&*_]|[^\n `\[\]\\!<&*_]+)/m;
// UTILITY FUNCTIONS
@@ -262,59 +262,51 @@ var scanDelims = function(c) {
can_close: can_close };
};
-// Attempt to parse emphasis or strong emphasis in an efficient way,
-// with no backtracking.
-var parseEmphasis = function(inlines) {
+// Attempt to parse emphasis or strong emphasis.
+var parseEmphasis = function() {
var startpos = this.pos;
var c ;
var first_close = 0;
- var nxt = this.peek();
- if (nxt == '*' || nxt == '_') {
- c = nxt;
- } else {
- return 0;
+ var c = this.peek();
+ if (!(c === '*' || c === '_')) {
+ return null;
}
var numdelims;
var delimpos;
+ var inlines = [];
// Get opening delimiters.
res = this.scanDelims(c);
numdelims = res.numdelims;
- this.pos += numdelims;
- // We provisionally add a literal string. If we match appropriate
- // closing delimiters, we'll change this to Strong or Emph.
- inlines.push({t: 'Str',
- c: this.subject.substr(this.pos - numdelims, numdelims)});
- // Record the position of this opening delimiter:
- delimpos = inlines.length - 1;
if (!res.can_open || numdelims === 0) {
- return 0;
+ this.pos = startpos;
+ return null;
}
+ this.pos += numdelims;
+
var first_close_delims = 0;
+ var next_inline;
switch (numdelims) {
case 1: // we started with * or _
while (true) {
res = this.scanDelims(c);
if (res.numdelims >= 1 && res.can_close) {
- this.pos += 1;
- // Convert the inline at delimpos, currently a string with the delim,
- // into an Emph whose contents are the succeeding inlines
- inlines[delimpos].t = 'Emph';
- inlines[delimpos].c = inlines.slice(delimpos + 1);
- inlines.splice(delimpos + 1);
- break;
+ this.pos += 1;
+ return {t: 'Emph', c: inlines};
+ } else if (next_inline = this.parseInline(inlines)) {
+ inlines.push(next_inline);
} else {
- if (this.parseInline(inlines) === 0) {
- break;
- }
+ // didn't find closing delimiter
+ this.pos = startpos;
+ return null;
}
}
- return (this.pos - startpos);
+/*
case 2: // We started with ** or __
while (true) {
res = this.scanDelims(c);
@@ -373,7 +365,7 @@ var parseEmphasis = function(inlines) {
}
}
return (this.pos - startpos);
-
+*/
default:
return res;
}
@@ -557,7 +549,7 @@ var parseEntity = function(inlines) {
// a special meaning in markdown, as a plain string, adding to inlines.
var parseString = function() {
var m;
- if ((m = this.match(reMain))) {
+ if (m = this.match(reMain)) {
return { t: 'Str', c: m };
} else {
return null;
@@ -567,14 +559,15 @@ var parseString = function() {
// Parse a newline. If it was preceded by two spaces, return a hard
// line break; otherwise a soft line break.
var parseNewline = function() {
- var m = this.match(/ *\n/);
- if (m.length > 2) {
- return { t: 'Hardbreak' };
- } else if (m.length > 0) {
- return { t: 'Softbreak' };
- } else {
- return null;
+ var m = this.match(/^ *\n/);
+ if (m) {
+ if (m.length > 2) {
+ return { t: 'Hardbreak' };
+ } else if (m.length > 0) {
+ return { t: 'Softbreak' };
+ }
}
+ return null;
};
// Attempt to parse an image. If the opening '!' is not followed
@@ -666,6 +659,9 @@ var parseInline = function() {
return memoized.inline;
}
var c = this.peek();
+ if (!c) {
+ return null;
+ }
var res;
switch(c) {
case '\n':
@@ -680,7 +676,7 @@ var parseInline = function() {
break;
case '*':
case '_':
- res = this.parseEmphasis(inlines);
+ res = this.parseEmphasis();
break;
case '[':
res = this.parseLink(inlines);
@@ -696,13 +692,16 @@ var parseInline = function() {
res = this.parseEntity(inlines);
break;
default:
- }
- if (!res) {
res = this.parseString();
+ break;
+ }
+ if (res === null) {
+ this.pos += 1;
+ res = {t: 'Str', c: c};
}
if (res) {
this.memo[startpos] = { inline: res,
- endpos: this.pos - startpos };
+ endpos: this.pos };
}
return res;
};
--
cgit v1.2.3
From 0e9674cbe56810b4c15386b1fc091777e9c7026b Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Mon, 8 Sep 2014 07:25:49 -0700
Subject: parseEmphasis: added Strong and shell for triples.
---
js/stmd.js | 84 ++++++++++++++++++++++++--------------------------------------
1 file changed, 33 insertions(+), 51 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 6d86c30..753eff8 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -305,72 +305,54 @@ var parseEmphasis = function() {
return null;
}
}
+ break;
-/*
case 2: // We started with ** or __
while (true) {
res = this.scanDelims(c);
if (res.numdelims >= 2 && res.can_close) {
- this.pos += 2;
- inlines[delimpos].t = 'Strong';
- inlines[delimpos].c = inlines.slice(delimpos + 1);
- inlines.splice(delimpos + 1);
- break;
+ this.pos += 2;
+ return {t: 'Strong', c: inlines};
+ } else if (next_inline = this.parseInline(inlines)) {
+ inlines.push(next_inline);
} else {
- if (this.parseInline(inlines) === 0) {
- break;
- }
+ // didn't find closing delimiter
+ this.pos = startpos;
+ return null;
}
}
- return (this.pos - startpos);
+ break;
- case 3: // We started with *** or ___
+ case 3: // We started with *** or ___
while (true) {
- res = this.scanDelims(c);
- if (res.numdelims >= 1 && res.numdelims <= 3 && res.can_close &&
- res.numdelims != first_close_delims) {
-
- if (first_close_delims === 1 && numdelims > 2) {
- res.numdelims = 2;
- } else if (first_close_delims === 2) {
- res.numdelims = 1;
- } else if (res.numdelims === 3) {
- // If we opened with ***, then we interpret *** as ** followed by *
- // giving us
- res.numdelims = 1;
- }
-
- this.pos += res.numdelims;
-
- if (first_close > 0) { // if we've already passed the first closer:
- inlines[delimpos].t = first_close_delims === 1 ? 'Strong' : 'Emph';
- inlines[delimpos].c = [
- { t: first_close_delims === 1 ? 'Emph' : 'Strong',
- c: inlines.slice(delimpos + 1, first_close)}
- ].concat(inlines.slice(first_close + 1));
- inlines.splice(delimpos + 1);
- break;
- } else { // this is the first closer; for now, add literal string;
- // we'll change this when he hit the second closer
- inlines.push({t: 'Str',
- c: this.subject.slice(this.pos - res.numdelims,
- this.pos) });
- first_close = inlines.length - 1;
- first_close_delims = res.numdelims;
- }
- } else { // parse another inline element, til we hit the end
- if (this.parseInline(inlines) === 0) {
- break;
+ res = this.scanDelims(c);
+ var numdelims = res.numdelims;
+ var can_close = res.can_close;
+ var first_delim === 0;
+ if (can_close && numdelims === 3 && first_delim === 0) {
+ // TODO - return Strong Emph with inlines
+ } else if (can_close && numdelims === 2 && first_delim === 0) {
+ // TODO - set first_delim, make inlines a Strong
+ } else if (can_close && numdelims === 1 && first_delim === 0) {
+ // TODO - set first_delim, make inlines an Emph
+ } else if (can_close && numdelims === 2 && first_delim === 1) {
+ // TODO - return Strong inlines
+ } else if (can_close && numdelims === 1 && first_delim === 2) {
+ // TODO - return Emph inlines
+ } else if (next_inline = this.parseInline(inlines)) {
+ inlines.push(next_inline);
+ } else {
+ // didn't find closing delimiter
+ this.pos = startpos;
+ return null;
}
- }
}
- return (this.pos - startpos);
-*/
+ break;
+
default:
- return res;
}
- return 0;
+ return null;
};
// Attempt to parse link title (sans quotes), returning the string
--
cgit v1.2.3
From 56f6b364c40563102779a84d1a1595226e1f1ccc Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Mon, 8 Sep 2014 08:21:59 -0700
Subject: Finished parseEmphasis.
This seems to work properly. We now get proper results for
`***hi**`.
---
js/stmd.js | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 753eff8..d04fd04 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -324,21 +324,24 @@ var parseEmphasis = function() {
break;
case 3: // We started with *** or ___
+ var first_delim = 0;
while (true) {
res = this.scanDelims(c);
var numdelims = res.numdelims;
var can_close = res.can_close;
- var first_delim === 0;
+ this.pos += numdelims;
if (can_close && numdelims === 3 && first_delim === 0) {
- // TODO - return Strong Emph with inlines
+ return {t: 'Strong', c: [{t: 'Emph', c: inlines}]};
} else if (can_close && numdelims === 2 && first_delim === 0) {
- // TODO - set first_delim, make inlines a Strong
+ first_delim = 2;
+ inlines = [{t: 'Strong', c: inlines}];
} else if (can_close && numdelims === 1 && first_delim === 0) {
- // TODO - set first_delim, make inlines an Emph
+ first_delim = 1;
+ inlines = [{t: 'Emph', c: inlines}];
} else if (can_close && numdelims === 2 && first_delim === 1) {
- // TODO - return Strong inlines
+ return {t: 'Strong', c: inlines};
} else if (can_close && numdelims === 1 && first_delim === 2) {
- // TODO - return Emph inlines
+ return {t: 'Emph', c: inlines};
} else if (next_inline = this.parseInline(inlines)) {
inlines.push(next_inline);
} else {
--
cgit v1.2.3
From 0a345c93475fab82d7cd49ed84450a882bab4b14 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Mon, 8 Sep 2014 09:07:23 -0700
Subject: Did parseBackticks.
---
js/stmd.js | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index d04fd04..524e99f 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -152,9 +152,9 @@ var spnl = function() {
// in the subject. If they succeed in matching anything, they
// return the inline matched, advancing the subject.
-// Attempt to parse backticks, adding either a backtick code span or a
-// literal sequence of backticks to the 'inlines' list.
-var parseBackticks = function(inlines) {
+// Attempt to parse backticks, returning either a backtick code span or a
+// literal sequence of backticks.
+var parseBackticks = function() {
var startpos = this.pos;
var ticks = this.match(/^`+/);
if (!ticks) {
@@ -165,17 +165,15 @@ var parseBackticks = function(inlines) {
var match;
while (!foundCode && (match = this.match(/`+/m))) {
if (match == ticks) {
- inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks,
+ return { t: 'Code', c: this.subject.slice(afterOpenTicks,
this.pos - ticks.length)
.replace(/[ \n]+/g,' ')
- .trim() });
- return (this.pos - startpos);
+ .trim() };
}
}
// If we got here, we didn't match a closing backtick sequence.
- inlines.push({ t: 'Str', c: ticks });
this.pos = afterOpenTicks;
- return (this.pos - startpos);
+ return { t: 'Str', c: ticks };
};
// Parse a backslash-escaped special character, adding either the escaped
@@ -657,7 +655,7 @@ var parseInline = function() {
res = this.parseBackslash();
break;
case '`':
- res = this.parseBackticks(inlines);
+ res = this.parseBackticks();
break;
case '*':
case '_':
--
cgit v1.2.3
From f9b9ed96c5e34a1a7224c6df825f52ef2ce2e368 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Mon, 8 Sep 2014 09:08:19 -0700
Subject: Did parseEntity.
---
js/stmd.js | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 524e99f..394ad06 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -517,14 +517,13 @@ var parseLink = function(inlines) {
return 0;
};
-// Attempt to parse an entity, adding to inlines if successful.
-var parseEntity = function(inlines) {
+// Attempt to parse an entity, return Entity object if successful.
+var parseEntity = function() {
var m;
if ((m = this.match(/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i))) {
- inlines.push({ t: 'Entity', c: m });
- return m.length;
+ return { t: 'Entity', c: m };
} else {
- return 0;
+ return null;
}
};
@@ -672,7 +671,7 @@ var parseInline = function() {
this.parseHtmlTag(inlines);
break;
case '&':
- res = this.parseEntity(inlines);
+ res = this.parseEntity();
break;
default:
res = this.parseString();
--
cgit v1.2.3
From 33a425b931b844691b5e4ca4b63101d8566ab159 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Mon, 8 Sep 2014 09:09:14 -0700
Subject: Did parseHtmLTag.
---
js/stmd.js | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 394ad06..5fb0fb5 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -221,10 +221,9 @@ var parseAutolink = function(inlines) {
var parseHtmlTag = function(inlines) {
var m = this.match(reHtmlTag);
if (m) {
- inlines.push({ t: 'Html', c: m });
- return m.length;
+ return { t: 'Html', c: m };
} else {
- return 0;
+ return null;
}
};
@@ -668,7 +667,7 @@ var parseInline = function() {
break;
case '<':
res = this.parseAutolink(inlines) ||
- this.parseHtmlTag(inlines);
+ this.parseHtmlTag();
break;
case '&':
res = this.parseEntity();
--
cgit v1.2.3
From 9ead350be9302268214801ef966f4f50efc4996a Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Mon, 8 Sep 2014 09:10:47 -0700
Subject: Did parseAutolink.
---
js/stmd.js | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 5fb0fb5..330ebef 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -199,21 +199,21 @@ var parseBackslash = function() {
};
// Attempt to parse an autolink (URL or email in pointy brackets).
-var parseAutolink = function(inlines) {
+var parseAutolink = function() {
var m;
var dest;
if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink
dest = m.slice(1,-1);
- inlines.push({ t: 'Link', label: [{ t: 'Str', c: dest }],
- destination: 'mailto:' + dest });
- return m.length;
+ return {t: 'Link',
+ label: [{ t: 'Str', c: dest }],
+ destination: 'mailto:' + dest };
} else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) {
dest = m.slice(1,-1);
- inlines.push({ t: 'Link', label: [{ t: 'Str', c: dest }],
- destination: dest });
- return m.length;
+ return { t: 'Link',
+ label: [{ t: 'Str', c: dest }],
+ destination: dest };
} else {
- return 0;
+ return null;
}
};
@@ -666,7 +666,7 @@ var parseInline = function() {
res = this.parseImage(inlines);
break;
case '<':
- res = this.parseAutolink(inlines) ||
+ res = this.parseAutolink() ||
this.parseHtmlTag();
break;
case '&':
--
cgit v1.2.3
From 3810f76a5939023d01e7ab082a6693e4634f15ad Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Mon, 8 Sep 2014 09:11:17 -0700
Subject: Cleanup.
---
js/stmd.js | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 330ebef..5b97666 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -405,10 +405,10 @@ var parseLinkLabel = function() {
while ((c = this.peek()) && (c != ']' || nest_level > 0)) {
switch (c) {
case '`':
- this.parseBackticks([]);
+ this.parseBackticks();
break;
case '<':
- this.parseAutolink([]) || this.parseHtmlTag([]) || this.parseString();
+ this.parseAutolink() || this.parseHtmlTag() || this.parseString();
break;
case '[': // nested []
nest_level++;
@@ -666,8 +666,7 @@ var parseInline = function() {
res = this.parseImage(inlines);
break;
case '<':
- res = this.parseAutolink() ||
- this.parseHtmlTag();
+ res = this.parseAutolink() || this.parseHtmlTag();
break;
case '&':
res = this.parseEntity();
--
cgit v1.2.3
From 2f718ac9a7e314ae1e195e040664b7478e93416d Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Mon, 8 Sep 2014 09:17:41 -0700
Subject: Completed conversion to memoized strategy.
Test suite runs, but many failures.
---
js/stmd.js | 62 +++++++++++++++++++++++++++-----------------------------------
1 file changed, 27 insertions(+), 35 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 5b97666..8fc7f20 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -218,7 +218,7 @@ var parseAutolink = function() {
};
// Attempt to parse a raw HTML tag.
-var parseHtmlTag = function(inlines) {
+var parseHtmlTag = function() {
var m = this.match(reHtmlTag);
if (m) {
return { t: 'Html', c: m };
@@ -294,7 +294,7 @@ var parseEmphasis = function() {
if (res.numdelims >= 1 && res.can_close) {
this.pos += 1;
return {t: 'Emph', c: inlines};
- } else if (next_inline = this.parseInline(inlines)) {
+ } else if (next_inline = this.parseInline()) {
inlines.push(next_inline);
} else {
// didn't find closing delimiter
@@ -310,7 +310,7 @@ var parseEmphasis = function() {
if (res.numdelims >= 2 && res.can_close) {
this.pos += 2;
return {t: 'Strong', c: inlines};
- } else if (next_inline = this.parseInline(inlines)) {
+ } else if (next_inline = this.parseInline()) {
inlines.push(next_inline);
} else {
// didn't find closing delimiter
@@ -339,7 +339,7 @@ var parseEmphasis = function() {
return {t: 'Strong', c: inlines};
} else if (can_close && numdelims === 1 && first_delim === 2) {
return {t: 'Emph', c: inlines};
- } else if (next_inline = this.parseInline(inlines)) {
+ } else if (next_inline = this.parseInline()) {
inlines.push(next_inline);
} else {
// didn't find closing delimiter
@@ -446,9 +446,8 @@ var parseRawLabel = function(s) {
return new InlineParser().parse(s.substr(1, s.length - 2), {});
};
-// Attempt to parse a link. If successful, add the link to
-// inlines.
-var parseLink = function(inlines) {
+// Attempt to parse a link. If successful, return the link.
+var parseLink = function() {
var startpos = this.pos;
var reflabel;
var n;
@@ -474,11 +473,10 @@ var parseLink = function(inlines) {
(title = this.parseLinkTitle() || '') || true) &&
this.spnl() &&
this.match(/^\)/)) {
- inlines.push({ t: 'Link',
- destination: dest,
- title: title,
- label: parseRawLabel(rawlabel) });
- return this.pos - startpos;
+ return { t: 'Link',
+ destination: dest,
+ title: title,
+ label: parseRawLabel(rawlabel) };
} else {
this.pos = startpos;
return 0;
@@ -502,18 +500,16 @@ var parseLink = function(inlines) {
// lookup rawlabel in refmap
var link = this.refmap[normalizeReference(reflabel)];
if (link) {
- inlines.push({t: 'Link',
- destination: link.destination,
- title: link.title,
- label: parseRawLabel(rawlabel) });
- return this.pos - startpos;
+ return {t: 'Link',
+ destination: link.destination,
+ title: link.title,
+ label: parseRawLabel(rawlabel) };
} else {
- this.pos = startpos;
- return 0;
+ return null;
}
// Nothing worked, rewind:
this.pos = startpos;
- return 0;
+ return null;
};
// Attempt to parse an entity, return Entity object if successful.
@@ -552,22 +548,18 @@ var parseNewline = function() {
};
// Attempt to parse an image. If the opening '!' is not followed
-// by a link, add a literal '!' to inlines.
-var parseImage = function(inlines) {
+// by a link, return a literal '!'.
+var parseImage = function() {
if (this.match(/^!/)) {
- var n = this.parseLink(inlines);
- if (n === 0) {
- inlines.push({ t: 'Str', c: '!' });
- return 1;
- } else if (inlines[inlines.length - 1] &&
- inlines[inlines.length - 1].t == 'Link') {
- inlines[inlines.length - 1].t = 'Image';
- return n+1;
+ var link = this.parseLink();
+ if (link) {
+ link.t = 'Image';
+ return link;
} else {
- throw "Shouldn't happen";
+ return { t: 'Str', c: '!' };
}
} else {
- return 0;
+ return null;
}
};
@@ -660,10 +652,10 @@ var parseInline = function() {
res = this.parseEmphasis();
break;
case '[':
- res = this.parseLink(inlines);
+ res = this.parseLink();
break;
case '!':
- res = this.parseImage(inlines);
+ res = this.parseImage();
break;
case '<':
res = this.parseAutolink() || this.parseHtmlTag();
@@ -694,7 +686,7 @@ var parseInlines = function(s, refmap) {
this.memo = {};
var inlines = [];
var next_inline;
- while (next_inline = this.parseInline(inlines)) {
+ while (next_inline = this.parseInline()) {
inlines.push(next_inline);
}
return inlines;
--
cgit v1.2.3
From a407869dfc062d6ec24f00482aae6019e083d8c7 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Mon, 8 Sep 2014 09:21:07 -0700
Subject: Fixed rewind on parseLabel.
14 test failures now, all with emphasis. IN most of all of these
cases, the examples in the spec seem to be mistakes, given what
the spec says.
More troubling, performance is down from around 220 to 83. This
needs investigation.
---
js/stmd.js | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 8fc7f20..7d0a532 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -456,7 +456,7 @@ var parseLink = function() {
n = this.parseLinkLabel();
if (n === 0) {
- return 0;
+ return null;
}
var afterlabel = this.pos;
var rawlabel = this.subject.substr(startpos, n);
@@ -479,7 +479,7 @@ var parseLink = function() {
label: parseRawLabel(rawlabel) };
} else {
this.pos = startpos;
- return 0;
+ return null;
}
}
// If we're here, it wasn't an explicit link. Try to parse a reference link.
@@ -505,6 +505,7 @@ var parseLink = function() {
title: link.title,
label: parseRawLabel(rawlabel) };
} else {
+ this.pos = startpos;
return null;
}
// Nothing worked, rewind:
--
cgit v1.2.3
From 9dde9c96a7b7fb9810a60ae65dd2623b03b83da8 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Mon, 8 Sep 2014 09:51:40 -0700
Subject: Fixed reMain regex for better performance.
---
js/stmd.js | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/js/stmd.js b/js/stmd.js
index 7d0a532..cfd5051 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -71,7 +71,7 @@ var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/;
// Matches a character with a special meaning in markdown,
// or a string of non-special characters.
-var reMain = /^(?: +|[\n`\[\]\\!<&*_]|[^\n `\[\]\\!<&*_]+)/m;
+var reMain = /^(?:[\n`\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m;
// UTILITY FUNCTIONS
--
cgit v1.2.3
From e829aaf75ff5feb57c9c0f1a0cd260903116752a Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Mon, 8 Sep 2014 15:56:04 -0700
Subject: Handle case with 4+ delimiters in a row.
Spec says to skip these.
---
js/stmd.js | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/js/stmd.js b/js/stmd.js
index cfd5051..4b3d994 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -277,6 +277,11 @@ var parseEmphasis = function() {
res = this.scanDelims(c);
numdelims = res.numdelims;
+ if (numdelims >= 4) {
+ this.pos += numdelims;
+ return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)};
+ }
+
if (!res.can_open || numdelims === 0) {
this.pos = startpos;
return null;
@@ -349,7 +354,7 @@ var parseEmphasis = function() {
}
break;
- default:
+ default: // shouldn't happen
}
return null;
--
cgit v1.2.3
From 977d40f2789eb4e22ba8380e99eab77e5860c21b Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Tue, 9 Sep 2014 22:23:42 -0700
Subject: Simplified parseEmphasis.
---
js/stmd.js | 73 ++++++++++++++------------------------------------------------
1 file changed, 16 insertions(+), 57 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 4b3d994..aa21335 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -289,73 +289,32 @@ var parseEmphasis = function() {
this.pos += numdelims;
- var first_close_delims = 0;
var next_inline;
- switch (numdelims) {
- case 1: // we started with * or _
- while (true) {
- res = this.scanDelims(c);
- if (res.numdelims >= 1 && res.can_close) {
- this.pos += 1;
- return {t: 'Emph', c: inlines};
- } else if (next_inline = this.parseInline()) {
- inlines.push(next_inline);
- } else {
- // didn't find closing delimiter
- this.pos = startpos;
- return null;
- }
- }
- break;
-
- case 2: // We started with ** or __
- while (true) {
- res = this.scanDelims(c);
- if (res.numdelims >= 2 && res.can_close) {
- this.pos += 2;
- return {t: 'Strong', c: inlines};
- } else if (next_inline = this.parseInline()) {
- inlines.push(next_inline);
- } else {
- // didn't find closing delimiter
- this.pos = startpos;
- return null;
- }
- }
- break;
-
- case 3: // We started with *** or ___
- var first_delim = 0;
+ var delims_to_match = numdelims;
while (true) {
res = this.scanDelims(c);
- var numdelims = res.numdelims;
- var can_close = res.can_close;
- this.pos += numdelims;
- if (can_close && numdelims === 3 && first_delim === 0) {
- return {t: 'Strong', c: [{t: 'Emph', c: inlines}]};
- } else if (can_close && numdelims === 2 && first_delim === 0) {
- first_delim = 2;
- inlines = [{t: 'Strong', c: inlines}];
- } else if (can_close && numdelims === 1 && first_delim === 0) {
- first_delim = 1;
- inlines = [{t: 'Emph', c: inlines}];
- } else if (can_close && numdelims === 2 && first_delim === 1) {
- return {t: 'Strong', c: inlines};
- } else if (can_close && numdelims === 1 && first_delim === 2) {
- return {t: 'Emph', c: inlines};
+ if (res.can_close) {
+ if (res.numdelims >= 2 && delims_to_match >= 2) {
+ delims_to_match -= 2;
+ this.pos += 2;
+ inlines = [{t: 'Strong', c: inlines}];
+ } else if (res.numdelims >= 1 && delims_to_match >= 1) {
+ delims_to_match -= 1;
+ this.pos += 1;
+ inlines = [{t: 'Emph', c: inlines}];
+ }
+ if (delims_to_match === 0) {
+ return inlines[0];
+ }
} else if (next_inline = this.parseInline()) {
inlines.push(next_inline);
} else {
// didn't find closing delimiter
- this.pos = startpos;
- return null;
+ this.pos = startpos + numdelims;
+ return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)};
}
}
- break;
-
- default: // shouldn't happen
- }
return null;
};
--
cgit v1.2.3
From bd271515770a17f3c320eb394f2012ccd51a417b Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Tue, 9 Sep 2014 22:30:54 -0700
Subject: spec: change nesting order of strong/emph in ***a***.
---
spec.txt | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/spec.txt b/spec.txt
index 4a9e9fd..88c8dea 100644
--- a/spec.txt
+++ b/spec.txt
@@ -4392,13 +4392,13 @@ The rules are sufficient for the following nesting patterns:
.
***foo bar***
.
-foo bar
+foo bar
.
.
___foo bar___
.
-foo bar
+foo bar
.
.
--
cgit v1.2.3
From 905b5d4d11cf1e56137fea1e68eb503863f1b113 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Wed, 10 Sep 2014 08:42:39 -0700
Subject: Revert "spec: change nesting order of strong/emph in ***a***."
This reverts commit 49a03b7666e2901d1ab2813fc0bdd23968d22979.
---
spec.txt | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/spec.txt b/spec.txt
index 88c8dea..4a9e9fd 100644
--- a/spec.txt
+++ b/spec.txt
@@ -4392,13 +4392,13 @@ The rules are sufficient for the following nesting patterns:
.
***foo bar***
.
-foo bar
+foo bar
.
.
___foo bar___
.
-foo bar
+foo bar
.
.
--
cgit v1.2.3
From 6df247e24f2b12d6d1440001877967e2f7c90093 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Wed, 10 Sep 2014 08:45:24 -0700
Subject: Special-case ***xx*** as strong/em.
---
js/stmd.js | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index aa21335..7c7362e 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -294,12 +294,16 @@ var parseEmphasis = function() {
var delims_to_match = numdelims;
while (true) {
res = this.scanDelims(c);
+ numclosedelims = res.numdelims;
if (res.can_close) {
- if (res.numdelims >= 2 && delims_to_match >= 2) {
+ if (numclosedelims === 3 && delims_to_match === 3) {
+ this.pos += 3;
+ return {t: 'Strong', c: [{t: 'Emph', c: inlines}]};
+ } else if (numclosedelims >= 2 && delims_to_match >= 2) {
delims_to_match -= 2;
this.pos += 2;
inlines = [{t: 'Strong', c: inlines}];
- } else if (res.numdelims >= 1 && delims_to_match >= 1) {
+ } else if (numclosedelims >= 1 && delims_to_match >= 1) {
delims_to_match -= 1;
this.pos += 1;
inlines = [{t: 'Emph', c: inlines}];
--
cgit v1.2.3
From e245f1a2d5ec76807633806a5af1ebe52fe5bd6d Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Wed, 10 Sep 2014 08:56:20 -0700
Subject: Updated spec (but not yet examples) with new rules.
These reflect the current parsing algorithm.
We now get a symmetry that we lacked before:
**a* b*
*a *b**
are both emphasis within emphasis.
One asymmetry remains:
**a*
has no emphasis, while
*a**
has emphasis. Further tweaking of the algorithm could regularize
this.
---
spec.txt | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/spec.txt b/spec.txt
index 4a9e9fd..37f92c5 100644
--- a/spec.txt
+++ b/spec.txt
@@ -4024,7 +4024,7 @@ for efficient parsing strategies that do not backtrack:
(a) it is not part of a sequence of four or more unescaped `*`s,
(b) it is not followed by whitespace, and
(c) either it is not followed by a `*` character or it is
- followed immediately by strong emphasis.
+ followed immediately by emphasis or strong emphasis.
2. A single `_` character [can open emphasis](#can-open-emphasis) iff
@@ -4032,7 +4032,7 @@ for efficient parsing strategies that do not backtrack:
(b) it is not followed by whitespace,
(c) is is not preceded by an ASCII alphanumeric character, and
(d) either it is not followed by a `_` character or it is
- followed immediately by strong emphasis.
+ followed immediately by emphasis or strong emphasis.
3. A single `*` character [can close emphasis](#can-close-emphasis)
iff
@@ -4088,6 +4088,11 @@ for efficient parsing strategies that do not backtrack:
emphasis](#can-close-strong-emphasis), and that uses the
same character (`_` or `*`) as the opening delimiter, is reached.
+11. In case of ambiguity, strong emphasis takes precedence. Thus,
+ `**foo**` is `foo`, not `foo`,
+ and `***foo***` is `foo`, not
+ `foo` or `foo`.
+
These rules can be illustrated through a series of examples.
Simple emphasis:
--
cgit v1.2.3
From 5cd513026fe49e83cfd544a7b375bf4fa1466b21 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Wed, 10 Sep 2014 09:00:40 -0700
Subject: Updated test cases in spec to reflect last change.
---
spec.txt | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/spec.txt b/spec.txt
index 37f92c5..e1aa502 100644
--- a/spec.txt
+++ b/spec.txt
@@ -4612,17 +4612,11 @@ Note that there are some asymmetries here:
**foo* bar*
.
foo bar
-**foo* bar*
+foo bar
.
More cases with mismatched delimiters:
-.
-**foo* bar*
-.
-**foo* bar*
-.
-
.
*bar***
.
--
cgit v1.2.3
From 5f56a1988ff8edfc020c97e37dbf834b499157d6 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Wed, 10 Sep 2014 09:30:23 -0700
Subject: Fixed bug.
---
js/stmd.js | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 7c7362e..0cfb6b3 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -70,8 +70,9 @@ var reAllTab = /\t/g;
var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/;
// Matches a character with a special meaning in markdown,
-// or a string of non-special characters.
-var reMain = /^(?:[\n`\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m;
+// or a string of non-special characters. Note: we match
+// clumps of _ or * or `, because they need to be handled in groups.
+var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m;
// UTILITY FUNCTIONS
@@ -277,16 +278,16 @@ var parseEmphasis = function() {
res = this.scanDelims(c);
numdelims = res.numdelims;
- if (numdelims >= 4) {
- this.pos += numdelims;
- return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)};
- }
-
- if (!res.can_open || numdelims === 0) {
+ if (numdelims === 0) {
this.pos = startpos;
return null;
}
+ if (numdelims >= 4 || !res.can_open) {
+ this.pos += numdelims;
+ return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)};
+ }
+
this.pos += numdelims;
var next_inline;
--
cgit v1.2.3
From 23c24d88401a4dbb8319c8c1fc6bbb0c44fb29cb Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Wed, 10 Sep 2014 23:06:22 -0700
Subject: Added last_closer to Inline object.
This helps us avoid unneeded backtracking in pathological input
of the form:
*a
**a
*a
**a
*a
etc.
If we get to position k without finding a closing delimiter,
then backtrack to 1, we can assume we won't find a closing
delimiter when parsing forward again.
This could no doubt be polished up, e.g. by making it sensitive
to the kind of delimiter.
---
js/stmd.js | 25 +++++++++++++++++++------
1 file changed, 19 insertions(+), 6 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 0cfb6b3..fdbc188 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -291,14 +291,19 @@ var parseEmphasis = function() {
this.pos += numdelims;
var next_inline;
+ var last_closer = null;
- var delims_to_match = numdelims;
- while (true) {
+ var delims_to_match = numdelims;
+ while (this.last_closer === null || this.last_closer >= this.pos) {
res = this.scanDelims(c);
numclosedelims = res.numdelims;
if (res.can_close) {
+ if (last_closer < this.pos) {
+ last_closer = this.pos;
+ }
if (numclosedelims === 3 && delims_to_match === 3) {
this.pos += 3;
+ this.last_closer = null;
return {t: 'Strong', c: [{t: 'Emph', c: inlines}]};
} else if (numclosedelims >= 2 && delims_to_match >= 2) {
delims_to_match -= 2;
@@ -310,18 +315,24 @@ var parseEmphasis = function() {
inlines = [{t: 'Emph', c: inlines}];
}
if (delims_to_match === 0) {
+ this.last_closer = null;
return inlines[0];
}
} else if (next_inline = this.parseInline()) {
inlines.push(next_inline);
} else {
- // didn't find closing delimiter
- this.pos = startpos + numdelims;
- return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)};
+ break;
}
}
- return null;
+ // didn't find closing delimiter
+ this.pos = startpos + numdelims;
+ if (last_closer === null) {
+ this.last_closer = startpos;
+ } else {
+ this.last_closer = last_closer;
+ }
+ return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)};
};
// Attempt to parse link title (sans quotes), returning the string
@@ -654,6 +665,7 @@ var parseInlines = function(s, refmap) {
this.pos = 0;
this.refmap = refmap || {};
this.memo = {};
+ this.last_closer = null;
var inlines = [];
var next_inline;
while (next_inline = this.parseInline()) {
@@ -670,6 +682,7 @@ function InlineParser(){
pos: 0,
refmap: {},
memo: {},
+ last_closer: null,
match: match,
peek: peek,
spnl: spnl,
--
cgit v1.2.3
From c11c900b618f6ca48f37ff1bdd2b9602317ec177 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Thu, 11 Sep 2014 09:04:09 -0700
Subject: Renamed last_closer -> last_emphasis_closer.
---
js/stmd.js | 23 ++++++++++++-----------
1 file changed, 12 insertions(+), 11 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index fdbc188..fab3a51 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -291,19 +291,20 @@ var parseEmphasis = function() {
this.pos += numdelims;
var next_inline;
- var last_closer = null;
+ var last_emphasis_closer = null;
var delims_to_match = numdelims;
- while (this.last_closer === null || this.last_closer >= this.pos) {
+ while (this.last_emphasis_closer === null ||
+ this.last_emphasis_closer >= this.pos) {
res = this.scanDelims(c);
numclosedelims = res.numdelims;
if (res.can_close) {
- if (last_closer < this.pos) {
- last_closer = this.pos;
+ if (last_emphasis_closer < this.pos) {
+ last_emphasis_closer = this.pos;
}
if (numclosedelims === 3 && delims_to_match === 3) {
this.pos += 3;
- this.last_closer = null;
+ this.last_emphasis_closer = null;
return {t: 'Strong', c: [{t: 'Emph', c: inlines}]};
} else if (numclosedelims >= 2 && delims_to_match >= 2) {
delims_to_match -= 2;
@@ -315,7 +316,7 @@ var parseEmphasis = function() {
inlines = [{t: 'Emph', c: inlines}];
}
if (delims_to_match === 0) {
- this.last_closer = null;
+ this.last_emphasis_closer = null;
return inlines[0];
}
} else if (next_inline = this.parseInline()) {
@@ -327,10 +328,10 @@ var parseEmphasis = function() {
// didn't find closing delimiter
this.pos = startpos + numdelims;
- if (last_closer === null) {
- this.last_closer = startpos;
+ if (last_emphasis_closer === null) {
+ this.last_emphasis_closer = startpos;
} else {
- this.last_closer = last_closer;
+ this.last_emphasis_closer = last_emphasis_closer;
}
return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)};
};
@@ -665,7 +666,7 @@ var parseInlines = function(s, refmap) {
this.pos = 0;
this.refmap = refmap || {};
this.memo = {};
- this.last_closer = null;
+ this.last_emphasis_closer = null;
var inlines = [];
var next_inline;
while (next_inline = this.parseInline()) {
@@ -679,10 +680,10 @@ function InlineParser(){
return {
subject: '',
label_nest_level: 0, // used by parseLinkLabel method
+ last_emphasis_closer: null, // used by parseEmphasis method
pos: 0,
refmap: {},
memo: {},
- last_closer: null,
match: match,
peek: peek,
spnl: spnl,
--
cgit v1.2.3
From 2fc6c0d06f4199f4e7ee6fb0e46337bfc6749d24 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Thu, 11 Sep 2014 09:10:03 -0700
Subject: Add check for null in last_emphasis_closer.
---
js/stmd.js | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/js/stmd.js b/js/stmd.js
index fab3a51..1b82fd5 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -299,7 +299,8 @@ var parseEmphasis = function() {
res = this.scanDelims(c);
numclosedelims = res.numdelims;
if (res.can_close) {
- if (last_emphasis_closer < this.pos) {
+ if (last_emphasis_closer === null ||
+ last_emphasis_closer < this.pos) {
last_emphasis_closer = this.pos;
}
if (numclosedelims === 3 && delims_to_match === 3) {
--
cgit v1.2.3
From 9c218c305e175183abd577c07daec5daf230801c Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Thu, 11 Sep 2014 09:24:24 -0700
Subject: Clarified code logic for last_emphasis_closer.
---
js/stmd.js | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 1b82fd5..250814e 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -294,8 +294,12 @@ var parseEmphasis = function() {
var last_emphasis_closer = null;
var delims_to_match = numdelims;
- while (this.last_emphasis_closer === null ||
- this.last_emphasis_closer >= this.pos) {
+
+ // We need not look for closers if we have already recorded that
+ // there are no closers past this point.
+ if (this.last_emphasis_closer === null ||
+ this.last_emphasis_closer >= this.pos) {
+ while (true) {
res = this.scanDelims(c);
numclosedelims = res.numdelims;
if (res.can_close) {
@@ -325,11 +329,13 @@ var parseEmphasis = function() {
} else {
break;
}
+ }
}
// didn't find closing delimiter
this.pos = startpos + numdelims;
if (last_emphasis_closer === null) {
+ // we know there are no closers after startpos, so:
this.last_emphasis_closer = startpos;
} else {
this.last_emphasis_closer = last_emphasis_closer;
--
cgit v1.2.3
From e6c06dbb715f59b5b9dd4ad7fb7090f83e3ad90d Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Thu, 11 Sep 2014 09:26:23 -0700
Subject: Reindented source with js2-mode.
---
js/stmd.js | 2936 ++++++++++++++++++++++++++++++------------------------------
1 file changed, 1468 insertions(+), 1468 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 250814e..6cf65d4 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -11,1505 +11,1505 @@
(function(exports) {
-// Some regexps used in inline parser:
-
-var ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]';
-var ESCAPED_CHAR = '\\\\' + ESCAPABLE;
-var IN_DOUBLE_QUOTES = '"(' + ESCAPED_CHAR + '|[^"\\x00])*"';
-var IN_SINGLE_QUOTES = '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'';
-var IN_PARENS = '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\)';
-var REG_CHAR = '[^\\\\()\\x00-\\x20]';
-var IN_PARENS_NOSP = '\\((' + REG_CHAR + '|' + ESCAPED_CHAR + ')*\\)';
-var TAGNAME = '[A-Za-z][A-Za-z0-9]*';
-var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)';
-var ATTRIBUTENAME = '[a-zA-Z_:][a-zA-Z0-9:._-]*';
-var UNQUOTEDVALUE = "[^\"'=<>`\\x00-\\x20]+";
-var SINGLEQUOTEDVALUE = "'[^']*'";
-var DOUBLEQUOTEDVALUE = '"[^"]*"';
-var ATTRIBUTEVALUE = "(?:" + UNQUOTEDVALUE + "|" + SINGLEQUOTEDVALUE + "|" + DOUBLEQUOTEDVALUE + ")";
-var ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + ")";
-var ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + "?)";
-var OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>";
-var CLOSETAG = "" + TAGNAME + "\\s*[>]";
-var OPENBLOCKTAG = "<" + BLOCKTAGNAME + ATTRIBUTE + "*" + "\\s*/?>";
-var CLOSEBLOCKTAG = "" + BLOCKTAGNAME + "\\s*[>]";
-var HTMLCOMMENT = "";
-var PROCESSINGINSTRUCTION = "[<][?].*?[?][>]";
-var DECLARATION = "]*>";
-var CDATA = "])*\\]\\]>";
-var HTMLTAG = "(?:" + OPENTAG + "|" + CLOSETAG + "|" + HTMLCOMMENT + "|" +
- PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")";
-var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" +
- "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])";
-
-var reHtmlTag = new RegExp('^' + HTMLTAG, 'i');
-
-var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i');
-
-var reLinkTitle = new RegExp(
- '^(?:"(' + ESCAPED_CHAR + '|[^"\\x00])*"' +
- '|' +
- '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'' +
- '|' +
- '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\))');
-
-var reLinkDestinationBraces = new RegExp(
- '^(?:[<](?:[^<>\\n\\\\\\x00]' + '|' + ESCAPED_CHAR + '|' + '\\\\)*[>])');
-
-var reLinkDestination = new RegExp(
- '^(?:' + REG_CHAR + '+|' + ESCAPED_CHAR + '|' + IN_PARENS_NOSP + ')*');
-
-var reEscapable = new RegExp(ESCAPABLE);
-
-var reAllEscapedChar = new RegExp('\\\\(' + ESCAPABLE + ')', 'g');
-
-var reEscapedChar = new RegExp('^\\\\(' + ESCAPABLE + ')');
-
-var reAllTab = /\t/g;
-
-var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/;
-
-// Matches a character with a special meaning in markdown,
-// or a string of non-special characters. Note: we match
-// clumps of _ or * or `, because they need to be handled in groups.
-var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m;
-
-// UTILITY FUNCTIONS
-
-// Replace backslash escapes with literal characters.
-var unescape = function(s) {
- return s.replace(reAllEscapedChar, '$1');
-};
-
-// Returns true if string contains only space characters.
-var isBlank = function(s) {
- return /^\s*$/.test(s);
-};
-
-// Normalize reference label: collapse internal whitespace
-// to single space, remove leading/trailing whitespace, case fold.
-var normalizeReference = function(s) {
- return s.trim()
- .replace(/\s+/,' ')
- .toUpperCase();
-};
-
-// Attempt to match a regex in string s at offset offset.
-// Return index of match or null.
-var matchAt = function(re, s, offset) {
- var res = s.slice(offset).match(re);
- if (res) {
- return offset + res.index;
- } else {
- return null;
- }
-};
-
-// Convert tabs to spaces on each line using a 4-space tab stop.
-var detabLine = function(text) {
- if (text.indexOf('\t') == -1) {
- return text;
- } else {
- var lastStop = 0;
- return text.replace(reAllTab, function(match, offset) {
- var result = ' '.slice((offset - lastStop) % 4);
- lastStop = offset + 1;
- return result;
- });
- }
-};
-
-// INLINE PARSER
-
-// These are methods of an InlineParser object, defined below.
-// An InlineParser keeps track of a subject (a string to be
-// parsed) and a position in that subject.
-
-// If re matches at current position in the subject, advance
-// position in subject and return the match; otherwise return null.
-var match = function(re) {
- var match = re.exec(this.subject.slice(this.pos));
- if (match) {
- this.pos += match.index + match[0].length;
- return match[0];
- } else {
- return null;
- }
-};
-
-// Returns the character at the current subject position, or null if
-// there are no more characters.
-var peek = function() {
- return this.subject[this.pos] || null;
-};
-
-// Parse zero or more space characters, including at most one newline
-var spnl = function() {
- this.match(/^ *(?:\n *)?/);
- return 1;
-};
-
-// All of the parsers below try to match something at the current position
-// in the subject. If they succeed in matching anything, they
-// return the inline matched, advancing the subject.
-
-// Attempt to parse backticks, returning either a backtick code span or a
-// literal sequence of backticks.
-var parseBackticks = function() {
- var startpos = this.pos;
- var ticks = this.match(/^`+/);
- if (!ticks) {
- return 0;
- }
- var afterOpenTicks = this.pos;
- var foundCode = false;
- var match;
- while (!foundCode && (match = this.match(/`+/m))) {
- if (match == ticks) {
- return { t: 'Code', c: this.subject.slice(afterOpenTicks,
- this.pos - ticks.length)
- .replace(/[ \n]+/g,' ')
- .trim() };
- }
- }
- // If we got here, we didn't match a closing backtick sequence.
- this.pos = afterOpenTicks;
- return { t: 'Str', c: ticks };
-};
-
-// Parse a backslash-escaped special character, adding either the escaped
-// character, a hard line break (if the backslash is followed by a newline),
-// or a literal backslash to the 'inlines' list.
-var parseBackslash = function() {
- var subj = this.subject,
- pos = this.pos;
- if (subj[pos] === '\\') {
- if (subj[pos + 1] === '\n') {
- this.pos = this.pos + 2;
- return { t: 'Hardbreak' };
- } else if (reEscapable.test(subj[pos + 1])) {
- this.pos = this.pos + 2;
- return { t: 'Str', c: subj[pos + 1] };
- } else {
- this.pos++;
- return {t: 'Str', c: '\\'};
- }
- } else {
- return null;
- }
-};
-
-// Attempt to parse an autolink (URL or email in pointy brackets).
-var parseAutolink = function() {
- var m;
- var dest;
- if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink
- dest = m.slice(1,-1);
- return {t: 'Link',
- label: [{ t: 'Str', c: dest }],
- destination: 'mailto:' + dest };
- } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) {
- dest = m.slice(1,-1);
- return { t: 'Link',
- label: [{ t: 'Str', c: dest }],
- destination: dest };
- } else {
- return null;
- }
-};
-
-// Attempt to parse a raw HTML tag.
-var parseHtmlTag = function() {
- var m = this.match(reHtmlTag);
- if (m) {
- return { t: 'Html', c: m };
- } else {
- return null;
- }
-};
-
-// Scan a sequence of characters == c, and return information about
-// the number of delimiters and whether they are positioned such that
-// they can open and/or close emphasis or strong emphasis. A utility
-// function for strong/emph parsing.
-var scanDelims = function(c) {
- var numdelims = 0;
- var first_close_delims = 0;
- var char_before, char_after;
- var startpos = this.pos;
-
- char_before = this.pos === 0 ? '\n' :
- this.subject[this.pos - 1];
-
- while (this.peek() === c) {
- numdelims++;
- this.pos++;
- }
-
- char_after = this.peek() || '\n';
-
- var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after));
- var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before));
- if (c === '_') {
- can_open = can_open && !((/[a-z0-9]/i).test(char_before));
- can_close = can_close && !((/[a-z0-9]/i).test(char_after));
- }
- this.pos = startpos;
- return { numdelims: numdelims,
- can_open: can_open,
- can_close: can_close };
-};
-
-// Attempt to parse emphasis or strong emphasis.
-var parseEmphasis = function() {
- var startpos = this.pos;
- var c ;
- var first_close = 0;
- var c = this.peek();
- if (!(c === '*' || c === '_')) {
- return null;
- }
-
- var numdelims;
- var delimpos;
- var inlines = [];
-
- // Get opening delimiters.
- res = this.scanDelims(c);
- numdelims = res.numdelims;
-
- if (numdelims === 0) {
- this.pos = startpos;
- return null;
- }
-
- if (numdelims >= 4 || !res.can_open) {
- this.pos += numdelims;
- return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)};
- }
-
- this.pos += numdelims;
-
- var next_inline;
- var last_emphasis_closer = null;
-
- var delims_to_match = numdelims;
-
- // We need not look for closers if we have already recorded that
- // there are no closers past this point.
- if (this.last_emphasis_closer === null ||
- this.last_emphasis_closer >= this.pos) {
- while (true) {
- res = this.scanDelims(c);
- numclosedelims = res.numdelims;
- if (res.can_close) {
- if (last_emphasis_closer === null ||
- last_emphasis_closer < this.pos) {
- last_emphasis_closer = this.pos;
+ // Some regexps used in inline parser:
+
+ var ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]';
+ var ESCAPED_CHAR = '\\\\' + ESCAPABLE;
+ var IN_DOUBLE_QUOTES = '"(' + ESCAPED_CHAR + '|[^"\\x00])*"';
+ var IN_SINGLE_QUOTES = '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'';
+ var IN_PARENS = '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\)';
+ var REG_CHAR = '[^\\\\()\\x00-\\x20]';
+ var IN_PARENS_NOSP = '\\((' + REG_CHAR + '|' + ESCAPED_CHAR + ')*\\)';
+ var TAGNAME = '[A-Za-z][A-Za-z0-9]*';
+ var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)';
+ var ATTRIBUTENAME = '[a-zA-Z_:][a-zA-Z0-9:._-]*';
+ var UNQUOTEDVALUE = "[^\"'=<>`\\x00-\\x20]+";
+ var SINGLEQUOTEDVALUE = "'[^']*'";
+ var DOUBLEQUOTEDVALUE = '"[^"]*"';
+ var ATTRIBUTEVALUE = "(?:" + UNQUOTEDVALUE + "|" + SINGLEQUOTEDVALUE + "|" + DOUBLEQUOTEDVALUE + ")";
+ var ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + ")";
+ var ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + "?)";
+ var OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>";
+ var CLOSETAG = "" + TAGNAME + "\\s*[>]";
+ var OPENBLOCKTAG = "<" + BLOCKTAGNAME + ATTRIBUTE + "*" + "\\s*/?>";
+ var CLOSEBLOCKTAG = "" + BLOCKTAGNAME + "\\s*[>]";
+ var HTMLCOMMENT = "";
+ var PROCESSINGINSTRUCTION = "[<][?].*?[?][>]";
+ var DECLARATION = "]*>";
+ var CDATA = "])*\\]\\]>";
+ var HTMLTAG = "(?:" + OPENTAG + "|" + CLOSETAG + "|" + HTMLCOMMENT + "|" +
+ PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")";
+ var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" +
+ "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])";
+
+ var reHtmlTag = new RegExp('^' + HTMLTAG, 'i');
+
+ var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i');
+
+ var reLinkTitle = new RegExp(
+ '^(?:"(' + ESCAPED_CHAR + '|[^"\\x00])*"' +
+ '|' +
+ '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'' +
+ '|' +
+ '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\))');
+
+ var reLinkDestinationBraces = new RegExp(
+ '^(?:[<](?:[^<>\\n\\\\\\x00]' + '|' + ESCAPED_CHAR + '|' + '\\\\)*[>])');
+
+ var reLinkDestination = new RegExp(
+ '^(?:' + REG_CHAR + '+|' + ESCAPED_CHAR + '|' + IN_PARENS_NOSP + ')*');
+
+ var reEscapable = new RegExp(ESCAPABLE);
+
+ var reAllEscapedChar = new RegExp('\\\\(' + ESCAPABLE + ')', 'g');
+
+ var reEscapedChar = new RegExp('^\\\\(' + ESCAPABLE + ')');
+
+ var reAllTab = /\t/g;
+
+ var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/;
+
+ // Matches a character with a special meaning in markdown,
+ // or a string of non-special characters. Note: we match
+ // clumps of _ or * or `, because they need to be handled in groups.
+ var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m;
+
+ // UTILITY FUNCTIONS
+
+ // Replace backslash escapes with literal characters.
+ var unescape = function(s) {
+ return s.replace(reAllEscapedChar, '$1');
+ };
+
+ // Returns true if string contains only space characters.
+ var isBlank = function(s) {
+ return /^\s*$/.test(s);
+ };
+
+ // Normalize reference label: collapse internal whitespace
+ // to single space, remove leading/trailing whitespace, case fold.
+ var normalizeReference = function(s) {
+ return s.trim()
+ .replace(/\s+/,' ')
+ .toUpperCase();
+ };
+
+ // Attempt to match a regex in string s at offset offset.
+ // Return index of match or null.
+ var matchAt = function(re, s, offset) {
+ var res = s.slice(offset).match(re);
+ if (res) {
+ return offset + res.index;
+ } else {
+ return null;
+ }
+ };
+
+ // Convert tabs to spaces on each line using a 4-space tab stop.
+ var detabLine = function(text) {
+ if (text.indexOf('\t') == -1) {
+ return text;
+ } else {
+ var lastStop = 0;
+ return text.replace(reAllTab, function(match, offset) {
+ var result = ' '.slice((offset - lastStop) % 4);
+ lastStop = offset + 1;
+ return result;
+ });
+ }
+ };
+
+ // INLINE PARSER
+
+ // These are methods of an InlineParser object, defined below.
+ // An InlineParser keeps track of a subject (a string to be
+ // parsed) and a position in that subject.
+
+ // If re matches at current position in the subject, advance
+ // position in subject and return the match; otherwise return null.
+ var match = function(re) {
+ var match = re.exec(this.subject.slice(this.pos));
+ if (match) {
+ this.pos += match.index + match[0].length;
+ return match[0];
+ } else {
+ return null;
+ }
+ };
+
+ // Returns the character at the current subject position, or null if
+ // there are no more characters.
+ var peek = function() {
+ return this.subject[this.pos] || null;
+ };
+
+ // Parse zero or more space characters, including at most one newline
+ var spnl = function() {
+ this.match(/^ *(?:\n *)?/);
+ return 1;
+ };
+
+ // All of the parsers below try to match something at the current position
+ // in the subject. If they succeed in matching anything, they
+ // return the inline matched, advancing the subject.
+
+ // Attempt to parse backticks, returning either a backtick code span or a
+ // literal sequence of backticks.
+ var parseBackticks = function() {
+ var startpos = this.pos;
+ var ticks = this.match(/^`+/);
+ if (!ticks) {
+ return 0;
+ }
+ var afterOpenTicks = this.pos;
+ var foundCode = false;
+ var match;
+ while (!foundCode && (match = this.match(/`+/m))) {
+ if (match == ticks) {
+ return { t: 'Code', c: this.subject.slice(afterOpenTicks,
+ this.pos - ticks.length)
+ .replace(/[ \n]+/g,' ')
+ .trim() };
}
- if (numclosedelims === 3 && delims_to_match === 3) {
- this.pos += 3;
- this.last_emphasis_closer = null;
- return {t: 'Strong', c: [{t: 'Emph', c: inlines}]};
- } else if (numclosedelims >= 2 && delims_to_match >= 2) {
- delims_to_match -= 2;
- this.pos += 2;
- inlines = [{t: 'Strong', c: inlines}];
- } else if (numclosedelims >= 1 && delims_to_match >= 1) {
- delims_to_match -= 1;
- this.pos += 1;
- inlines = [{t: 'Emph', c: inlines}];
+ }
+ // If we got here, we didn't match a closing backtick sequence.
+ this.pos = afterOpenTicks;
+ return { t: 'Str', c: ticks };
+ };
+
+ // Parse a backslash-escaped special character, adding either the escaped
+ // character, a hard line break (if the backslash is followed by a newline),
+ // or a literal backslash to the 'inlines' list.
+ var parseBackslash = function() {
+ var subj = this.subject,
+ pos = this.pos;
+ if (subj[pos] === '\\') {
+ if (subj[pos + 1] === '\n') {
+ this.pos = this.pos + 2;
+ return { t: 'Hardbreak' };
+ } else if (reEscapable.test(subj[pos + 1])) {
+ this.pos = this.pos + 2;
+ return { t: 'Str', c: subj[pos + 1] };
+ } else {
+ this.pos++;
+ return {t: 'Str', c: '\\'};
}
- if (delims_to_match === 0) {
- this.last_emphasis_closer = null;
- return inlines[0];
+ } else {
+ return null;
+ }
+ };
+
+ // Attempt to parse an autolink (URL or email in pointy brackets).
+ var parseAutolink = function() {
+ var m;
+ var dest;
+ if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink
+ dest = m.slice(1,-1);
+ return {t: 'Link',
+ label: [{ t: 'Str', c: dest }],
+ destination: 'mailto:' + dest };
+ } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) {
+ dest = m.slice(1,-1);
+ return { t: 'Link',
+ label: [{ t: 'Str', c: dest }],
+ destination: dest };
+ } else {
+ return null;
+ }
+ };
+
+ // Attempt to parse a raw HTML tag.
+ var parseHtmlTag = function() {
+ var m = this.match(reHtmlTag);
+ if (m) {
+ return { t: 'Html', c: m };
+ } else {
+ return null;
+ }
+ };
+
+ // Scan a sequence of characters == c, and return information about
+ // the number of delimiters and whether they are positioned such that
+ // they can open and/or close emphasis or strong emphasis. A utility
+ // function for strong/emph parsing.
+ var scanDelims = function(c) {
+ var numdelims = 0;
+ var first_close_delims = 0;
+ var char_before, char_after;
+ var startpos = this.pos;
+
+ char_before = this.pos === 0 ? '\n' :
+ this.subject[this.pos - 1];
+
+ while (this.peek() === c) {
+ numdelims++;
+ this.pos++;
+ }
+
+ char_after = this.peek() || '\n';
+
+ var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after));
+ var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before));
+ if (c === '_') {
+ can_open = can_open && !((/[a-z0-9]/i).test(char_before));
+ can_close = can_close && !((/[a-z0-9]/i).test(char_after));
+ }
+ this.pos = startpos;
+ return { numdelims: numdelims,
+ can_open: can_open,
+ can_close: can_close };
+ };
+
+ // Attempt to parse emphasis or strong emphasis.
+ var parseEmphasis = function() {
+ var startpos = this.pos;
+ var c ;
+ var first_close = 0;
+ var c = this.peek();
+ if (!(c === '*' || c === '_')) {
+ return null;
+ }
+
+ var numdelims;
+ var delimpos;
+ var inlines = [];
+
+ // Get opening delimiters.
+ res = this.scanDelims(c);
+ numdelims = res.numdelims;
+
+ if (numdelims === 0) {
+ this.pos = startpos;
+ return null;
+ }
+
+ if (numdelims >= 4 || !res.can_open) {
+ this.pos += numdelims;
+ return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)};
+ }
+
+ this.pos += numdelims;
+
+ var next_inline;
+ var last_emphasis_closer = null;
+
+ var delims_to_match = numdelims;
+
+ // We need not look for closers if we have already recorded that
+ // there are no closers past this point.
+ if (this.last_emphasis_closer === null ||
+ this.last_emphasis_closer >= this.pos) {
+ while (true) {
+ res = this.scanDelims(c);
+ numclosedelims = res.numdelims;
+ if (res.can_close) {
+ if (last_emphasis_closer === null ||
+ last_emphasis_closer < this.pos) {
+ last_emphasis_closer = this.pos;
+ }
+ if (numclosedelims === 3 && delims_to_match === 3) {
+ this.pos += 3;
+ this.last_emphasis_closer = null;
+ return {t: 'Strong', c: [{t: 'Emph', c: inlines}]};
+ } else if (numclosedelims >= 2 && delims_to_match >= 2) {
+ delims_to_match -= 2;
+ this.pos += 2;
+ inlines = [{t: 'Strong', c: inlines}];
+ } else if (numclosedelims >= 1 && delims_to_match >= 1) {
+ delims_to_match -= 1;
+ this.pos += 1;
+ inlines = [{t: 'Emph', c: inlines}];
+ }
+ if (delims_to_match === 0) {
+ this.last_emphasis_closer = null;
+ return inlines[0];
+ }
+ } else if (next_inline = this.parseInline()) {
+ inlines.push(next_inline);
+ } else {
+ break;
+ }
}
- } else if (next_inline = this.parseInline()) {
- inlines.push(next_inline);
+ }
+
+ // didn't find closing delimiter
+ this.pos = startpos + numdelims;
+ if (last_emphasis_closer === null) {
+ // we know there are no closers after startpos, so:
+ this.last_emphasis_closer = startpos;
} else {
- break;
+ this.last_emphasis_closer = last_emphasis_closer;
}
- }
- }
+ return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)};
+ };
+
+ // Attempt to parse link title (sans quotes), returning the string
+ // or null if no match.
+ var parseLinkTitle = function() {
+ var title = this.match(reLinkTitle);
+ if (title) {
+ // chop off quotes from title and unescape:
+ return unescape(title.substr(1, title.length - 2));
+ } else {
+ return null;
+ }
+ };
+
+ // Attempt to parse link destination, returning the string or
+ // null if no match.
+ var parseLinkDestination = function() {
+ var res = this.match(reLinkDestinationBraces);
+ if (res) { // chop off surrounding <..>:
+ return unescape(res.substr(1, res.length - 2));
+ } else {
+ res = this.match(reLinkDestination);
+ if (res !== null) {
+ return unescape(res);
+ } else {
+ return null;
+ }
+ }
+ };
- // didn't find closing delimiter
- this.pos = startpos + numdelims;
- if (last_emphasis_closer === null) {
- // we know there are no closers after startpos, so:
- this.last_emphasis_closer = startpos;
- } else {
- this.last_emphasis_closer = last_emphasis_closer;
- }
- return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)};
-};
-
-// Attempt to parse link title (sans quotes), returning the string
-// or null if no match.
-var parseLinkTitle = function() {
- var title = this.match(reLinkTitle);
- if (title) {
- // chop off quotes from title and unescape:
- return unescape(title.substr(1, title.length - 2));
- } else {
- return null;
- }
-};
-
-// Attempt to parse link destination, returning the string or
-// null if no match.
-var parseLinkDestination = function() {
- var res = this.match(reLinkDestinationBraces);
- if (res) { // chop off surrounding <..>:
- return unescape(res.substr(1, res.length - 2));
- } else {
- res = this.match(reLinkDestination);
- if (res !== null) {
- return unescape(res);
- } else {
- return null;
- }
- }
-};
-
-// Attempt to parse a link label, returning number of characters parsed.
-var parseLinkLabel = function() {
- if (this.peek() != '[') {
- return 0;
- }
- var startpos = this.pos;
- var nest_level = 0;
- if (this.label_nest_level > 0) {
- // If we've already checked to the end of this subject
- // for a label, even with a different starting [, we
- // know we won't find one here and we can just return.
- // This avoids lots of backtracking.
- // Note: nest level 1 would be: [foo [bar]
- // nest level 2 would be: [foo [bar [baz]
- this.label_nest_level--;
- return 0;
- }
- this.pos++; // advance past [
- var c;
- while ((c = this.peek()) && (c != ']' || nest_level > 0)) {
- switch (c) {
- case '`':
- this.parseBackticks();
- break;
- case '<':
- this.parseAutolink() || this.parseHtmlTag() || this.parseString();
- break;
- case '[': // nested []
- nest_level++;
- this.pos++;
- break;
- case ']': // nested []
- nest_level--;
- this.pos++;
- break;
- case '\\':
- this.parseBackslash();
- break;
- default:
- this.parseString();
- }
- }
- if (c === ']') {
- this.label_nest_level = 0;
- this.pos++; // advance past ]
- return this.pos - startpos;
- } else {
- if (!c) {
- this.label_nest_level = nest_level;
- }
- this.pos = startpos;
- return 0;
- }
-};
-
-// Parse raw link label, including surrounding [], and return
-// inline contents. (Note: this is not a method of InlineParser.)
-var parseRawLabel = function(s) {
- // note: parse without a refmap; we don't want links to resolve
- // in nested brackets!
- return new InlineParser().parse(s.substr(1, s.length - 2), {});
-};
-
-// Attempt to parse a link. If successful, return the link.
-var parseLink = function() {
- var startpos = this.pos;
- var reflabel;
- var n;
- var dest;
- var title;
-
- n = this.parseLinkLabel();
- if (n === 0) {
- return null;
- }
- var afterlabel = this.pos;
- var rawlabel = this.subject.substr(startpos, n);
-
- // if we got this far, we've parsed a label.
- // Try to parse an explicit link: [label](url "title")
- if (this.peek() == '(') {
- this.pos++;
- if (this.spnl() &&
- ((dest = this.parseLinkDestination()) !== null) &&
- this.spnl() &&
- // make sure there's a space before the title:
- (/^\s/.test(this.subject[this.pos - 1]) &&
- (title = this.parseLinkTitle() || '') || true) &&
- this.spnl() &&
- this.match(/^\)/)) {
- return { t: 'Link',
- destination: dest,
- title: title,
- label: parseRawLabel(rawlabel) };
- } else {
+ // Attempt to parse a link label, returning number of characters parsed.
+ var parseLinkLabel = function() {
+ if (this.peek() != '[') {
+ return 0;
+ }
+ var startpos = this.pos;
+ var nest_level = 0;
+ if (this.label_nest_level > 0) {
+ // If we've already checked to the end of this subject
+ // for a label, even with a different starting [, we
+ // know we won't find one here and we can just return.
+ // This avoids lots of backtracking.
+ // Note: nest level 1 would be: [foo [bar]
+ // nest level 2 would be: [foo [bar [baz]
+ this.label_nest_level--;
+ return 0;
+ }
+ this.pos++; // advance past [
+ var c;
+ while ((c = this.peek()) && (c != ']' || nest_level > 0)) {
+ switch (c) {
+ case '`':
+ this.parseBackticks();
+ break;
+ case '<':
+ this.parseAutolink() || this.parseHtmlTag() || this.parseString();
+ break;
+ case '[': // nested []
+ nest_level++;
+ this.pos++;
+ break;
+ case ']': // nested []
+ nest_level--;
+ this.pos++;
+ break;
+ case '\\':
+ this.parseBackslash();
+ break;
+ default:
+ this.parseString();
+ }
+ }
+ if (c === ']') {
+ this.label_nest_level = 0;
+ this.pos++; // advance past ]
+ return this.pos - startpos;
+ } else {
+ if (!c) {
+ this.label_nest_level = nest_level;
+ }
+ this.pos = startpos;
+ return 0;
+ }
+ };
+
+ // Parse raw link label, including surrounding [], and return
+ // inline contents. (Note: this is not a method of InlineParser.)
+ var parseRawLabel = function(s) {
+ // note: parse without a refmap; we don't want links to resolve
+ // in nested brackets!
+ return new InlineParser().parse(s.substr(1, s.length - 2), {});
+ };
+
+ // Attempt to parse a link. If successful, return the link.
+ var parseLink = function() {
+ var startpos = this.pos;
+ var reflabel;
+ var n;
+ var dest;
+ var title;
+
+ n = this.parseLinkLabel();
+ if (n === 0) {
+ return null;
+ }
+ var afterlabel = this.pos;
+ var rawlabel = this.subject.substr(startpos, n);
+
+ // if we got this far, we've parsed a label.
+ // Try to parse an explicit link: [label](url "title")
+ if (this.peek() == '(') {
+ this.pos++;
+ if (this.spnl() &&
+ ((dest = this.parseLinkDestination()) !== null) &&
+ this.spnl() &&
+ // make sure there's a space before the title:
+ (/^\s/.test(this.subject[this.pos - 1]) &&
+ (title = this.parseLinkTitle() || '') || true) &&
+ this.spnl() &&
+ this.match(/^\)/)) {
+ return { t: 'Link',
+ destination: dest,
+ title: title,
+ label: parseRawLabel(rawlabel) };
+ } else {
+ this.pos = startpos;
+ return null;
+ }
+ }
+ // If we're here, it wasn't an explicit link. Try to parse a reference link.
+ // first, see if there's another label
+ var savepos = this.pos;
+ this.spnl();
+ var beforelabel = this.pos;
+ n = this.parseLinkLabel();
+ if (n == 2) {
+ // empty second label
+ reflabel = rawlabel;
+ } else if (n > 0) {
+ reflabel = this.subject.slice(beforelabel, beforelabel + n);
+ } else {
+ this.pos = savepos;
+ reflabel = rawlabel;
+ }
+ // lookup rawlabel in refmap
+ var link = this.refmap[normalizeReference(reflabel)];
+ if (link) {
+ return {t: 'Link',
+ destination: link.destination,
+ title: link.title,
+ label: parseRawLabel(rawlabel) };
+ } else {
+ this.pos = startpos;
+ return null;
+ }
+ // Nothing worked, rewind:
this.pos = startpos;
return null;
- }
- }
- // If we're here, it wasn't an explicit link. Try to parse a reference link.
- // first, see if there's another label
- var savepos = this.pos;
- this.spnl();
- var beforelabel = this.pos;
- n = this.parseLinkLabel();
- if (n == 2) {
- // empty second label
- reflabel = rawlabel;
- } else if (n > 0) {
- reflabel = this.subject.slice(beforelabel, beforelabel + n);
- } else {
- this.pos = savepos;
- reflabel = rawlabel;
- }
- // lookup rawlabel in refmap
- var link = this.refmap[normalizeReference(reflabel)];
- if (link) {
- return {t: 'Link',
- destination: link.destination,
- title: link.title,
- label: parseRawLabel(rawlabel) };
- } else {
- this.pos = startpos;
- return null;
- }
- // Nothing worked, rewind:
- this.pos = startpos;
- return null;
-};
-
-// Attempt to parse an entity, return Entity object if successful.
-var parseEntity = function() {
- var m;
- if ((m = this.match(/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i))) {
- return { t: 'Entity', c: m };
- } else {
- return null;
- }
-};
-
-// Parse a run of ordinary characters, or a single character with
-// a special meaning in markdown, as a plain string, adding to inlines.
-var parseString = function() {
- var m;
- if (m = this.match(reMain)) {
- return { t: 'Str', c: m };
- } else {
- return null;
- }
-};
-
-// Parse a newline. If it was preceded by two spaces, return a hard
-// line break; otherwise a soft line break.
-var parseNewline = function() {
- var m = this.match(/^ *\n/);
- if (m) {
- if (m.length > 2) {
- return { t: 'Hardbreak' };
- } else if (m.length > 0) {
- return { t: 'Softbreak' };
- }
- }
- return null;
-};
-
-// Attempt to parse an image. If the opening '!' is not followed
-// by a link, return a literal '!'.
-var parseImage = function() {
- if (this.match(/^!/)) {
- var link = this.parseLink();
- if (link) {
- link.t = 'Image';
- return link;
- } else {
- return { t: 'Str', c: '!' };
- }
- } else {
- return null;
- }
-};
-
-// Attempt to parse a link reference, modifying refmap.
-var parseReference = function(s, refmap) {
- this.subject = s;
- this.pos = 0;
- var rawlabel;
- var dest;
- var title;
- var matchChars;
- var startpos = this.pos;
- var match;
-
- // label:
- matchChars = this.parseLinkLabel();
- if (matchChars === 0) {
- return 0;
- } else {
- rawlabel = this.subject.substr(0, matchChars);
- }
-
- // colon:
- if (this.peek() === ':') {
- this.pos++;
- } else {
- this.pos = startpos;
- return 0;
- }
-
- // link url
- this.spnl();
-
- dest = this.parseLinkDestination();
- if (dest === null || dest.length === 0) {
- this.pos = startpos;
- return 0;
- }
-
- var beforetitle = this.pos;
- this.spnl();
- title = this.parseLinkTitle();
- if (title === null) {
- title = '';
- // rewind before spaces
- this.pos = beforetitle;
- }
-
- // make sure we're at line end:
- if (this.match(/^ *(?:\n|$)/) === null) {
- this.pos = startpos;
- return 0;
- }
-
- var normlabel = normalizeReference(rawlabel);
-
- if (!refmap[normlabel]) {
- refmap[normlabel] = { destination: dest, title: title };
- }
- return this.pos - startpos;
-};
-
-// Parse the next inline element in subject, advancing subject position
-// and returning the inline parsed.
-var parseInline = function() {
- var startpos = this.pos;
- var memoized = this.memo[startpos];
- if (memoized) {
- this.pos = memoized.endpos;
- return memoized.inline;
- }
- var c = this.peek();
- if (!c) {
- return null;
- }
- var res;
- switch(c) {
- case '\n':
- case ' ':
- res = this.parseNewline();
- break;
- case '\\':
- res = this.parseBackslash();
- break;
- case '`':
- res = this.parseBackticks();
- break;
- case '*':
- case '_':
- res = this.parseEmphasis();
- break;
- case '[':
- res = this.parseLink();
- break;
- case '!':
- res = this.parseImage();
- break;
- case '<':
- res = this.parseAutolink() || this.parseHtmlTag();
- break;
- case '&':
- res = this.parseEntity();
- break;
- default:
- res = this.parseString();
- break;
- }
- if (res === null) {
- this.pos += 1;
- res = {t: 'Str', c: c};
- }
- if (res) {
- this.memo[startpos] = { inline: res,
- endpos: this.pos };
- }
- return res;
-};
-
-// Parse s as a list of inlines, using refmap to resolve references.
-var parseInlines = function(s, refmap) {
- this.subject = s;
- this.pos = 0;
- this.refmap = refmap || {};
- this.memo = {};
- this.last_emphasis_closer = null;
- var inlines = [];
- var next_inline;
- while (next_inline = this.parseInline()) {
- inlines.push(next_inline);
- }
- return inlines;
-};
-
-// The InlineParser object.
-function InlineParser(){
- return {
- subject: '',
- label_nest_level: 0, // used by parseLinkLabel method
- last_emphasis_closer: null, // used by parseEmphasis method
- pos: 0,
- refmap: {},
- memo: {},
- match: match,
- peek: peek,
- spnl: spnl,
- parseBackticks: parseBackticks,
- parseBackslash: parseBackslash,
- parseAutolink: parseAutolink,
- parseHtmlTag: parseHtmlTag,
- scanDelims: scanDelims,
- parseEmphasis: parseEmphasis,
- parseLinkTitle: parseLinkTitle,
- parseLinkDestination: parseLinkDestination,
- parseLinkLabel: parseLinkLabel,
- parseLink: parseLink,
- parseEntity: parseEntity,
- parseString: parseString,
- parseNewline: parseNewline,
- parseImage: parseImage,
- parseReference: parseReference,
- parseInline: parseInline,
- parse: parseInlines
- };
-}
-
-// DOC PARSER
-
-// These are methods of a DocParser object, defined below.
-
-var makeBlock = function(tag, start_line, start_column) {
- return { t: tag,
- open: true,
- last_line_blank: false,
- start_line: start_line,
- start_column: start_column,
- end_line: start_line,
- children: [],
- parent: null,
- // string_content is formed by concatenating strings, in finalize:
- string_content: "",
- strings: [],
- inline_content: []
+ };
+
+ // Attempt to parse an entity, return Entity object if successful.
+ var parseEntity = function() {
+ var m;
+ if ((m = this.match(/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i))) {
+ return { t: 'Entity', c: m };
+ } else {
+ return null;
+ }
+ };
+
+ // Parse a run of ordinary characters, or a single character with
+ // a special meaning in markdown, as a plain string, adding to inlines.
+ var parseString = function() {
+ var m;
+ if (m = this.match(reMain)) {
+ return { t: 'Str', c: m };
+ } else {
+ return null;
+ }
+ };
+
+ // Parse a newline. If it was preceded by two spaces, return a hard
+ // line break; otherwise a soft line break.
+ var parseNewline = function() {
+ var m = this.match(/^ *\n/);
+ if (m) {
+ if (m.length > 2) {
+ return { t: 'Hardbreak' };
+ } else if (m.length > 0) {
+ return { t: 'Softbreak' };
+ }
+ }
+ return null;
+ };
+
+ // Attempt to parse an image. If the opening '!' is not followed
+ // by a link, return a literal '!'.
+ var parseImage = function() {
+ if (this.match(/^!/)) {
+ var link = this.parseLink();
+ if (link) {
+ link.t = 'Image';
+ return link;
+ } else {
+ return { t: 'Str', c: '!' };
+ }
+ } else {
+ return null;
+ }
+ };
+
+ // Attempt to parse a link reference, modifying refmap.
+ var parseReference = function(s, refmap) {
+ this.subject = s;
+ this.pos = 0;
+ var rawlabel;
+ var dest;
+ var title;
+ var matchChars;
+ var startpos = this.pos;
+ var match;
+
+ // label:
+ matchChars = this.parseLinkLabel();
+ if (matchChars === 0) {
+ return 0;
+ } else {
+ rawlabel = this.subject.substr(0, matchChars);
+ }
+
+ // colon:
+ if (this.peek() === ':') {
+ this.pos++;
+ } else {
+ this.pos = startpos;
+ return 0;
+ }
+
+ // link url
+ this.spnl();
+
+ dest = this.parseLinkDestination();
+ if (dest === null || dest.length === 0) {
+ this.pos = startpos;
+ return 0;
+ }
+
+ var beforetitle = this.pos;
+ this.spnl();
+ title = this.parseLinkTitle();
+ if (title === null) {
+ title = '';
+ // rewind before spaces
+ this.pos = beforetitle;
+ }
+
+ // make sure we're at line end:
+ if (this.match(/^ *(?:\n|$)/) === null) {
+ this.pos = startpos;
+ return 0;
+ }
+
+ var normlabel = normalizeReference(rawlabel);
+
+ if (!refmap[normlabel]) {
+ refmap[normlabel] = { destination: dest, title: title };
+ }
+ return this.pos - startpos;
+ };
+
+ // Parse the next inline element in subject, advancing subject position
+ // and returning the inline parsed.
+ var parseInline = function() {
+ var startpos = this.pos;
+ var memoized = this.memo[startpos];
+ if (memoized) {
+ this.pos = memoized.endpos;
+ return memoized.inline;
+ }
+ var c = this.peek();
+ if (!c) {
+ return null;
+ }
+ var res;
+ switch(c) {
+ case '\n':
+ case ' ':
+ res = this.parseNewline();
+ break;
+ case '\\':
+ res = this.parseBackslash();
+ break;
+ case '`':
+ res = this.parseBackticks();
+ break;
+ case '*':
+ case '_':
+ res = this.parseEmphasis();
+ break;
+ case '[':
+ res = this.parseLink();
+ break;
+ case '!':
+ res = this.parseImage();
+ break;
+ case '<':
+ res = this.parseAutolink() || this.parseHtmlTag();
+ break;
+ case '&':
+ res = this.parseEntity();
+ break;
+ default:
+ res = this.parseString();
+ break;
+ }
+ if (res === null) {
+ this.pos += 1;
+ res = {t: 'Str', c: c};
+ }
+ if (res) {
+ this.memo[startpos] = { inline: res,
+ endpos: this.pos };
+ }
+ return res;
+ };
+
+ // Parse s as a list of inlines, using refmap to resolve references.
+ var parseInlines = function(s, refmap) {
+ this.subject = s;
+ this.pos = 0;
+ this.refmap = refmap || {};
+ this.memo = {};
+ this.last_emphasis_closer = null;
+ var inlines = [];
+ var next_inline;
+ while (next_inline = this.parseInline()) {
+ inlines.push(next_inline);
+ }
+ return inlines;
+ };
+
+ // The InlineParser object.
+ function InlineParser(){
+ return {
+ subject: '',
+ label_nest_level: 0, // used by parseLinkLabel method
+ last_emphasis_closer: null, // used by parseEmphasis method
+ pos: 0,
+ refmap: {},
+ memo: {},
+ match: match,
+ peek: peek,
+ spnl: spnl,
+ parseBackticks: parseBackticks,
+ parseBackslash: parseBackslash,
+ parseAutolink: parseAutolink,
+ parseHtmlTag: parseHtmlTag,
+ scanDelims: scanDelims,
+ parseEmphasis: parseEmphasis,
+ parseLinkTitle: parseLinkTitle,
+ parseLinkDestination: parseLinkDestination,
+ parseLinkLabel: parseLinkLabel,
+ parseLink: parseLink,
+ parseEntity: parseEntity,
+ parseString: parseString,
+ parseNewline: parseNewline,
+ parseImage: parseImage,
+ parseReference: parseReference,
+ parseInline: parseInline,
+ parse: parseInlines
};
-};
-
-// Returns true if parent block can contain child block.
-var canContain = function(parent_type, child_type) {
- return ( parent_type == 'Document' ||
- parent_type == 'BlockQuote' ||
- parent_type == 'ListItem' ||
- (parent_type == 'List' && child_type == 'ListItem') );
-};
-
-// Returns true if block type can accept lines of text.
-var acceptsLines = function(block_type) {
- return ( block_type == 'Paragraph' ||
- block_type == 'IndentedCode' ||
- block_type == 'FencedCode' );
-};
-
-// Returns true if block ends with a blank line, descending if needed
-// into lists and sublists.
-var endsWithBlankLine = function(block) {
- if (block.last_line_blank) {
- return true;
- }
- if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) {
- return endsWithBlankLine(block.children[block.children.length - 1]);
- } else {
- return false;
- }
-};
-
-// Break out of all containing lists, resetting the tip of the
-// document to the parent of the highest list, and finalizing
-// all the lists. (This is used to implement the "two blank lines
-// break of of all lists" feature.)
-var breakOutOfLists = function(block, line_number) {
- var b = block;
- var last_list = null;
- do {
- if (b.t === 'List') {
- last_list = b;
}
- b = b.parent;
- } while (b);
- if (last_list) {
- while (block != last_list) {
- this.finalize(block, line_number);
- block = block.parent;
- }
- this.finalize(last_list, line_number);
- this.tip = last_list.parent;
- }
-};
-
-// Add a line to the block at the tip. We assume the tip
-// can accept lines -- that check should be done before calling this.
-var addLine = function(ln, offset) {
- var s = ln.slice(offset);
- if (!(this.tip.open)) {
- throw({ msg: "Attempted to add line (" + ln + ") to closed container." });
- }
- this.tip.strings.push(s);
-};
-
-// Add block of type tag as a child of the tip. If the tip can't
-// accept children, close and finalize it and try its parent,
-// and so on til we find a block that can accept children.
-var addChild = function(tag, line_number, offset) {
- while (!canContain(this.tip.t, tag)) {
- this.finalize(this.tip, line_number);
- }
-
- var column_number = offset + 1; // offset 0 = column 1
- var newBlock = makeBlock(tag, line_number, column_number);
- this.tip.children.push(newBlock);
- newBlock.parent = this.tip;
- this.tip = newBlock;
- return newBlock;
-};
-
-// Parse a list marker and return data on the marker (type,
-// start, delimiter, bullet character, padding) or null.
-var parseListMarker = function(ln, offset) {
- var rest = ln.slice(offset);
- var match;
- var spaces_after_marker;
- var data = {};
- if (rest.match(reHrule)) {
- return null;
- }
- if ((match = rest.match(/^[*+-]( +|$)/))) {
- spaces_after_marker = match[1].length;
- data.type = 'Bullet';
- data.bullet_char = match[0][0];
-
- } else if ((match = rest.match(/^(\d+)([.)])( +|$)/))) {
- spaces_after_marker = match[3].length;
- data.type = 'Ordered';
- data.start = parseInt(match[1]);
- data.delimiter = match[2];
- } else {
- return null;
- }
- var blank_item = match[0].length === rest.length;
- if (spaces_after_marker >= 5 ||
- spaces_after_marker < 1 ||
- blank_item) {
- data.padding = match[0].length - spaces_after_marker + 1;
- } else {
- data.padding = match[0].length;
- }
- return data;
-};
-
-// Returns true if the two list items are of the same type,
-// with the same delimiter and bullet character. This is used
-// in agglomerating list items into lists.
-var listsMatch = function(list_data, item_data) {
- return (list_data.type === item_data.type &&
- list_data.delimiter === item_data.delimiter &&
- list_data.bullet_char === item_data.bullet_char);
-};
-
-// Analyze a line of text and update the document appropriately.
-// We parse markdown text by calling this on each line of input,
-// then finalizing the document.
-var incorporateLine = function(ln, line_number) {
-
- var all_matched = true;
- var last_child;
- var first_nonspace;
- var offset = 0;
- var match;
- var data;
- var blank;
- var indent;
- var last_matched_container;
- var i;
- var CODE_INDENT = 4;
-
- var container = this.doc;
- var oldtip = this.tip;
-
- // Convert tabs to spaces:
- ln = detabLine(ln);
-
- // For each containing block, try to parse the associated line start.
- // Bail out on failure: container will point to the last matching block.
- // Set all_matched to false if not all containers match.
- while (container.children.length > 0) {
- last_child = container.children[container.children.length - 1];
- if (!last_child.open) {
- break;
- }
- container = last_child;
-
- match = matchAt(/[^ ]/, ln, offset);
- if (match === null) {
- first_nonspace = ln.length;
- blank = true;
- } else {
- first_nonspace = match;
- blank = false;
- }
- indent = first_nonspace - offset;
-
- switch (container.t) {
- case 'BlockQuote':
- var matched = indent <= 3 && ln[first_nonspace] === '>';
- if (matched) {
- offset = first_nonspace + 1;
- if (ln[offset] === ' ') {
- offset++;
- }
+ // DOC PARSER
+
+ // These are methods of a DocParser object, defined below.
+
+ var makeBlock = function(tag, start_line, start_column) {
+ return { t: tag,
+ open: true,
+ last_line_blank: false,
+ start_line: start_line,
+ start_column: start_column,
+ end_line: start_line,
+ children: [],
+ parent: null,
+ // string_content is formed by concatenating strings, in finalize:
+ string_content: "",
+ strings: [],
+ inline_content: []
+ };
+ };
+
+ // Returns true if parent block can contain child block.
+ var canContain = function(parent_type, child_type) {
+ return ( parent_type == 'Document' ||
+ parent_type == 'BlockQuote' ||
+ parent_type == 'ListItem' ||
+ (parent_type == 'List' && child_type == 'ListItem') );
+ };
+
+ // Returns true if block type can accept lines of text.
+ var acceptsLines = function(block_type) {
+ return ( block_type == 'Paragraph' ||
+ block_type == 'IndentedCode' ||
+ block_type == 'FencedCode' );
+ };
+
+ // Returns true if block ends with a blank line, descending if needed
+ // into lists and sublists.
+ var endsWithBlankLine = function(block) {
+ if (block.last_line_blank) {
+ return true;
+ }
+ if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) {
+ return endsWithBlankLine(block.children[block.children.length - 1]);
} else {
- all_matched = false;
+ return false;
}
- break;
+ };
+
+ // Break out of all containing lists, resetting the tip of the
+ // document to the parent of the highest list, and finalizing
+ // all the lists. (This is used to implement the "two blank lines
+ // break of of all lists" feature.)
+ var breakOutOfLists = function(block, line_number) {
+ var b = block;
+ var last_list = null;
+ do {
+ if (b.t === 'List') {
+ last_list = b;
+ }
+ b = b.parent;
+ } while (b);
- case 'ListItem':
- if (indent >= container.list_data.marker_offset +
- container.list_data.padding) {
- offset += container.list_data.marker_offset +
- container.list_data.padding;
- } else if (blank) {
- offset = first_nonspace;
- } else {
- all_matched = false;
+ if (last_list) {
+ while (block != last_list) {
+ this.finalize(block, line_number);
+ block = block.parent;
+ }
+ this.finalize(last_list, line_number);
+ this.tip = last_list.parent;
+ }
+ };
+
+ // Add a line to the block at the tip. We assume the tip
+ // can accept lines -- that check should be done before calling this.
+ var addLine = function(ln, offset) {
+ var s = ln.slice(offset);
+ if (!(this.tip.open)) {
+ throw({ msg: "Attempted to add line (" + ln + ") to closed container." });
+ }
+ this.tip.strings.push(s);
+ };
+
+ // Add block of type tag as a child of the tip. If the tip can't
+ // accept children, close and finalize it and try its parent,
+ // and so on til we find a block that can accept children.
+ var addChild = function(tag, line_number, offset) {
+ while (!canContain(this.tip.t, tag)) {
+ this.finalize(this.tip, line_number);
}
- break;
- case 'IndentedCode':
- if (indent >= CODE_INDENT) {
- offset += CODE_INDENT;
- } else if (blank) {
- offset = first_nonspace;
+ var column_number = offset + 1; // offset 0 = column 1
+ var newBlock = makeBlock(tag, line_number, column_number);
+ this.tip.children.push(newBlock);
+ newBlock.parent = this.tip;
+ this.tip = newBlock;
+ return newBlock;
+ };
+
+ // Parse a list marker and return data on the marker (type,
+ // start, delimiter, bullet character, padding) or null.
+ var parseListMarker = function(ln, offset) {
+ var rest = ln.slice(offset);
+ var match;
+ var spaces_after_marker;
+ var data = {};
+ if (rest.match(reHrule)) {
+ return null;
+ }
+ if ((match = rest.match(/^[*+-]( +|$)/))) {
+ spaces_after_marker = match[1].length;
+ data.type = 'Bullet';
+ data.bullet_char = match[0][0];
+
+ } else if ((match = rest.match(/^(\d+)([.)])( +|$)/))) {
+ spaces_after_marker = match[3].length;
+ data.type = 'Ordered';
+ data.start = parseInt(match[1]);
+ data.delimiter = match[2];
} else {
- all_matched = false;
+ return null;
+ }
+ var blank_item = match[0].length === rest.length;
+ if (spaces_after_marker >= 5 ||
+ spaces_after_marker < 1 ||
+ blank_item) {
+ data.padding = match[0].length - spaces_after_marker + 1;
+ } else {
+ data.padding = match[0].length;
+ }
+ return data;
+ };
+
+ // Returns true if the two list items are of the same type,
+ // with the same delimiter and bullet character. This is used
+ // in agglomerating list items into lists.
+ var listsMatch = function(list_data, item_data) {
+ return (list_data.type === item_data.type &&
+ list_data.delimiter === item_data.delimiter &&
+ list_data.bullet_char === item_data.bullet_char);
+ };
+
+ // Analyze a line of text and update the document appropriately.
+ // We parse markdown text by calling this on each line of input,
+ // then finalizing the document.
+ var incorporateLine = function(ln, line_number) {
+
+ var all_matched = true;
+ var last_child;
+ var first_nonspace;
+ var offset = 0;
+ var match;
+ var data;
+ var blank;
+ var indent;
+ var last_matched_container;
+ var i;
+ var CODE_INDENT = 4;
+
+ var container = this.doc;
+ var oldtip = this.tip;
+
+ // Convert tabs to spaces:
+ ln = detabLine(ln);
+
+ // For each containing block, try to parse the associated line start.
+ // Bail out on failure: container will point to the last matching block.
+ // Set all_matched to false if not all containers match.
+ while (container.children.length > 0) {
+ last_child = container.children[container.children.length - 1];
+ if (!last_child.open) {
+ break;
+ }
+ container = last_child;
+
+ match = matchAt(/[^ ]/, ln, offset);
+ if (match === null) {
+ first_nonspace = ln.length;
+ blank = true;
+ } else {
+ first_nonspace = match;
+ blank = false;
+ }
+ indent = first_nonspace - offset;
+
+ switch (container.t) {
+ case 'BlockQuote':
+ var matched = indent <= 3 && ln[first_nonspace] === '>';
+ if (matched) {
+ offset = first_nonspace + 1;
+ if (ln[offset] === ' ') {
+ offset++;
+ }
+ } else {
+ all_matched = false;
+ }
+ break;
+
+ case 'ListItem':
+ if (indent >= container.list_data.marker_offset +
+ container.list_data.padding) {
+ offset += container.list_data.marker_offset +
+ container.list_data.padding;
+ } else if (blank) {
+ offset = first_nonspace;
+ } else {
+ all_matched = false;
+ }
+ break;
+
+ case 'IndentedCode':
+ if (indent >= CODE_INDENT) {
+ offset += CODE_INDENT;
+ } else if (blank) {
+ offset = first_nonspace;
+ } else {
+ all_matched = false;
+ }
+ break;
+
+ case 'ATXHeader':
+ case 'SetextHeader':
+ case 'HorizontalRule':
+ // a header can never container > 1 line, so fail to match:
+ all_matched = false;
+ break;
+
+ case 'FencedCode':
+ // skip optional spaces of fence offset
+ i = container.fence_offset;
+ while (i > 0 && ln[offset] === ' ') {
+ offset++;
+ i--;
+ }
+ break;
+
+ case 'HtmlBlock':
+ if (blank) {
+ all_matched = false;
+ }
+ break;
+
+ case 'Paragraph':
+ if (blank) {
+ container.last_line_blank = true;
+ all_matched = false;
+ }
+ break;
+
+ default:
+ }
+
+ if (!all_matched) {
+ container = container.parent; // back up to last matching block
+ break;
+ }
}
- break;
- case 'ATXHeader':
- case 'SetextHeader':
- case 'HorizontalRule':
- // a header can never container > 1 line, so fail to match:
- all_matched = false;
- break;
+ last_matched_container = container;
+
+ // This function is used to finalize and close any unmatched
+ // blocks. We aren't ready to do this now, because we might
+ // have a lazy paragraph continuation, in which case we don't
+ // want to close unmatched blocks. So we store this closure for
+ // use later, when we have more information.
+ var closeUnmatchedBlocks = function(mythis) {
+ // finalize any blocks not matched
+ while (!already_done && oldtip != last_matched_container) {
+ mythis.finalize(oldtip, line_number);
+ oldtip = oldtip.parent;
+ }
+ var already_done = true;
+ };
- case 'FencedCode':
- // skip optional spaces of fence offset
- i = container.fence_offset;
- while (i > 0 && ln[offset] === ' ') {
- offset++;
- i--;
+ // Check to see if we've hit 2nd blank line; if so break out of list:
+ if (blank && container.last_line_blank) {
+ this.breakOutOfLists(container, line_number);
}
- break;
- case 'HtmlBlock':
- if (blank) {
- all_matched = false;
+ // Unless last matched container is a code block, try new container starts,
+ // adding children to the last matched container:
+ while (container.t != 'FencedCode' &&
+ container.t != 'IndentedCode' &&
+ container.t != 'HtmlBlock' &&
+ // this is a little performance optimization:
+ matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) {
+
+ match = matchAt(/[^ ]/, ln, offset);
+ if (match === null) {
+ first_nonspace = ln.length;
+ blank = true;
+ } else {
+ first_nonspace = match;
+ blank = false;
+ }
+ indent = first_nonspace - offset;
+
+ if (indent >= CODE_INDENT) {
+ // indented code
+ if (this.tip.t != 'Paragraph' && !blank) {
+ offset += CODE_INDENT;
+ closeUnmatchedBlocks(this);
+ container = this.addChild('IndentedCode', line_number, offset);
+ } else { // indent > 4 in a lazy paragraph continuation
+ break;
+ }
+
+ } else if (ln[first_nonspace] === '>') {
+ // blockquote
+ offset = first_nonspace + 1;
+ // optional following space
+ if (ln[offset] === ' ') {
+ offset++;
+ }
+ closeUnmatchedBlocks(this);
+ container = this.addChild('BlockQuote', line_number, offset);
+
+ } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) {
+ // ATX header
+ offset = first_nonspace + match[0].length;
+ closeUnmatchedBlocks(this);
+ container = this.addChild('ATXHeader', line_number, first_nonspace);
+ container.level = match[0].trim().length; // number of #s
+ // remove trailing ###s:
+ container.strings =
+ [ln.slice(offset).replace(/(?:(\\#) *#*| *#+) *$/,'$1')];
+ break;
+
+ } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) {
+ // fenced code block
+ var fence_length = match[0].length;
+ closeUnmatchedBlocks(this);
+ container = this.addChild('FencedCode', line_number, first_nonspace);
+ container.fence_length = fence_length;
+ container.fence_char = match[0][0];
+ container.fence_offset = first_nonspace - offset;
+ offset = first_nonspace + fence_length;
+ break;
+
+ } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) {
+ // html block
+ closeUnmatchedBlocks(this);
+ container = this.addChild('HtmlBlock', line_number, first_nonspace);
+ // note, we don't adjust offset because the tag is part of the text
+ break;
+
+ } else if (container.t == 'Paragraph' &&
+ container.strings.length === 1 &&
+ ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) {
+ // setext header line
+ closeUnmatchedBlocks(this);
+ container.t = 'SetextHeader'; // convert Paragraph to SetextHeader
+ container.level = match[0][0] === '=' ? 1 : 2;
+ offset = ln.length;
+
+ } else if (matchAt(reHrule, ln, first_nonspace) !== null) {
+ // hrule
+ closeUnmatchedBlocks(this);
+ container = this.addChild('HorizontalRule', line_number, first_nonspace);
+ offset = ln.length - 1;
+ break;
+
+ } else if ((data = parseListMarker(ln, first_nonspace))) {
+ // list item
+ closeUnmatchedBlocks(this);
+ data.marker_offset = indent;
+ offset = first_nonspace + data.padding;
+
+ // add the list if needed
+ if (container.t !== 'List' ||
+ !(listsMatch(container.list_data, data))) {
+ container = this.addChild('List', line_number, first_nonspace);
+ container.list_data = data;
+ }
+
+ // add the list item
+ container = this.addChild('ListItem', line_number, first_nonspace);
+ container.list_data = data;
+
+ } else {
+ break;
+
+ }
+
+ if (acceptsLines(container.t)) {
+ // if it's a line container, it can't contain other containers
+ break;
+ }
}
- break;
- case 'Paragraph':
- if (blank) {
- container.last_line_blank = true;
- all_matched = false;
+ // What remains at the offset is a text line. Add the text to the
+ // appropriate container.
+
+ match = matchAt(/[^ ]/, ln, offset);
+ if (match === null) {
+ first_nonspace = ln.length;
+ blank = true;
+ } else {
+ first_nonspace = match;
+ blank = false;
}
- break;
+ indent = first_nonspace - offset;
+
+ // First check for a lazy paragraph continuation:
+ if (this.tip !== last_matched_container &&
+ !blank &&
+ this.tip.t == 'Paragraph' &&
+ this.tip.strings.length > 0) {
+ // lazy paragraph continuation
+
+ this.last_line_blank = false;
+ this.addLine(ln, offset);
+
+ } else { // not a lazy continuation
+
+ // finalize any blocks not matched
+ closeUnmatchedBlocks(this);
+
+ // Block quote lines are never blank as they start with >
+ // and we don't count blanks in fenced code for purposes of tight/loose
+ // lists or breaking out of lists. We also don't set last_line_blank
+ // on an empty list item.
+ container.last_line_blank = blank &&
+ !(container.t == 'BlockQuote' ||
+ container.t == 'FencedCode' ||
+ (container.t == 'ListItem' &&
+ container.children.length === 0 &&
+ container.start_line == line_number));
+
+ var cont = container;
+ while (cont.parent) {
+ cont.parent.last_line_blank = false;
+ cont = cont.parent;
+ }
- default:
- }
+ switch (container.t) {
+ case 'IndentedCode':
+ case 'HtmlBlock':
+ this.addLine(ln, offset);
+ break;
+
+ case 'FencedCode':
+ // check for closing code fence:
+ match = (indent <= 3 &&
+ ln[first_nonspace] == container.fence_char &&
+ ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/));
+ if (match && match[0].length >= container.fence_length) {
+ // don't add closing fence to container; instead, close it:
+ this.finalize(container, line_number);
+ } else {
+ this.addLine(ln, offset);
+ }
+ break;
+
+ case 'ATXHeader':
+ case 'SetextHeader':
+ case 'HorizontalRule':
+ // nothing to do; we already added the contents.
+ break;
+
+ default:
+ if (acceptsLines(container.t)) {
+ this.addLine(ln, first_nonspace);
+ } else if (blank) {
+ // do nothing
+ } else if (container.t != 'HorizontalRule' &&
+ container.t != 'SetextHeader') {
+ // create paragraph container for line
+ container = this.addChild('Paragraph', line_number, first_nonspace);
+ this.addLine(ln, first_nonspace);
+ } else {
+ console.log("Line " + line_number.toString() +
+ " with container type " + container.t +
+ " did not match any condition.");
+
+ }
+ }
+ }
+ };
+
+ // Finalize a block. Close it and do any necessary postprocessing,
+ // e.g. creating string_content from strings, setting the 'tight'
+ // or 'loose' status of a list, and parsing the beginnings
+ // of paragraphs for reference definitions. Reset the tip to the
+ // parent of the closed block.
+ var finalize = function(block, line_number) {
+ var pos;
+ // don't do anything if the block is already closed
+ if (!block.open) {
+ return 0;
+ }
+ block.open = false;
+ if (line_number > block.start_line) {
+ block.end_line = line_number - 1;
+ } else {
+ block.end_line = line_number;
+ }
- if (!all_matched) {
- container = container.parent; // back up to last matching block
- break;
- }
- }
-
- last_matched_container = container;
-
- // This function is used to finalize and close any unmatched
- // blocks. We aren't ready to do this now, because we might
- // have a lazy paragraph continuation, in which case we don't
- // want to close unmatched blocks. So we store this closure for
- // use later, when we have more information.
- var closeUnmatchedBlocks = function(mythis) {
- // finalize any blocks not matched
- while (!already_done && oldtip != last_matched_container) {
- mythis.finalize(oldtip, line_number);
- oldtip = oldtip.parent;
- }
- var already_done = true;
- };
-
- // Check to see if we've hit 2nd blank line; if so break out of list:
- if (blank && container.last_line_blank) {
- this.breakOutOfLists(container, line_number);
- }
-
- // Unless last matched container is a code block, try new container starts,
- // adding children to the last matched container:
- while (container.t != 'FencedCode' &&
- container.t != 'IndentedCode' &&
- container.t != 'HtmlBlock' &&
- // this is a little performance optimization:
- matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) {
-
- match = matchAt(/[^ ]/, ln, offset);
- if (match === null) {
- first_nonspace = ln.length;
- blank = true;
- } else {
- first_nonspace = match;
- blank = false;
- }
- indent = first_nonspace - offset;
-
- if (indent >= CODE_INDENT) {
- // indented code
- if (this.tip.t != 'Paragraph' && !blank) {
- offset += CODE_INDENT;
- closeUnmatchedBlocks(this);
- container = this.addChild('IndentedCode', line_number, offset);
- } else { // indent > 4 in a lazy paragraph continuation
- break;
- }
-
- } else if (ln[first_nonspace] === '>') {
- // blockquote
- offset = first_nonspace + 1;
- // optional following space
- if (ln[offset] === ' ') {
- offset++;
- }
- closeUnmatchedBlocks(this);
- container = this.addChild('BlockQuote', line_number, offset);
-
- } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) {
- // ATX header
- offset = first_nonspace + match[0].length;
- closeUnmatchedBlocks(this);
- container = this.addChild('ATXHeader', line_number, first_nonspace);
- container.level = match[0].trim().length; // number of #s
- // remove trailing ###s:
- container.strings =
- [ln.slice(offset).replace(/(?:(\\#) *#*| *#+) *$/,'$1')];
- break;
-
- } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) {
- // fenced code block
- var fence_length = match[0].length;
- closeUnmatchedBlocks(this);
- container = this.addChild('FencedCode', line_number, first_nonspace);
- container.fence_length = fence_length;
- container.fence_char = match[0][0];
- container.fence_offset = first_nonspace - offset;
- offset = first_nonspace + fence_length;
- break;
-
- } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) {
- // html block
- closeUnmatchedBlocks(this);
- container = this.addChild('HtmlBlock', line_number, first_nonspace);
- // note, we don't adjust offset because the tag is part of the text
- break;
-
- } else if (container.t == 'Paragraph' &&
- container.strings.length === 1 &&
- ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) {
- // setext header line
- closeUnmatchedBlocks(this);
- container.t = 'SetextHeader'; // convert Paragraph to SetextHeader
- container.level = match[0][0] === '=' ? 1 : 2;
- offset = ln.length;
-
- } else if (matchAt(reHrule, ln, first_nonspace) !== null) {
- // hrule
- closeUnmatchedBlocks(this);
- container = this.addChild('HorizontalRule', line_number, first_nonspace);
- offset = ln.length - 1;
- break;
-
- } else if ((data = parseListMarker(ln, first_nonspace))) {
- // list item
- closeUnmatchedBlocks(this);
- data.marker_offset = indent;
- offset = first_nonspace + data.padding;
-
- // add the list if needed
- if (container.t !== 'List' ||
- !(listsMatch(container.list_data, data))) {
- container = this.addChild('List', line_number, first_nonspace);
- container.list_data = data;
- }
-
- // add the list item
- container = this.addChild('ListItem', line_number, first_nonspace);
- container.list_data = data;
-
- } else {
- break;
+ switch (block.t) {
+ case 'Paragraph':
+ block.string_content = block.strings.join('\n').replace(/^ */m,'');
+
+ // try parsing the beginning as link reference definitions:
+ while (block.string_content[0] === '[' &&
+ (pos = this.inlineParser.parseReference(block.string_content,
+ this.refmap))) {
+ block.string_content = block.string_content.slice(pos);
+ if (isBlank(block.string_content)) {
+ block.t = 'ReferenceDef';
+ break;
+ }
+ }
+ break;
- }
+ case 'ATXHeader':
+ case 'SetextHeader':
+ case 'HtmlBlock':
+ block.string_content = block.strings.join('\n');
+ break;
- if (acceptsLines(container.t)) {
- // if it's a line container, it can't contain other containers
- break;
- }
- }
-
- // What remains at the offset is a text line. Add the text to the
- // appropriate container.
-
- match = matchAt(/[^ ]/, ln, offset);
- if (match === null) {
- first_nonspace = ln.length;
- blank = true;
- } else {
- first_nonspace = match;
- blank = false;
- }
- indent = first_nonspace - offset;
-
- // First check for a lazy paragraph continuation:
- if (this.tip !== last_matched_container &&
- !blank &&
- this.tip.t == 'Paragraph' &&
- this.tip.strings.length > 0) {
- // lazy paragraph continuation
-
- this.last_line_blank = false;
- this.addLine(ln, offset);
-
- } else { // not a lazy continuation
-
- // finalize any blocks not matched
- closeUnmatchedBlocks(this);
-
- // Block quote lines are never blank as they start with >
- // and we don't count blanks in fenced code for purposes of tight/loose
- // lists or breaking out of lists. We also don't set last_line_blank
- // on an empty list item.
- container.last_line_blank = blank &&
- !(container.t == 'BlockQuote' ||
- container.t == 'FencedCode' ||
- (container.t == 'ListItem' &&
- container.children.length === 0 &&
- container.start_line == line_number));
-
- var cont = container;
- while (cont.parent) {
- cont.parent.last_line_blank = false;
- cont = cont.parent;
- }
+ case 'IndentedCode':
+ block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n');
+ break;
- switch (container.t) {
- case 'IndentedCode':
- case 'HtmlBlock':
- this.addLine(ln, offset);
- break;
-
- case 'FencedCode':
- // check for closing code fence:
- match = (indent <= 3 &&
- ln[first_nonspace] == container.fence_char &&
- ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/));
- if (match && match[0].length >= container.fence_length) {
- // don't add closing fence to container; instead, close it:
- this.finalize(container, line_number);
- } else {
- this.addLine(ln, offset);
- }
- break;
-
- case 'ATXHeader':
- case 'SetextHeader':
- case 'HorizontalRule':
- // nothing to do; we already added the contents.
- break;
-
- default:
- if (acceptsLines(container.t)) {
- this.addLine(ln, first_nonspace);
- } else if (blank) {
- // do nothing
- } else if (container.t != 'HorizontalRule' &&
- container.t != 'SetextHeader') {
- // create paragraph container for line
- container = this.addChild('Paragraph', line_number, first_nonspace);
- this.addLine(ln, first_nonspace);
- } else {
- console.log("Line " + line_number.toString() +
- " with container type " + container.t +
- " did not match any condition.");
-
- }
- }
- }
-};
-
-// Finalize a block. Close it and do any necessary postprocessing,
-// e.g. creating string_content from strings, setting the 'tight'
-// or 'loose' status of a list, and parsing the beginnings
-// of paragraphs for reference definitions. Reset the tip to the
-// parent of the closed block.
-var finalize = function(block, line_number) {
- var pos;
- // don't do anything if the block is already closed
- if (!block.open) {
- return 0;
- }
- block.open = false;
- if (line_number > block.start_line) {
- block.end_line = line_number - 1;
- } else {
- block.end_line = line_number;
- }
-
- switch (block.t) {
- case 'Paragraph':
- block.string_content = block.strings.join('\n').replace(/^ */m,'');
-
- // try parsing the beginning as link reference definitions:
- while (block.string_content[0] === '[' &&
- (pos = this.inlineParser.parseReference(block.string_content,
- this.refmap))) {
- block.string_content = block.string_content.slice(pos);
- if (isBlank(block.string_content)) {
- block.t = 'ReferenceDef';
- break;
- }
- }
- break;
-
- case 'ATXHeader':
- case 'SetextHeader':
- case 'HtmlBlock':
- block.string_content = block.strings.join('\n');
- break;
-
- case 'IndentedCode':
- block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n');
- break;
-
- case 'FencedCode':
- // first line becomes info string
- block.info = unescape(block.strings[0].trim());
- if (block.strings.length == 1) {
- block.string_content = '';
- } else {
- block.string_content = block.strings.slice(1).join('\n') + '\n';
- }
- break;
-
- case 'List':
- block.tight = true; // tight by default
-
- var numitems = block.children.length;
- var i = 0;
- while (i < numitems) {
- var item = block.children[i];
- // check for non-final list item ending with blank line:
- var last_item = i == numitems - 1;
- if (endsWithBlankLine(item) && !last_item) {
- block.tight = false;
- break;
- }
- // recurse into children of list item, to see if there are
- // spaces between any of them:
- var numsubitems = item.children.length;
- var j = 0;
- while (j < numsubitems) {
- var subitem = item.children[j];
- var last_subitem = j == numsubitems - 1;
- if (endsWithBlankLine(subitem) && !(last_item && last_subitem)) {
- block.tight = false;
- break;
- }
- j++;
- }
- i++;
- }
- break;
-
- default:
- break;
- }
-
- this.tip = block.parent || this.top;
-};
-
-// Walk through a block & children recursively, parsing string content
-// into inline content where appropriate.
-var processInlines = function(block) {
- switch(block.t) {
- case 'Paragraph':
- case 'SetextHeader':
- case 'ATXHeader':
- block.inline_content =
- this.inlineParser.parse(block.string_content.trim(), this.refmap);
- block.string_content = "";
- break;
- default:
- break;
- }
-
- if (block.children) {
- for (var i = 0; i < block.children.length; i++) {
- this.processInlines(block.children[i]);
- }
- }
-
-};
-
-// The main parsing function. Returns a parsed document AST.
-var parse = function(input) {
- this.doc = makeBlock('Document', 1, 1);
- this.tip = this.doc;
- this.refmap = {};
- var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/);
- var len = lines.length;
- for (var i = 0; i < len; i++) {
- this.incorporateLine(lines[i], i+1);
- }
- while (this.tip) {
- this.finalize(this.tip, len - 1);
- }
- this.processInlines(this.doc);
- return this.doc;
-};
-
-
-// The DocParser object.
-function DocParser(){
- return {
- doc: makeBlock('Document', 1, 1),
- tip: this.doc,
- refmap: {},
- inlineParser: new InlineParser(),
- breakOutOfLists: breakOutOfLists,
- addLine: addLine,
- addChild: addChild,
- incorporateLine: incorporateLine,
- finalize: finalize,
- processInlines: processInlines,
- parse: parse
- };
-}
-
-// HTML RENDERER
-
-// Helper function to produce content in a pair of HTML tags.
-var inTags = function(tag, attribs, contents, selfclosing) {
- var result = '<' + tag;
- if (attribs) {
- var i = 0;
- var attrib;
- while ((attrib = attribs[i]) !== undefined) {
- result = result.concat(' ', attrib[0], '="', attrib[1], '"');
- i++;
+ case 'FencedCode':
+ // first line becomes info string
+ block.info = unescape(block.strings[0].trim());
+ if (block.strings.length == 1) {
+ block.string_content = '';
+ } else {
+ block.string_content = block.strings.slice(1).join('\n') + '\n';
+ }
+ break;
+
+ case 'List':
+ block.tight = true; // tight by default
+
+ var numitems = block.children.length;
+ var i = 0;
+ while (i < numitems) {
+ var item = block.children[i];
+ // check for non-final list item ending with blank line:
+ var last_item = i == numitems - 1;
+ if (endsWithBlankLine(item) && !last_item) {
+ block.tight = false;
+ break;
+ }
+ // recurse into children of list item, to see if there are
+ // spaces between any of them:
+ var numsubitems = item.children.length;
+ var j = 0;
+ while (j < numsubitems) {
+ var subitem = item.children[j];
+ var last_subitem = j == numsubitems - 1;
+ if (endsWithBlankLine(subitem) && !(last_item && last_subitem)) {
+ block.tight = false;
+ break;
+ }
+ j++;
+ }
+ i++;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ this.tip = block.parent || this.top;
+ };
+
+ // Walk through a block & children recursively, parsing string content
+ // into inline content where appropriate.
+ var processInlines = function(block) {
+ switch(block.t) {
+ case 'Paragraph':
+ case 'SetextHeader':
+ case 'ATXHeader':
+ block.inline_content =
+ this.inlineParser.parse(block.string_content.trim(), this.refmap);
+ block.string_content = "";
+ break;
+ default:
+ break;
+ }
+
+ if (block.children) {
+ for (var i = 0; i < block.children.length; i++) {
+ this.processInlines(block.children[i]);
+ }
+ }
+
+ };
+
+ // The main parsing function. Returns a parsed document AST.
+ var parse = function(input) {
+ this.doc = makeBlock('Document', 1, 1);
+ this.tip = this.doc;
+ this.refmap = {};
+ var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/);
+ var len = lines.length;
+ for (var i = 0; i < len; i++) {
+ this.incorporateLine(lines[i], i+1);
+ }
+ while (this.tip) {
+ this.finalize(this.tip, len - 1);
+ }
+ this.processInlines(this.doc);
+ return this.doc;
+ };
+
+
+ // The DocParser object.
+ function DocParser(){
+ return {
+ doc: makeBlock('Document', 1, 1),
+ tip: this.doc,
+ refmap: {},
+ inlineParser: new InlineParser(),
+ breakOutOfLists: breakOutOfLists,
+ addLine: addLine,
+ addChild: addChild,
+ incorporateLine: incorporateLine,
+ finalize: finalize,
+ processInlines: processInlines,
+ parse: parse
+ };
}
- }
- if (contents) {
- result = result.concat('>', contents, '', tag, '>');
- } else if (selfclosing) {
- result = result + ' />';
- } else {
- result = result.concat('>', tag, '>');
- }
- return result;
-};
-
-// Render an inline element as HTML.
-var renderInline = function(inline) {
- var attrs;
- switch (inline.t) {
- case 'Str':
- return this.escape(inline.c);
- case 'Softbreak':
- return this.softbreak;
- case 'Hardbreak':
- return inTags('br',[],"",true) + '\n';
- case 'Emph':
- return inTags('em', [], this.renderInlines(inline.c));
- case 'Strong':
- return inTags('strong', [], this.renderInlines(inline.c));
- case 'Html':
- return inline.c;
- case 'Entity':
- return inline.c;
- case 'Link':
- attrs = [['href', this.escape(inline.destination, true)]];
- if (inline.title) {
- attrs.push(['title', this.escape(inline.title, true)]);
- }
- return inTags('a', attrs, this.renderInlines(inline.label));
- case 'Image':
- attrs = [['src', this.escape(inline.destination, true)],
- ['alt', this.escape(this.renderInlines(inline.label))]];
- if (inline.title) {
- attrs.push(['title', this.escape(inline.title, true)]);
- }
- return inTags('img', attrs, "", true);
- case 'Code':
- return inTags('code', [], this.escape(inline.c));
- default:
- console.log("Uknown inline type " + inline.t);
- return "";
- }
-};
-
-// Render a list of inlines.
-var renderInlines = function(inlines) {
- var result = '';
- for (var i=0; i < inlines.length; i++) {
- result = result + this.renderInline(inlines[i]);
- }
- return result;
-};
-
-// Render a single block element.
-var renderBlock = function(block, in_tight_list) {
- var tag;
- var attr;
- var info_words;
- switch (block.t) {
- case 'Document':
- var whole_doc = this.renderBlocks(block.children);
- return (whole_doc === '' ? '' : whole_doc + '\n');
- case 'Paragraph':
- if (in_tight_list) {
- return this.renderInlines(block.inline_content);
- } else {
- return inTags('p', [], this.renderInlines(block.inline_content));
- }
- break;
- case 'BlockQuote':
- var filling = this.renderBlocks(block.children);
- return inTags('blockquote', [], filling === '' ? this.innersep :
- this.innersep + this.renderBlocks(block.children) + this.innersep);
- case 'ListItem':
- return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim());
- case 'List':
- tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol';
- attr = (!block.list_data.start || block.list_data.start == 1) ?
- [] : [['start', block.list_data.start.toString()]];
- return inTags(tag, attr, this.innersep +
- this.renderBlocks(block.children, block.tight) +
- this.innersep);
- case 'ATXHeader':
- case 'SetextHeader':
- tag = 'h' + block.level;
- return inTags(tag, [], this.renderInlines(block.inline_content));
- case 'IndentedCode':
- return inTags('pre', [],
- inTags('code', [], this.escape(block.string_content)));
- case 'FencedCode':
- info_words = block.info.split(/ +/);
- attr = info_words.length === 0 || info_words[0].length === 0 ?
- [] : [['class','language-' +
- this.escape(info_words[0],true)]];
- return inTags('pre', [],
- inTags('code', attr, this.escape(block.string_content)));
- case 'HtmlBlock':
- return block.string_content;
- case 'ReferenceDef':
- return "";
- case 'HorizontalRule':
- return inTags('hr',[],"",true);
- default:
- console.log("Uknown block type " + block.t);
- return "";
- }
-};
-
-// Render a list of block elements, separated by this.blocksep.
-var renderBlocks = function(blocks, in_tight_list) {
- var result = [];
- for (var i=0; i < blocks.length; i++) {
- if (blocks[i].t !== 'ReferenceDef') {
- result.push(this.renderBlock(blocks[i], in_tight_list));
+
+ // HTML RENDERER
+
+ // Helper function to produce content in a pair of HTML tags.
+ var inTags = function(tag, attribs, contents, selfclosing) {
+ var result = '<' + tag;
+ if (attribs) {
+ var i = 0;
+ var attrib;
+ while ((attrib = attribs[i]) !== undefined) {
+ result = result.concat(' ', attrib[0], '="', attrib[1], '"');
+ i++;
+ }
+ }
+ if (contents) {
+ result = result.concat('>', contents, '', tag, '>');
+ } else if (selfclosing) {
+ result = result + ' />';
+ } else {
+ result = result.concat('>', tag, '>');
+ }
+ return result;
+ };
+
+ // Render an inline element as HTML.
+ var renderInline = function(inline) {
+ var attrs;
+ switch (inline.t) {
+ case 'Str':
+ return this.escape(inline.c);
+ case 'Softbreak':
+ return this.softbreak;
+ case 'Hardbreak':
+ return inTags('br',[],"",true) + '\n';
+ case 'Emph':
+ return inTags('em', [], this.renderInlines(inline.c));
+ case 'Strong':
+ return inTags('strong', [], this.renderInlines(inline.c));
+ case 'Html':
+ return inline.c;
+ case 'Entity':
+ return inline.c;
+ case 'Link':
+ attrs = [['href', this.escape(inline.destination, true)]];
+ if (inline.title) {
+ attrs.push(['title', this.escape(inline.title, true)]);
+ }
+ return inTags('a', attrs, this.renderInlines(inline.label));
+ case 'Image':
+ attrs = [['src', this.escape(inline.destination, true)],
+ ['alt', this.escape(this.renderInlines(inline.label))]];
+ if (inline.title) {
+ attrs.push(['title', this.escape(inline.title, true)]);
+ }
+ return inTags('img', attrs, "", true);
+ case 'Code':
+ return inTags('code', [], this.escape(inline.c));
+ default:
+ console.log("Uknown inline type " + inline.t);
+ return "";
+ }
+ };
+
+ // Render a list of inlines.
+ var renderInlines = function(inlines) {
+ var result = '';
+ for (var i=0; i < inlines.length; i++) {
+ result = result + this.renderInline(inlines[i]);
+ }
+ return result;
+ };
+
+ // Render a single block element.
+ var renderBlock = function(block, in_tight_list) {
+ var tag;
+ var attr;
+ var info_words;
+ switch (block.t) {
+ case 'Document':
+ var whole_doc = this.renderBlocks(block.children);
+ return (whole_doc === '' ? '' : whole_doc + '\n');
+ case 'Paragraph':
+ if (in_tight_list) {
+ return this.renderInlines(block.inline_content);
+ } else {
+ return inTags('p', [], this.renderInlines(block.inline_content));
+ }
+ break;
+ case 'BlockQuote':
+ var filling = this.renderBlocks(block.children);
+ return inTags('blockquote', [], filling === '' ? this.innersep :
+ this.innersep + this.renderBlocks(block.children) + this.innersep);
+ case 'ListItem':
+ return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim());
+ case 'List':
+ tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol';
+ attr = (!block.list_data.start || block.list_data.start == 1) ?
+ [] : [['start', block.list_data.start.toString()]];
+ return inTags(tag, attr, this.innersep +
+ this.renderBlocks(block.children, block.tight) +
+ this.innersep);
+ case 'ATXHeader':
+ case 'SetextHeader':
+ tag = 'h' + block.level;
+ return inTags(tag, [], this.renderInlines(block.inline_content));
+ case 'IndentedCode':
+ return inTags('pre', [],
+ inTags('code', [], this.escape(block.string_content)));
+ case 'FencedCode':
+ info_words = block.info.split(/ +/);
+ attr = info_words.length === 0 || info_words[0].length === 0 ?
+ [] : [['class','language-' +
+ this.escape(info_words[0],true)]];
+ return inTags('pre', [],
+ inTags('code', attr, this.escape(block.string_content)));
+ case 'HtmlBlock':
+ return block.string_content;
+ case 'ReferenceDef':
+ return "";
+ case 'HorizontalRule':
+ return inTags('hr',[],"",true);
+ default:
+ console.log("Uknown block type " + block.t);
+ return "";
+ }
+ };
+
+ // Render a list of block elements, separated by this.blocksep.
+ var renderBlocks = function(blocks, in_tight_list) {
+ var result = [];
+ for (var i=0; i < blocks.length; i++) {
+ if (blocks[i].t !== 'ReferenceDef') {
+ result.push(this.renderBlock(blocks[i], in_tight_list));
+ }
+ }
+ return result.join(this.blocksep);
+ };
+
+ // The HtmlRenderer object.
+ function HtmlRenderer(){
+ return {
+ // default options:
+ blocksep: '\n', // space between blocks
+ innersep: '\n', // space between block container tag and contents
+ softbreak: '\n', // by default, soft breaks are rendered as newlines in HTML
+ // set to "
" to make them hard breaks
+ // set to " " if you want to ignore line wrapping in source
+ escape: function(s, preserve_entities) {
+ if (preserve_entities) {
+ return s.replace(/[&](?;|[a-z][a-z0-9]{1,31};)/gi,'&')
+ .replace(/[<]/g,'<')
+ .replace(/[>]/g,'>')
+ .replace(/["]/g,'"');
+ } else {
+ return s.replace(/[&]/g,'&')
+ .replace(/[<]/g,'<')
+ .replace(/[>]/g,'>')
+ .replace(/["]/g,'"');
+ }
+ },
+ renderInline: renderInline,
+ renderInlines: renderInlines,
+ renderBlock: renderBlock,
+ renderBlocks: renderBlocks,
+ render: renderBlock
+ };
}
- }
- return result.join(this.blocksep);
-};
-
-// The HtmlRenderer object.
-function HtmlRenderer(){
- return {
- // default options:
- blocksep: '\n', // space between blocks
- innersep: '\n', // space between block container tag and contents
- softbreak: '\n', // by default, soft breaks are rendered as newlines in HTML
- // set to "
" to make them hard breaks
- // set to " " if you want to ignore line wrapping in source
- escape: function(s, preserve_entities) {
- if (preserve_entities) {
- return s.replace(/[&](?;|[a-z][a-z0-9]{1,31};)/gi,'&')
- .replace(/[<]/g,'<')
- .replace(/[>]/g,'>')
- .replace(/["]/g,'"');
- } else {
- return s.replace(/[&]/g,'&')
- .replace(/[<]/g,'<')
- .replace(/[>]/g,'>')
- .replace(/["]/g,'"');
- }
- },
- renderInline: renderInline,
- renderInlines: renderInlines,
- renderBlock: renderBlock,
- renderBlocks: renderBlocks,
- render: renderBlock
- };
-}
-
-exports.DocParser = DocParser;
-exports.HtmlRenderer = HtmlRenderer;
+
+ exports.DocParser = DocParser;
+ exports.HtmlRenderer = HtmlRenderer;
})(typeof exports === 'undefined' ? this.stmd = {} : exports);
--
cgit v1.2.3
From 0efcb9ff947ee9fcda77f317f2bec811160dca4a Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Thu, 11 Sep 2014 09:34:29 -0700
Subject: jshint improvements.
---
js/stmd.js | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 6cf65d4..f7a1e4c 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -265,7 +265,7 @@
var startpos = this.pos;
var c ;
var first_close = 0;
- var c = this.peek();
+ c = this.peek();
if (!(c === '*' || c === '_')) {
return null;
}
@@ -324,7 +324,7 @@
this.last_emphasis_closer = null;
return inlines[0];
}
- } else if (next_inline = this.parseInline()) {
+ } else if ((next_inline = this.parseInline())) {
inlines.push(next_inline);
} else {
break;
@@ -396,7 +396,9 @@
this.parseBackticks();
break;
case '<':
- this.parseAutolink() || this.parseHtmlTag() || this.parseString();
+ if (!(this.parseAutolink())) {
+ this.parseHtmlTag();
+ }
break;
case '[': // nested []
nest_level++;
@@ -515,7 +517,7 @@
// a special meaning in markdown, as a plain string, adding to inlines.
var parseString = function() {
var m;
- if (m = this.match(reMain)) {
+ if ((m = this.match(reMain))) {
return { t: 'Str', c: m };
} else {
return null;
@@ -676,7 +678,7 @@
this.last_emphasis_closer = null;
var inlines = [];
var next_inline;
- while (next_inline = this.parseInline()) {
+ while ((next_inline = this.parseInline())) {
inlines.push(next_inline);
}
return inlines;
--
cgit v1.2.3
From 026fd723dc8bc327b86096c489df5b8f8e9035ba Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Thu, 11 Sep 2014 10:39:51 -0700
Subject: Fixed typo.
starting
---
js/stmd.js | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index f7a1e4c..4b80581 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -1401,7 +1401,7 @@
case 'Code':
return inTags('code', [], this.escape(inline.c));
default:
- console.log("Uknown inline type " + inline.t);
+ console.log("Unknown inline type " + inline.t);
return "";
}
};
@@ -1465,7 +1465,7 @@
case 'HorizontalRule':
return inTags('hr',[],"",true);
default:
- console.log("Uknown block type " + block.t);
+ console.log("Unknown block type " + block.t);
return "";
}
};
--
cgit v1.2.3
From 25f65e91293f1bfd74f81a78e2dac2cdbaa55e98 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Thu, 11 Sep 2014 10:53:40 -0700
Subject: Fixed performance regression from eccc23dc8d.
---
js/stmd.js | 60 +++++++++++++++++++++++++++++-------------------------------
1 file changed, 29 insertions(+), 31 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 4b80581..187d058 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -297,38 +297,36 @@
// We need not look for closers if we have already recorded that
// there are no closers past this point.
- if (this.last_emphasis_closer === null ||
- this.last_emphasis_closer >= this.pos) {
- while (true) {
- res = this.scanDelims(c);
- numclosedelims = res.numdelims;
- if (res.can_close) {
- if (last_emphasis_closer === null ||
- last_emphasis_closer < this.pos) {
- last_emphasis_closer = this.pos;
- }
- if (numclosedelims === 3 && delims_to_match === 3) {
- this.pos += 3;
- this.last_emphasis_closer = null;
- return {t: 'Strong', c: [{t: 'Emph', c: inlines}]};
- } else if (numclosedelims >= 2 && delims_to_match >= 2) {
- delims_to_match -= 2;
- this.pos += 2;
- inlines = [{t: 'Strong', c: inlines}];
- } else if (numclosedelims >= 1 && delims_to_match >= 1) {
- delims_to_match -= 1;
- this.pos += 1;
- inlines = [{t: 'Emph', c: inlines}];
- }
- if (delims_to_match === 0) {
- this.last_emphasis_closer = null;
- return inlines[0];
- }
- } else if ((next_inline = this.parseInline())) {
- inlines.push(next_inline);
- } else {
- break;
+ while (this.last_emphasis_closer === null ||
+ this.last_emphasis_closer >= this.pos) {
+ res = this.scanDelims(c);
+ numclosedelims = res.numdelims;
+ if (res.can_close) {
+ if (last_emphasis_closer === null ||
+ last_emphasis_closer < this.pos) {
+ last_emphasis_closer = this.pos;
}
+ if (numclosedelims === 3 && delims_to_match === 3) {
+ this.pos += 3;
+ this.last_emphasis_closer = null;
+ return {t: 'Strong', c: [{t: 'Emph', c: inlines}]};
+ } else if (numclosedelims >= 2 && delims_to_match >= 2) {
+ delims_to_match -= 2;
+ this.pos += 2;
+ inlines = [{t: 'Strong', c: inlines}];
+ } else if (numclosedelims >= 1 && delims_to_match >= 1) {
+ delims_to_match -= 1;
+ this.pos += 1;
+ inlines = [{t: 'Emph', c: inlines}];
+ }
+ if (delims_to_match === 0) {
+ this.last_emphasis_closer = null;
+ return inlines[0];
+ }
+ } else if ((next_inline = this.parseInline())) {
+ inlines.push(next_inline);
+ } else {
+ break;
}
}
--
cgit v1.2.3
From 118e3d3c39242225baa876319cdbfbb1adadc77b Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Mon, 15 Sep 2014 15:28:49 +0200
Subject: Cleanup external APIs
---
src/blocks.c | 11 ++--
src/html/html.c | 163 ++++++++++++++++++++++++++++---------------------------
src/inlines.c | 1 +
src/main.c | 8 +--
src/print.c | 114 +++++++++++++++++++-------------------
src/references.c | 1 +
src/stmd.h | 26 ++-------
7 files changed, 159 insertions(+), 165 deletions(-)
diff --git a/src/blocks.c b/src/blocks.c
index 30a8284..2ac7032 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -6,8 +6,9 @@
#include "stmd.h"
#include "utf8.h"
-#include "html/houdini.h"
#include "scanners.h"
+#include "inlines.h"
+#include "html/houdini.h"
#define peek_at(i, n) (i)->data[n]
@@ -224,7 +225,7 @@ static void finalize(node_block* b, int line_number)
}
// Add a node_block as child of another. Return pointer to child.
-extern node_block* add_child(node_block* parent,
+static node_block* add_child(node_block* parent,
int block_type, int start_line, int start_column)
{
assert(parent);
@@ -252,7 +253,7 @@ extern node_block* add_child(node_block* parent,
}
// Free a node_block list and any children.
-extern void free_blocks(node_block* e)
+void stmd_free_nodes(node_block *e)
{
node_block * next;
while (e != NULL) {
@@ -264,7 +265,7 @@ extern void free_blocks(node_block* e)
} else if (e->tag == BLOCK_DOCUMENT) {
reference_map_free(e->as.document.refmap);
}
- free_blocks(e->children);
+ stmd_free_nodes(e->children);
free(e);
e = next;
}
@@ -279,8 +280,6 @@ void process_inlines(node_block* cur, reference_map *refmap)
case BLOCK_ATX_HEADER:
case BLOCK_SETEXT_HEADER:
cur->inline_content = parse_inlines(&cur->string_content, refmap);
- // MEM
- // strbuf_free(&cur->string_content);
break;
default:
diff --git a/src/html/html.c b/src/html/html.c
index b48b10b..6f3bc76 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -32,8 +32,89 @@ static inline void cr(strbuf *html)
strbuf_putc(html, '\n');
}
+// Convert an inline list to HTML. Returns 0 on success, and sets result.
+static void inlines_to_html(strbuf *html, node_inl* ils)
+{
+ strbuf scrap = GH_BUF_INIT;
+
+ while(ils != NULL) {
+ switch(ils->tag) {
+ case INL_STRING:
+ escape_html(html, ils->content.literal.data, ils->content.literal.len);
+ break;
+
+ case INL_LINEBREAK:
+ strbuf_puts(html, "
\n");
+ break;
+
+ case INL_SOFTBREAK:
+ strbuf_putc(html, '\n');
+ break;
+
+ case INL_CODE:
+ strbuf_puts(html, "");
+ escape_html(html, ils->content.literal.data, ils->content.literal.len);
+ strbuf_puts(html, "");
+ break;
+
+ case INL_RAW_HTML:
+ strbuf_put(html,
+ ils->content.literal.data,
+ ils->content.literal.len);
+ break;
+
+ case INL_LINK:
+ strbuf_puts(html, "content.linkable.url)
+ escape_href(html, ils->content.linkable.url, -1);
+
+ if (ils->content.linkable.title) {
+ strbuf_puts(html, "\" title=\"");
+ escape_html(html, ils->content.linkable.title, -1);
+ }
+
+ strbuf_puts(html, "\">");
+ inlines_to_html(html, ils->content.inlines);
+ strbuf_puts(html, "");
+ break;
+
+ case INL_IMAGE:
+ strbuf_puts(html, "
content.linkable.url)
+ escape_href(html, ils->content.linkable.url, -1);
+
+ inlines_to_html(&scrap, ils->content.inlines);
+ strbuf_puts(html, "\" alt=\"");
+ if (scrap.size)
+ escape_html(html, scrap.ptr, scrap.size);
+ strbuf_clear(&scrap);
+
+ if (ils->content.linkable.title) {
+ strbuf_puts(html, "\" title=\"");
+ escape_html(html, ils->content.linkable.title, -1);
+ }
+
+ strbuf_puts(html, "\"/>");
+ break;
+
+ case INL_STRONG:
+ strbuf_puts(html, "");
+ inlines_to_html(html, ils->content.inlines);
+ strbuf_puts(html, "");
+ break;
+
+ case INL_EMPH:
+ strbuf_puts(html, "");
+ inlines_to_html(html, ils->content.inlines);
+ strbuf_puts(html, "");
+ break;
+ }
+ ils = ils->next;
+ }
+}
+
// Convert a node_block list to HTML. Returns 0 on success, and sets result.
-void blocks_to_html(strbuf *html, node_block *b, bool tight)
+static void blocks_to_html(strbuf *html, node_block *b, bool tight)
{
struct ListData *data;
@@ -139,83 +220,7 @@ void blocks_to_html(strbuf *html, node_block *b, bool tight)
}
}
-// Convert an inline list to HTML. Returns 0 on success, and sets result.
-void inlines_to_html(strbuf *html, node_inl* ils)
+void stmd_render_html(strbuf *html, node_block *root)
{
- strbuf scrap = GH_BUF_INIT;
-
- while(ils != NULL) {
- switch(ils->tag) {
- case INL_STRING:
- escape_html(html, ils->content.literal.data, ils->content.literal.len);
- break;
-
- case INL_LINEBREAK:
- strbuf_puts(html, "
\n");
- break;
-
- case INL_SOFTBREAK:
- strbuf_putc(html, '\n');
- break;
-
- case INL_CODE:
- strbuf_puts(html, "");
- escape_html(html, ils->content.literal.data, ils->content.literal.len);
- strbuf_puts(html, "");
- break;
-
- case INL_RAW_HTML:
- strbuf_put(html,
- ils->content.literal.data,
- ils->content.literal.len);
- break;
-
- case INL_LINK:
- strbuf_puts(html, "content.linkable.url)
- escape_href(html, ils->content.linkable.url, -1);
-
- if (ils->content.linkable.title) {
- strbuf_puts(html, "\" title=\"");
- escape_html(html, ils->content.linkable.title, -1);
- }
-
- strbuf_puts(html, "\">");
- inlines_to_html(html, ils->content.inlines);
- strbuf_puts(html, "");
- break;
-
- case INL_IMAGE:
- strbuf_puts(html, "
content.linkable.url)
- escape_href(html, ils->content.linkable.url, -1);
-
- inlines_to_html(&scrap, ils->content.inlines);
- strbuf_puts(html, "\" alt=\"");
- if (scrap.size)
- escape_html(html, scrap.ptr, scrap.size);
- strbuf_clear(&scrap);
-
- if (ils->content.linkable.title) {
- strbuf_puts(html, "\" title=\"");
- escape_html(html, ils->content.linkable.title, -1);
- }
-
- strbuf_puts(html, "\"/>");
- break;
-
- case INL_STRONG:
- strbuf_puts(html, "");
- inlines_to_html(html, ils->content.inlines);
- strbuf_puts(html, "");
- break;
-
- case INL_EMPH:
- strbuf_puts(html, "");
- inlines_to_html(html, ils->content.inlines);
- strbuf_puts(html, "");
- break;
- }
- ils = ils->next;
- }
+ blocks_to_html(html, root, false);
}
diff --git a/src/inlines.c b/src/inlines.c
index cd2d124..145825c 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -8,6 +8,7 @@
#include "html/houdini.h"
#include "utf8.h"
#include "scanners.h"
+#include "inlines.h"
typedef struct Subject {
chunk input;
diff --git a/src/main.c b/src/main.c
index 90bb16d..76a0e12 100644
--- a/src/main.c
+++ b/src/main.c
@@ -17,9 +17,9 @@ static void print_document(node_block *document, bool ast)
strbuf html = GH_BUF_INIT;
if (ast) {
- print_blocks(document, 0);
+ stmd_debug_print(document);
} else {
- blocks_to_html(&html, document, false);
+ stmd_render_html(&html, document);
printf("%s", html.ptr);
strbuf_free(&html);
}
@@ -54,7 +54,7 @@ int main(int argc, char *argv[])
if (numfps == 0) {
document = stmd_parse_file(stdin);
print_document(document, ast);
- free_blocks(document);
+ stmd_free_nodes(document);
} else {
for (i = 0; i < numfps; i++) {
FILE *fp = fopen(argv[files[i]], "r");
@@ -67,7 +67,7 @@ int main(int argc, char *argv[])
document = stmd_parse_file(fp);
print_document(document, ast);
- free_blocks(document);
+ stmd_free_nodes(document);
fclose(fp);
}
}
diff --git a/src/print.c b/src/print.c
index 36140a8..83f8daa 100644
--- a/src/print.c
+++ b/src/print.c
@@ -32,14 +32,69 @@ static void print_str(const unsigned char *s, int len)
putchar('"');
}
+// Prettyprint an inline list, for debugging.
+static void print_inlines(node_inl* ils, int indent)
+{
+ while(ils != NULL) {
+ for (int i=0; i < indent; i++) {
+ putchar(' ');
+ }
+ switch(ils->tag) {
+ case INL_STRING:
+ printf("str ");
+ print_str(ils->content.literal.data, ils->content.literal.len);
+ putchar('\n');
+ break;
+ case INL_LINEBREAK:
+ printf("linebreak\n");
+ break;
+ case INL_SOFTBREAK:
+ printf("softbreak\n");
+ break;
+ case INL_CODE:
+ printf("code ");
+ print_str(ils->content.literal.data, ils->content.literal.len);
+ putchar('\n');
+ break;
+ case INL_RAW_HTML:
+ printf("html ");
+ print_str(ils->content.literal.data, ils->content.literal.len);
+ putchar('\n');
+ break;
+ case INL_LINK:
+ case INL_IMAGE:
+ printf("%s url=", ils->tag == INL_LINK ? "link" : "image");
+
+ if (ils->content.linkable.url)
+ print_str(ils->content.linkable.url, -1);
+
+ if (ils->content.linkable.title) {
+ printf(" title=");
+ print_str(ils->content.linkable.title, -1);
+ }
+ putchar('\n');
+ print_inlines(ils->content.linkable.label, indent + 2);
+ break;
+ case INL_STRONG:
+ printf("strong\n");
+ print_inlines(ils->content.linkable.label, indent + 2);
+ break;
+ case INL_EMPH:
+ printf("emph\n");
+ print_inlines(ils->content.linkable.label, indent + 2);
+ break;
+ }
+ ils = ils->next;
+ }
+}
+
// Functions to pretty-print inline and node_block lists, for debugging.
// Prettyprint an inline list, for debugging.
-extern void print_blocks(node_block* b, int indent)
+static void print_blocks(node_block* b, int indent)
{
struct ListData *data;
while(b != NULL) {
- // printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line);
for (int i=0; i < indent; i++) {
putchar(' ');
}
@@ -115,58 +170,7 @@ extern void print_blocks(node_block* b, int indent)
}
}
-// Prettyprint an inline list, for debugging.
-extern void print_inlines(node_inl* ils, int indent)
+void stmd_debug_print(node_block *root)
{
- while(ils != NULL) {
- for (int i=0; i < indent; i++) {
- putchar(' ');
- }
- switch(ils->tag) {
- case INL_STRING:
- printf("str ");
- print_str(ils->content.literal.data, ils->content.literal.len);
- putchar('\n');
- break;
- case INL_LINEBREAK:
- printf("linebreak\n");
- break;
- case INL_SOFTBREAK:
- printf("softbreak\n");
- break;
- case INL_CODE:
- printf("code ");
- print_str(ils->content.literal.data, ils->content.literal.len);
- putchar('\n');
- break;
- case INL_RAW_HTML:
- printf("html ");
- print_str(ils->content.literal.data, ils->content.literal.len);
- putchar('\n');
- break;
- case INL_LINK:
- case INL_IMAGE:
- printf("%s url=", ils->tag == INL_LINK ? "link" : "image");
-
- if (ils->content.linkable.url)
- print_str(ils->content.linkable.url, -1);
-
- if (ils->content.linkable.title) {
- printf(" title=");
- print_str(ils->content.linkable.title, -1);
- }
- putchar('\n');
- print_inlines(ils->content.linkable.label, indent + 2);
- break;
- case INL_STRONG:
- printf("strong\n");
- print_inlines(ils->content.linkable.label, indent + 2);
- break;
- case INL_EMPH:
- printf("emph\n");
- print_inlines(ils->content.linkable.label, indent + 2);
- break;
- }
- ils = ils->next;
- }
+ print_blocks(root, 0);
}
diff --git a/src/references.c b/src/references.c
index 300bbcc..3e54b48 100644
--- a/src/references.c
+++ b/src/references.c
@@ -1,6 +1,7 @@
#include "stmd.h"
#include "utf8.h"
#include "references.h"
+#include "inlines.h"
static unsigned int
refhash(const unsigned char *link_ref)
diff --git a/src/stmd.h b/src/stmd.h
index 4e21e6c..c6473a6 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -104,28 +104,12 @@ struct node_block {
typedef struct node_block node_block;
-node_inl* parse_inlines(strbuf *input, reference_map *refmap);
-void free_inlines(node_inl* e);
+node_block *stmd_parse_document(const unsigned char *buffer, size_t len);
+node_block *stmd_parse_file(FILE *f);
-int parse_reference_inline(strbuf *input, reference_map *refmap);
-void unescape_buffer(strbuf *buf);
+void stmd_free_nodes(node_block *e);
-extern node_block* make_document();
-extern node_block* add_child(node_block* parent,
- int block_type, int start_line, int start_column);
-void free_blocks(node_block* e);
-
-extern node_block *stmd_parse_document(const unsigned char *buffer, size_t len);
-extern node_block *stmd_parse_file(FILE *f);
-
-void print_inlines(node_inl* ils, int indent);
-void print_blocks(node_block* blk, int indent);
-
-void blocks_to_html(strbuf *html, node_block *b, bool tight);
-void inlines_to_html(strbuf *html, node_inl *b);
-
-unsigned char *clean_url(chunk *url);
-unsigned char *clean_autolink(chunk *url, int is_email);
-unsigned char *clean_title(chunk *title);
+void stmd_debug_print(node_block *root);
+void stmd_render_html(strbuf *html, node_block *root);
#endif
--
cgit v1.2.3
From 9c08b31793f269e4b5902908282034618ee66eef Mon Sep 17 00:00:00 2001
From: Alex Kocharin
Date: Tue, 16 Sep 2014 00:44:52 +0400
Subject: typo fix
---
spec.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/spec.txt b/spec.txt
index 4a9e9fd..40d04f2 100644
--- a/spec.txt
+++ b/spec.txt
@@ -4030,7 +4030,7 @@ for efficient parsing strategies that do not backtrack:
(a) it is not part of a sequence of four or more unescaped `_`s,
(b) it is not followed by whitespace,
- (c) is is not preceded by an ASCII alphanumeric character, and
+ (c) it is not preceded by an ASCII alphanumeric character, and
(d) either it is not followed by a `_` character or it is
followed immediately by strong emphasis.
--
cgit v1.2.3
From e9f5a586938b926da932a9e957f801281dde4730 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Tue, 16 Sep 2014 11:42:11 -0700
Subject: New parseEmphasis algorithm.
- State machine for emphasis parsing.
- This would require some adjustments to the spec and spec
examples.
- It currently blows the stack on `tricky'.
- Memoization code has been commented out.
- Inline parsers return arrays.
---
js/stmd.js | 293 +++++++++++++++++++++++++++++++++++++++++++++++--------------
1 file changed, 228 insertions(+), 65 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 187d058..9c84268 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -166,15 +166,15 @@
var match;
while (!foundCode && (match = this.match(/`+/m))) {
if (match == ticks) {
- return { t: 'Code', c: this.subject.slice(afterOpenTicks,
+ return [{ t: 'Code', c: this.subject.slice(afterOpenTicks,
this.pos - ticks.length)
.replace(/[ \n]+/g,' ')
- .trim() };
+ .trim() }];
}
}
// If we got here, we didn't match a closing backtick sequence.
this.pos = afterOpenTicks;
- return { t: 'Str', c: ticks };
+ return [{ t: 'Str', c: ticks }];
};
// Parse a backslash-escaped special character, adding either the escaped
@@ -186,13 +186,13 @@
if (subj[pos] === '\\') {
if (subj[pos + 1] === '\n') {
this.pos = this.pos + 2;
- return { t: 'Hardbreak' };
+ return [{ t: 'Hardbreak' }];
} else if (reEscapable.test(subj[pos + 1])) {
this.pos = this.pos + 2;
- return { t: 'Str', c: subj[pos + 1] };
+ return [{ t: 'Str', c: subj[pos + 1] }];
} else {
this.pos++;
- return {t: 'Str', c: '\\'};
+ return [{t: 'Str', c: '\\'}];
}
} else {
return null;
@@ -205,14 +205,14 @@
var dest;
if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink
dest = m.slice(1,-1);
- return {t: 'Link',
- label: [{ t: 'Str', c: dest }],
- destination: 'mailto:' + dest };
+ return [{t: 'Link',
+ label: [{ t: 'Str', c: dest }],
+ destination: 'mailto:' + dest }];
} else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) {
dest = m.slice(1,-1);
- return { t: 'Link',
- label: [{ t: 'Str', c: dest }],
- destination: dest };
+ return [{ t: 'Link',
+ label: [{ t: 'Str', c: dest }],
+ destination: dest }];
} else {
return null;
}
@@ -222,7 +222,7 @@
var parseHtmlTag = function() {
var m = this.match(reHtmlTag);
if (m) {
- return { t: 'Html', c: m };
+ return [{ t: 'Html', c: m }];
} else {
return null;
}
@@ -285,60 +285,219 @@
if (numdelims >= 4 || !res.can_open) {
this.pos += numdelims;
- return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)};
+ return [{t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}];
}
this.pos += numdelims;
var next_inline;
- var last_emphasis_closer = null;
+ var first = [];
+ var second = [];
+ var current = first;
+ var state = 0;
- var delims_to_match = numdelims;
+ if (numdelims === 3) {
+ state = 1;
+ } else if (numdelims === 2) {
+ state = 2;
+ } else if (numdelims === 1) {
+ state = 3;
+ }
- // We need not look for closers if we have already recorded that
- // there are no closers past this point.
- while (this.last_emphasis_closer === null ||
- this.last_emphasis_closer >= this.pos) {
+ while (true) {
res = this.scanDelims(c);
- numclosedelims = res.numdelims;
- if (res.can_close) {
- if (last_emphasis_closer === null ||
- last_emphasis_closer < this.pos) {
- last_emphasis_closer = this.pos;
+
+ switch (state) {
+ case 1: // ***a
+ if (res.numdelims === 3 && res.can_close) {
+ this.pos += 3;
+ return [{t: 'Strong', c: [{t: 'Emph', c: first}]}];
+ } else if (res.numdelims === 2 && res.can_close) {
+ this.pos += 2;
+ current = second;
+ state = res.can_open ? 4 : 6;
+ continue;
+ } else if (res.numdelims === 1 && res.can_close) {
+ this.pos += 1;
+ current = second;
+ state = res.can_open ? 5 : 7;
+ continue;
+ }
+ break;
+ case 2: // **a
+ if (res.numdelims === 2 && res.can_close) {
+ this.pos += 2;
+ return [{t: 'Strong', c: first}];
+ } else if (res.numdelims === 1 && res.can_open) {
+ this.pos += 1;
+ current = second;
+ state = 8;
+ continue;
}
- if (numclosedelims === 3 && delims_to_match === 3) {
+ break;
+ case 3: // *a
+ if (res.numdelims === 1 && res.can_close) {
+ this.pos += 1;
+ return [{t: 'Emph', c: first}];
+ } else if (res.numdelims === 2 && res.can_open) {
+ this.pos += 2;
+ current = second;
+ state = 9;
+ continue;
+ }
+ break;
+ case 4: // ***a**b
+ if (res.numdelims === 3 && res.can_close) {
this.pos += 3;
- this.last_emphasis_closer = null;
- return {t: 'Strong', c: [{t: 'Emph', c: inlines}]};
- } else if (numclosedelims >= 2 && delims_to_match >= 2) {
- delims_to_match -= 2;
+ return [{t: 'Strong',
+ c: [{t: 'Emph',
+ c: first.concat(
+ [{t: 'Str', c: c+c}],
+ second)}]}];
+ } else if (res.numdelims === 2 && res.can_close) {
this.pos += 2;
- inlines = [{t: 'Strong', c: inlines}];
- } else if (numclosedelims >= 1 && delims_to_match >= 1) {
- delims_to_match -= 1;
+ return [{t: 'Strong',
+ c: [{t: 'Str', c: c+c+c}].concat(
+ first,
+ [{t: 'Strong', c: second}])}];
+ } else if (res.numdelims === 1 && res.can_close) {
this.pos += 1;
- inlines = [{t: 'Emph', c: inlines}];
+ return [{t: 'Emph',
+ c: [{t: 'Strong', c: first}].concat(second)}];
}
- if (delims_to_match === 0) {
- this.last_emphasis_closer = null;
- return inlines[0];
+ break;
+ case 5: // ***a*b
+ if (res.numdelims === 3 && res.can_close) {
+ this.pos += 3;
+ return [{t: 'Strong',
+ c: [{t: 'Emph',
+ c: first.concat(
+ [{t: 'Str', c: c}],
+ second)}]}];
+ } else if (res.numdelims === 2 && res.can_close) {
+ this.pos += 2;
+ return [{t: 'Strong',
+ c: [{t: 'Emph', c: first}].concat(second)}];
+ } else if (res.numdelims === 1 && res.can_close) {
+ this.pos += 1;
+ return [{t: 'Strong',
+ c: [{t: 'Str', c: c+c+c}].concat(
+ first,
+ [{t: 'Emph', c: second}])}];
}
- } else if ((next_inline = this.parseInline())) {
- inlines.push(next_inline);
+ break;
+ case 6: // ***a** b
+ if (res.numdelims === 3 && res.can_close) {
+ this.pos += 3;
+ return [{t: 'Strong',
+ c: [{t: 'Emph',
+ c: first.concat(
+ [{t: 'Str', c: c+c}],
+ second)}]}];
+ } else if (res.numdelims === 1 && res.can_close) {
+ this.pos += 1;
+ return [{t: 'Emph',
+ c: [{t: 'Strong', c: first}].concat(second)}];
+ }
+ break;
+ case 7: // ***a* b
+ if (res.numdelims === 3 && res.can_close) {
+ this.pos += 3;
+ return [{t: 'Strong',
+ c: [{t: 'Emph',
+ c: first.concat(
+ [{t: 'Str', c: c}],
+ second)}]}];
+ } else if (res.numdelims === 2 && res.can_close) {
+ this.pos += 2;
+ return [{t: 'Strong',
+ c: [{t: 'Emph', c: first}].concat(second)}];
+ }
+ break;
+ case 8: // **a *b
+ if (res.numdelims === 3 && res.can_close) {
+ this.pos += 3;
+ return [{t: 'Strong',
+ c: first.concat([{t: 'Emph',
+ c: second}])}];
+ } else if (res.numdelims === 2 && res.can_close) {
+ this.pos += 2;
+ return [{t: 'Strong',
+ c: first.concat(
+ [{t: 'Str', c: c}],
+ second)}];
+ } else if (res.numdelims === 1 && res.can_close) {
+ this.pos += 1;
+ return [{t: 'Str', c: c+c}].concat(
+ first,
+ [{t: 'Emph', c: second}]);
+ }
+ break;
+ case 9: // *a **b
+ if (res.numdelims === 3 && res.can_close) {
+ this.pos += 3;
+ return [{t: 'Emph',
+ c: first.concat([{t: 'Strong',
+ c: second}])}];
+ } else if (res.numdelims === 2 && res.can_close) {
+ this.pos += 2;
+ return [{t: 'Str', c: c}].concat(
+ first,
+ [{t: 'Strong', c: second}]);
+ } else if (res.numdelims === 1 && res.can_close) {
+ this.pos += 1;
+ return [{t: 'Emph',
+ c: first.concat(
+ [{t: 'Str', c: c+c}],
+ second)}];
+ }
+ break;
+ default:
+ break;
+ }
+
+ if ((next_inline = this.parseInline())) {
+ Array.prototype.push.apply(current, next_inline);
} else {
break;
}
+
}
- // didn't find closing delimiter
- this.pos = startpos + numdelims;
- if (last_emphasis_closer === null) {
- // we know there are no closers after startpos, so:
- this.last_emphasis_closer = startpos;
- } else {
- this.last_emphasis_closer = last_emphasis_closer;
+ switch (state) {
+ case 1: // ***a
+ return [{t: 'Str', c: c+c+c}].concat(first);
+ case 2: // **a
+ return [{t: 'Str', c: c+c}].concat(first);
+ case 3: // *a
+ return [{t: 'Str', c: c}].concat(first);
+ case 4: // ***a**b
+ case 6: // ***a** b
+ return [{t: 'Str', c: c+c+c}]
+ .concat(first,
+ [{t: 'Str', c: c+c}],
+ second);
+ case 5: // ***a*b
+ case 7: // ***a* b
+ return [{t: 'Str', c: c+c+c}]
+ .concat(first,
+ [{t: 'Str', c: c}],
+ second);
+ case 8: // **a *b
+ return [{t: 'Str', c: c+c}]
+ .concat(first,
+ [{t: 'Str', c: c}],
+ second);
+ case 9: // *a **b
+ return [{t: 'Str', c: c}]
+ .concat(first,
+ [{t: 'Str', c: c+c}],
+ second);
+ default:
+ console.log("Unknown state, parseEmphasis");
+ // shouldn't happen
}
- return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)};
+
};
// Attempt to parse link title (sans quotes), returning the string
@@ -461,10 +620,10 @@
(title = this.parseLinkTitle() || '') || true) &&
this.spnl() &&
this.match(/^\)/)) {
- return { t: 'Link',
- destination: dest,
- title: title,
- label: parseRawLabel(rawlabel) };
+ return [{ t: 'Link',
+ destination: dest,
+ title: title,
+ label: parseRawLabel(rawlabel) }];
} else {
this.pos = startpos;
return null;
@@ -488,10 +647,10 @@
// lookup rawlabel in refmap
var link = this.refmap[normalizeReference(reflabel)];
if (link) {
- return {t: 'Link',
- destination: link.destination,
- title: link.title,
- label: parseRawLabel(rawlabel) };
+ return [{t: 'Link',
+ destination: link.destination,
+ title: link.title,
+ label: parseRawLabel(rawlabel) }];
} else {
this.pos = startpos;
return null;
@@ -505,7 +664,7 @@
var parseEntity = function() {
var m;
if ((m = this.match(/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i))) {
- return { t: 'Entity', c: m };
+ return [{ t: 'Entity', c: m }];
} else {
return null;
}
@@ -516,7 +675,7 @@
var parseString = function() {
var m;
if ((m = this.match(reMain))) {
- return { t: 'Str', c: m };
+ return [{ t: 'Str', c: m }];
} else {
return null;
}
@@ -528,9 +687,9 @@
var m = this.match(/^ *\n/);
if (m) {
if (m.length > 2) {
- return { t: 'Hardbreak' };
+ return [{ t: 'Hardbreak' }];
} else if (m.length > 0) {
- return { t: 'Softbreak' };
+ return [{ t: 'Softbreak' }];
}
}
return null;
@@ -542,10 +701,10 @@
if (this.match(/^!/)) {
var link = this.parseLink();
if (link) {
- link.t = 'Image';
+ link[0].t = 'Image';
return link;
} else {
- return { t: 'Str', c: '!' };
+ return [{ t: 'Str', c: '!' }];
}
} else {
return null;
@@ -615,11 +774,13 @@
// and returning the inline parsed.
var parseInline = function() {
var startpos = this.pos;
+ /*
var memoized = this.memo[startpos];
if (memoized) {
this.pos = memoized.endpos;
return memoized.inline;
}
+ */
var c = this.peek();
if (!c) {
return null;
@@ -658,12 +819,14 @@
}
if (res === null) {
this.pos += 1;
- res = {t: 'Str', c: c};
+ res = [{t: 'Str', c: c}];
}
+ /*
if (res) {
this.memo[startpos] = { inline: res,
endpos: this.pos };
}
+ */
return res;
};
@@ -672,12 +835,12 @@
this.subject = s;
this.pos = 0;
this.refmap = refmap || {};
- this.memo = {};
+ // this.memo = {};
this.last_emphasis_closer = null;
var inlines = [];
var next_inline;
while ((next_inline = this.parseInline())) {
- inlines.push(next_inline);
+ Array.prototype.push.apply(inlines, next_inline);
}
return inlines;
};
@@ -690,7 +853,7 @@
last_emphasis_closer: null, // used by parseEmphasis method
pos: 0,
refmap: {},
- memo: {},
+ // memo: {},
match: match,
peek: peek,
spnl: spnl,
--
cgit v1.2.3
From a2a6b7dd829bd7097aa52f5af7fbd66dd7e2c667 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Tue, 16 Sep 2014 14:15:55 -0700
Subject: Fixed bug in parsing `* **a** b*` etc.
---
js/stmd.js | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 9c84268..157fe5f 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -428,9 +428,10 @@
second)}];
} else if (res.numdelims === 1 && res.can_close) {
this.pos += 1;
- return [{t: 'Str', c: c+c}].concat(
- first,
- [{t: 'Emph', c: second}]);
+ first = first.concat([{t: 'Emph', c: second}]);
+ current = first;
+ state = 2;
+ continue;
}
break;
case 9: // *a **b
@@ -441,9 +442,10 @@
c: second}])}];
} else if (res.numdelims === 2 && res.can_close) {
this.pos += 2;
- return [{t: 'Str', c: c}].concat(
- first,
- [{t: 'Strong', c: second}]);
+ first = first.concat([{t: 'Strong', c: second}]);
+ current = first;
+ state = 3;
+ continue;
} else if (res.numdelims === 1 && res.can_close) {
this.pos += 1;
return [{t: 'Emph',
--
cgit v1.2.3
From 1ffcc1d908a4b3f8c6e0c0ca0af7cc6cc4c28331 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Tue, 16 Sep 2014 14:29:22 -0700
Subject: Small performance tweaks.
---
js/stmd.js | 303 +++++++++++++++++++++++++++++++------------------------------
1 file changed, 155 insertions(+), 148 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 157fe5f..c5268d8 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -295,6 +295,8 @@
var second = [];
var current = first;
var state = 0;
+ var can_close = false;
+ var can_open = false;
if (numdelims === 3) {
state = 1;
@@ -307,155 +309,160 @@
while (true) {
res = this.scanDelims(c);
- switch (state) {
- case 1: // ***a
- if (res.numdelims === 3 && res.can_close) {
- this.pos += 3;
- return [{t: 'Strong', c: [{t: 'Emph', c: first}]}];
- } else if (res.numdelims === 2 && res.can_close) {
- this.pos += 2;
- current = second;
- state = res.can_open ? 4 : 6;
- continue;
- } else if (res.numdelims === 1 && res.can_close) {
- this.pos += 1;
- current = second;
- state = res.can_open ? 5 : 7;
- continue;
- }
- break;
- case 2: // **a
- if (res.numdelims === 2 && res.can_close) {
- this.pos += 2;
- return [{t: 'Strong', c: first}];
- } else if (res.numdelims === 1 && res.can_open) {
- this.pos += 1;
- current = second;
- state = 8;
- continue;
- }
- break;
- case 3: // *a
- if (res.numdelims === 1 && res.can_close) {
- this.pos += 1;
- return [{t: 'Emph', c: first}];
- } else if (res.numdelims === 2 && res.can_open) {
- this.pos += 2;
- current = second;
- state = 9;
- continue;
- }
- break;
- case 4: // ***a**b
- if (res.numdelims === 3 && res.can_close) {
- this.pos += 3;
- return [{t: 'Strong',
- c: [{t: 'Emph',
- c: first.concat(
- [{t: 'Str', c: c+c}],
- second)}]}];
- } else if (res.numdelims === 2 && res.can_close) {
- this.pos += 2;
- return [{t: 'Strong',
- c: [{t: 'Str', c: c+c+c}].concat(
- first,
- [{t: 'Strong', c: second}])}];
- } else if (res.numdelims === 1 && res.can_close) {
- this.pos += 1;
- return [{t: 'Emph',
- c: [{t: 'Strong', c: first}].concat(second)}];
- }
- break;
- case 5: // ***a*b
- if (res.numdelims === 3 && res.can_close) {
- this.pos += 3;
- return [{t: 'Strong',
- c: [{t: 'Emph',
- c: first.concat(
- [{t: 'Str', c: c}],
- second)}]}];
- } else if (res.numdelims === 2 && res.can_close) {
- this.pos += 2;
- return [{t: 'Strong',
- c: [{t: 'Emph', c: first}].concat(second)}];
- } else if (res.numdelims === 1 && res.can_close) {
- this.pos += 1;
- return [{t: 'Strong',
- c: [{t: 'Str', c: c+c+c}].concat(
- first,
- [{t: 'Emph', c: second}])}];
- }
- break;
- case 6: // ***a** b
- if (res.numdelims === 3 && res.can_close) {
- this.pos += 3;
- return [{t: 'Strong',
- c: [{t: 'Emph',
- c: first.concat(
- [{t: 'Str', c: c+c}],
- second)}]}];
- } else if (res.numdelims === 1 && res.can_close) {
- this.pos += 1;
- return [{t: 'Emph',
- c: [{t: 'Strong', c: first}].concat(second)}];
- }
- break;
- case 7: // ***a* b
- if (res.numdelims === 3 && res.can_close) {
- this.pos += 3;
- return [{t: 'Strong',
- c: [{t: 'Emph',
- c: first.concat(
- [{t: 'Str', c: c}],
- second)}]}];
- } else if (res.numdelims === 2 && res.can_close) {
- this.pos += 2;
- return [{t: 'Strong',
- c: [{t: 'Emph', c: first}].concat(second)}];
- }
- break;
- case 8: // **a *b
- if (res.numdelims === 3 && res.can_close) {
- this.pos += 3;
- return [{t: 'Strong',
- c: first.concat([{t: 'Emph',
- c: second}])}];
- } else if (res.numdelims === 2 && res.can_close) {
- this.pos += 2;
- return [{t: 'Strong',
- c: first.concat(
- [{t: 'Str', c: c}],
- second)}];
- } else if (res.numdelims === 1 && res.can_close) {
- this.pos += 1;
- first = first.concat([{t: 'Emph', c: second}]);
- current = first;
- state = 2;
- continue;
- }
- break;
- case 9: // *a **b
- if (res.numdelims === 3 && res.can_close) {
- this.pos += 3;
- return [{t: 'Emph',
- c: first.concat([{t: 'Strong',
- c: second}])}];
- } else if (res.numdelims === 2 && res.can_close) {
- this.pos += 2;
- first = first.concat([{t: 'Strong', c: second}]);
- current = first;
- state = 3;
- continue;
- } else if (res.numdelims === 1 && res.can_close) {
- this.pos += 1;
- return [{t: 'Emph',
- c: first.concat(
- [{t: 'Str', c: c+c}],
- second)}];
+ if (res) {
+ numdelims = res.numdelims;
+ can_close = res.can_close;
+ can_open = res.can_open;
+ switch (state) {
+ case 1: // ***a
+ if (numdelims === 3 && can_close) {
+ this.pos += 3;
+ return [{t: 'Strong', c: [{t: 'Emph', c: first}]}];
+ } else if (numdelims === 2 && can_close) {
+ this.pos += 2;
+ current = second;
+ state = can_open ? 4 : 6;
+ continue;
+ } else if (numdelims === 1 && can_close) {
+ this.pos += 1;
+ current = second;
+ state = can_open ? 5 : 7;
+ continue;
+ }
+ break;
+ case 2: // **a
+ if (numdelims === 2 && can_close) {
+ this.pos += 2;
+ return [{t: 'Strong', c: first}];
+ } else if (numdelims === 1 && can_open) {
+ this.pos += 1;
+ current = second;
+ state = 8;
+ continue;
+ }
+ break;
+ case 3: // *a
+ if (numdelims === 1 && can_close) {
+ this.pos += 1;
+ return [{t: 'Emph', c: first}];
+ } else if (numdelims === 2 && can_open) {
+ this.pos += 2;
+ current = second;
+ state = 9;
+ continue;
+ }
+ break;
+ case 4: // ***a**b
+ if (numdelims === 3 && can_close) {
+ this.pos += 3;
+ return [{t: 'Strong',
+ c: [{t: 'Emph',
+ c: first.concat(
+ [{t: 'Str', c: c+c}],
+ second)}]}];
+ } else if (numdelims === 2 && can_close) {
+ this.pos += 2;
+ return [{t: 'Strong',
+ c: [{t: 'Str', c: c+c+c}].concat(
+ first,
+ [{t: 'Strong', c: second}])}];
+ } else if (numdelims === 1 && can_close) {
+ this.pos += 1;
+ return [{t: 'Emph',
+ c: [{t: 'Strong', c: first}].concat(second)}];
+ }
+ break;
+ case 5: // ***a*b
+ if (numdelims === 3 && can_close) {
+ this.pos += 3;
+ return [{t: 'Strong',
+ c: [{t: 'Emph',
+ c: first.concat(
+ [{t: 'Str', c: c}],
+ second)}]}];
+ } else if (numdelims === 2 && can_close) {
+ this.pos += 2;
+ return [{t: 'Strong',
+ c: [{t: 'Emph', c: first}].concat(second)}];
+ } else if (numdelims === 1 && can_close) {
+ this.pos += 1;
+ return [{t: 'Strong',
+ c: [{t: 'Str', c: c+c+c}].concat(
+ first,
+ [{t: 'Emph', c: second}])}];
+ }
+ break;
+ case 6: // ***a** b
+ if (numdelims === 3 && can_close) {
+ this.pos += 3;
+ return [{t: 'Strong',
+ c: [{t: 'Emph',
+ c: first.concat(
+ [{t: 'Str', c: c+c}],
+ second)}]}];
+ } else if (numdelims === 1 && can_close) {
+ this.pos += 1;
+ return [{t: 'Emph',
+ c: [{t: 'Strong', c: first}].concat(second)}];
+ }
+ break;
+ case 7: // ***a* b
+ if (numdelims === 3 && can_close) {
+ this.pos += 3;
+ return [{t: 'Strong',
+ c: [{t: 'Emph',
+ c: first.concat(
+ [{t: 'Str', c: c}],
+ second)}]}];
+ } else if (numdelims === 2 && can_close) {
+ this.pos += 2;
+ return [{t: 'Strong',
+ c: [{t: 'Emph', c: first}].concat(second)}];
+ }
+ break;
+ case 8: // **a *b
+ if (numdelims === 3 && can_close) {
+ this.pos += 3;
+ return [{t: 'Strong',
+ c: first.concat([{t: 'Emph',
+ c: second}])}];
+ } else if (numdelims === 2 && can_close) {
+ this.pos += 2;
+ return [{t: 'Strong',
+ c: first.concat(
+ [{t: 'Str', c: c}],
+ second)}];
+ } else if (numdelims === 1 && can_close) {
+ this.pos += 1;
+ first = first.concat([{t: 'Emph', c: second}]);
+ current = first;
+ state = 2;
+ continue;
+ }
+ break;
+ case 9: // *a **b
+ if (numdelims === 3 && can_close) {
+ this.pos += 3;
+ return [{t: 'Emph',
+ c: first.concat([{t: 'Strong',
+ c: second}])}];
+ } else if (numdelims === 2 && can_close) {
+ this.pos += 2;
+ first = first.concat([{t: 'Strong', c: second}]);
+ current = first;
+ state = 3;
+ continue;
+ } else if (numdelims === 1 && can_close) {
+ this.pos += 1;
+ return [{t: 'Emph',
+ c: first.concat(
+ [{t: 'Str', c: c+c}],
+ second)}];
+ }
+ break;
+ default:
+ break;
}
- break;
- default:
- break;
}
if ((next_inline = this.parseInline())) {
--
cgit v1.2.3
From ac8529c9f55da7fdc1186e3f34313cf411de6e71 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Tue, 16 Sep 2014 22:04:54 -0700
Subject: Re-added backtracking and memoization.
Gives better results for things like
**foo*
---
js/stmd.js | 30 +++++++++++-------------------
1 file changed, 11 insertions(+), 19 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index c5268d8..ea72b9e 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -289,6 +289,7 @@
}
this.pos += numdelims;
+ var delimpos = this.pos;
var next_inline;
var first = [];
@@ -472,36 +473,31 @@
}
}
+ this.pos = startpos;
+ return null;
switch (state) {
case 1: // ***a
- return [{t: 'Str', c: c+c+c}].concat(first);
+ return [{t: 'Emph', c: [{t: 'Str', c: c}]}].concat(first);
case 2: // **a
return [{t: 'Str', c: c+c}].concat(first);
case 3: // *a
return [{t: 'Str', c: c}].concat(first);
case 4: // ***a**b
case 6: // ***a** b
- return [{t: 'Str', c: c+c+c}]
- .concat(first,
- [{t: 'Str', c: c+c}],
- second);
+ return [{t: 'Strong', c:
+ [{t: 'Str', c: c}].concat(first)}].concat(second);
case 5: // ***a*b
case 7: // ***a* b
- return [{t: 'Str', c: c+c+c}]
- .concat(first,
- [{t: 'Str', c: c}],
- second);
+ return [{t: 'Emph', c:
+ [{t: 'Str', c: c+c}].concat(first)}].concat(second);
case 8: // **a *b
return [{t: 'Str', c: c+c}]
.concat(first,
[{t: 'Str', c: c}],
second);
case 9: // *a **b
- return [{t: 'Str', c: c}]
- .concat(first,
- [{t: 'Str', c: c+c}],
- second);
+ return [{t: 'Emph', c: first.concat([{t: 'Str', c: c}])}].concat(second);
default:
console.log("Unknown state, parseEmphasis");
// shouldn't happen
@@ -783,13 +779,11 @@
// and returning the inline parsed.
var parseInline = function() {
var startpos = this.pos;
- /*
var memoized = this.memo[startpos];
if (memoized) {
this.pos = memoized.endpos;
return memoized.inline;
}
- */
var c = this.peek();
if (!c) {
return null;
@@ -830,12 +824,10 @@
this.pos += 1;
res = [{t: 'Str', c: c}];
}
- /*
if (res) {
this.memo[startpos] = { inline: res,
endpos: this.pos };
}
- */
return res;
};
@@ -844,7 +836,7 @@
this.subject = s;
this.pos = 0;
this.refmap = refmap || {};
- // this.memo = {};
+ this.memo = {};
this.last_emphasis_closer = null;
var inlines = [];
var next_inline;
@@ -862,7 +854,7 @@
last_emphasis_closer: null, // used by parseEmphasis method
pos: 0,
refmap: {},
- // memo: {},
+ memo: {},
match: match,
peek: peek,
spnl: spnl,
--
cgit v1.2.3
From 459f08896d2adf09fa3e0a8ce1d2267921b2be5b Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Tue, 16 Sep 2014 22:39:17 -0700
Subject: Revert "Re-added backtracking and memoization."
This reverts commit ac8529c9f55da7fdc1186e3f34313cf411de6e71.
---
js/stmd.js | 30 +++++++++++++++++++-----------
1 file changed, 19 insertions(+), 11 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index ea72b9e..c5268d8 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -289,7 +289,6 @@
}
this.pos += numdelims;
- var delimpos = this.pos;
var next_inline;
var first = [];
@@ -473,31 +472,36 @@
}
}
- this.pos = startpos;
- return null;
switch (state) {
case 1: // ***a
- return [{t: 'Emph', c: [{t: 'Str', c: c}]}].concat(first);
+ return [{t: 'Str', c: c+c+c}].concat(first);
case 2: // **a
return [{t: 'Str', c: c+c}].concat(first);
case 3: // *a
return [{t: 'Str', c: c}].concat(first);
case 4: // ***a**b
case 6: // ***a** b
- return [{t: 'Strong', c:
- [{t: 'Str', c: c}].concat(first)}].concat(second);
+ return [{t: 'Str', c: c+c+c}]
+ .concat(first,
+ [{t: 'Str', c: c+c}],
+ second);
case 5: // ***a*b
case 7: // ***a* b
- return [{t: 'Emph', c:
- [{t: 'Str', c: c+c}].concat(first)}].concat(second);
+ return [{t: 'Str', c: c+c+c}]
+ .concat(first,
+ [{t: 'Str', c: c}],
+ second);
case 8: // **a *b
return [{t: 'Str', c: c+c}]
.concat(first,
[{t: 'Str', c: c}],
second);
case 9: // *a **b
- return [{t: 'Emph', c: first.concat([{t: 'Str', c: c}])}].concat(second);
+ return [{t: 'Str', c: c}]
+ .concat(first,
+ [{t: 'Str', c: c+c}],
+ second);
default:
console.log("Unknown state, parseEmphasis");
// shouldn't happen
@@ -779,11 +783,13 @@
// and returning the inline parsed.
var parseInline = function() {
var startpos = this.pos;
+ /*
var memoized = this.memo[startpos];
if (memoized) {
this.pos = memoized.endpos;
return memoized.inline;
}
+ */
var c = this.peek();
if (!c) {
return null;
@@ -824,10 +830,12 @@
this.pos += 1;
res = [{t: 'Str', c: c}];
}
+ /*
if (res) {
this.memo[startpos] = { inline: res,
endpos: this.pos };
}
+ */
return res;
};
@@ -836,7 +844,7 @@
this.subject = s;
this.pos = 0;
this.refmap = refmap || {};
- this.memo = {};
+ // this.memo = {};
this.last_emphasis_closer = null;
var inlines = [];
var next_inline;
@@ -854,7 +862,7 @@
last_emphasis_closer: null, // used by parseEmphasis method
pos: 0,
refmap: {},
- memo: {},
+ // memo: {},
match: match,
peek: peek,
spnl: spnl,
--
cgit v1.2.3
From 3aa56049d4b52b55a2313e51698090ee81e10036 Mon Sep 17 00:00:00 2001
From: Jordan Milne
Date: Fri, 12 Sep 2014 04:42:30 -0300
Subject: Better handle trailing backslashes in ATX-style headers
Previously something like '# `\' would hang the parser while it
waited for an extra character that wasn't there.
---
src/blocks.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/blocks.c b/src/blocks.c
index 2ac7032..5b38116 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -420,17 +420,17 @@ extern node_block *stmd_parse_document(const unsigned char *buffer, size_t len)
static void chop_trailing_hashtags(chunk *ch)
{
- int n;
+ int n, orig_n;
chunk_rtrim(ch);
- n = ch->len - 1;
+ orig_n = n = ch->len - 1;
// if string ends in #s, remove these:
while (n >= 0 && peek_at(ch, n) == '#')
n--;
// the last # was escaped, so we include it.
- if (n >= 0 && peek_at(ch, n) == '\\')
+ if (n != orig_n && n >= 0 && peek_at(ch, n) == '\\')
n++;
ch->len = n + 1;
--
cgit v1.2.3
From 3307a5ac1d2819ecbde0763aef3102828e13ae44 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Wed, 17 Sep 2014 13:52:08 -0700
Subject: Use helper functions to simplify code.
---
js/stmd.js | 122 ++++++++++++++++++++++++-------------------------------------
1 file changed, 47 insertions(+), 75 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index c5268d8..72e0306 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -260,6 +260,18 @@
can_close: can_close };
};
+ var Emph = function(ils) {
+ return {t: 'Emph', c: ils};
+ }
+
+ var Strong = function(ils) {
+ return {t: 'Strong', c: ils};
+ }
+
+ var Str = function(s) {
+ return {t: 'Str', c: s};
+ }
+
// Attempt to parse emphasis or strong emphasis.
var parseEmphasis = function() {
var startpos = this.pos;
@@ -285,7 +297,7 @@
if (numdelims >= 4 || !res.can_open) {
this.pos += numdelims;
- return [{t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}];
+ return [Str(this.subject.slice(startpos, startpos + numdelims))];
}
this.pos += numdelims;
@@ -317,7 +329,7 @@
case 1: // ***a
if (numdelims === 3 && can_close) {
this.pos += 3;
- return [{t: 'Strong', c: [{t: 'Emph', c: first}]}];
+ return [Strong([Emph(first)])];
} else if (numdelims === 2 && can_close) {
this.pos += 2;
current = second;
@@ -333,7 +345,7 @@
case 2: // **a
if (numdelims === 2 && can_close) {
this.pos += 2;
- return [{t: 'Strong', c: first}];
+ return [Strong(first)];
} else if (numdelims === 1 && can_open) {
this.pos += 1;
current = second;
@@ -344,7 +356,7 @@
case 3: // *a
if (numdelims === 1 && can_close) {
this.pos += 1;
- return [{t: 'Emph', c: first}];
+ return [Emph(first)];
} else if (numdelims === 2 && can_open) {
this.pos += 2;
current = second;
@@ -355,86 +367,59 @@
case 4: // ***a**b
if (numdelims === 3 && can_close) {
this.pos += 3;
- return [{t: 'Strong',
- c: [{t: 'Emph',
- c: first.concat(
- [{t: 'Str', c: c+c}],
- second)}]}];
+ return [Strong([Emph(first.concat([Str(c+c)], second))])];
} else if (numdelims === 2 && can_close) {
this.pos += 2;
- return [{t: 'Strong',
- c: [{t: 'Str', c: c+c+c}].concat(
- first,
- [{t: 'Strong', c: second}])}];
+ return [Strong([Str(c+c+c)].concat(
+ first,
+ [Strong(second)]))];
} else if (numdelims === 1 && can_close) {
this.pos += 1;
- return [{t: 'Emph',
- c: [{t: 'Strong', c: first}].concat(second)}];
+ return [Emph([Strong(first)].concat(second))];
}
break;
case 5: // ***a*b
if (numdelims === 3 && can_close) {
this.pos += 3;
- return [{t: 'Strong',
- c: [{t: 'Emph',
- c: first.concat(
- [{t: 'Str', c: c}],
- second)}]}];
+ return [Strong([Emph(first.concat([Str(c)], second))])];
} else if (numdelims === 2 && can_close) {
this.pos += 2;
- return [{t: 'Strong',
- c: [{t: 'Emph', c: first}].concat(second)}];
+ return [Strong([Emph(first)].concat(second))];
} else if (numdelims === 1 && can_close) {
this.pos += 1;
- return [{t: 'Strong',
- c: [{t: 'Str', c: c+c+c}].concat(
- first,
- [{t: 'Emph', c: second}])}];
+ return [Strong([Str(c+c+c)].concat(
+ first,
+ [Emph(second)]))];
}
break;
case 6: // ***a** b
if (numdelims === 3 && can_close) {
this.pos += 3;
- return [{t: 'Strong',
- c: [{t: 'Emph',
- c: first.concat(
- [{t: 'Str', c: c+c}],
- second)}]}];
+ return [Strong([Emph(first.concat([Str(c+c)], second))])];
} else if (numdelims === 1 && can_close) {
this.pos += 1;
- return [{t: 'Emph',
- c: [{t: 'Strong', c: first}].concat(second)}];
+ return [Emph([Strong(first)].concat(second))];
}
break;
case 7: // ***a* b
if (numdelims === 3 && can_close) {
this.pos += 3;
- return [{t: 'Strong',
- c: [{t: 'Emph',
- c: first.concat(
- [{t: 'Str', c: c}],
- second)}]}];
+ return [Strong([Emph(first.concat([Str(c)], second))])];
} else if (numdelims === 2 && can_close) {
this.pos += 2;
- return [{t: 'Strong',
- c: [{t: 'Emph', c: first}].concat(second)}];
+ return [Strong([Emph(first)].concat(second))];
}
break;
case 8: // **a *b
if (numdelims === 3 && can_close) {
this.pos += 3;
- return [{t: 'Strong',
- c: first.concat([{t: 'Emph',
- c: second}])}];
+ return [Strong(first.concat([Emph(second)]))];
} else if (numdelims === 2 && can_close) {
this.pos += 2;
- return [{t: 'Strong',
- c: first.concat(
- [{t: 'Str', c: c}],
- second)}];
+ return [Strong(first.concat([Str(c)], second))];
} else if (numdelims === 1 && can_close) {
this.pos += 1;
- first = first.concat([{t: 'Emph', c: second}]);
+ first.push(Emph(second));
current = first;
state = 2;
continue;
@@ -443,21 +428,16 @@
case 9: // *a **b
if (numdelims === 3 && can_close) {
this.pos += 3;
- return [{t: 'Emph',
- c: first.concat([{t: 'Strong',
- c: second}])}];
+ return [(Emph(first.concat([Strong(second)])))];
} else if (numdelims === 2 && can_close) {
this.pos += 2;
- first = first.concat([{t: 'Strong', c: second}]);
+ first.push(Strong(second));
current = first;
state = 3;
continue;
} else if (numdelims === 1 && can_close) {
this.pos += 1;
- return [{t: 'Emph',
- c: first.concat(
- [{t: 'Str', c: c+c}],
- second)}];
+ return [Emph(first.concat([Str(c+c)], second))];
}
break;
default:
@@ -475,33 +455,25 @@
switch (state) {
case 1: // ***a
- return [{t: 'Str', c: c+c+c}].concat(first);
+ return [Str(c+c+c)].concat(first);
case 2: // **a
- return [{t: 'Str', c: c+c}].concat(first);
+ return [Str(c+c)].concat(first);
case 3: // *a
- return [{t: 'Str', c: c}].concat(first);
+ return [Str(c)].concat(first);
case 4: // ***a**b
case 6: // ***a** b
- return [{t: 'Str', c: c+c+c}]
- .concat(first,
- [{t: 'Str', c: c+c}],
- second);
+ return [Str(c+c+c)]
+ .concat(first, [Str(c+c)], second);
case 5: // ***a*b
case 7: // ***a* b
- return [{t: 'Str', c: c+c+c}]
- .concat(first,
- [{t: 'Str', c: c}],
- second);
+ return [Str(c+c+c)]
+ .concat(first, [Str(c)], second);
case 8: // **a *b
- return [{t: 'Str', c: c+c}]
- .concat(first,
- [{t: 'Str', c: c}],
- second);
+ return [Str(c+c)]
+ .concat(first, [Str(c)], second);
case 9: // *a **b
- return [{t: 'Str', c: c}]
- .concat(first,
- [{t: 'Str', c: c+c}],
- second);
+ return [Str(c)]
+ .concat(first, [Str(c+c)], second);
default:
console.log("Unknown state, parseEmphasis");
// shouldn't happen
--
cgit v1.2.3
From f22e1f2536cc70e1f989e457079f1bad252c887a Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Thu, 18 Sep 2014 00:23:23 +0200
Subject: Add missing header
---
src/inlines.h | 13 +++++++++++++
1 file changed, 13 insertions(+)
create mode 100644 src/inlines.h
diff --git a/src/inlines.h b/src/inlines.h
new file mode 100644
index 0000000..8c6e2cb
--- /dev/null
+++ b/src/inlines.h
@@ -0,0 +1,13 @@
+#ifndef _INLINES_H_
+#define _INLINES_H_
+
+unsigned char *clean_url(chunk *url);
+unsigned char *clean_autolink(chunk *url, int is_email);
+unsigned char *clean_title(chunk *title);
+
+node_inl* parse_inlines(strbuf *input, reference_map *refmap);
+void free_inlines(node_inl* e);
+
+int parse_reference_inline(strbuf *input, reference_map *refmap);
+
+#endif
--
cgit v1.2.3
From 2943b3850c5cb9e4561c3d109b4513a123bf4db7 Mon Sep 17 00:00:00 2001
From: Jordan Milne
Date: Thu, 18 Sep 2014 17:21:12 -0300
Subject: Use a lookup table for subject_find_special_char
---
src/inlines.c | 22 +++++++++++++++++++---
1 file changed, 19 insertions(+), 3 deletions(-)
diff --git a/src/inlines.c b/src/inlines.c
index 145825c..71d75e9 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -768,13 +768,29 @@ node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap)
static int subject_find_special_char(subject *subj)
{
- static const char CHARS[] = "\n\\`&_*[]pos + 1;
while (n < subj->input.len) {
- if (memchr(CHARS, subj->input.data[n], CHARS_SIZE))
+ if (SPECIAL_CHARS[subj->input.data[n]])
return n;
n++;
}
--
cgit v1.2.3
From 507d8d3a09f6704e8c1f21e5a5df2e4e014e6779 Mon Sep 17 00:00:00 2001
From: Jordan Milne
Date: Thu, 18 Sep 2014 09:26:05 -0300
Subject: Fix memory leak when rendering images as HTML
---
src/html/html.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/html/html.c b/src/html/html.c
index 6f3bc76..ab6fc35 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -111,6 +111,8 @@ static void inlines_to_html(strbuf *html, node_inl* ils)
}
ils = ils->next;
}
+
+ strbuf_free(&scrap);
}
// Convert a node_block list to HTML. Returns 0 on success, and sets result.
--
cgit v1.2.3
From c4b76cf93c8c54b6a33bab82056dc542c6630d92 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Fri, 19 Sep 2014 18:11:33 -0700
Subject: spec: Fixed date, version.
Closes #133.
---
spec.txt | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/spec.txt b/spec.txt
index 040c060..fce8792 100644
--- a/spec.txt
+++ b/spec.txt
@@ -2,8 +2,8 @@
title: CommonMark Spec
author:
- John MacFarlane
-version: 1
-date: 2014-09-06
+version: 2
+date: 2014-09-19
...
# Introduction
--
cgit v1.2.3
From 518eaeca38dfc6f840907f6bcc1ce28826801888 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Wed, 24 Sep 2014 22:22:51 -0700
Subject: Makefile: Use ?= so variables can be set on command line.
---
Makefile | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/Makefile b/Makefile
index 55b6645..6abaa97 100644
--- a/Makefile
+++ b/Makefile
@@ -1,9 +1,9 @@
-CFLAGS=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
-LDFLAGS=-g -O3 -Wall -Werror
-SRCDIR=src
-DATADIR=data
+CFLAGS?=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
+LDFLAGS?=-g -O3 -Wall -Werror
+SRCDIR?=src
+DATADIR?=data
-PROG=./stmd
+PROG?=./stmd
.PHONY: all oldtests test spec benchjs testjs
all: $(SRCDIR)/case_fold_switch.c $(PROG)
--
cgit v1.2.3
From 7f4b2f7f3949f807d5dafe2219280a0f1419b0e2 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Wed, 24 Sep 2014 22:23:09 -0700
Subject: Fixed bug that causes hang on bare `<` inside link label.
---
js/stmd.js | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 72e0306..552fe16 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -534,9 +534,8 @@
this.parseBackticks();
break;
case '<':
- if (!(this.parseAutolink())) {
- this.parseHtmlTag();
- }
+ this.parseAutolink() || this.parseHtmlTag() ||
+ this.pos++;
break;
case '[': // nested []
nest_level++;
--
cgit v1.2.3
From e752b9776d434f63768c50e4c73c533a43529052 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Wed, 24 Sep 2014 22:22:51 -0700
Subject: Makefile: Use ?= so variables can be set on command line.
---
Makefile | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/Makefile b/Makefile
index 11e2141..f5f408e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,9 +1,9 @@
-CFLAGS=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
-LDFLAGS=-g -O3 -Wall -Werror
-SRCDIR=src
-DATADIR=data
+CFLAGS?=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
+LDFLAGS?=-g -O3 -Wall -Werror
+SRCDIR?=src
+DATADIR?=data
-PROG=./stmd
+PROG?=./stmd
.PHONY: all oldtests test spec benchjs testjs
all: $(SRCDIR)/case_fold_switch.inc $(PROG)
--
cgit v1.2.3
From c006aececef112f61dd44cad43f0596221f29700 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Wed, 24 Sep 2014 22:47:47 -0700
Subject: Suppress 'missing field initializer' warnings
from gperf generated header.
---
Makefile | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/Makefile b/Makefile
index f5f408e..671d30d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-CFLAGS?=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
+CFLAGS?=-g -O3 -Wall -Wextra -std=c99 -Isrc -Wno-missing-field-initializers $(OPTFLAGS)
LDFLAGS?=-g -O3 -Wall -Werror
SRCDIR?=src
DATADIR?=data
@@ -42,6 +42,7 @@ benchjs:
node js/bench.js ${BENCHINP}
HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o $(SRCDIR)/html/houdini_html_u.o
+
STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o $(SRCDIR)/references.c
$(PROG): $(SRCDIR)/html/html_unescape.h $(SRCDIR)/case_fold_switch.inc $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c
--
cgit v1.2.3
From de2a35a4dcb3b051df328ec2c204f08c77a5ad3d Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Fri, 26 Sep 2014 10:45:51 -0700
Subject: Simple fallback if we don't match emphasis.
The other approach led to wrong results on:
*hi _there*
---
js/stmd.js | 31 ++++++-------------------------
1 file changed, 6 insertions(+), 25 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 552fe16..589ac03 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -302,6 +302,9 @@
this.pos += numdelims;
+ var fallbackpos = this.pos;
+ var fallback = Str(this.subject.slice(startpos, fallbackpos));
+
var next_inline;
var first = [];
var second = [];
@@ -453,31 +456,9 @@
}
- switch (state) {
- case 1: // ***a
- return [Str(c+c+c)].concat(first);
- case 2: // **a
- return [Str(c+c)].concat(first);
- case 3: // *a
- return [Str(c)].concat(first);
- case 4: // ***a**b
- case 6: // ***a** b
- return [Str(c+c+c)]
- .concat(first, [Str(c+c)], second);
- case 5: // ***a*b
- case 7: // ***a* b
- return [Str(c+c+c)]
- .concat(first, [Str(c)], second);
- case 8: // **a *b
- return [Str(c+c)]
- .concat(first, [Str(c)], second);
- case 9: // *a **b
- return [Str(c)]
- .concat(first, [Str(c+c)], second);
- default:
- console.log("Unknown state, parseEmphasis");
- // shouldn't happen
- }
+ // we didn't match emphasis: fallback
+ this.pos = fallbackpos;
+ return [fallback];
};
--
cgit v1.2.3
From 50d87813fc96ea8d5c2610f3fad134f8d4f8e286 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Fri, 26 Sep 2014 10:47:46 -0700
Subject: Removed memoization code.
---
js/stmd.js | 15 ---------------
1 file changed, 15 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 589ac03..5a09875 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -735,13 +735,6 @@
// and returning the inline parsed.
var parseInline = function() {
var startpos = this.pos;
- /*
- var memoized = this.memo[startpos];
- if (memoized) {
- this.pos = memoized.endpos;
- return memoized.inline;
- }
- */
var c = this.peek();
if (!c) {
return null;
@@ -782,12 +775,6 @@
this.pos += 1;
res = [{t: 'Str', c: c}];
}
- /*
- if (res) {
- this.memo[startpos] = { inline: res,
- endpos: this.pos };
- }
- */
return res;
};
@@ -796,7 +783,6 @@
this.subject = s;
this.pos = 0;
this.refmap = refmap || {};
- // this.memo = {};
this.last_emphasis_closer = null;
var inlines = [];
var next_inline;
@@ -814,7 +800,6 @@
last_emphasis_closer: null, // used by parseEmphasis method
pos: 0,
refmap: {},
- // memo: {},
match: match,
peek: peek,
spnl: spnl,
--
cgit v1.2.3
From 151cb9e51b25bfd644e1920c078ca894fc9e7e9d Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Fri, 26 Sep 2014 11:01:20 -0700
Subject: Used last_emphasis_closer to avoid unneeded scans for closer.
This doesn't seem to help much.
---
js/stmd.js | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/js/stmd.js b/js/stmd.js
index 5a09875..287a0c9 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -312,6 +312,7 @@
var state = 0;
var can_close = false;
var can_open = false;
+ var last_emphasis_closer = null;
if (numdelims === 3) {
state = 1;
@@ -322,11 +323,17 @@
}
while (true) {
+ if (this.last_emphasis_closer[c] < this.pos) {
+ break;
+ }
res = this.scanDelims(c);
if (res) {
numdelims = res.numdelims;
can_close = res.can_close;
+ if (can_close) {
+ last_emphasis_closer = this.pos;
+ }
can_open = res.can_open;
switch (state) {
case 1: // ***a
@@ -458,6 +465,9 @@
// we didn't match emphasis: fallback
this.pos = fallbackpos;
+ if (last_emphasis_closer) {
+ this.last_emphasis_closer[c] = last_emphasis_closer;
+ }
return [fallback];
};
@@ -783,7 +793,7 @@
this.subject = s;
this.pos = 0;
this.refmap = refmap || {};
- this.last_emphasis_closer = null;
+ this.last_emphasis_closer = { '*': s.length, '_': s.length };
var inlines = [];
var next_inline;
while ((next_inline = this.parseInline())) {
--
cgit v1.2.3
From 78ad57d6919c20831c8f6d3455a72d431afd1715 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Fri, 26 Sep 2014 11:05:10 -0700
Subject: Restored memoization code.
---
js/stmd.js | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/js/stmd.js b/js/stmd.js
index 287a0c9..3da719f 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -745,6 +745,13 @@
// and returning the inline parsed.
var parseInline = function() {
var startpos = this.pos;
+
+ var memoized = this.memo[startpos];
+ if (memoized) {
+ this.pos = memoized.endpos;
+ return memoized.inline;
+ }
+
var c = this.peek();
if (!c) {
return null;
@@ -785,6 +792,12 @@
this.pos += 1;
res = [{t: 'Str', c: c}];
}
+
+ if (res) {
+ this.memo[startpos] = { inline: res,
+ endpos: this.pos };
+ }
+
return res;
};
@@ -793,6 +806,7 @@
this.subject = s;
this.pos = 0;
this.refmap = refmap || {};
+ this.memo = {};
this.last_emphasis_closer = { '*': s.length, '_': s.length };
var inlines = [];
var next_inline;
@@ -810,6 +824,7 @@
last_emphasis_closer: null, // used by parseEmphasis method
pos: 0,
refmap: {},
+ memo: {},
match: match,
peek: peek,
spnl: spnl,
--
cgit v1.2.3
From 2d43050a1c62a3e6a7ef5e0d286828adc72e4bb4 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Fri, 26 Sep 2014 11:11:01 -0700
Subject: Only memoize during inline parsing.
This cuts the performance hit.
With memoization, we get roughly constant behavior in the fuzztest.
Without it, not.
---
js/stmd.js | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 3da719f..221dbef 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -455,7 +455,7 @@
}
}
- if ((next_inline = this.parseInline())) {
+ if ((next_inline = this.parseInline(true))) {
Array.prototype.push.apply(current, next_inline);
} else {
break;
@@ -743,10 +743,10 @@
// Parse the next inline element in subject, advancing subject position
// and returning the inline parsed.
- var parseInline = function() {
+ var parseInline = function(memoize) {
var startpos = this.pos;
- var memoized = this.memo[startpos];
+ var memoized = memoize && this.memo[startpos];
if (memoized) {
this.pos = memoized.endpos;
return memoized.inline;
@@ -793,7 +793,7 @@
res = [{t: 'Str', c: c}];
}
- if (res) {
+ if (res && memoize) {
this.memo[startpos] = { inline: res,
endpos: this.pos };
}
--
cgit v1.2.3
From efc3e5d7a234587c79ac847213437f936de2499b Mon Sep 17 00:00:00 2001
From: Andrew January
Date: Mon, 29 Sep 2014 13:12:29 +0100
Subject: Changes append to prepend
When adding something to the beginning it is "prepending", not "appending"
---
spec.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/spec.txt b/spec.txt
index fce8792..b89105f 100644
--- a/spec.txt
+++ b/spec.txt
@@ -2010,7 +2010,7 @@ The following rules define [block quotes](#block-quote):
1. **Basic case.** If a string of lines *Ls* constitute a sequence
- of blocks *Bs*, then the result of appending a [block quote
+ of blocks *Bs*, then the result of prepending a [block quote
marker](#block-quote-marker) to the beginning of each line in *Ls*
is a [block quote](#block-quote) containing *Bs*.
--
cgit v1.2.3
From 749b3000e8cc3202c52e30f2cd5e585175e9e17d Mon Sep 17 00:00:00 2001
From: Andrew January
Date: Mon, 29 Sep 2014 13:24:54 +0100
Subject: Changes urls to use example.com
As per RFC 2606 it is recommended to use example.com for sample urls in specifications.
One example is left using "foo+special@Bar.baz-bar0.com" because it is designed to demonstrate the complexity of email addresses that should be permitted.
---
spec.txt | 18 ++++++++----------
1 file changed, 8 insertions(+), 10 deletions(-)
diff --git a/spec.txt b/spec.txt
index fce8792..9a7e675 100644
--- a/spec.txt
+++ b/spec.txt
@@ -3686,9 +3686,9 @@ raw HTML:
.
.
-
+
.
-
+
.
.
@@ -5504,9 +5504,9 @@ spec](http://www.whatwg.org/specs/web-apps/current-work/multipage/forms.html#e-m
Examples of email autolinks:
.
-
+
.
-
+
.
.
@@ -5548,15 +5548,15 @@ These are not autolinks:
.
.
-http://google.com
+http://example.com
.
-http://google.com
+http://example.com
.
.
-foo@bar.baz.com
+foo@bar.example.com
.
-foo@bar.baz.com
+foo@bar.example.com
.
## Raw HTML
@@ -6146,5 +6146,3 @@ an `emph`.
The document can be rendered as HTML, or in any other format, given
an appropriate renderer.
-
-
--
cgit v1.2.3
From 205b4aafe8c4aeb03700b450d2805f6f5b9fdc3f Mon Sep 17 00:00:00 2001
From: Andrew January
Date: Mon, 29 Sep 2014 13:27:12 +0100
Subject: Adds missing newlines
---
spec.txt | 2 ++
1 file changed, 2 insertions(+)
diff --git a/spec.txt b/spec.txt
index 9a7e675..c9d207a 100644
--- a/spec.txt
+++ b/spec.txt
@@ -6146,3 +6146,5 @@ an `emph`.
The document can be rendered as HTML, or in any other format, given
an appropriate renderer.
+
+
--
cgit v1.2.3
From de1e28217f0da80b928bca0ca09541c0401314ee Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Mon, 29 Sep 2014 22:58:22 -0700
Subject: Use charAt for browser compatibility.
---
js/stmd.js | 28 ++++++++++++++--------------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 221dbef..b9ce5ee 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -140,7 +140,7 @@
// Returns the character at the current subject position, or null if
// there are no more characters.
var peek = function() {
- return this.subject[this.pos] || null;
+ return this.subject.charAt(this.pos) || null;
};
// Parse zero or more space characters, including at most one newline
@@ -183,13 +183,13 @@
var parseBackslash = function() {
var subj = this.subject,
pos = this.pos;
- if (subj[pos] === '\\') {
- if (subj[pos + 1] === '\n') {
+ if (subj.charAt(pos) === '\\') {
+ if (subj.charAt(pos + 1) === '\n') {
this.pos = this.pos + 2;
return [{ t: 'Hardbreak' }];
- } else if (reEscapable.test(subj[pos + 1])) {
+ } else if (reEscapable.test(subj.charAt(pos + 1))) {
this.pos = this.pos + 2;
- return [{ t: 'Str', c: subj[pos + 1] }];
+ return [{ t: 'Str', c: subj.charAt(pos + 1) }];
} else {
this.pos++;
return [{t: 'Str', c: '\\'}];
@@ -239,7 +239,7 @@
var startpos = this.pos;
char_before = this.pos === 0 ? '\n' :
- this.subject[this.pos - 1];
+ this.subject.charAt(this.pos - 1);
while (this.peek() === c) {
numdelims++;
@@ -587,7 +587,7 @@
((dest = this.parseLinkDestination()) !== null) &&
this.spnl() &&
// make sure there's a space before the title:
- (/^\s/.test(this.subject[this.pos - 1]) &&
+ (/^\s/.test(this.subject.charAt(this.pos - 1)) &&
(title = this.parseLinkTitle() || '') || true) &&
this.spnl() &&
this.match(/^\)/)) {
@@ -1034,10 +1034,10 @@
switch (container.t) {
case 'BlockQuote':
- var matched = indent <= 3 && ln[first_nonspace] === '>';
+ var matched = indent <= 3 && ln.charAt(first_nonspace) === '>';
if (matched) {
offset = first_nonspace + 1;
- if (ln[offset] === ' ') {
+ if (ln.charAt(offset) === ' ') {
offset++;
}
} else {
@@ -1077,7 +1077,7 @@
case 'FencedCode':
// skip optional spaces of fence offset
i = container.fence_offset;
- while (i > 0 && ln[offset] === ' ') {
+ while (i > 0 && ln.charAt(offset) === ' ') {
offset++;
i--;
}
@@ -1154,11 +1154,11 @@
break;
}
- } else if (ln[first_nonspace] === '>') {
+ } else if (ln.charAt(first_nonspace) === '>') {
// blockquote
offset = first_nonspace + 1;
// optional following space
- if (ln[offset] === ' ') {
+ if (ln.charAt(offset) === ' ') {
offset++;
}
closeUnmatchedBlocks(this);
@@ -1291,7 +1291,7 @@
case 'FencedCode':
// check for closing code fence:
match = (indent <= 3 &&
- ln[first_nonspace] == container.fence_char &&
+ ln.charAt(first_nonspace) == container.fence_char &&
ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/));
if (match && match[0].length >= container.fence_length) {
// don't add closing fence to container; instead, close it:
@@ -1350,7 +1350,7 @@
block.string_content = block.strings.join('\n').replace(/^ */m,'');
// try parsing the beginning as link reference definitions:
- while (block.string_content[0] === '[' &&
+ while (block.string_content.charAt(0) === '[' &&
(pos = this.inlineParser.parseReference(block.string_content,
this.refmap))) {
block.string_content = block.string_content.slice(pos);
--
cgit v1.2.3
From 5e6a28c965d6b036b413500a070059585ddfdbe9 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Mon, 29 Sep 2014 22:46:52 -0700
Subject: Escape URIs.
---
js/stmd.js | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index b9ce5ee..30eceb2 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -207,12 +207,12 @@
dest = m.slice(1,-1);
return [{t: 'Link',
label: [{ t: 'Str', c: dest }],
- destination: 'mailto:' + dest }];
+ destination: 'mailto:' + encodeURI(dest) }];
} else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) {
dest = m.slice(1,-1);
return [{ t: 'Link',
label: [{ t: 'Str', c: dest }],
- destination: dest }];
+ destination: encodeURI(dest) }];
} else {
return null;
}
@@ -489,11 +489,11 @@
var parseLinkDestination = function() {
var res = this.match(reLinkDestinationBraces);
if (res) { // chop off surrounding <..>:
- return unescape(res.substr(1, res.length - 2));
+ return encodeURI(unescape(res.substr(1, res.length - 2)));
} else {
res = this.match(reLinkDestination);
if (res !== null) {
- return unescape(res);
+ return encodeURI(unescape(res));
} else {
return null;
}
--
cgit v1.2.3
From 8cabf96510bb17f80d0b849f7e97ebe54c779eb7 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Mon, 29 Sep 2014 23:05:02 -0700
Subject: Rename unescape -> unescapeBS to avoid confusion with built-in.
---
js/stmd.js | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 30eceb2..97120ed 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -77,7 +77,7 @@
// UTILITY FUNCTIONS
// Replace backslash escapes with literal characters.
- var unescape = function(s) {
+ var unescapeBS = function(s) {
return s.replace(reAllEscapedChar, '$1');
};
@@ -478,7 +478,7 @@
var title = this.match(reLinkTitle);
if (title) {
// chop off quotes from title and unescape:
- return unescape(title.substr(1, title.length - 2));
+ return unescapeBS(title.substr(1, title.length - 2));
} else {
return null;
}
@@ -489,11 +489,11 @@
var parseLinkDestination = function() {
var res = this.match(reLinkDestinationBraces);
if (res) { // chop off surrounding <..>:
- return encodeURI(unescape(res.substr(1, res.length - 2)));
+ return encodeURI(unescapeBS(res.substr(1, res.length - 2)));
} else {
res = this.match(reLinkDestination);
if (res !== null) {
- return encodeURI(unescape(res));
+ return encodeURI(unescapeBS(res));
} else {
return null;
}
@@ -1373,7 +1373,7 @@
case 'FencedCode':
// first line becomes info string
- block.info = unescape(block.strings[0].trim());
+ block.info = unescapeBS(block.strings[0].trim());
if (block.strings.length == 1) {
block.string_content = '';
} else {
--
cgit v1.2.3
From 840a6a326f5885137922517c80bce0a1005d5c71 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Tue, 30 Sep 2014 21:34:47 -0700
Subject: Added entity decoding.
AST now contains parses entities as Str objects with unicode
characters, not as 'Entity'. (Like the new C parser.)
---
js/stmd.js | 2144 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 2141 insertions(+), 3 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 97120ed..2a63d23 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -11,6 +11,2132 @@
(function(exports) {
+ var entities = { AAacute: 'Á',
+ aacute: 'á',
+ Abreve: 'Ă',
+ abreve: 'ă',
+ ac: '∾',
+ acd: '∿',
+ acE: '∾',
+ Acirc: 'Â',
+ acirc: 'â',
+ acute: '´',
+ Acy: 'А',
+ acy: 'а',
+ AElig: 'Æ',
+ aelig: 'æ',
+ af: '',
+ Afr: '𝔄',
+ afr: '𝔞',
+ Agrave: 'À',
+ agrave: 'à',
+ alefsym: 'ℵ',
+ aleph: 'ℵ',
+ Alpha: 'Α',
+ alpha: 'α',
+ Amacr: 'Ā',
+ amacr: 'ā',
+ amalg: '⨿',
+ amp: '&',
+ AMP: '&',
+ andand: '⩕',
+ And: '⩓',
+ and: '∧',
+ andd: '⩜',
+ andslope: '⩘',
+ andv: '⩚',
+ ang: '∠',
+ ange: '⦤',
+ angle: '∠',
+ angmsdaa: '⦨',
+ angmsdab: '⦩',
+ angmsdac: '⦪',
+ angmsdad: '⦫',
+ angmsdae: '⦬',
+ angmsdaf: '⦭',
+ angmsdag: '⦮',
+ angmsdah: '⦯',
+ angmsd: '∡',
+ angrt: '∟',
+ angrtvb: '⊾',
+ angrtvbd: '⦝',
+ angsph: '∢',
+ angst: 'Å',
+ angzarr: '⍼',
+ Aogon: 'Ą',
+ aogon: 'ą',
+ Aopf: '𝔸',
+ aopf: '𝕒',
+ apacir: '⩯',
+ ap: '≈',
+ apE: '⩰',
+ ape: '≊',
+ apid: '≋',
+ apos: '\'',
+ ApplyFunction: '',
+ approx: '≈',
+ approxeq: '≊',
+ Aring: 'Å',
+ aring: 'å',
+ Ascr: '𝒜',
+ ascr: '𝒶',
+ Assign: '≔',
+ ast: '*',
+ asymp: '≈',
+ asympeq: '≍',
+ Atilde: 'Ã',
+ atilde: 'ã',
+ Auml: 'Ä',
+ auml: 'ä',
+ awconint: '∳',
+ awint: '⨑',
+ backcong: '≌',
+ backepsilon: '϶',
+ backprime: '‵',
+ backsim: '∽',
+ backsimeq: '⋍',
+ Backslash: '∖',
+ Barv: '⫧',
+ barvee: '⊽',
+ barwed: '⌅',
+ Barwed: '⌆',
+ barwedge: '⌅',
+ bbrk: '⎵',
+ bbrktbrk: '⎶',
+ bcong: '≌',
+ Bcy: 'Б',
+ bcy: 'б',
+ bdquo: '„',
+ becaus: '∵',
+ because: '∵',
+ Because: '∵',
+ bemptyv: '⦰',
+ bepsi: '϶',
+ bernou: 'ℬ',
+ Bernoullis: 'ℬ',
+ Beta: 'Β',
+ beta: 'β',
+ beth: 'ℶ',
+ between: '≬',
+ Bfr: '𝔅',
+ bfr: '𝔟',
+ bigcap: '⋂',
+ bigcirc: '◯',
+ bigcup: '⋃',
+ bigodot: '⨀',
+ bigoplus: '⨁',
+ bigotimes: '⨂',
+ bigsqcup: '⨆',
+ bigstar: '★',
+ bigtriangledown: '▽',
+ bigtriangleup: '△',
+ biguplus: '⨄',
+ bigvee: '⋁',
+ bigwedge: '⋀',
+ bkarow: '⤍',
+ blacklozenge: '⧫',
+ blacksquare: '▪',
+ blacktriangle: '▴',
+ blacktriangledown: '▾',
+ blacktriangleleft: '◂',
+ blacktriangleright: '▸',
+ blank: '␣',
+ blk12: '▒',
+ blk14: '░',
+ blk34: '▓',
+ block: '█',
+ bne: '=',
+ bnequiv: '≡',
+ bNot: '⫭',
+ bnot: '⌐',
+ Bopf: '𝔹',
+ bopf: '𝕓',
+ bot: '⊥',
+ bottom: '⊥',
+ bowtie: '⋈',
+ boxbox: '⧉',
+ boxdl: '┐',
+ boxdL: '╕',
+ boxDl: '╖',
+ boxDL: '╗',
+ boxdr: '┌',
+ boxdR: '╒',
+ boxDr: '╓',
+ boxDR: '╔',
+ boxh: '─',
+ boxH: '═',
+ boxhd: '┬',
+ boxHd: '╤',
+ boxhD: '╥',
+ boxHD: '╦',
+ boxhu: '┴',
+ boxHu: '╧',
+ boxhU: '╨',
+ boxHU: '╩',
+ boxminus: '⊟',
+ boxplus: '⊞',
+ boxtimes: '⊠',
+ boxul: '┘',
+ boxuL: '╛',
+ boxUl: '╜',
+ boxUL: '╝',
+ boxur: '└',
+ boxuR: '╘',
+ boxUr: '╙',
+ boxUR: '╚',
+ boxv: '│',
+ boxV: '║',
+ boxvh: '┼',
+ boxvH: '╪',
+ boxVh: '╫',
+ boxVH: '╬',
+ boxvl: '┤',
+ boxvL: '╡',
+ boxVl: '╢',
+ boxVL: '╣',
+ boxvr: '├',
+ boxvR: '╞',
+ boxVr: '╟',
+ boxVR: '╠',
+ bprime: '‵',
+ breve: '˘',
+ Breve: '˘',
+ brvbar: '¦',
+ bscr: '𝒷',
+ Bscr: 'ℬ',
+ bsemi: '⁏',
+ bsim: '∽',
+ bsime: '⋍',
+ bsolb: '⧅',
+ bsol: '\\',
+ bsolhsub: '⟈',
+ bull: '•',
+ bullet: '•',
+ bump: '≎',
+ bumpE: '⪮',
+ bumpe: '≏',
+ Bumpeq: '≎',
+ bumpeq: '≏',
+ Cacute: 'Ć',
+ cacute: 'ć',
+ capand: '⩄',
+ capbrcup: '⩉',
+ capcap: '⩋',
+ cap: '∩',
+ Cap: '⋒',
+ capcup: '⩇',
+ capdot: '⩀',
+ CapitalDifferentialD: 'ⅅ',
+ caps: '∩',
+ caret: '⁁',
+ caron: 'ˇ',
+ Cayleys: 'ℭ',
+ ccaps: '⩍',
+ Ccaron: 'Č',
+ ccaron: 'č',
+ Ccedil: 'Ç',
+ ccedil: 'ç',
+ Ccirc: 'Ĉ',
+ ccirc: 'ĉ',
+ Cconint: '∰',
+ ccups: '⩌',
+ ccupssm: '⩐',
+ Cdot: 'Ċ',
+ cdot: 'ċ',
+ cedil: '¸',
+ Cedilla: '¸',
+ cemptyv: '⦲',
+ cent: '¢',
+ centerdot: '·',
+ CenterDot: '·',
+ cfr: '𝔠',
+ Cfr: 'ℭ',
+ CHcy: 'Ч',
+ chcy: 'ч',
+ check: '✓',
+ checkmark: '✓',
+ Chi: 'Χ',
+ chi: 'χ',
+ circ: 'ˆ',
+ circeq: '≗',
+ circlearrowleft: '↺',
+ circlearrowright: '↻',
+ circledast: '⊛',
+ circledcirc: '⊚',
+ circleddash: '⊝',
+ CircleDot: '⊙',
+ circledR: '®',
+ circledS: 'Ⓢ',
+ CircleMinus: '⊖',
+ CirclePlus: '⊕',
+ CircleTimes: '⊗',
+ cir: '○',
+ cirE: '⧃',
+ cire: '≗',
+ cirfnint: '⨐',
+ cirmid: '⫯',
+ cirscir: '⧂',
+ ClockwiseContourIntegral: '∲',
+ CloseCurlyDoubleQuote: '”',
+ CloseCurlyQuote: '’',
+ clubs: '♣',
+ clubsuit: '♣',
+ colon: ':',
+ Colon: '∷',
+ Colone: '⩴',
+ colone: '≔',
+ coloneq: '≔',
+ comma: ',',
+ commat: '@',
+ comp: '∁',
+ compfn: '∘',
+ complement: '∁',
+ complexes: 'ℂ',
+ cong: '≅',
+ congdot: '⩭',
+ Congruent: '≡',
+ conint: '∮',
+ Conint: '∯',
+ ContourIntegral: '∮',
+ copf: '𝕔',
+ Copf: 'ℂ',
+ coprod: '∐',
+ Coproduct: '∐',
+ copy: '©',
+ COPY: '©',
+ copysr: '℗',
+ CounterClockwiseContourIntegral: '∳',
+ crarr: '↵',
+ cross: '✗',
+ Cross: '⨯',
+ Cscr: '𝒞',
+ cscr: '𝒸',
+ csub: '⫏',
+ csube: '⫑',
+ csup: '⫐',
+ csupe: '⫒',
+ ctdot: '⋯',
+ cudarrl: '⤸',
+ cudarrr: '⤵',
+ cuepr: '⋞',
+ cuesc: '⋟',
+ cularr: '↶',
+ cularrp: '⤽',
+ cupbrcap: '⩈',
+ cupcap: '⩆',
+ CupCap: '≍',
+ cup: '∪',
+ Cup: '⋓',
+ cupcup: '⩊',
+ cupdot: '⊍',
+ cupor: '⩅',
+ cups: '∪',
+ curarr: '↷',
+ curarrm: '⤼',
+ curlyeqprec: '⋞',
+ curlyeqsucc: '⋟',
+ curlyvee: '⋎',
+ curlywedge: '⋏',
+ curren: '¤',
+ curvearrowleft: '↶',
+ curvearrowright: '↷',
+ cuvee: '⋎',
+ cuwed: '⋏',
+ cwconint: '∲',
+ cwint: '∱',
+ cylcty: '⌭',
+ dagger: '†',
+ Dagger: '‡',
+ daleth: 'ℸ',
+ darr: '↓',
+ Darr: '↡',
+ dArr: '⇓',
+ dash: '‐',
+ Dashv: '⫤',
+ dashv: '⊣',
+ dbkarow: '⤏',
+ dblac: '˝',
+ Dcaron: 'Ď',
+ dcaron: 'ď',
+ Dcy: 'Д',
+ dcy: 'д',
+ ddagger: '‡',
+ ddarr: '⇊',
+ DD: 'ⅅ',
+ dd: 'ⅆ',
+ DDotrahd: '⤑',
+ ddotseq: '⩷',
+ deg: '°',
+ Del: '∇',
+ Delta: 'Δ',
+ delta: 'δ',
+ demptyv: '⦱',
+ dfisht: '⥿',
+ Dfr: '𝔇',
+ dfr: '𝔡',
+ dHar: '⥥',
+ dharl: '⇃',
+ dharr: '⇂',
+ DiacriticalAcute: '´',
+ DiacriticalDot: '˙',
+ DiacriticalDoubleAcute: '˝',
+ DiacriticalGrave: '`',
+ DiacriticalTilde: '˜',
+ diam: '⋄',
+ diamond: '⋄',
+ Diamond: '⋄',
+ diamondsuit: '♦',
+ diams: '♦',
+ die: '¨',
+ DifferentialD: 'ⅆ',
+ digamma: 'ϝ',
+ disin: '⋲',
+ div: '÷',
+ divide: '÷',
+ divideontimes: '⋇',
+ divonx: '⋇',
+ DJcy: 'Ђ',
+ djcy: 'ђ',
+ dlcorn: '⌞',
+ dlcrop: '⌍',
+ dollar: '$',
+ Dopf: '𝔻',
+ dopf: '𝕕',
+ Dot: '¨',
+ dot: '˙',
+ DotDot: '⃜',
+ doteq: '≐',
+ doteqdot: '≑',
+ DotEqual: '≐',
+ dotminus: '∸',
+ dotplus: '∔',
+ dotsquare: '⊡',
+ doublebarwedge: '⌆',
+ DoubleContourIntegral: '∯',
+ DoubleDot: '¨',
+ DoubleDownArrow: '⇓',
+ DoubleLeftArrow: '⇐',
+ DoubleLeftRightArrow: '⇔',
+ DoubleLeftTee: '⫤',
+ DoubleLongLeftArrow: '⟸',
+ DoubleLongLeftRightArrow: '⟺',
+ DoubleLongRightArrow: '⟹',
+ DoubleRightArrow: '⇒',
+ DoubleRightTee: '⊨',
+ DoubleUpArrow: '⇑',
+ DoubleUpDownArrow: '⇕',
+ DoubleVerticalBar: '∥',
+ DownArrowBar: '⤓',
+ downarrow: '↓',
+ DownArrow: '↓',
+ Downarrow: '⇓',
+ DownArrowUpArrow: '⇵',
+ DownBreve: '̑',
+ downdownarrows: '⇊',
+ downharpoonleft: '⇃',
+ downharpoonright: '⇂',
+ DownLeftRightVector: '⥐',
+ DownLeftTeeVector: '⥞',
+ DownLeftVectorBar: '⥖',
+ DownLeftVector: '↽',
+ DownRightTeeVector: '⥟',
+ DownRightVectorBar: '⥗',
+ DownRightVector: '⇁',
+ DownTeeArrow: '↧',
+ DownTee: '⊤',
+ drbkarow: '⤐',
+ drcorn: '⌟',
+ drcrop: '⌌',
+ Dscr: '𝒟',
+ dscr: '𝒹',
+ DScy: 'Ѕ',
+ dscy: 'ѕ',
+ dsol: '⧶',
+ Dstrok: 'Đ',
+ dstrok: 'đ',
+ dtdot: '⋱',
+ dtri: '▿',
+ dtrif: '▾',
+ duarr: '⇵',
+ duhar: '⥯',
+ dwangle: '⦦',
+ DZcy: 'Џ',
+ dzcy: 'џ',
+ dzigrarr: '⟿',
+ Eacute: 'É',
+ eacute: 'é',
+ easter: '⩮',
+ Ecaron: 'Ě',
+ ecaron: 'ě',
+ Ecirc: 'Ê',
+ ecirc: 'ê',
+ ecir: '≖',
+ ecolon: '≕',
+ Ecy: 'Э',
+ ecy: 'э',
+ eDDot: '⩷',
+ Edot: 'Ė',
+ edot: 'ė',
+ eDot: '≑',
+ ee: 'ⅇ',
+ efDot: '≒',
+ Efr: '𝔈',
+ efr: '𝔢',
+ eg: '⪚',
+ Egrave: 'È',
+ egrave: 'è',
+ egs: '⪖',
+ egsdot: '⪘',
+ el: '⪙',
+ Element: '∈',
+ elinters: '⏧',
+ ell: 'ℓ',
+ els: '⪕',
+ elsdot: '⪗',
+ Emacr: 'Ē',
+ emacr: 'ē',
+ empty: '∅',
+ emptyset: '∅',
+ EmptySmallSquare: '◻',
+ emptyv: '∅',
+ EmptyVerySmallSquare: '▫',
+ emsp13: ' ',
+ emsp14: ' ',
+ emsp: ' ',
+ ENG: 'Ŋ',
+ eng: 'ŋ',
+ ensp: ' ',
+ Eogon: 'Ę',
+ eogon: 'ę',
+ Eopf: '𝔼',
+ eopf: '𝕖',
+ epar: '⋕',
+ eparsl: '⧣',
+ eplus: '⩱',
+ epsi: 'ε',
+ Epsilon: 'Ε',
+ epsilon: 'ε',
+ epsiv: 'ϵ',
+ eqcirc: '≖',
+ eqcolon: '≕',
+ eqsim: '≂',
+ eqslantgtr: '⪖',
+ eqslantless: '⪕',
+ Equal: '⩵',
+ equals: '=',
+ EqualTilde: '≂',
+ equest: '≟',
+ Equilibrium: '⇌',
+ equiv: '≡',
+ equivDD: '⩸',
+ eqvparsl: '⧥',
+ erarr: '⥱',
+ erDot: '≓',
+ escr: 'ℯ',
+ Escr: 'ℰ',
+ esdot: '≐',
+ Esim: '⩳',
+ esim: '≂',
+ Eta: 'Η',
+ eta: 'η',
+ ETH: 'Ð',
+ eth: 'ð',
+ Euml: 'Ë',
+ euml: 'ë',
+ euro: '€',
+ excl: '!',
+ exist: '∃',
+ Exists: '∃',
+ expectation: 'ℰ',
+ exponentiale: 'ⅇ',
+ ExponentialE: 'ⅇ',
+ fallingdotseq: '≒',
+ Fcy: 'Ф',
+ fcy: 'ф',
+ female: '♀',
+ ffilig: 'ffi',
+ fflig: 'ff',
+ ffllig: 'ffl',
+ Ffr: '𝔉',
+ ffr: '𝔣',
+ filig: 'fi',
+ FilledSmallSquare: '◼',
+ FilledVerySmallSquare: '▪',
+ fjlig: 'f',
+ flat: '♭',
+ fllig: 'fl',
+ fltns: '▱',
+ fnof: 'ƒ',
+ Fopf: '𝔽',
+ fopf: '𝕗',
+ forall: '∀',
+ ForAll: '∀',
+ fork: '⋔',
+ forkv: '⫙',
+ Fouriertrf: 'ℱ',
+ fpartint: '⨍',
+ frac12: '½',
+ frac13: '⅓',
+ frac14: '¼',
+ frac15: '⅕',
+ frac16: '⅙',
+ frac18: '⅛',
+ frac23: '⅔',
+ frac25: '⅖',
+ frac34: '¾',
+ frac35: '⅗',
+ frac38: '⅜',
+ frac45: '⅘',
+ frac56: '⅚',
+ frac58: '⅝',
+ frac78: '⅞',
+ frasl: '⁄',
+ frown: '⌢',
+ fscr: '𝒻',
+ Fscr: 'ℱ',
+ gacute: 'ǵ',
+ Gamma: 'Γ',
+ gamma: 'γ',
+ Gammad: 'Ϝ',
+ gammad: 'ϝ',
+ gap: '⪆',
+ Gbreve: 'Ğ',
+ gbreve: 'ğ',
+ Gcedil: 'Ģ',
+ Gcirc: 'Ĝ',
+ gcirc: 'ĝ',
+ Gcy: 'Г',
+ gcy: 'г',
+ Gdot: 'Ġ',
+ gdot: 'ġ',
+ ge: '≥',
+ gE: '≧',
+ gEl: '⪌',
+ gel: '⋛',
+ geq: '≥',
+ geqq: '≧',
+ geqslant: '⩾',
+ gescc: '⪩',
+ ges: '⩾',
+ gesdot: '⪀',
+ gesdoto: '⪂',
+ gesdotol: '⪄',
+ gesl: '⋛',
+ gesles: '⪔',
+ Gfr: '𝔊',
+ gfr: '𝔤',
+ gg: '≫',
+ Gg: '⋙',
+ ggg: '⋙',
+ gimel: 'ℷ',
+ GJcy: 'Ѓ',
+ gjcy: 'ѓ',
+ gla: '⪥',
+ gl: '≷',
+ glE: '⪒',
+ glj: '⪤',
+ gnap: '⪊',
+ gnapprox: '⪊',
+ gne: '⪈',
+ gnE: '≩',
+ gneq: '⪈',
+ gneqq: '≩',
+ gnsim: '⋧',
+ Gopf: '𝔾',
+ gopf: '𝕘',
+ grave: '`',
+ GreaterEqual: '≥',
+ GreaterEqualLess: '⋛',
+ GreaterFullEqual: '≧',
+ GreaterGreater: '⪢',
+ GreaterLess: '≷',
+ GreaterSlantEqual: '⩾',
+ GreaterTilde: '≳',
+ Gscr: '𝒢',
+ gscr: 'ℊ',
+ gsim: '≳',
+ gsime: '⪎',
+ gsiml: '⪐',
+ gtcc: '⪧',
+ gtcir: '⩺',
+ gt: '>',
+ GT: '>',
+ Gt: '≫',
+ gtdot: '⋗',
+ gtlPar: '⦕',
+ gtquest: '⩼',
+ gtrapprox: '⪆',
+ gtrarr: '⥸',
+ gtrdot: '⋗',
+ gtreqless: '⋛',
+ gtreqqless: '⪌',
+ gtrless: '≷',
+ gtrsim: '≳',
+ gvertneqq: '≩',
+ gvnE: '≩',
+ Hacek: 'ˇ',
+ hairsp: ' ',
+ half: '½',
+ hamilt: 'ℋ',
+ HARDcy: 'Ъ',
+ hardcy: 'ъ',
+ harrcir: '⥈',
+ harr: '↔',
+ hArr: '⇔',
+ harrw: '↭',
+ Hat: '^',
+ hbar: 'ℏ',
+ Hcirc: 'Ĥ',
+ hcirc: 'ĥ',
+ hearts: '♥',
+ heartsuit: '♥',
+ hellip: '…',
+ hercon: '⊹',
+ hfr: '𝔥',
+ Hfr: 'ℌ',
+ HilbertSpace: 'ℋ',
+ hksearow: '⤥',
+ hkswarow: '⤦',
+ hoarr: '⇿',
+ homtht: '∻',
+ hookleftarrow: '↩',
+ hookrightarrow: '↪',
+ hopf: '𝕙',
+ Hopf: 'ℍ',
+ horbar: '―',
+ HorizontalLine: '─',
+ hscr: '𝒽',
+ Hscr: 'ℋ',
+ hslash: 'ℏ',
+ Hstrok: 'Ħ',
+ hstrok: 'ħ',
+ HumpDownHump: '≎',
+ HumpEqual: '≏',
+ hybull: '⁃',
+ hyphen: '‐',
+ Iacute: 'Í',
+ iacute: 'í',
+ ic: '',
+ Icirc: 'Î',
+ icirc: 'î',
+ Icy: 'И',
+ icy: 'и',
+ Idot: 'İ',
+ IEcy: 'Е',
+ iecy: 'е',
+ iexcl: '¡',
+ iff: '⇔',
+ ifr: '𝔦',
+ Ifr: 'ℑ',
+ Igrave: 'Ì',
+ igrave: 'ì',
+ ii: 'ⅈ',
+ iiiint: '⨌',
+ iiint: '∭',
+ iinfin: '⧜',
+ iiota: '℩',
+ IJlig: 'IJ',
+ ijlig: 'ij',
+ Imacr: 'Ī',
+ imacr: 'ī',
+ image: 'ℑ',
+ ImaginaryI: 'ⅈ',
+ imagline: 'ℐ',
+ imagpart: 'ℑ',
+ imath: 'ı',
+ Im: 'ℑ',
+ imof: '⊷',
+ imped: 'Ƶ',
+ Implies: '⇒',
+ incare: '℅',
+ in: '∈',
+ infin: '∞',
+ infintie: '⧝',
+ inodot: 'ı',
+ intcal: '⊺',
+ int: '∫',
+ Int: '∬',
+ integers: 'ℤ',
+ Integral: '∫',
+ intercal: '⊺',
+ Intersection: '⋂',
+ intlarhk: '⨗',
+ intprod: '⨼',
+ InvisibleComma: '',
+ InvisibleTimes: '',
+ IOcy: 'Ё',
+ iocy: 'ё',
+ Iogon: 'Į',
+ iogon: 'į',
+ Iopf: '𝕀',
+ iopf: '𝕚',
+ Iota: 'Ι',
+ iota: 'ι',
+ iprod: '⨼',
+ iquest: '¿',
+ iscr: '𝒾',
+ Iscr: 'ℐ',
+ isin: '∈',
+ isindot: '⋵',
+ isinE: '⋹',
+ isins: '⋴',
+ isinsv: '⋳',
+ isinv: '∈',
+ it: '',
+ Itilde: 'Ĩ',
+ itilde: 'ĩ',
+ Iukcy: 'І',
+ iukcy: 'і',
+ Iuml: 'Ï',
+ iuml: 'ï',
+ Jcirc: 'Ĵ',
+ jcirc: 'ĵ',
+ Jcy: 'Й',
+ jcy: 'й',
+ Jfr: '𝔍',
+ jfr: '𝔧',
+ jmath: 'ȷ',
+ Jopf: '𝕁',
+ jopf: '𝕛',
+ Jscr: '𝒥',
+ jscr: '𝒿',
+ Jsercy: 'Ј',
+ jsercy: 'ј',
+ Jukcy: 'Є',
+ jukcy: 'є',
+ Kappa: 'Κ',
+ kappa: 'κ',
+ kappav: 'ϰ',
+ Kcedil: 'Ķ',
+ kcedil: 'ķ',
+ Kcy: 'К',
+ kcy: 'к',
+ Kfr: '𝔎',
+ kfr: '𝔨',
+ kgreen: 'ĸ',
+ KHcy: 'Х',
+ khcy: 'х',
+ KJcy: 'Ќ',
+ kjcy: 'ќ',
+ Kopf: '𝕂',
+ kopf: '𝕜',
+ Kscr: '𝒦',
+ kscr: '𝓀',
+ lAarr: '⇚',
+ Lacute: 'Ĺ',
+ lacute: 'ĺ',
+ laemptyv: '⦴',
+ lagran: 'ℒ',
+ Lambda: 'Λ',
+ lambda: 'λ',
+ lang: '⟨',
+ Lang: '⟪',
+ langd: '⦑',
+ langle: '⟨',
+ lap: '⪅',
+ Laplacetrf: 'ℒ',
+ laquo: '«',
+ larrb: '⇤',
+ larrbfs: '⤟',
+ larr: '←',
+ Larr: '↞',
+ lArr: '⇐',
+ larrfs: '⤝',
+ larrhk: '↩',
+ larrlp: '↫',
+ larrpl: '⤹',
+ larrsim: '⥳',
+ larrtl: '↢',
+ latail: '⤙',
+ lAtail: '⤛',
+ lat: '⪫',
+ late: '⪭',
+ lates: '⪭',
+ lbarr: '⤌',
+ lBarr: '⤎',
+ lbbrk: '❲',
+ lbrace: '{',
+ lbrack: '[',
+ lbrke: '⦋',
+ lbrksld: '⦏',
+ lbrkslu: '⦍',
+ Lcaron: 'Ľ',
+ lcaron: 'ľ',
+ Lcedil: 'Ļ',
+ lcedil: 'ļ',
+ lceil: '⌈',
+ lcub: '{',
+ Lcy: 'Л',
+ lcy: 'л',
+ ldca: '⤶',
+ ldquo: '“',
+ ldquor: '„',
+ ldrdhar: '⥧',
+ ldrushar: '⥋',
+ ldsh: '↲',
+ le: '≤',
+ lE: '≦',
+ LeftAngleBracket: '⟨',
+ LeftArrowBar: '⇤',
+ leftarrow: '←',
+ LeftArrow: '←',
+ Leftarrow: '⇐',
+ LeftArrowRightArrow: '⇆',
+ leftarrowtail: '↢',
+ LeftCeiling: '⌈',
+ LeftDoubleBracket: '⟦',
+ LeftDownTeeVector: '⥡',
+ LeftDownVectorBar: '⥙',
+ LeftDownVector: '⇃',
+ LeftFloor: '⌊',
+ leftharpoondown: '↽',
+ leftharpoonup: '↼',
+ leftleftarrows: '⇇',
+ leftrightarrow: '↔',
+ LeftRightArrow: '↔',
+ Leftrightarrow: '⇔',
+ leftrightarrows: '⇆',
+ leftrightharpoons: '⇋',
+ leftrightsquigarrow: '↭',
+ LeftRightVector: '⥎',
+ LeftTeeArrow: '↤',
+ LeftTee: '⊣',
+ LeftTeeVector: '⥚',
+ leftthreetimes: '⋋',
+ LeftTriangleBar: '⧏',
+ LeftTriangle: '⊲',
+ LeftTriangleEqual: '⊴',
+ LeftUpDownVector: '⥑',
+ LeftUpTeeVector: '⥠',
+ LeftUpVectorBar: '⥘',
+ LeftUpVector: '↿',
+ LeftVectorBar: '⥒',
+ LeftVector: '↼',
+ lEg: '⪋',
+ leg: '⋚',
+ leq: '≤',
+ leqq: '≦',
+ leqslant: '⩽',
+ lescc: '⪨',
+ les: '⩽',
+ lesdot: '⩿',
+ lesdoto: '⪁',
+ lesdotor: '⪃',
+ lesg: '⋚',
+ lesges: '⪓',
+ lessapprox: '⪅',
+ lessdot: '⋖',
+ lesseqgtr: '⋚',
+ lesseqqgtr: '⪋',
+ LessEqualGreater: '⋚',
+ LessFullEqual: '≦',
+ LessGreater: '≶',
+ lessgtr: '≶',
+ LessLess: '⪡',
+ lesssim: '≲',
+ LessSlantEqual: '⩽',
+ LessTilde: '≲',
+ lfisht: '⥼',
+ lfloor: '⌊',
+ Lfr: '𝔏',
+ lfr: '𝔩',
+ lg: '≶',
+ lgE: '⪑',
+ lHar: '⥢',
+ lhard: '↽',
+ lharu: '↼',
+ lharul: '⥪',
+ lhblk: '▄',
+ LJcy: 'Љ',
+ ljcy: 'љ',
+ llarr: '⇇',
+ ll: '≪',
+ Ll: '⋘',
+ llcorner: '⌞',
+ Lleftarrow: '⇚',
+ llhard: '⥫',
+ lltri: '◺',
+ Lmidot: 'Ŀ',
+ lmidot: 'ŀ',
+ lmoustache: '⎰',
+ lmoust: '⎰',
+ lnap: '⪉',
+ lnapprox: '⪉',
+ lne: '⪇',
+ lnE: '≨',
+ lneq: '⪇',
+ lneqq: '≨',
+ lnsim: '⋦',
+ loang: '⟬',
+ loarr: '⇽',
+ lobrk: '⟦',
+ longleftarrow: '⟵',
+ LongLeftArrow: '⟵',
+ Longleftarrow: '⟸',
+ longleftrightarrow: '⟷',
+ LongLeftRightArrow: '⟷',
+ Longleftrightarrow: '⟺',
+ longmapsto: '⟼',
+ longrightarrow: '⟶',
+ LongRightArrow: '⟶',
+ Longrightarrow: '⟹',
+ looparrowleft: '↫',
+ looparrowright: '↬',
+ lopar: '⦅',
+ Lopf: '𝕃',
+ lopf: '𝕝',
+ loplus: '⨭',
+ lotimes: '⨴',
+ lowast: '∗',
+ lowbar: '_',
+ LowerLeftArrow: '↙',
+ LowerRightArrow: '↘',
+ loz: '◊',
+ lozenge: '◊',
+ lozf: '⧫',
+ lpar: '(',
+ lparlt: '⦓',
+ lrarr: '⇆',
+ lrcorner: '⌟',
+ lrhar: '⇋',
+ lrhard: '⥭',
+ lrm: '',
+ lrtri: '⊿',
+ lsaquo: '‹',
+ lscr: '𝓁',
+ Lscr: 'ℒ',
+ lsh: '↰',
+ Lsh: '↰',
+ lsim: '≲',
+ lsime: '⪍',
+ lsimg: '⪏',
+ lsqb: '[',
+ lsquo: '‘',
+ lsquor: '‚',
+ Lstrok: 'Ł',
+ lstrok: 'ł',
+ ltcc: '⪦',
+ ltcir: '⩹',
+ lt: '<',
+ LT: '<',
+ Lt: '≪',
+ ltdot: '⋖',
+ lthree: '⋋',
+ ltimes: '⋉',
+ ltlarr: '⥶',
+ ltquest: '⩻',
+ ltri: '◃',
+ ltrie: '⊴',
+ ltrif: '◂',
+ ltrPar: '⦖',
+ lurdshar: '⥊',
+ luruhar: '⥦',
+ lvertneqq: '≨',
+ lvnE: '≨',
+ macr: '¯',
+ male: '♂',
+ malt: '✠',
+ maltese: '✠',
+ Map: '⤅',
+ map: '↦',
+ mapsto: '↦',
+ mapstodown: '↧',
+ mapstoleft: '↤',
+ mapstoup: '↥',
+ marker: '▮',
+ mcomma: '⨩',
+ Mcy: 'М',
+ mcy: 'м',
+ mdash: '—',
+ mDDot: '∺',
+ measuredangle: '∡',
+ MediumSpace: ' ',
+ Mellintrf: 'ℳ',
+ Mfr: '𝔐',
+ mfr: '𝔪',
+ mho: '℧',
+ micro: 'µ',
+ midast: '*',
+ midcir: '⫰',
+ mid: '∣',
+ middot: '·',
+ minusb: '⊟',
+ minus: '−',
+ minusd: '∸',
+ minusdu: '⨪',
+ MinusPlus: '∓',
+ mlcp: '⫛',
+ mldr: '…',
+ mnplus: '∓',
+ models: '⊧',
+ Mopf: '𝕄',
+ mopf: '𝕞',
+ mp: '∓',
+ mscr: '𝓂',
+ Mscr: 'ℳ',
+ mstpos: '∾',
+ Mu: 'Μ',
+ mu: 'μ',
+ multimap: '⊸',
+ mumap: '⊸',
+ nabla: '∇',
+ Nacute: 'Ń',
+ nacute: 'ń',
+ nang: '∠',
+ nap: '≉',
+ napE: '⩰',
+ napid: '≋',
+ napos: 'ʼn',
+ napprox: '≉',
+ natural: '♮',
+ naturals: 'ℕ',
+ natur: '♮',
+ nbsp: ' ',
+ nbump: '≎',
+ nbumpe: '≏',
+ ncap: '⩃',
+ Ncaron: 'Ň',
+ ncaron: 'ň',
+ Ncedil: 'Ņ',
+ ncedil: 'ņ',
+ ncong: '≇',
+ ncongdot: '⩭',
+ ncup: '⩂',
+ Ncy: 'Н',
+ ncy: 'н',
+ ndash: '–',
+ nearhk: '⤤',
+ nearr: '↗',
+ neArr: '⇗',
+ nearrow: '↗',
+ ne: '≠',
+ nedot: '≐',
+ NegativeMediumSpace: '',
+ NegativeThickSpace: '',
+ NegativeThinSpace: '',
+ NegativeVeryThinSpace: '',
+ nequiv: '≢',
+ nesear: '⤨',
+ nesim: '≂',
+ NestedGreaterGreater: '≫',
+ NestedLessLess: '≪',
+ NewLine: '\n',
+ nexist: '∄',
+ nexists: '∄',
+ Nfr: '𝔑',
+ nfr: '𝔫',
+ ngE: '≧',
+ nge: '≱',
+ ngeq: '≱',
+ ngeqq: '≧',
+ ngeqslant: '⩾',
+ nges: '⩾',
+ nGg: '⋙',
+ ngsim: '≵',
+ nGt: '≫',
+ ngt: '≯',
+ ngtr: '≯',
+ nGtv: '≫',
+ nharr: '↮',
+ nhArr: '⇎',
+ nhpar: '⫲',
+ ni: '∋',
+ nis: '⋼',
+ nisd: '⋺',
+ niv: '∋',
+ NJcy: 'Њ',
+ njcy: 'њ',
+ nlarr: '↚',
+ nlArr: '⇍',
+ nldr: '‥',
+ nlE: '≦',
+ nle: '≰',
+ nleftarrow: '↚',
+ nLeftarrow: '⇍',
+ nleftrightarrow: '↮',
+ nLeftrightarrow: '⇎',
+ nleq: '≰',
+ nleqq: '≦',
+ nleqslant: '⩽',
+ nles: '⩽',
+ nless: '≮',
+ nLl: '⋘',
+ nlsim: '≴',
+ nLt: '≪',
+ nlt: '≮',
+ nltri: '⋪',
+ nltrie: '⋬',
+ nLtv: '≪',
+ nmid: '∤',
+ NoBreak: '',
+ NonBreakingSpace: ' ',
+ nopf: '𝕟',
+ Nopf: 'ℕ',
+ Not: '⫬',
+ not: '¬',
+ NotCongruent: '≢',
+ NotCupCap: '≭',
+ NotDoubleVerticalBar: '∦',
+ NotElement: '∉',
+ NotEqual: '≠',
+ NotEqualTilde: '≂',
+ NotExists: '∄',
+ NotGreater: '≯',
+ NotGreaterEqual: '≱',
+ NotGreaterFullEqual: '≧',
+ NotGreaterGreater: '≫',
+ NotGreaterLess: '≹',
+ NotGreaterSlantEqual: '⩾',
+ NotGreaterTilde: '≵',
+ NotHumpDownHump: '≎',
+ NotHumpEqual: '≏',
+ notin: '∉',
+ notindot: '⋵',
+ notinE: '⋹',
+ notinva: '∉',
+ notinvb: '⋷',
+ notinvc: '⋶',
+ NotLeftTriangleBar: '⧏',
+ NotLeftTriangle: '⋪',
+ NotLeftTriangleEqual: '⋬',
+ NotLess: '≮',
+ NotLessEqual: '≰',
+ NotLessGreater: '≸',
+ NotLessLess: '≪',
+ NotLessSlantEqual: '⩽',
+ NotLessTilde: '≴',
+ NotNestedGreaterGreater: '⪢',
+ NotNestedLessLess: '⪡',
+ notni: '∌',
+ notniva: '∌',
+ notnivb: '⋾',
+ notnivc: '⋽',
+ NotPrecedes: '⊀',
+ NotPrecedesEqual: '⪯',
+ NotPrecedesSlantEqual: '⋠',
+ NotReverseElement: '∌',
+ NotRightTriangleBar: '⧐',
+ NotRightTriangle: '⋫',
+ NotRightTriangleEqual: '⋭',
+ NotSquareSubset: '⊏',
+ NotSquareSubsetEqual: '⋢',
+ NotSquareSuperset: '⊐',
+ NotSquareSupersetEqual: '⋣',
+ NotSubset: '⊂',
+ NotSubsetEqual: '⊈',
+ NotSucceeds: '⊁',
+ NotSucceedsEqual: '⪰',
+ NotSucceedsSlantEqual: '⋡',
+ NotSucceedsTilde: '≿',
+ NotSuperset: '⊃',
+ NotSupersetEqual: '⊉',
+ NotTilde: '≁',
+ NotTildeEqual: '≄',
+ NotTildeFullEqual: '≇',
+ NotTildeTilde: '≉',
+ NotVerticalBar: '∤',
+ nparallel: '∦',
+ npar: '∦',
+ nparsl: '⫽',
+ npart: '∂',
+ npolint: '⨔',
+ npr: '⊀',
+ nprcue: '⋠',
+ nprec: '⊀',
+ npreceq: '⪯',
+ npre: '⪯',
+ nrarrc: '⤳',
+ nrarr: '↛',
+ nrArr: '⇏',
+ nrarrw: '↝',
+ nrightarrow: '↛',
+ nRightarrow: '⇏',
+ nrtri: '⋫',
+ nrtrie: '⋭',
+ nsc: '⊁',
+ nsccue: '⋡',
+ nsce: '⪰',
+ Nscr: '𝒩',
+ nscr: '𝓃',
+ nshortmid: '∤',
+ nshortparallel: '∦',
+ nsim: '≁',
+ nsime: '≄',
+ nsimeq: '≄',
+ nsmid: '∤',
+ nspar: '∦',
+ nsqsube: '⋢',
+ nsqsupe: '⋣',
+ nsub: '⊄',
+ nsubE: '⫅',
+ nsube: '⊈',
+ nsubset: '⊂',
+ nsubseteq: '⊈',
+ nsubseteqq: '⫅',
+ nsucc: '⊁',
+ nsucceq: '⪰',
+ nsup: '⊅',
+ nsupE: '⫆',
+ nsupe: '⊉',
+ nsupset: '⊃',
+ nsupseteq: '⊉',
+ nsupseteqq: '⫆',
+ ntgl: '≹',
+ Ntilde: 'Ñ',
+ ntilde: 'ñ',
+ ntlg: '≸',
+ ntriangleleft: '⋪',
+ ntrianglelefteq: '⋬',
+ ntriangleright: '⋫',
+ ntrianglerighteq: '⋭',
+ Nu: 'Ν',
+ nu: 'ν',
+ num: '#',
+ numero: '№',
+ numsp: ' ',
+ nvap: '≍',
+ nvdash: '⊬',
+ nvDash: '⊭',
+ nVdash: '⊮',
+ nVDash: '⊯',
+ nvge: '≥',
+ nvgt: '>',
+ nvHarr: '⤄',
+ nvinfin: '⧞',
+ nvlArr: '⤂',
+ nvle: '≤',
+ nvlt: '>',
+ nvltrie: '⊴',
+ nvrArr: '⤃',
+ nvrtrie: '⊵',
+ nvsim: '∼',
+ nwarhk: '⤣',
+ nwarr: '↖',
+ nwArr: '⇖',
+ nwarrow: '↖',
+ nwnear: '⤧',
+ Oacute: 'Ó',
+ oacute: 'ó',
+ oast: '⊛',
+ Ocirc: 'Ô',
+ ocirc: 'ô',
+ ocir: '⊚',
+ Ocy: 'О',
+ ocy: 'о',
+ odash: '⊝',
+ Odblac: 'Ő',
+ odblac: 'ő',
+ odiv: '⨸',
+ odot: '⊙',
+ odsold: '⦼',
+ OElig: 'Œ',
+ oelig: 'œ',
+ ofcir: '⦿',
+ Ofr: '𝔒',
+ ofr: '𝔬',
+ ogon: '˛',
+ Ograve: 'Ò',
+ ograve: 'ò',
+ ogt: '⧁',
+ ohbar: '⦵',
+ ohm: 'Ω',
+ oint: '∮',
+ olarr: '↺',
+ olcir: '⦾',
+ olcross: '⦻',
+ oline: '‾',
+ olt: '⧀',
+ Omacr: 'Ō',
+ omacr: 'ō',
+ Omega: 'Ω',
+ omega: 'ω',
+ Omicron: 'Ο',
+ omicron: 'ο',
+ omid: '⦶',
+ ominus: '⊖',
+ Oopf: '𝕆',
+ oopf: '𝕠',
+ opar: '⦷',
+ OpenCurlyDoubleQuote: '“',
+ OpenCurlyQuote: '‘',
+ operp: '⦹',
+ oplus: '⊕',
+ orarr: '↻',
+ Or: '⩔',
+ or: '∨',
+ ord: '⩝',
+ order: 'ℴ',
+ orderof: 'ℴ',
+ ordf: 'ª',
+ ordm: 'º',
+ origof: '⊶',
+ oror: '⩖',
+ orslope: '⩗',
+ orv: '⩛',
+ oS: 'Ⓢ',
+ Oscr: '𝒪',
+ oscr: 'ℴ',
+ Oslash: 'Ø',
+ oslash: 'ø',
+ osol: '⊘',
+ Otilde: 'Õ',
+ otilde: 'õ',
+ otimesas: '⨶',
+ Otimes: '⨷',
+ otimes: '⊗',
+ Ouml: 'Ö',
+ ouml: 'ö',
+ ovbar: '⌽',
+ OverBar: '‾',
+ OverBrace: '⏞',
+ OverBracket: '⎴',
+ OverParenthesis: '⏜',
+ para: '¶',
+ parallel: '∥',
+ par: '∥',
+ parsim: '⫳',
+ parsl: '⫽',
+ part: '∂',
+ PartialD: '∂',
+ Pcy: 'П',
+ pcy: 'п',
+ percnt: '%',
+ period: '.',
+ permil: '‰',
+ perp: '⊥',
+ pertenk: '‱',
+ Pfr: '𝔓',
+ pfr: '𝔭',
+ Phi: 'Φ',
+ phi: 'φ',
+ phiv: 'ϕ',
+ phmmat: 'ℳ',
+ phone: '☎',
+ Pi: 'Π',
+ pi: 'π',
+ pitchfork: '⋔',
+ piv: 'ϖ',
+ planck: 'ℏ',
+ planckh: 'ℎ',
+ plankv: 'ℏ',
+ plusacir: '⨣',
+ plusb: '⊞',
+ pluscir: '⨢',
+ plus: '+',
+ plusdo: '∔',
+ plusdu: '⨥',
+ pluse: '⩲',
+ PlusMinus: '±',
+ plusmn: '±',
+ plussim: '⨦',
+ plustwo: '⨧',
+ pm: '±',
+ Poincareplane: 'ℌ',
+ pointint: '⨕',
+ popf: '𝕡',
+ Popf: 'ℙ',
+ pound: '£',
+ prap: '⪷',
+ Pr: '⪻',
+ pr: '≺',
+ prcue: '≼',
+ precapprox: '⪷',
+ prec: '≺',
+ preccurlyeq: '≼',
+ Precedes: '≺',
+ PrecedesEqual: '⪯',
+ PrecedesSlantEqual: '≼',
+ PrecedesTilde: '≾',
+ preceq: '⪯',
+ precnapprox: '⪹',
+ precneqq: '⪵',
+ precnsim: '⋨',
+ pre: '⪯',
+ prE: '⪳',
+ precsim: '≾',
+ prime: '′',
+ Prime: '″',
+ primes: 'ℙ',
+ prnap: '⪹',
+ prnE: '⪵',
+ prnsim: '⋨',
+ prod: '∏',
+ Product: '∏',
+ profalar: '⌮',
+ profline: '⌒',
+ profsurf: '⌓',
+ prop: '∝',
+ Proportional: '∝',
+ Proportion: '∷',
+ propto: '∝',
+ prsim: '≾',
+ prurel: '⊰',
+ Pscr: '𝒫',
+ pscr: '𝓅',
+ Psi: 'Ψ',
+ psi: 'ψ',
+ puncsp: ' ',
+ Qfr: '𝔔',
+ qfr: '𝔮',
+ qint: '⨌',
+ qopf: '𝕢',
+ Qopf: 'ℚ',
+ qprime: '⁗',
+ Qscr: '𝒬',
+ qscr: '𝓆',
+ quaternions: 'ℍ',
+ quatint: '⨖',
+ quest: '?',
+ questeq: '≟',
+ quot: '"',
+ QUOT: '"',
+ rAarr: '⇛',
+ race: '∽',
+ Racute: 'Ŕ',
+ racute: 'ŕ',
+ radic: '√',
+ raemptyv: '⦳',
+ rang: '⟩',
+ Rang: '⟫',
+ rangd: '⦒',
+ range: '⦥',
+ rangle: '⟩',
+ raquo: '»',
+ rarrap: '⥵',
+ rarrb: '⇥',
+ rarrbfs: '⤠',
+ rarrc: '⤳',
+ rarr: '→',
+ Rarr: '↠',
+ rArr: '⇒',
+ rarrfs: '⤞',
+ rarrhk: '↪',
+ rarrlp: '↬',
+ rarrpl: '⥅',
+ rarrsim: '⥴',
+ Rarrtl: '⤖',
+ rarrtl: '↣',
+ rarrw: '↝',
+ ratail: '⤚',
+ rAtail: '⤜',
+ ratio: '∶',
+ rationals: 'ℚ',
+ rbarr: '⤍',
+ rBarr: '⤏',
+ RBarr: '⤐',
+ rbbrk: '❳',
+ rbrace: '}',
+ rbrack: ']',
+ rbrke: '⦌',
+ rbrksld: '⦎',
+ rbrkslu: '⦐',
+ Rcaron: 'Ř',
+ rcaron: 'ř',
+ Rcedil: 'Ŗ',
+ rcedil: 'ŗ',
+ rceil: '⌉',
+ rcub: '}',
+ Rcy: 'Р',
+ rcy: 'р',
+ rdca: '⤷',
+ rdldhar: '⥩',
+ rdquo: '”',
+ rdquor: '”',
+ rdsh: '↳',
+ real: 'ℜ',
+ realine: 'ℛ',
+ realpart: 'ℜ',
+ reals: 'ℝ',
+ Re: 'ℜ',
+ rect: '▭',
+ reg: '®',
+ REG: '®',
+ ReverseElement: '∋',
+ ReverseEquilibrium: '⇋',
+ ReverseUpEquilibrium: '⥯',
+ rfisht: '⥽',
+ rfloor: '⌋',
+ rfr: '𝔯',
+ Rfr: 'ℜ',
+ rHar: '⥤',
+ rhard: '⇁',
+ rharu: '⇀',
+ rharul: '⥬',
+ Rho: 'Ρ',
+ rho: 'ρ',
+ rhov: 'ϱ',
+ RightAngleBracket: '⟩',
+ RightArrowBar: '⇥',
+ rightarrow: '→',
+ RightArrow: '→',
+ Rightarrow: '⇒',
+ RightArrowLeftArrow: '⇄',
+ rightarrowtail: '↣',
+ RightCeiling: '⌉',
+ RightDoubleBracket: '⟧',
+ RightDownTeeVector: '⥝',
+ RightDownVectorBar: '⥕',
+ RightDownVector: '⇂',
+ RightFloor: '⌋',
+ rightharpoondown: '⇁',
+ rightharpoonup: '⇀',
+ rightleftarrows: '⇄',
+ rightleftharpoons: '⇌',
+ rightrightarrows: '⇉',
+ rightsquigarrow: '↝',
+ RightTeeArrow: '↦',
+ RightTee: '⊢',
+ RightTeeVector: '⥛',
+ rightthreetimes: '⋌',
+ RightTriangleBar: '⧐',
+ RightTriangle: '⊳',
+ RightTriangleEqual: '⊵',
+ RightUpDownVector: '⥏',
+ RightUpTeeVector: '⥜',
+ RightUpVectorBar: '⥔',
+ RightUpVector: '↾',
+ RightVectorBar: '⥓',
+ RightVector: '⇀',
+ ring: '˚',
+ risingdotseq: '≓',
+ rlarr: '⇄',
+ rlhar: '⇌',
+ rlm: '',
+ rmoustache: '⎱',
+ rmoust: '⎱',
+ rnmid: '⫮',
+ roang: '⟭',
+ roarr: '⇾',
+ robrk: '⟧',
+ ropar: '⦆',
+ ropf: '𝕣',
+ Ropf: 'ℝ',
+ roplus: '⨮',
+ rotimes: '⨵',
+ RoundImplies: '⥰',
+ rpar: ')',
+ rpargt: '⦔',
+ rppolint: '⨒',
+ rrarr: '⇉',
+ Rrightarrow: '⇛',
+ rsaquo: '›',
+ rscr: '𝓇',
+ Rscr: 'ℛ',
+ rsh: '↱',
+ Rsh: '↱',
+ rsqb: ']',
+ rsquo: '’',
+ rsquor: '’',
+ rthree: '⋌',
+ rtimes: '⋊',
+ rtri: '▹',
+ rtrie: '⊵',
+ rtrif: '▸',
+ rtriltri: '⧎',
+ RuleDelayed: '⧴',
+ ruluhar: '⥨',
+ rx: '℞',
+ Sacute: 'Ś',
+ sacute: 'ś',
+ sbquo: '‚',
+ scap: '⪸',
+ Scaron: 'Š',
+ scaron: 'š',
+ Sc: '⪼',
+ sc: '≻',
+ sccue: '≽',
+ sce: '⪰',
+ scE: '⪴',
+ Scedil: 'Ş',
+ scedil: 'ş',
+ Scirc: 'Ŝ',
+ scirc: 'ŝ',
+ scnap: '⪺',
+ scnE: '⪶',
+ scnsim: '⋩',
+ scpolint: '⨓',
+ scsim: '≿',
+ Scy: 'С',
+ scy: 'с',
+ sdotb: '⊡',
+ sdot: '⋅',
+ sdote: '⩦',
+ searhk: '⤥',
+ searr: '↘',
+ seArr: '⇘',
+ searrow: '↘',
+ sect: '§',
+ semi: ';',
+ seswar: '⤩',
+ setminus: '∖',
+ setmn: '∖',
+ sext: '✶',
+ Sfr: '𝔖',
+ sfr: '𝔰',
+ sfrown: '⌢',
+ sharp: '♯',
+ SHCHcy: 'Щ',
+ shchcy: 'щ',
+ SHcy: 'Ш',
+ shcy: 'ш',
+ ShortDownArrow: '↓',
+ ShortLeftArrow: '←',
+ shortmid: '∣',
+ shortparallel: '∥',
+ ShortRightArrow: '→',
+ ShortUpArrow: '↑',
+ shy: '',
+ Sigma: 'Σ',
+ sigma: 'σ',
+ sigmaf: 'ς',
+ sigmav: 'ς',
+ sim: '∼',
+ simdot: '⩪',
+ sime: '≃',
+ simeq: '≃',
+ simg: '⪞',
+ simgE: '⪠',
+ siml: '⪝',
+ simlE: '⪟',
+ simne: '≆',
+ simplus: '⨤',
+ simrarr: '⥲',
+ slarr: '←',
+ SmallCircle: '∘',
+ smallsetminus: '∖',
+ smashp: '⨳',
+ smeparsl: '⧤',
+ smid: '∣',
+ smile: '⌣',
+ smt: '⪪',
+ smte: '⪬',
+ smtes: '⪬',
+ SOFTcy: 'Ь',
+ softcy: 'ь',
+ solbar: '⌿',
+ solb: '⧄',
+ sol: '/',
+ Sopf: '𝕊',
+ sopf: '𝕤',
+ spades: '♠',
+ spadesuit: '♠',
+ spar: '∥',
+ sqcap: '⊓',
+ sqcaps: '⊓',
+ sqcup: '⊔',
+ sqcups: '⊔',
+ Sqrt: '√',
+ sqsub: '⊏',
+ sqsube: '⊑',
+ sqsubset: '⊏',
+ sqsubseteq: '⊑',
+ sqsup: '⊐',
+ sqsupe: '⊒',
+ sqsupset: '⊐',
+ sqsupseteq: '⊒',
+ square: '□',
+ Square: '□',
+ SquareIntersection: '⊓',
+ SquareSubset: '⊏',
+ SquareSubsetEqual: '⊑',
+ SquareSuperset: '⊐',
+ SquareSupersetEqual: '⊒',
+ SquareUnion: '⊔',
+ squarf: '▪',
+ squ: '□',
+ squf: '▪',
+ srarr: '→',
+ Sscr: '𝒮',
+ sscr: '𝓈',
+ ssetmn: '∖',
+ ssmile: '⌣',
+ sstarf: '⋆',
+ Star: '⋆',
+ star: '☆',
+ starf: '★',
+ straightepsilon: 'ϵ',
+ straightphi: 'ϕ',
+ strns: '¯',
+ sub: '⊂',
+ Sub: '⋐',
+ subdot: '⪽',
+ subE: '⫅',
+ sube: '⊆',
+ subedot: '⫃',
+ submult: '⫁',
+ subnE: '⫋',
+ subne: '⊊',
+ subplus: '⪿',
+ subrarr: '⥹',
+ subset: '⊂',
+ Subset: '⋐',
+ subseteq: '⊆',
+ subseteqq: '⫅',
+ SubsetEqual: '⊆',
+ subsetneq: '⊊',
+ subsetneqq: '⫋',
+ subsim: '⫇',
+ subsub: '⫕',
+ subsup: '⫓',
+ succapprox: '⪸',
+ succ: '≻',
+ succcurlyeq: '≽',
+ Succeeds: '≻',
+ SucceedsEqual: '⪰',
+ SucceedsSlantEqual: '≽',
+ SucceedsTilde: '≿',
+ succeq: '⪰',
+ succnapprox: '⪺',
+ succneqq: '⪶',
+ succnsim: '⋩',
+ succsim: '≿',
+ SuchThat: '∋',
+ sum: '∑',
+ Sum: '∑',
+ sung: '♪',
+ sup1: '¹',
+ sup2: '²',
+ sup3: '³',
+ sup: '⊃',
+ Sup: '⋑',
+ supdot: '⪾',
+ supdsub: '⫘',
+ supE: '⫆',
+ supe: '⊇',
+ supedot: '⫄',
+ Superset: '⊃',
+ SupersetEqual: '⊇',
+ suphsol: '⟉',
+ suphsub: '⫗',
+ suplarr: '⥻',
+ supmult: '⫂',
+ supnE: '⫌',
+ supne: '⊋',
+ supplus: '⫀',
+ supset: '⊃',
+ Supset: '⋑',
+ supseteq: '⊇',
+ supseteqq: '⫆',
+ supsetneq: '⊋',
+ supsetneqq: '⫌',
+ supsim: '⫈',
+ supsub: '⫔',
+ supsup: '⫖',
+ swarhk: '⤦',
+ swarr: '↙',
+ swArr: '⇙',
+ swarrow: '↙',
+ swnwar: '⤪',
+ szlig: 'ß',
+ Tab: ' ',
+ target: '⌖',
+ Tau: 'Τ',
+ tau: 'τ',
+ tbrk: '⎴',
+ Tcaron: 'Ť',
+ tcaron: 'ť',
+ Tcedil: 'Ţ',
+ tcedil: 'ţ',
+ Tcy: 'Т',
+ tcy: 'т',
+ tdot: '⃛',
+ telrec: '⌕',
+ Tfr: '𝔗',
+ tfr: '𝔱',
+ there4: '∴',
+ therefore: '∴',
+ Therefore: '∴',
+ Theta: 'Θ',
+ theta: 'θ',
+ thetasym: 'ϑ',
+ thetav: 'ϑ',
+ thickapprox: '≈',
+ thicksim: '∼',
+ ThickSpace: ' ',
+ ThinSpace: ' ',
+ thinsp: ' ',
+ thkap: '≈',
+ thksim: '∼',
+ THORN: 'Þ',
+ thorn: 'þ',
+ tilde: '˜',
+ Tilde: '∼',
+ TildeEqual: '≃',
+ TildeFullEqual: '≅',
+ TildeTilde: '≈',
+ timesbar: '⨱',
+ timesb: '⊠',
+ times: '×',
+ timesd: '⨰',
+ tint: '∭',
+ toea: '⤨',
+ topbot: '⌶',
+ topcir: '⫱',
+ top: '⊤',
+ Topf: '𝕋',
+ topf: '𝕥',
+ topfork: '⫚',
+ tosa: '⤩',
+ tprime: '‴',
+ trade: '™',
+ TRADE: '™',
+ triangle: '▵',
+ triangledown: '▿',
+ triangleleft: '◃',
+ trianglelefteq: '⊴',
+ triangleq: '≜',
+ triangleright: '▹',
+ trianglerighteq: '⊵',
+ tridot: '◬',
+ trie: '≜',
+ triminus: '⨺',
+ TripleDot: '⃛',
+ triplus: '⨹',
+ trisb: '⧍',
+ tritime: '⨻',
+ trpezium: '⏢',
+ Tscr: '𝒯',
+ tscr: '𝓉',
+ TScy: 'Ц',
+ tscy: 'ц',
+ TSHcy: 'Ћ',
+ tshcy: 'ћ',
+ Tstrok: 'Ŧ',
+ tstrok: 'ŧ',
+ twixt: '≬',
+ twoheadleftarrow: '↞',
+ twoheadrightarrow: '↠',
+ Uacute: 'Ú',
+ uacute: 'ú',
+ uarr: '↑',
+ Uarr: '↟',
+ uArr: '⇑',
+ Uarrocir: '⥉',
+ Ubrcy: 'Ў',
+ ubrcy: 'ў',
+ Ubreve: 'Ŭ',
+ ubreve: 'ŭ',
+ Ucirc: 'Û',
+ ucirc: 'û',
+ Ucy: 'У',
+ ucy: 'у',
+ udarr: '⇅',
+ Udblac: 'Ű',
+ udblac: 'ű',
+ udhar: '⥮',
+ ufisht: '⥾',
+ Ufr: '𝔘',
+ ufr: '𝔲',
+ Ugrave: 'Ù',
+ ugrave: 'ù',
+ uHar: '⥣',
+ uharl: '↿',
+ uharr: '↾',
+ uhblk: '▀',
+ ulcorn: '⌜',
+ ulcorner: '⌜',
+ ulcrop: '⌏',
+ ultri: '◸',
+ Umacr: 'Ū',
+ umacr: 'ū',
+ uml: '¨',
+ UnderBar: '_',
+ UnderBrace: '⏟',
+ UnderBracket: '⎵',
+ UnderParenthesis: '⏝',
+ Union: '⋃',
+ UnionPlus: '⊎',
+ Uogon: 'Ų',
+ uogon: 'ų',
+ Uopf: '𝕌',
+ uopf: '𝕦',
+ UpArrowBar: '⤒',
+ uparrow: '↑',
+ UpArrow: '↑',
+ Uparrow: '⇑',
+ UpArrowDownArrow: '⇅',
+ updownarrow: '↕',
+ UpDownArrow: '↕',
+ Updownarrow: '⇕',
+ UpEquilibrium: '⥮',
+ upharpoonleft: '↿',
+ upharpoonright: '↾',
+ uplus: '⊎',
+ UpperLeftArrow: '↖',
+ UpperRightArrow: '↗',
+ upsi: 'υ',
+ Upsi: 'ϒ',
+ upsih: 'ϒ',
+ Upsilon: 'Υ',
+ upsilon: 'υ',
+ UpTeeArrow: '↥',
+ UpTee: '⊥',
+ upuparrows: '⇈',
+ urcorn: '⌝',
+ urcorner: '⌝',
+ urcrop: '⌎',
+ Uring: 'Ů',
+ uring: 'ů',
+ urtri: '◹',
+ Uscr: '𝒰',
+ uscr: '𝓊',
+ utdot: '⋰',
+ Utilde: 'Ũ',
+ utilde: 'ũ',
+ utri: '▵',
+ utrif: '▴',
+ uuarr: '⇈',
+ Uuml: 'Ü',
+ uuml: 'ü',
+ uwangle: '⦧',
+ vangrt: '⦜',
+ varepsilon: 'ϵ',
+ varkappa: 'ϰ',
+ varnothing: '∅',
+ varphi: 'ϕ',
+ varpi: 'ϖ',
+ varpropto: '∝',
+ varr: '↕',
+ vArr: '⇕',
+ varrho: 'ϱ',
+ varsigma: 'ς',
+ varsubsetneq: '⊊',
+ varsubsetneqq: '⫋',
+ varsupsetneq: '⊋',
+ varsupsetneqq: '⫌',
+ vartheta: 'ϑ',
+ vartriangleleft: '⊲',
+ vartriangleright: '⊳',
+ vBar: '⫨',
+ Vbar: '⫫',
+ vBarv: '⫩',
+ Vcy: 'В',
+ vcy: 'в',
+ vdash: '⊢',
+ vDash: '⊨',
+ Vdash: '⊩',
+ VDash: '⊫',
+ Vdashl: '⫦',
+ veebar: '⊻',
+ vee: '∨',
+ Vee: '⋁',
+ veeeq: '≚',
+ vellip: '⋮',
+ verbar: '|',
+ Verbar: '‖',
+ vert: '|',
+ Vert: '‖',
+ VerticalBar: '∣',
+ VerticalLine: '|',
+ VerticalSeparator: '❘',
+ VerticalTilde: '≀',
+ VeryThinSpace: ' ',
+ Vfr: '𝔙',
+ vfr: '𝔳',
+ vltri: '⊲',
+ vnsub: '⊂',
+ vnsup: '⊃',
+ Vopf: '𝕍',
+ vopf: '𝕧',
+ vprop: '∝',
+ vrtri: '⊳',
+ Vscr: '𝒱',
+ vscr: '𝓋',
+ vsubnE: '⫋',
+ vsubne: '⊊',
+ vsupnE: '⫌',
+ vsupne: '⊋',
+ Vvdash: '⊪',
+ vzigzag: '⦚',
+ Wcirc: 'Ŵ',
+ wcirc: 'ŵ',
+ wedbar: '⩟',
+ wedge: '∧',
+ Wedge: '⋀',
+ wedgeq: '≙',
+ weierp: '℘',
+ Wfr: '𝔚',
+ wfr: '𝔴',
+ Wopf: '𝕎',
+ wopf: '𝕨',
+ wp: '℘',
+ wr: '≀',
+ wreath: '≀',
+ Wscr: '𝒲',
+ wscr: '𝓌',
+ xcap: '⋂',
+ xcirc: '◯',
+ xcup: '⋃',
+ xdtri: '▽',
+ Xfr: '𝔛',
+ xfr: '𝔵',
+ xharr: '⟷',
+ xhArr: '⟺',
+ Xi: 'Ξ',
+ xi: 'ξ',
+ xlarr: '⟵',
+ xlArr: '⟸',
+ xmap: '⟼',
+ xnis: '⋻',
+ xodot: '⨀',
+ Xopf: '𝕏',
+ xopf: '𝕩',
+ xoplus: '⨁',
+ xotime: '⨂',
+ xrarr: '⟶',
+ xrArr: '⟹',
+ Xscr: '𝒳',
+ xscr: '𝓍',
+ xsqcup: '⨆',
+ xuplus: '⨄',
+ xutri: '△',
+ xvee: '⋁',
+ xwedge: '⋀',
+ Yacute: 'Ý',
+ yacute: 'ý',
+ YAcy: 'Я',
+ yacy: 'я',
+ Ycirc: 'Ŷ',
+ ycirc: 'ŷ',
+ Ycy: 'Ы',
+ ycy: 'ы',
+ yen: '¥',
+ Yfr: '𝔜',
+ yfr: '𝔶',
+ YIcy: 'Ї',
+ yicy: 'ї',
+ Yopf: '𝕐',
+ yopf: '𝕪',
+ Yscr: '𝒴',
+ yscr: '𝓎',
+ YUcy: 'Ю',
+ yucy: 'ю',
+ yuml: 'ÿ',
+ Yuml: 'Ÿ',
+ Zacute: 'Ź',
+ zacute: 'ź',
+ Zcaron: 'Ž',
+ zcaron: 'ž',
+ Zcy: 'З',
+ zcy: 'з',
+ Zdot: 'Ż',
+ zdot: 'ż',
+ zeetrf: 'ℨ',
+ ZeroWidthSpace: '',
+ Zeta: 'Ζ',
+ zeta: 'ζ',
+ zfr: '𝔷',
+ Zfr: 'ℨ',
+ ZHcy: 'Ж',
+ zhcy: 'ж',
+ zigrarr: '⇝',
+ zopf: '𝕫',
+ Zopf: 'ℤ',
+ Zscr: '𝒵',
+ zscr: '𝓏',
+ zwj: '',
+ zwnj: '' };
+
// Some regexps used in inline parser:
var ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]';
@@ -635,7 +2761,21 @@
var parseEntity = function() {
var m;
if ((m = this.match(/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i))) {
- return [{ t: 'Entity', c: m }];
+ var isNumeric = /^/.test(m);
+ var isHex = /^[Xx]/.test(m);
+ var uchar;
+ if (isNumeric) {
+ var num;
+ if (isHex) {
+ num = parseInt(m.slice(3,-1), 16);
+ } else {
+ num = parseInt(m.slice(2,-1), 10);
+ }
+ uchar = String.fromCharCode(num);
+ } else {
+ uchar = entities[m.slice(1,-1)];
+ }
+ return [{ t: 'Str', c: uchar || m }];
} else {
return null;
}
@@ -1515,8 +3655,6 @@
return inTags('strong', [], this.renderInlines(inline.c));
case 'Html':
return inline.c;
- case 'Entity':
- return inline.c;
case 'Link':
attrs = [['href', this.escape(inline.destination, true)]];
if (inline.title) {
--
cgit v1.2.3
From 40f5a3d6f904b6b9558d51b0133f6a406eafc21a Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Tue, 30 Sep 2014 21:39:57 -0700
Subject: unescape URI before escaping.
If we already have %-encoded characters in the URI, we want
to preserve them.
---
js/stmd.js | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 2a63d23..e113794 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -2333,12 +2333,12 @@
dest = m.slice(1,-1);
return [{t: 'Link',
label: [{ t: 'Str', c: dest }],
- destination: 'mailto:' + encodeURI(dest) }];
+ destination: 'mailto:' + encodeURI(unescape(dest)) }];
} else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) {
dest = m.slice(1,-1);
return [{ t: 'Link',
label: [{ t: 'Str', c: dest }],
- destination: encodeURI(dest) }];
+ destination: encodeURI(unescape(dest)) }];
} else {
return null;
}
@@ -2615,11 +2615,11 @@
var parseLinkDestination = function() {
var res = this.match(reLinkDestinationBraces);
if (res) { // chop off surrounding <..>:
- return encodeURI(unescapeBS(res.substr(1, res.length - 2)));
+ return encodeURI(unescape(unescapeBS(res.substr(1, res.length - 2))));
} else {
res = this.match(reLinkDestination);
if (res !== null) {
- return encodeURI(unescapeBS(res));
+ return encodeURI(unescape(unescapeBS(res)));
} else {
return null;
}
--
cgit v1.2.3
From 669ea14fdbf12c25693706502f8dae6b1cf4e033 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Tue, 30 Sep 2014 21:51:31 -0700
Subject: Unescape entities as well as backslashes in titles, URLs.
This way URLs with entities will be properly percent encoded
as in the C implementation.
---
js/stmd.js | 55 ++++++++++++++++++++++++++++++++-----------------------
1 file changed, 32 insertions(+), 23 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index e113794..04d7360 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -2167,6 +2167,7 @@
PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")";
var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" +
"/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])";
+ var ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"
var reHtmlTag = new RegExp('^' + HTMLTAG, 'i');
@@ -2195,16 +2196,38 @@
var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/;
+ var reEntityHere = new RegExp('^' + ENTITY, 'i');
+
+ var reEntity = new RegExp(ENTITY, 'gi');
+
// Matches a character with a special meaning in markdown,
// or a string of non-special characters. Note: we match
// clumps of _ or * or `, because they need to be handled in groups.
var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m;
// UTILITY FUNCTIONS
+ var entityToChar = function(m) {
+ var isNumeric = /^/.test(m);
+ var isHex = /^[Xx]/.test(m);
+ var uchar;
+ if (isNumeric) {
+ var num;
+ if (isHex) {
+ num = parseInt(m.slice(3,-1), 16);
+ } else {
+ num = parseInt(m.slice(2,-1), 10);
+ }
+ uchar = String.fromCharCode(num);
+ } else {
+ uchar = entities[m.slice(1,-1)];
+ }
+ return (uchar || m);
+ }
- // Replace backslash escapes with literal characters.
- var unescapeBS = function(s) {
- return s.replace(reAllEscapedChar, '$1');
+ // Replace entities and backslash escapes with literal characters.
+ var unescapeEntBS = function(s) {
+ return s.replace(reAllEscapedChar, '$1')
+ .replace(reEntity, entityToChar);;
};
// Returns true if string contains only space characters.
@@ -2604,7 +2627,7 @@
var title = this.match(reLinkTitle);
if (title) {
// chop off quotes from title and unescape:
- return unescapeBS(title.substr(1, title.length - 2));
+ return unescapeEntBS(title.substr(1, title.length - 2));
} else {
return null;
}
@@ -2615,11 +2638,11 @@
var parseLinkDestination = function() {
var res = this.match(reLinkDestinationBraces);
if (res) { // chop off surrounding <..>:
- return encodeURI(unescape(unescapeBS(res.substr(1, res.length - 2))));
+ return encodeURI(unescape(unescapeEntBS(res.substr(1, res.length - 2))));
} else {
res = this.match(reLinkDestination);
if (res !== null) {
- return encodeURI(unescape(unescapeBS(res)));
+ return encodeURI(unescape(unescapeEntBS(res)));
} else {
return null;
}
@@ -2760,22 +2783,8 @@
// Attempt to parse an entity, return Entity object if successful.
var parseEntity = function() {
var m;
- if ((m = this.match(/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i))) {
- var isNumeric = /^/.test(m);
- var isHex = /^[Xx]/.test(m);
- var uchar;
- if (isNumeric) {
- var num;
- if (isHex) {
- num = parseInt(m.slice(3,-1), 16);
- } else {
- num = parseInt(m.slice(2,-1), 10);
- }
- uchar = String.fromCharCode(num);
- } else {
- uchar = entities[m.slice(1,-1)];
- }
- return [{ t: 'Str', c: uchar || m }];
+ if ((m = this.match(reEntityHere))) {
+ return [{ t: 'Str', c: entityToChar(m) }];
} else {
return null;
}
@@ -3513,7 +3522,7 @@
case 'FencedCode':
// first line becomes info string
- block.info = unescapeBS(block.strings[0].trim());
+ block.info = unescapeEntBS(block.strings[0].trim());
if (block.strings.length == 1) {
block.string_content = '';
} else {
--
cgit v1.2.3
From fb0c0cc2741120e3706c7698b15a510c40fc71c0 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Thu, 2 Oct 2014 10:33:49 -0700
Subject: Changed peek() to return char code.
Test char codes instead of strings.
Small optimization (about 1% speed boost).
---
js/stmd.js | 106 +++++++++++++++++++++++++++++++++++++------------------------
1 file changed, 64 insertions(+), 42 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 04d7360..788809b 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -2137,6 +2137,22 @@
zwj: '',
zwnj: '' };
+ // Constants for character codes:
+
+ var C_NEWLINE = 10;
+ var C_SPACE = 32;
+ var C_ASTERISK = 42;
+ var C_UNDERSCORE = 95;
+ var C_BACKTICK = 96;
+ var C_OPEN_BRACKET = 91;
+ var C_CLOSE_BRACKET = 93;
+ var C_LESSTHAN = 60;
+ var C_BANG = 33;
+ var C_BACKSLASH = 92;
+ var C_AMPERSAND = 38;
+ var C_OPEN_PAREN = 40;
+ var C_COLON = 58;
+
// Some regexps used in inline parser:
var ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]';
@@ -2286,10 +2302,14 @@
}
};
- // Returns the character at the current subject position, or null if
+ // Returns the code for the character at the current subject position, or -1
// there are no more characters.
var peek = function() {
- return this.subject.charAt(this.pos) || null;
+ if (this.pos < this.subject.length) {
+ return this.subject.charCodeAt(this.pos);
+ } else {
+ return -1;
+ }
};
// Parse zero or more space characters, including at most one newline
@@ -2377,29 +2397,34 @@
}
};
- // Scan a sequence of characters == c, and return information about
+ // Scan a sequence of characters with code cc, and return information about
// the number of delimiters and whether they are positioned such that
// they can open and/or close emphasis or strong emphasis. A utility
// function for strong/emph parsing.
- var scanDelims = function(c) {
+ var scanDelims = function(cc) {
var numdelims = 0;
var first_close_delims = 0;
- var char_before, char_after;
+ var char_before, char_after, cc_after;
var startpos = this.pos;
char_before = this.pos === 0 ? '\n' :
this.subject.charAt(this.pos - 1);
- while (this.peek() === c) {
+ while (this.peek() === cc) {
numdelims++;
this.pos++;
}
- char_after = this.peek() || '\n';
+ cc_after = this.peek();
+ if (cc_after === -1) {
+ char_after = '\n';
+ } else {
+ char_after = String.fromCharCode(cc_after);
+ }
var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after));
var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before));
- if (c === '_') {
+ if (cc === C_UNDERSCORE) {
can_open = can_open && !((/[a-z0-9]/i).test(char_before));
can_close = can_close && !((/[a-z0-9]/i).test(char_after));
}
@@ -2422,21 +2447,18 @@
}
// Attempt to parse emphasis or strong emphasis.
- var parseEmphasis = function() {
+ var parseEmphasis = function(cc) {
var startpos = this.pos;
var c ;
var first_close = 0;
- c = this.peek();
- if (!(c === '*' || c === '_')) {
- return null;
- }
+ c = String.fromCharCode(cc);
var numdelims;
var delimpos;
var inlines = [];
// Get opening delimiters.
- res = this.scanDelims(c);
+ res = this.scanDelims(cc);
numdelims = res.numdelims;
if (numdelims === 0) {
@@ -2472,10 +2494,10 @@
}
while (true) {
- if (this.last_emphasis_closer[c] < this.pos) {
+ if (this.last_emphasis_closer[cc] < this.pos) {
break;
}
- res = this.scanDelims(c);
+ res = this.scanDelims(cc);
if (res) {
numdelims = res.numdelims;
@@ -2615,7 +2637,7 @@
// we didn't match emphasis: fallback
this.pos = fallbackpos;
if (last_emphasis_closer) {
- this.last_emphasis_closer[c] = last_emphasis_closer;
+ this.last_emphasis_closer[cc] = last_emphasis_closer;
}
return [fallback];
@@ -2651,7 +2673,7 @@
// Attempt to parse a link label, returning number of characters parsed.
var parseLinkLabel = function() {
- if (this.peek() != '[') {
+ if (this.peek() != C_OPEN_BRACKET) {
return 0;
}
var startpos = this.pos;
@@ -2668,36 +2690,36 @@
}
this.pos++; // advance past [
var c;
- while ((c = this.peek()) && (c != ']' || nest_level > 0)) {
+ while ((c = this.peek()) && c != -1 && (c != C_CLOSE_BRACKET || nest_level > 0)) {
switch (c) {
- case '`':
+ case C_BACKTICK:
this.parseBackticks();
break;
- case '<':
+ case C_LESSTHAN:
this.parseAutolink() || this.parseHtmlTag() ||
this.pos++;
break;
- case '[': // nested []
+ case C_OPEN_BRACKET: // nested []
nest_level++;
this.pos++;
break;
- case ']': // nested []
+ case C_CLOSE_BRACKET: // nested []
nest_level--;
this.pos++;
break;
- case '\\':
+ case C_BACKSLASH:
this.parseBackslash();
break;
default:
this.parseString();
}
}
- if (c === ']') {
+ if (c === C_CLOSE_BRACKET) {
this.label_nest_level = 0;
this.pos++; // advance past ]
return this.pos - startpos;
} else {
- if (!c) {
+ if (c === -1) {
this.label_nest_level = nest_level;
}
this.pos = startpos;
@@ -2730,7 +2752,7 @@
// if we got this far, we've parsed a label.
// Try to parse an explicit link: [label](url "title")
- if (this.peek() == '(') {
+ if (this.peek() == C_OPEN_PAREN) {
this.pos++;
if (this.spnl() &&
((dest = this.parseLinkDestination()) !== null) &&
@@ -2851,7 +2873,7 @@
}
// colon:
- if (this.peek() === ':') {
+ if (this.peek() === C_COLON) {
this.pos++;
} else {
this.pos = startpos;
@@ -2902,35 +2924,35 @@
}
var c = this.peek();
- if (!c) {
+ if (c === -1) {
return null;
}
var res;
switch(c) {
- case '\n':
- case ' ':
+ case C_NEWLINE:
+ case C_SPACE:
res = this.parseNewline();
break;
- case '\\':
+ case C_BACKSLASH:
res = this.parseBackslash();
break;
- case '`':
+ case C_BACKTICK:
res = this.parseBackticks();
break;
- case '*':
- case '_':
- res = this.parseEmphasis();
+ case C_ASTERISK:
+ case C_UNDERSCORE:
+ res = this.parseEmphasis(c);
break;
- case '[':
+ case C_OPEN_BRACKET:
res = this.parseLink();
break;
- case '!':
+ case C_BANG:
res = this.parseImage();
break;
- case '<':
+ case C_LESSTHAN:
res = this.parseAutolink() || this.parseHtmlTag();
break;
- case '&':
+ case C_AMPERSAND:
res = this.parseEntity();
break;
default:
@@ -2939,7 +2961,7 @@
}
if (res === null) {
this.pos += 1;
- res = [{t: 'Str', c: c}];
+ res = [{t: 'Str', c: String.fromCharCode(c)}];
}
if (res && memoize) {
@@ -2956,7 +2978,7 @@
this.pos = 0;
this.refmap = refmap || {};
this.memo = {};
- this.last_emphasis_closer = { '*': s.length, '_': s.length };
+ this.last_emphasis_closer = { C_ASTERISK: s.length, C_UNDERSCORE: s.length };
var inlines = [];
var next_inline;
while ((next_inline = this.parseInline())) {
--
cgit v1.2.3
From 189685f5a0527e90f4ff31623d219415e2735fac Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Thu, 2 Oct 2014 11:23:51 -0700
Subject: Eliminated unnecessary variable.
---
js/stmd.js | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 788809b..f4ccdf4 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -3205,8 +3205,7 @@
switch (container.t) {
case 'BlockQuote':
- var matched = indent <= 3 && ln.charAt(first_nonspace) === '>';
- if (matched) {
+ if (indent <= 3 && ln.charAt(first_nonspace) === '>') {
offset = first_nonspace + 1;
if (ln.charAt(offset) === ' ') {
offset++;
--
cgit v1.2.3
From 67e76295cbc15e258c6ac579b082e410b4aaca6a Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Thu, 2 Oct 2014 11:28:18 -0700
Subject: Char code optimizations in block parsers.
---
js/stmd.js | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index f4ccdf4..fc8d4a7 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -2147,6 +2147,7 @@
var C_OPEN_BRACKET = 91;
var C_CLOSE_BRACKET = 93;
var C_LESSTHAN = 60;
+ var C_GREATERTHAN = 62;
var C_BANG = 33;
var C_BACKSLASH = 92;
var C_AMPERSAND = 38;
@@ -2352,7 +2353,7 @@
var parseBackslash = function() {
var subj = this.subject,
pos = this.pos;
- if (subj.charAt(pos) === '\\') {
+ if (subj.charCodeAt(pos) === C_BACKSLASH) {
if (subj.charAt(pos + 1) === '\n') {
this.pos = this.pos + 2;
return [{ t: 'Hardbreak' }];
@@ -3205,9 +3206,9 @@
switch (container.t) {
case 'BlockQuote':
- if (indent <= 3 && ln.charAt(first_nonspace) === '>') {
+ if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) {
offset = first_nonspace + 1;
- if (ln.charAt(offset) === ' ') {
+ if (ln.charCodeAt(offset) === C_SPACE) {
offset++;
}
} else {
@@ -3247,7 +3248,7 @@
case 'FencedCode':
// skip optional spaces of fence offset
i = container.fence_offset;
- while (i > 0 && ln.charAt(offset) === ' ') {
+ while (i > 0 && ln.charCodeAt(offset) === C_SPACE) {
offset++;
i--;
}
@@ -3324,11 +3325,11 @@
break;
}
- } else if (ln.charAt(first_nonspace) === '>') {
+ } else if (ln.charCodeAt(first_nonspace) === C_GREATERTHAN) {
// blockquote
offset = first_nonspace + 1;
// optional following space
- if (ln.charAt(offset) === ' ') {
+ if (ln.charCodeAt(offset) === C_SPACE) {
offset++;
}
closeUnmatchedBlocks(this);
@@ -3520,7 +3521,7 @@
block.string_content = block.strings.join('\n').replace(/^ */m,'');
// try parsing the beginning as link reference definitions:
- while (block.string_content.charAt(0) === '[' &&
+ while (block.string_content.charCodeAt(0) === C_OPEN_BRACKET &&
(pos = this.inlineParser.parseReference(block.string_content,
this.refmap))) {
block.string_content = block.string_content.slice(pos);
--
cgit v1.2.3
From 9c0b2f51a2e560a3932bb060ecfbfb50879548de Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Thu, 2 Oct 2014 12:59:13 -0700
Subject: Fixed rendering bug for blockquotes.
---
js/stmd.js | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/js/stmd.js b/js/stmd.js
index fc8d4a7..4ca38cc 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -3735,7 +3735,7 @@
case 'BlockQuote':
var filling = this.renderBlocks(block.children);
return inTags('blockquote', [], filling === '' ? this.innersep :
- this.innersep + this.renderBlocks(block.children) + this.innersep);
+ this.innersep + filling + this.innersep);
case 'ListItem':
return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim());
case 'List':
--
cgit v1.2.3
From 3c9ce6fa7434d3ffc1ea8d988e7f77d98d4cc3a2 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Thu, 2 Oct 2014 13:45:37 -0700
Subject: Changed inline parsers to be monomorphic and modify inlines param.
They all return true or false now, instead of the inlines parsed.
Performance optimization.
---
js/stmd.js | 233 ++++++++++++++++++++++++++++++++++---------------------------
1 file changed, 131 insertions(+), 102 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 4ca38cc..efccad8 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -2325,7 +2325,7 @@
// Attempt to parse backticks, returning either a backtick code span or a
// literal sequence of backticks.
- var parseBackticks = function() {
+ var parseBackticks = function(inlines) {
var startpos = this.pos;
var ticks = this.match(/^`+/);
if (!ticks) {
@@ -2336,65 +2336,73 @@
var match;
while (!foundCode && (match = this.match(/`+/m))) {
if (match == ticks) {
- return [{ t: 'Code', c: this.subject.slice(afterOpenTicks,
+ inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks,
this.pos - ticks.length)
.replace(/[ \n]+/g,' ')
- .trim() }];
+ .trim() });
+ return true;
}
}
// If we got here, we didn't match a closing backtick sequence.
this.pos = afterOpenTicks;
- return [{ t: 'Str', c: ticks }];
+ inlines.push({ t: 'Str', c: ticks });
+ return true;
};
// Parse a backslash-escaped special character, adding either the escaped
// character, a hard line break (if the backslash is followed by a newline),
// or a literal backslash to the 'inlines' list.
- var parseBackslash = function() {
+ var parseBackslash = function(inlines) {
var subj = this.subject,
pos = this.pos;
if (subj.charCodeAt(pos) === C_BACKSLASH) {
if (subj.charAt(pos + 1) === '\n') {
this.pos = this.pos + 2;
- return [{ t: 'Hardbreak' }];
+ inlines.push({ t: 'Hardbreak' });
} else if (reEscapable.test(subj.charAt(pos + 1))) {
this.pos = this.pos + 2;
- return [{ t: 'Str', c: subj.charAt(pos + 1) }];
+ inlines.push({ t: 'Str', c: subj.charAt(pos + 1) });
} else {
this.pos++;
- return [{t: 'Str', c: '\\'}];
+ inlines.push({t: 'Str', c: '\\'});
}
+ return true;
} else {
- return null;
+ return false;
}
};
// Attempt to parse an autolink (URL or email in pointy brackets).
- var parseAutolink = function() {
+ var parseAutolink = function(inlines) {
var m;
var dest;
if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink
dest = m.slice(1,-1);
- return [{t: 'Link',
+ inlines.push(
+ {t: 'Link',
label: [{ t: 'Str', c: dest }],
- destination: 'mailto:' + encodeURI(unescape(dest)) }];
+ destination: 'mailto:' + encodeURI(unescape(dest)) });
+ return true;
} else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) {
dest = m.slice(1,-1);
- return [{ t: 'Link',
+ inlines.push({
+ t: 'Link',
label: [{ t: 'Str', c: dest }],
- destination: encodeURI(unescape(dest)) }];
+ destination: encodeURI(unescape(dest)) });
+ return true;
} else {
- return null;
+ return false;
}
};
// Attempt to parse a raw HTML tag.
- var parseHtmlTag = function() {
+ var parseHtmlTag = function(inlines) {
var m = this.match(reHtmlTag);
if (m) {
- return [{ t: 'Html', c: m }];
+ inlines.push({ t: 'Html', c: m });
+ return true;
} else {
- return null;
+ return false;
}
};
@@ -2448,7 +2456,7 @@
}
// Attempt to parse emphasis or strong emphasis.
- var parseEmphasis = function(cc) {
+ var parseEmphasis = function(cc,inlines) {
var startpos = this.pos;
var c ;
var first_close = 0;
@@ -2456,7 +2464,6 @@
var numdelims;
var delimpos;
- var inlines = [];
// Get opening delimiters.
res = this.scanDelims(cc);
@@ -2464,18 +2471,18 @@
if (numdelims === 0) {
this.pos = startpos;
- return null;
+ return false;
}
if (numdelims >= 4 || !res.can_open) {
this.pos += numdelims;
- return [Str(this.subject.slice(startpos, startpos + numdelims))];
+ inlines.push(Str(this.subject.slice(startpos, startpos + numdelims)));
+ return true;
}
this.pos += numdelims;
var fallbackpos = this.pos;
- var fallback = Str(this.subject.slice(startpos, fallbackpos));
var next_inline;
var first = [];
@@ -2495,7 +2502,7 @@
}
while (true) {
- if (this.last_emphasis_closer[cc] < this.pos) {
+ if (this.last_emphasis_closer[c] < this.pos) {
break;
}
res = this.scanDelims(cc);
@@ -2511,7 +2518,8 @@
case 1: // ***a
if (numdelims === 3 && can_close) {
this.pos += 3;
- return [Strong([Emph(first)])];
+ inlines.push(Strong([Emph(first)]));
+ return true;
} else if (numdelims === 2 && can_close) {
this.pos += 2;
current = second;
@@ -2527,7 +2535,8 @@
case 2: // **a
if (numdelims === 2 && can_close) {
this.pos += 2;
- return [Strong(first)];
+ inlines.push(Strong(first));
+ return true;
} else if (numdelims === 1 && can_open) {
this.pos += 1;
current = second;
@@ -2538,7 +2547,8 @@
case 3: // *a
if (numdelims === 1 && can_close) {
this.pos += 1;
- return [Emph(first)];
+ inlines.push(Emph(first));
+ return true;
} else if (numdelims === 2 && can_open) {
this.pos += 2;
current = second;
@@ -2549,56 +2559,68 @@
case 4: // ***a**b
if (numdelims === 3 && can_close) {
this.pos += 3;
- return [Strong([Emph(first.concat([Str(c+c)], second))])];
+ inlines.push(Strong([Emph(first.concat([Str(c+c)], second))]));
+ return true;
} else if (numdelims === 2 && can_close) {
this.pos += 2;
- return [Strong([Str(c+c+c)].concat(
+ inlines.push(Strong([Str(c+c+c)].concat(
first,
- [Strong(second)]))];
+ [Strong(second)])));
+ return true;
} else if (numdelims === 1 && can_close) {
this.pos += 1;
- return [Emph([Strong(first)].concat(second))];
+ inlines.push(Emph([Strong(first)].concat(second)));
+ return true;
}
break;
case 5: // ***a*b
if (numdelims === 3 && can_close) {
this.pos += 3;
- return [Strong([Emph(first.concat([Str(c)], second))])];
+ inlines.push(Strong([Emph(first.concat([Str(c)], second))]));
+ return true;
} else if (numdelims === 2 && can_close) {
this.pos += 2;
- return [Strong([Emph(first)].concat(second))];
+ inlines.push(Strong([Emph(first)].concat(second)));
+ return true;
} else if (numdelims === 1 && can_close) {
this.pos += 1;
- return [Strong([Str(c+c+c)].concat(
+ inlines.push(Strong([Str(c+c+c)].concat(
first,
- [Emph(second)]))];
+ [Emph(second)])));
+ return true;
}
break;
case 6: // ***a** b
if (numdelims === 3 && can_close) {
this.pos += 3;
- return [Strong([Emph(first.concat([Str(c+c)], second))])];
+ inlines.push(Strong([Emph(first.concat([Str(c+c)], second))]));
+ return true;
} else if (numdelims === 1 && can_close) {
this.pos += 1;
- return [Emph([Strong(first)].concat(second))];
+ inlines.push(Emph([Strong(first)].concat(second)));
+ return true;
}
break;
case 7: // ***a* b
if (numdelims === 3 && can_close) {
this.pos += 3;
- return [Strong([Emph(first.concat([Str(c)], second))])];
+ inlines.push(Strong([Emph(first.concat([Str(c)], second))]));
+ return true;
} else if (numdelims === 2 && can_close) {
this.pos += 2;
- return [Strong([Emph(first)].concat(second))];
+ inlines.push(Strong([Emph(first)].concat(second)));
+ return true;
}
break;
case 8: // **a *b
if (numdelims === 3 && can_close) {
this.pos += 3;
- return [Strong(first.concat([Emph(second)]))];
+ inlines.push(Strong(first.concat([Emph(second)])));
+ return true;
} else if (numdelims === 2 && can_close) {
this.pos += 2;
- return [Strong(first.concat([Str(c)], second))];
+ inlines.push(Strong(first.concat([Str(c)], second)));
+ return true;
} else if (numdelims === 1 && can_close) {
this.pos += 1;
first.push(Emph(second));
@@ -2610,7 +2632,8 @@
case 9: // *a **b
if (numdelims === 3 && can_close) {
this.pos += 3;
- return [(Emph(first.concat([Strong(second)])))];
+ inlines.push(Emph(first.concat([Strong(second)])));
+ return true;
} else if (numdelims === 2 && can_close) {
this.pos += 2;
first.push(Strong(second));
@@ -2619,7 +2642,8 @@
continue;
} else if (numdelims === 1 && can_close) {
this.pos += 1;
- return [Emph(first.concat([Str(c+c)], second))];
+ inlines.push(Emph(first.concat([Str(c+c)], second)));
+ return true;
}
break;
default:
@@ -2627,9 +2651,7 @@
}
}
- if ((next_inline = this.parseInline(true))) {
- Array.prototype.push.apply(current, next_inline);
- } else {
+ if (!(this.parseInline(current,true))) {
break;
}
@@ -2638,9 +2660,10 @@
// we didn't match emphasis: fallback
this.pos = fallbackpos;
if (last_emphasis_closer) {
- this.last_emphasis_closer[cc] = last_emphasis_closer;
+ this.last_emphasis_closer[c] = last_emphasis_closer;
}
- return [fallback];
+ inlines.push(Str(this.subject.slice(startpos, fallbackpos)));
+ return true;
};
@@ -2694,10 +2717,10 @@
while ((c = this.peek()) && c != -1 && (c != C_CLOSE_BRACKET || nest_level > 0)) {
switch (c) {
case C_BACKTICK:
- this.parseBackticks();
+ this.parseBackticks([]);
break;
case C_LESSTHAN:
- this.parseAutolink() || this.parseHtmlTag() ||
+ this.parseAutolink([]) || this.parseHtmlTag([]) ||
this.pos++;
break;
case C_OPEN_BRACKET: // nested []
@@ -2709,10 +2732,10 @@
this.pos++;
break;
case C_BACKSLASH:
- this.parseBackslash();
+ this.parseBackslash([]);
break;
default:
- this.parseString();
+ this.parseString([]);
}
}
if (c === C_CLOSE_BRACKET) {
@@ -2737,7 +2760,7 @@
};
// Attempt to parse a link. If successful, return the link.
- var parseLink = function() {
+ var parseLink = function(inlines) {
var startpos = this.pos;
var reflabel;
var n;
@@ -2746,7 +2769,7 @@
n = this.parseLinkLabel();
if (n === 0) {
- return null;
+ return false;
}
var afterlabel = this.pos;
var rawlabel = this.subject.substr(startpos, n);
@@ -2763,13 +2786,14 @@
(title = this.parseLinkTitle() || '') || true) &&
this.spnl() &&
this.match(/^\)/)) {
- return [{ t: 'Link',
+ inlines.push({ t: 'Link',
destination: dest,
title: title,
- label: parseRawLabel(rawlabel) }];
+ label: parseRawLabel(rawlabel) });
+ return true;
} else {
this.pos = startpos;
- return null;
+ return false;
}
}
// If we're here, it wasn't an explicit link. Try to parse a reference link.
@@ -2790,67 +2814,72 @@
// lookup rawlabel in refmap
var link = this.refmap[normalizeReference(reflabel)];
if (link) {
- return [{t: 'Link',
+ inlines.push({t: 'Link',
destination: link.destination,
title: link.title,
- label: parseRawLabel(rawlabel) }];
+ label: parseRawLabel(rawlabel) });
+ return true;
} else {
this.pos = startpos;
- return null;
+ return false;
}
// Nothing worked, rewind:
this.pos = startpos;
- return null;
+ return false;
};
// Attempt to parse an entity, return Entity object if successful.
- var parseEntity = function() {
+ var parseEntity = function(inlines) {
var m;
if ((m = this.match(reEntityHere))) {
- return [{ t: 'Str', c: entityToChar(m) }];
+ inlines.push({ t: 'Str', c: entityToChar(m) });
+ return true;
} else {
- return null;
+ return false;
}
};
// Parse a run of ordinary characters, or a single character with
// a special meaning in markdown, as a plain string, adding to inlines.
- var parseString = function() {
+ var parseString = function(inlines) {
var m;
if ((m = this.match(reMain))) {
- return [{ t: 'Str', c: m }];
+ inlines.push({ t: 'Str', c: m });
+ return true;
} else {
- return null;
+ return false;
}
};
// Parse a newline. If it was preceded by two spaces, return a hard
// line break; otherwise a soft line break.
- var parseNewline = function() {
+ var parseNewline = function(inlines) {
var m = this.match(/^ *\n/);
if (m) {
if (m.length > 2) {
- return [{ t: 'Hardbreak' }];
+ inlines.push({ t: 'Hardbreak' });
} else if (m.length > 0) {
- return [{ t: 'Softbreak' }];
+ inlines.push({ t: 'Softbreak' });
}
+ return true;
}
- return null;
+ return false;
};
// Attempt to parse an image. If the opening '!' is not followed
// by a link, return a literal '!'.
- var parseImage = function() {
+ var parseImage = function(inlines) {
if (this.match(/^!/)) {
- var link = this.parseLink();
+ var link = this.parseLink(inlines);
if (link) {
- link[0].t = 'Image';
- return link;
+ inlines[inlines.length - 1].t = 'Image';
+ return true;
} else {
- return [{ t: 'Str', c: '!' }];
+ inlines.push({ t: 'Str', c: '!' });
+ return true;
}
} else {
- return null;
+ return false;
}
};
@@ -2913,64 +2942,66 @@
return this.pos - startpos;
};
- // Parse the next inline element in subject, advancing subject position
- // and returning the inline parsed.
- var parseInline = function(memoize) {
+ // Parse the next inline element in subject, advancing subject position.
+ // If memoize is set, memoize the result.
+ // On success, add the result to the inlines list, and return true.
+ // On failure, return false.
+ var parseInline = function(inlines, memoize) {
var startpos = this.pos;
-
+ var origlen = inlines.length;
var memoized = memoize && this.memo[startpos];
if (memoized) {
this.pos = memoized.endpos;
- return memoized.inline;
+ Array.prototype.push.apply(inlines, memoized.inline);
+ return true;
}
var c = this.peek();
if (c === -1) {
- return null;
+ return false;
}
var res;
switch(c) {
case C_NEWLINE:
case C_SPACE:
- res = this.parseNewline();
+ res = this.parseNewline(inlines);
break;
case C_BACKSLASH:
- res = this.parseBackslash();
+ res = this.parseBackslash(inlines);
break;
case C_BACKTICK:
- res = this.parseBackticks();
+ res = this.parseBackticks(inlines);
break;
case C_ASTERISK:
case C_UNDERSCORE:
- res = this.parseEmphasis(c);
+ res = this.parseEmphasis(c, inlines);
break;
case C_OPEN_BRACKET:
- res = this.parseLink();
+ res = this.parseLink(inlines);
break;
case C_BANG:
- res = this.parseImage();
+ res = this.parseImage(inlines);
break;
case C_LESSTHAN:
- res = this.parseAutolink() || this.parseHtmlTag();
+ res = this.parseAutolink(inlines) || this.parseHtmlTag(inlines);
break;
case C_AMPERSAND:
- res = this.parseEntity();
+ res = this.parseEntity(inlines);
break;
default:
- res = this.parseString();
+ res = this.parseString(inlines);
break;
}
- if (res === null) {
+ if (!res) {
this.pos += 1;
- res = [{t: 'Str', c: String.fromCharCode(c)}];
+ inlines.push({t: 'Str', c: String.fromCharCode(c)});
}
- if (res && memoize) {
- this.memo[startpos] = { inline: res,
+ if (memoize) {
+ this.memo[startpos] = { inline: inlines.slice(origlen),
endpos: this.pos };
}
-
- return res;
+ return true;
};
// Parse s as a list of inlines, using refmap to resolve references.
@@ -2979,11 +3010,9 @@
this.pos = 0;
this.refmap = refmap || {};
this.memo = {};
- this.last_emphasis_closer = { C_ASTERISK: s.length, C_UNDERSCORE: s.length };
+ this.last_emphasis_closer = { '*': s.length, '_': s.length };
var inlines = [];
- var next_inline;
- while ((next_inline = this.parseInline())) {
- Array.prototype.push.apply(inlines, next_inline);
+ while (this.parseInline(inlines, false)) {
}
return inlines;
};
--
cgit v1.2.3
From ac611d51c9de9aa719b42b9463e6f28d6e7d74a4 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Thu, 2 Oct 2014 16:00:13 -0700
Subject: Use integers instead of strings for tags.
Use === whenever possible to compare them.
---
js/stmd.js | 238 ++++++++++++++++++++++++++++++++++---------------------------
1 file changed, 131 insertions(+), 107 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index efccad8..23caf31 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -2137,6 +2137,30 @@
zwj: '',
zwnj: '' };
+ // Constants for inline and block types:
+
+ var I_STR = 1;
+ var I_SOFT_BREAK = 2;
+ var I_HARD_BREAK = 3;
+ var I_EMPH = 4;
+ var I_STRONG = 5;
+ var I_HTML = 6;
+ var I_LINK = 7;
+ var I_IMAGE = 8;
+ var I_CODE = 9;
+ var B_DOCUMENT = 10;
+ var B_PARAGRAPH = 11;
+ var B_BLOCK_QUOTE = 12;
+ var B_LIST_ITEM = 13;
+ var B_LIST = 14;
+ var B_ATX_HEADER = 15;
+ var B_SETEXT_HEADER = 16;
+ var B_INDENTED_CODE = 17;
+ var B_FENCED_CODE = 18;
+ var B_HTML_BLOCK = 19;
+ var B_REFERENCE_DEF = 20;
+ var B_HORIZONTAL_RULE = 21;
+
// Constants for character codes:
var C_NEWLINE = 10;
@@ -2273,7 +2297,7 @@
// Convert tabs to spaces on each line using a 4-space tab stop.
var detabLine = function(text) {
- if (text.indexOf('\t') == -1) {
+ if (text.indexOf('\t') === -1) {
return text;
} else {
var lastStop = 0;
@@ -2335,8 +2359,8 @@
var foundCode = false;
var match;
while (!foundCode && (match = this.match(/`+/m))) {
- if (match == ticks) {
- inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks,
+ if (match === ticks) {
+ inlines.push({ t: I_CODE, c: this.subject.slice(afterOpenTicks,
this.pos - ticks.length)
.replace(/[ \n]+/g,' ')
.trim() });
@@ -2345,7 +2369,7 @@
}
// If we got here, we didn't match a closing backtick sequence.
this.pos = afterOpenTicks;
- inlines.push({ t: 'Str', c: ticks });
+ inlines.push({ t: I_STR, c: ticks });
return true;
};
@@ -2358,13 +2382,13 @@
if (subj.charCodeAt(pos) === C_BACKSLASH) {
if (subj.charAt(pos + 1) === '\n') {
this.pos = this.pos + 2;
- inlines.push({ t: 'Hardbreak' });
+ inlines.push({ t: I_HARD_BREAK });
} else if (reEscapable.test(subj.charAt(pos + 1))) {
this.pos = this.pos + 2;
- inlines.push({ t: 'Str', c: subj.charAt(pos + 1) });
+ inlines.push({ t: I_STR, c: subj.charAt(pos + 1) });
} else {
this.pos++;
- inlines.push({t: 'Str', c: '\\'});
+ inlines.push({t: I_STR, c: '\\'});
}
return true;
} else {
@@ -2379,15 +2403,15 @@
if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink
dest = m.slice(1,-1);
inlines.push(
- {t: 'Link',
- label: [{ t: 'Str', c: dest }],
+ {t: I_LINK,
+ label: [{ t: I_STR, c: dest }],
destination: 'mailto:' + encodeURI(unescape(dest)) });
return true;
} else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) {
dest = m.slice(1,-1);
inlines.push({
- t: 'Link',
- label: [{ t: 'Str', c: dest }],
+ t: I_LINK,
+ label: [{ t: I_STR, c: dest }],
destination: encodeURI(unescape(dest)) });
return true;
} else {
@@ -2399,7 +2423,7 @@
var parseHtmlTag = function(inlines) {
var m = this.match(reHtmlTag);
if (m) {
- inlines.push({ t: 'Html', c: m });
+ inlines.push({ t: I_HTML, c: m });
return true;
} else {
return false;
@@ -2444,15 +2468,15 @@
};
var Emph = function(ils) {
- return {t: 'Emph', c: ils};
+ return {t: I_EMPH, c: ils};
}
var Strong = function(ils) {
- return {t: 'Strong', c: ils};
+ return {t: I_STRONG, c: ils};
}
var Str = function(s) {
- return {t: 'Str', c: s};
+ return {t: I_STR, c: s};
}
// Attempt to parse emphasis or strong emphasis.
@@ -2776,7 +2800,7 @@
// if we got this far, we've parsed a label.
// Try to parse an explicit link: [label](url "title")
- if (this.peek() == C_OPEN_PAREN) {
+ if (this.peek() === C_OPEN_PAREN) {
this.pos++;
if (this.spnl() &&
((dest = this.parseLinkDestination()) !== null) &&
@@ -2786,7 +2810,7 @@
(title = this.parseLinkTitle() || '') || true) &&
this.spnl() &&
this.match(/^\)/)) {
- inlines.push({ t: 'Link',
+ inlines.push({ t: I_LINK,
destination: dest,
title: title,
label: parseRawLabel(rawlabel) });
@@ -2802,7 +2826,7 @@
this.spnl();
var beforelabel = this.pos;
n = this.parseLinkLabel();
- if (n == 2) {
+ if (n === 2) {
// empty second label
reflabel = rawlabel;
} else if (n > 0) {
@@ -2814,7 +2838,7 @@
// lookup rawlabel in refmap
var link = this.refmap[normalizeReference(reflabel)];
if (link) {
- inlines.push({t: 'Link',
+ inlines.push({t: I_LINK,
destination: link.destination,
title: link.title,
label: parseRawLabel(rawlabel) });
@@ -2832,7 +2856,7 @@
var parseEntity = function(inlines) {
var m;
if ((m = this.match(reEntityHere))) {
- inlines.push({ t: 'Str', c: entityToChar(m) });
+ inlines.push({ t: I_STR, c: entityToChar(m) });
return true;
} else {
return false;
@@ -2844,7 +2868,7 @@
var parseString = function(inlines) {
var m;
if ((m = this.match(reMain))) {
- inlines.push({ t: 'Str', c: m });
+ inlines.push({ t: I_STR, c: m });
return true;
} else {
return false;
@@ -2857,9 +2881,9 @@
var m = this.match(/^ *\n/);
if (m) {
if (m.length > 2) {
- inlines.push({ t: 'Hardbreak' });
+ inlines.push({ t: I_HARD_BREAK });
} else if (m.length > 0) {
- inlines.push({ t: 'Softbreak' });
+ inlines.push({ t: I_SOFT_BREAK });
}
return true;
}
@@ -2872,10 +2896,10 @@
if (this.match(/^!/)) {
var link = this.parseLink(inlines);
if (link) {
- inlines[inlines.length - 1].t = 'Image';
+ inlines[inlines.length - 1].t = I_IMAGE;
return true;
} else {
- inlines.push({ t: 'Str', c: '!' });
+ inlines.push({ t: I_STR, c: '!' });
return true;
}
} else {
@@ -2994,7 +3018,7 @@
}
if (!res) {
this.pos += 1;
- inlines.push({t: 'Str', c: String.fromCharCode(c)});
+ inlines.push({t: I_STR, c: String.fromCharCode(c)});
}
if (memoize) {
@@ -3071,17 +3095,17 @@
// Returns true if parent block can contain child block.
var canContain = function(parent_type, child_type) {
- return ( parent_type == 'Document' ||
- parent_type == 'BlockQuote' ||
- parent_type == 'ListItem' ||
- (parent_type == 'List' && child_type == 'ListItem') );
+ return ( parent_type === B_DOCUMENT ||
+ parent_type === B_BLOCK_QUOTE ||
+ parent_type === B_LIST_ITEM ||
+ (parent_type === B_LIST && child_type === B_LIST_ITEM) );
};
// Returns true if block type can accept lines of text.
var acceptsLines = function(block_type) {
- return ( block_type == 'Paragraph' ||
- block_type == 'IndentedCode' ||
- block_type == 'FencedCode' );
+ return ( block_type === B_PARAGRAPH ||
+ block_type === B_INDENTED_CODE ||
+ block_type === B_FENCED_CODE );
};
// Returns true if block ends with a blank line, descending if needed
@@ -3090,7 +3114,7 @@
if (block.last_line_blank) {
return true;
}
- if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) {
+ if ((block.t === B_LIST || block.t === B_LIST_ITEM) && block.children.length > 0) {
return endsWithBlankLine(block.children[block.children.length - 1]);
} else {
return false;
@@ -3105,7 +3129,7 @@
var b = block;
var last_list = null;
do {
- if (b.t === 'List') {
+ if (b.t === B_LIST) {
last_list = b;
}
b = b.parent;
@@ -3234,7 +3258,7 @@
indent = first_nonspace - offset;
switch (container.t) {
- case 'BlockQuote':
+ case B_BLOCK_QUOTE:
if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) {
offset = first_nonspace + 1;
if (ln.charCodeAt(offset) === C_SPACE) {
@@ -3245,7 +3269,7 @@
}
break;
- case 'ListItem':
+ case B_LIST_ITEM:
if (indent >= container.list_data.marker_offset +
container.list_data.padding) {
offset += container.list_data.marker_offset +
@@ -3257,7 +3281,7 @@
}
break;
- case 'IndentedCode':
+ case B_INDENTED_CODE:
if (indent >= CODE_INDENT) {
offset += CODE_INDENT;
} else if (blank) {
@@ -3267,14 +3291,14 @@
}
break;
- case 'ATXHeader':
- case 'SetextHeader':
- case 'HorizontalRule':
+ case B_ATX_HEADER:
+ case B_SETEXT_HEADER:
+ case B_HORIZONTAL_RULE:
// a header can never container > 1 line, so fail to match:
all_matched = false;
break;
- case 'FencedCode':
+ case B_FENCED_CODE:
// skip optional spaces of fence offset
i = container.fence_offset;
while (i > 0 && ln.charCodeAt(offset) === C_SPACE) {
@@ -3283,13 +3307,13 @@
}
break;
- case 'HtmlBlock':
+ case B_HTML_BLOCK:
if (blank) {
all_matched = false;
}
break;
- case 'Paragraph':
+ case B_PARAGRAPH:
if (blank) {
container.last_line_blank = true;
all_matched = false;
@@ -3328,9 +3352,9 @@
// Unless last matched container is a code block, try new container starts,
// adding children to the last matched container:
- while (container.t != 'FencedCode' &&
- container.t != 'IndentedCode' &&
- container.t != 'HtmlBlock' &&
+ while (container.t != B_FENCED_CODE &&
+ container.t != B_INDENTED_CODE &&
+ container.t != B_HTML_BLOCK &&
// this is a little performance optimization:
matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) {
@@ -3346,10 +3370,10 @@
if (indent >= CODE_INDENT) {
// indented code
- if (this.tip.t != 'Paragraph' && !blank) {
+ if (this.tip.t != B_PARAGRAPH && !blank) {
offset += CODE_INDENT;
closeUnmatchedBlocks(this);
- container = this.addChild('IndentedCode', line_number, offset);
+ container = this.addChild(B_INDENTED_CODE, line_number, offset);
} else { // indent > 4 in a lazy paragraph continuation
break;
}
@@ -3362,13 +3386,13 @@
offset++;
}
closeUnmatchedBlocks(this);
- container = this.addChild('BlockQuote', line_number, offset);
+ container = this.addChild(B_BLOCK_QUOTE, line_number, offset);
} else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) {
// ATX header
offset = first_nonspace + match[0].length;
closeUnmatchedBlocks(this);
- container = this.addChild('ATXHeader', line_number, first_nonspace);
+ container = this.addChild(B_ATX_HEADER, line_number, first_nonspace);
container.level = match[0].trim().length; // number of #s
// remove trailing ###s:
container.strings =
@@ -3379,7 +3403,7 @@
// fenced code block
var fence_length = match[0].length;
closeUnmatchedBlocks(this);
- container = this.addChild('FencedCode', line_number, first_nonspace);
+ container = this.addChild(B_FENCED_CODE, line_number, first_nonspace);
container.fence_length = fence_length;
container.fence_char = match[0][0];
container.fence_offset = first_nonspace - offset;
@@ -3389,23 +3413,23 @@
} else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) {
// html block
closeUnmatchedBlocks(this);
- container = this.addChild('HtmlBlock', line_number, first_nonspace);
+ container = this.addChild(B_HTML_BLOCK, line_number, first_nonspace);
// note, we don't adjust offset because the tag is part of the text
break;
- } else if (container.t == 'Paragraph' &&
+ } else if (container.t == B_PARAGRAPH &&
container.strings.length === 1 &&
((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) {
// setext header line
closeUnmatchedBlocks(this);
- container.t = 'SetextHeader'; // convert Paragraph to SetextHeader
+ container.t = B_SETEXT_HEADER; // convert Paragraph to SetextHeader
container.level = match[0][0] === '=' ? 1 : 2;
offset = ln.length;
} else if (matchAt(reHrule, ln, first_nonspace) !== null) {
// hrule
closeUnmatchedBlocks(this);
- container = this.addChild('HorizontalRule', line_number, first_nonspace);
+ container = this.addChild(B_HORIZONTAL_RULE, line_number, first_nonspace);
offset = ln.length - 1;
break;
@@ -3416,14 +3440,14 @@
offset = first_nonspace + data.padding;
// add the list if needed
- if (container.t !== 'List' ||
+ if (container.t !== B_LIST ||
!(listsMatch(container.list_data, data))) {
- container = this.addChild('List', line_number, first_nonspace);
+ container = this.addChild(B_LIST, line_number, first_nonspace);
container.list_data = data;
}
// add the list item
- container = this.addChild('ListItem', line_number, first_nonspace);
+ container = this.addChild(B_LIST_ITEM, line_number, first_nonspace);
container.list_data = data;
} else {
@@ -3453,7 +3477,7 @@
// First check for a lazy paragraph continuation:
if (this.tip !== last_matched_container &&
!blank &&
- this.tip.t == 'Paragraph' &&
+ this.tip.t == B_PARAGRAPH &&
this.tip.strings.length > 0) {
// lazy paragraph continuation
@@ -3470,9 +3494,9 @@
// lists or breaking out of lists. We also don't set last_line_blank
// on an empty list item.
container.last_line_blank = blank &&
- !(container.t == 'BlockQuote' ||
- container.t == 'FencedCode' ||
- (container.t == 'ListItem' &&
+ !(container.t == B_BLOCK_QUOTE ||
+ container.t == B_FENCED_CODE ||
+ (container.t == B_LIST_ITEM &&
container.children.length === 0 &&
container.start_line == line_number));
@@ -3483,12 +3507,12 @@
}
switch (container.t) {
- case 'IndentedCode':
- case 'HtmlBlock':
+ case B_INDENTED_CODE:
+ case B_HTML_BLOCK:
this.addLine(ln, offset);
break;
- case 'FencedCode':
+ case B_FENCED_CODE:
// check for closing code fence:
match = (indent <= 3 &&
ln.charAt(first_nonspace) == container.fence_char &&
@@ -3501,9 +3525,9 @@
}
break;
- case 'ATXHeader':
- case 'SetextHeader':
- case 'HorizontalRule':
+ case B_ATX_HEADER:
+ case B_SETEXT_HEADER:
+ case B_HORIZONTAL_RULE:
// nothing to do; we already added the contents.
break;
@@ -3512,10 +3536,10 @@
this.addLine(ln, first_nonspace);
} else if (blank) {
// do nothing
- } else if (container.t != 'HorizontalRule' &&
- container.t != 'SetextHeader') {
+ } else if (container.t != B_HORIZONTAL_RULE &&
+ container.t != B_SETEXT_HEADER) {
// create paragraph container for line
- container = this.addChild('Paragraph', line_number, first_nonspace);
+ container = this.addChild(B_PARAGRAPH, line_number, first_nonspace);
this.addLine(ln, first_nonspace);
} else {
console.log("Line " + line_number.toString() +
@@ -3546,7 +3570,7 @@
}
switch (block.t) {
- case 'Paragraph':
+ case B_PARAGRAPH:
block.string_content = block.strings.join('\n').replace(/^ */m,'');
// try parsing the beginning as link reference definitions:
@@ -3555,23 +3579,23 @@
this.refmap))) {
block.string_content = block.string_content.slice(pos);
if (isBlank(block.string_content)) {
- block.t = 'ReferenceDef';
+ block.t = B_REFERENCE_DEF;
break;
}
}
break;
- case 'ATXHeader':
- case 'SetextHeader':
- case 'HtmlBlock':
+ case B_ATX_HEADER:
+ case B_SETEXT_HEADER:
+ case B_HTML_BLOCK:
block.string_content = block.strings.join('\n');
break;
- case 'IndentedCode':
+ case B_INDENTED_CODE:
block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n');
break;
- case 'FencedCode':
+ case B_FENCED_CODE:
// first line becomes info string
block.info = unescapeEntBS(block.strings[0].trim());
if (block.strings.length == 1) {
@@ -3581,7 +3605,7 @@
}
break;
- case 'List':
+ case B_LIST:
block.tight = true; // tight by default
var numitems = block.children.length;
@@ -3622,9 +3646,9 @@
// into inline content where appropriate.
var processInlines = function(block) {
switch(block.t) {
- case 'Paragraph':
- case 'SetextHeader':
- case 'ATXHeader':
+ case B_PARAGRAPH:
+ case B_SETEXT_HEADER:
+ case B_ATX_HEADER:
block.inline_content =
this.inlineParser.parse(block.string_content.trim(), this.refmap);
block.string_content = "";
@@ -3643,7 +3667,7 @@
// The main parsing function. Returns a parsed document AST.
var parse = function(input) {
- this.doc = makeBlock('Document', 1, 1);
+ this.doc = makeBlock(B_DOCUMENT, 1, 1);
this.tip = this.doc;
this.refmap = {};
var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/);
@@ -3662,7 +3686,7 @@
// The DocParser object.
function DocParser(){
return {
- doc: makeBlock('Document', 1, 1),
+ doc: makeBlock(B_DOCUMENT, 1, 1),
tip: this.doc,
refmap: {},
inlineParser: new InlineParser(),
@@ -3703,32 +3727,32 @@
var renderInline = function(inline) {
var attrs;
switch (inline.t) {
- case 'Str':
+ case I_STR:
return this.escape(inline.c);
- case 'Softbreak':
+ case I_SOFT_BREAK:
return this.softbreak;
- case 'Hardbreak':
+ case I_HARD_BREAK:
return inTags('br',[],"",true) + '\n';
- case 'Emph':
+ case I_EMPH:
return inTags('em', [], this.renderInlines(inline.c));
- case 'Strong':
+ case I_STRONG:
return inTags('strong', [], this.renderInlines(inline.c));
- case 'Html':
+ case I_HTML:
return inline.c;
- case 'Link':
+ case I_LINK:
attrs = [['href', this.escape(inline.destination, true)]];
if (inline.title) {
attrs.push(['title', this.escape(inline.title, true)]);
}
return inTags('a', attrs, this.renderInlines(inline.label));
- case 'Image':
+ case I_IMAGE:
attrs = [['src', this.escape(inline.destination, true)],
['alt', this.escape(this.renderInlines(inline.label))]];
if (inline.title) {
attrs.push(['title', this.escape(inline.title, true)]);
}
return inTags('img', attrs, "", true);
- case 'Code':
+ case I_CODE:
return inTags('code', [], this.escape(inline.c));
default:
console.log("Unknown inline type " + inline.t);
@@ -3751,48 +3775,48 @@
var attr;
var info_words;
switch (block.t) {
- case 'Document':
+ case B_DOCUMENT:
var whole_doc = this.renderBlocks(block.children);
return (whole_doc === '' ? '' : whole_doc + '\n');
- case 'Paragraph':
+ case B_PARAGRAPH:
if (in_tight_list) {
return this.renderInlines(block.inline_content);
} else {
return inTags('p', [], this.renderInlines(block.inline_content));
}
break;
- case 'BlockQuote':
+ case B_BLOCK_QUOTE:
var filling = this.renderBlocks(block.children);
return inTags('blockquote', [], filling === '' ? this.innersep :
this.innersep + filling + this.innersep);
- case 'ListItem':
+ case B_LIST_ITEM:
return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim());
- case 'List':
+ case B_LIST:
tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol';
attr = (!block.list_data.start || block.list_data.start == 1) ?
[] : [['start', block.list_data.start.toString()]];
return inTags(tag, attr, this.innersep +
this.renderBlocks(block.children, block.tight) +
this.innersep);
- case 'ATXHeader':
- case 'SetextHeader':
+ case B_ATX_HEADER:
+ case B_SETEXT_HEADER:
tag = 'h' + block.level;
return inTags(tag, [], this.renderInlines(block.inline_content));
- case 'IndentedCode':
+ case B_INDENTED_CODE:
return inTags('pre', [],
inTags('code', [], this.escape(block.string_content)));
- case 'FencedCode':
+ case B_FENCED_CODE:
info_words = block.info.split(/ +/);
attr = info_words.length === 0 || info_words[0].length === 0 ?
[] : [['class','language-' +
this.escape(info_words[0],true)]];
return inTags('pre', [],
inTags('code', attr, this.escape(block.string_content)));
- case 'HtmlBlock':
+ case B_HTML_BLOCK:
return block.string_content;
- case 'ReferenceDef':
+ case B_REFERENCE_DEF:
return "";
- case 'HorizontalRule':
+ case B_HORIZONTAL_RULE:
return inTags('hr',[],"",true);
default:
console.log("Unknown block type " + block.t);
@@ -3804,7 +3828,7 @@
var renderBlocks = function(blocks, in_tight_list) {
var result = [];
for (var i=0; i < blocks.length; i++) {
- if (blocks[i].t !== 'ReferenceDef') {
+ if (blocks[i].t !== B_REFERENCE_DEF) {
result.push(this.renderBlock(blocks[i], in_tight_list));
}
}
--
cgit v1.2.3
From db25de09f5dc931c0e2b31ce0ccdb58052f3105f Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Thu, 2 Oct 2014 17:53:53 -0700
Subject: Use numerical constants.
Performance optimization, but at cost of code clarity.
---
js/stmd.js | 270 ++++++++++++++++++++++++++++++-------------------------------
1 file changed, 135 insertions(+), 135 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 23caf31..3c4eab0 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -2137,8 +2137,7 @@
zwj: '',
zwnj: '' };
- // Constants for inline and block types:
-
+ /* Constants for inline and block types
var I_STR = 1;
var I_SOFT_BREAK = 2;
var I_HARD_BREAK = 3;
@@ -2160,9 +2159,9 @@
var B_HTML_BLOCK = 19;
var B_REFERENCE_DEF = 20;
var B_HORIZONTAL_RULE = 21;
+ */
- // Constants for character codes:
-
+ /* Constants for character codes:
var C_NEWLINE = 10;
var C_SPACE = 32;
var C_ASTERISK = 42;
@@ -2177,6 +2176,7 @@
var C_AMPERSAND = 38;
var C_OPEN_PAREN = 40;
var C_COLON = 58;
+ */
// Some regexps used in inline parser:
@@ -2360,7 +2360,7 @@
var match;
while (!foundCode && (match = this.match(/`+/m))) {
if (match === ticks) {
- inlines.push({ t: I_CODE, c: this.subject.slice(afterOpenTicks,
+ inlines.push({ t: 9, c: this.subject.slice(afterOpenTicks,
this.pos - ticks.length)
.replace(/[ \n]+/g,' ')
.trim() });
@@ -2369,7 +2369,7 @@
}
// If we got here, we didn't match a closing backtick sequence.
this.pos = afterOpenTicks;
- inlines.push({ t: I_STR, c: ticks });
+ inlines.push({ t: 1, c: ticks });
return true;
};
@@ -2379,16 +2379,16 @@
var parseBackslash = function(inlines) {
var subj = this.subject,
pos = this.pos;
- if (subj.charCodeAt(pos) === C_BACKSLASH) {
+ if (subj.charCodeAt(pos) === 92) {
if (subj.charAt(pos + 1) === '\n') {
this.pos = this.pos + 2;
- inlines.push({ t: I_HARD_BREAK });
+ inlines.push({ t: 3 });
} else if (reEscapable.test(subj.charAt(pos + 1))) {
this.pos = this.pos + 2;
- inlines.push({ t: I_STR, c: subj.charAt(pos + 1) });
+ inlines.push({ t: 1, c: subj.charAt(pos + 1) });
} else {
this.pos++;
- inlines.push({t: I_STR, c: '\\'});
+ inlines.push({t: 1, c: '\\'});
}
return true;
} else {
@@ -2403,15 +2403,15 @@
if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink
dest = m.slice(1,-1);
inlines.push(
- {t: I_LINK,
- label: [{ t: I_STR, c: dest }],
+ {t: 7,
+ label: [{ t: 1, c: dest }],
destination: 'mailto:' + encodeURI(unescape(dest)) });
return true;
} else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) {
dest = m.slice(1,-1);
inlines.push({
- t: I_LINK,
- label: [{ t: I_STR, c: dest }],
+ t: 7,
+ label: [{ t: 1, c: dest }],
destination: encodeURI(unescape(dest)) });
return true;
} else {
@@ -2423,7 +2423,7 @@
var parseHtmlTag = function(inlines) {
var m = this.match(reHtmlTag);
if (m) {
- inlines.push({ t: I_HTML, c: m });
+ inlines.push({ t: 6, c: m });
return true;
} else {
return false;
@@ -2457,7 +2457,7 @@
var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after));
var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before));
- if (cc === C_UNDERSCORE) {
+ if (cc === 95) {
can_open = can_open && !((/[a-z0-9]/i).test(char_before));
can_close = can_close && !((/[a-z0-9]/i).test(char_after));
}
@@ -2468,15 +2468,15 @@
};
var Emph = function(ils) {
- return {t: I_EMPH, c: ils};
+ return {t: 4, c: ils};
}
var Strong = function(ils) {
- return {t: I_STRONG, c: ils};
+ return {t: 5, c: ils};
}
var Str = function(s) {
- return {t: I_STR, c: s};
+ return {t: 1, c: s};
}
// Attempt to parse emphasis or strong emphasis.
@@ -2721,7 +2721,7 @@
// Attempt to parse a link label, returning number of characters parsed.
var parseLinkLabel = function() {
- if (this.peek() != C_OPEN_BRACKET) {
+ if (this.peek() != 91) {
return 0;
}
var startpos = this.pos;
@@ -2738,31 +2738,31 @@
}
this.pos++; // advance past [
var c;
- while ((c = this.peek()) && c != -1 && (c != C_CLOSE_BRACKET || nest_level > 0)) {
+ while ((c = this.peek()) && c != -1 && (c != 93 || nest_level > 0)) {
switch (c) {
- case C_BACKTICK:
+ case 96:
this.parseBackticks([]);
break;
- case C_LESSTHAN:
+ case 60:
this.parseAutolink([]) || this.parseHtmlTag([]) ||
this.pos++;
break;
- case C_OPEN_BRACKET: // nested []
+ case 91: // nested []
nest_level++;
this.pos++;
break;
- case C_CLOSE_BRACKET: // nested []
+ case 93: // nested []
nest_level--;
this.pos++;
break;
- case C_BACKSLASH:
+ case 92:
this.parseBackslash([]);
break;
default:
this.parseString([]);
}
}
- if (c === C_CLOSE_BRACKET) {
+ if (c === 93) {
this.label_nest_level = 0;
this.pos++; // advance past ]
return this.pos - startpos;
@@ -2800,7 +2800,7 @@
// if we got this far, we've parsed a label.
// Try to parse an explicit link: [label](url "title")
- if (this.peek() === C_OPEN_PAREN) {
+ if (this.peek() === 40) {
this.pos++;
if (this.spnl() &&
((dest = this.parseLinkDestination()) !== null) &&
@@ -2810,7 +2810,7 @@
(title = this.parseLinkTitle() || '') || true) &&
this.spnl() &&
this.match(/^\)/)) {
- inlines.push({ t: I_LINK,
+ inlines.push({ t: 7,
destination: dest,
title: title,
label: parseRawLabel(rawlabel) });
@@ -2838,7 +2838,7 @@
// lookup rawlabel in refmap
var link = this.refmap[normalizeReference(reflabel)];
if (link) {
- inlines.push({t: I_LINK,
+ inlines.push({t: 7,
destination: link.destination,
title: link.title,
label: parseRawLabel(rawlabel) });
@@ -2856,7 +2856,7 @@
var parseEntity = function(inlines) {
var m;
if ((m = this.match(reEntityHere))) {
- inlines.push({ t: I_STR, c: entityToChar(m) });
+ inlines.push({ t: 1, c: entityToChar(m) });
return true;
} else {
return false;
@@ -2868,7 +2868,7 @@
var parseString = function(inlines) {
var m;
if ((m = this.match(reMain))) {
- inlines.push({ t: I_STR, c: m });
+ inlines.push({ t: 1, c: m });
return true;
} else {
return false;
@@ -2881,9 +2881,9 @@
var m = this.match(/^ *\n/);
if (m) {
if (m.length > 2) {
- inlines.push({ t: I_HARD_BREAK });
+ inlines.push({ t: 3 });
} else if (m.length > 0) {
- inlines.push({ t: I_SOFT_BREAK });
+ inlines.push({ t: 2 });
}
return true;
}
@@ -2896,10 +2896,10 @@
if (this.match(/^!/)) {
var link = this.parseLink(inlines);
if (link) {
- inlines[inlines.length - 1].t = I_IMAGE;
+ inlines[inlines.length - 1].t = 8;
return true;
} else {
- inlines.push({ t: I_STR, c: '!' });
+ inlines.push({ t: 1, c: '!' });
return true;
}
} else {
@@ -2927,7 +2927,7 @@
}
// colon:
- if (this.peek() === C_COLON) {
+ if (this.peek() === 58) {
this.pos++;
} else {
this.pos = startpos;
@@ -2986,30 +2986,30 @@
}
var res;
switch(c) {
- case C_NEWLINE:
- case C_SPACE:
+ case 10:
+ case 32:
res = this.parseNewline(inlines);
break;
- case C_BACKSLASH:
+ case 92:
res = this.parseBackslash(inlines);
break;
- case C_BACKTICK:
+ case 96:
res = this.parseBackticks(inlines);
break;
- case C_ASTERISK:
- case C_UNDERSCORE:
+ case 42:
+ case 95:
res = this.parseEmphasis(c, inlines);
break;
- case C_OPEN_BRACKET:
+ case 91:
res = this.parseLink(inlines);
break;
- case C_BANG:
+ case 33:
res = this.parseImage(inlines);
break;
- case C_LESSTHAN:
+ case 60:
res = this.parseAutolink(inlines) || this.parseHtmlTag(inlines);
break;
- case C_AMPERSAND:
+ case 38:
res = this.parseEntity(inlines);
break;
default:
@@ -3018,7 +3018,7 @@
}
if (!res) {
this.pos += 1;
- inlines.push({t: I_STR, c: String.fromCharCode(c)});
+ inlines.push({t: 1, c: String.fromCharCode(c)});
}
if (memoize) {
@@ -3095,17 +3095,17 @@
// Returns true if parent block can contain child block.
var canContain = function(parent_type, child_type) {
- return ( parent_type === B_DOCUMENT ||
- parent_type === B_BLOCK_QUOTE ||
- parent_type === B_LIST_ITEM ||
- (parent_type === B_LIST && child_type === B_LIST_ITEM) );
+ return ( parent_type === 10 ||
+ parent_type === 12 ||
+ parent_type === 13 ||
+ (parent_type === 14 && child_type === 13) );
};
// Returns true if block type can accept lines of text.
var acceptsLines = function(block_type) {
- return ( block_type === B_PARAGRAPH ||
- block_type === B_INDENTED_CODE ||
- block_type === B_FENCED_CODE );
+ return ( block_type === 11 ||
+ block_type === 17 ||
+ block_type === 18 );
};
// Returns true if block ends with a blank line, descending if needed
@@ -3114,7 +3114,7 @@
if (block.last_line_blank) {
return true;
}
- if ((block.t === B_LIST || block.t === B_LIST_ITEM) && block.children.length > 0) {
+ if ((block.t === 14 || block.t === 13) && block.children.length > 0) {
return endsWithBlankLine(block.children[block.children.length - 1]);
} else {
return false;
@@ -3129,7 +3129,7 @@
var b = block;
var last_list = null;
do {
- if (b.t === B_LIST) {
+ if (b.t === 14) {
last_list = b;
}
b = b.parent;
@@ -3258,10 +3258,10 @@
indent = first_nonspace - offset;
switch (container.t) {
- case B_BLOCK_QUOTE:
- if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) {
+ case 12:
+ if (indent <= 3 && ln.charCodeAt(first_nonspace) === 62) {
offset = first_nonspace + 1;
- if (ln.charCodeAt(offset) === C_SPACE) {
+ if (ln.charCodeAt(offset) === 32) {
offset++;
}
} else {
@@ -3269,7 +3269,7 @@
}
break;
- case B_LIST_ITEM:
+ case 13:
if (indent >= container.list_data.marker_offset +
container.list_data.padding) {
offset += container.list_data.marker_offset +
@@ -3281,7 +3281,7 @@
}
break;
- case B_INDENTED_CODE:
+ case 17:
if (indent >= CODE_INDENT) {
offset += CODE_INDENT;
} else if (blank) {
@@ -3291,29 +3291,29 @@
}
break;
- case B_ATX_HEADER:
- case B_SETEXT_HEADER:
- case B_HORIZONTAL_RULE:
+ case 15:
+ case 16:
+ case 21:
// a header can never container > 1 line, so fail to match:
all_matched = false;
break;
- case B_FENCED_CODE:
+ case 18:
// skip optional spaces of fence offset
i = container.fence_offset;
- while (i > 0 && ln.charCodeAt(offset) === C_SPACE) {
+ while (i > 0 && ln.charCodeAt(offset) === 32) {
offset++;
i--;
}
break;
- case B_HTML_BLOCK:
+ case 19:
if (blank) {
all_matched = false;
}
break;
- case B_PARAGRAPH:
+ case 11:
if (blank) {
container.last_line_blank = true;
all_matched = false;
@@ -3352,9 +3352,9 @@
// Unless last matched container is a code block, try new container starts,
// adding children to the last matched container:
- while (container.t != B_FENCED_CODE &&
- container.t != B_INDENTED_CODE &&
- container.t != B_HTML_BLOCK &&
+ while (container.t != 18 &&
+ container.t != 17 &&
+ container.t != 19 &&
// this is a little performance optimization:
matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) {
@@ -3370,29 +3370,29 @@
if (indent >= CODE_INDENT) {
// indented code
- if (this.tip.t != B_PARAGRAPH && !blank) {
+ if (this.tip.t != 11 && !blank) {
offset += CODE_INDENT;
closeUnmatchedBlocks(this);
- container = this.addChild(B_INDENTED_CODE, line_number, offset);
+ container = this.addChild(17, line_number, offset);
} else { // indent > 4 in a lazy paragraph continuation
break;
}
- } else if (ln.charCodeAt(first_nonspace) === C_GREATERTHAN) {
+ } else if (ln.charCodeAt(first_nonspace) === 62) {
// blockquote
offset = first_nonspace + 1;
// optional following space
- if (ln.charCodeAt(offset) === C_SPACE) {
+ if (ln.charCodeAt(offset) === 32) {
offset++;
}
closeUnmatchedBlocks(this);
- container = this.addChild(B_BLOCK_QUOTE, line_number, offset);
+ container = this.addChild(12, line_number, offset);
} else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) {
// ATX header
offset = first_nonspace + match[0].length;
closeUnmatchedBlocks(this);
- container = this.addChild(B_ATX_HEADER, line_number, first_nonspace);
+ container = this.addChild(15, line_number, first_nonspace);
container.level = match[0].trim().length; // number of #s
// remove trailing ###s:
container.strings =
@@ -3403,7 +3403,7 @@
// fenced code block
var fence_length = match[0].length;
closeUnmatchedBlocks(this);
- container = this.addChild(B_FENCED_CODE, line_number, first_nonspace);
+ container = this.addChild(18, line_number, first_nonspace);
container.fence_length = fence_length;
container.fence_char = match[0][0];
container.fence_offset = first_nonspace - offset;
@@ -3413,23 +3413,23 @@
} else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) {
// html block
closeUnmatchedBlocks(this);
- container = this.addChild(B_HTML_BLOCK, line_number, first_nonspace);
+ container = this.addChild(19, line_number, first_nonspace);
// note, we don't adjust offset because the tag is part of the text
break;
- } else if (container.t == B_PARAGRAPH &&
+ } else if (container.t == 11 &&
container.strings.length === 1 &&
((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) {
// setext header line
closeUnmatchedBlocks(this);
- container.t = B_SETEXT_HEADER; // convert Paragraph to SetextHeader
+ container.t = 16; // convert Paragraph to SetextHeader
container.level = match[0][0] === '=' ? 1 : 2;
offset = ln.length;
} else if (matchAt(reHrule, ln, first_nonspace) !== null) {
// hrule
closeUnmatchedBlocks(this);
- container = this.addChild(B_HORIZONTAL_RULE, line_number, first_nonspace);
+ container = this.addChild(21, line_number, first_nonspace);
offset = ln.length - 1;
break;
@@ -3440,14 +3440,14 @@
offset = first_nonspace + data.padding;
// add the list if needed
- if (container.t !== B_LIST ||
+ if (container.t !== 14 ||
!(listsMatch(container.list_data, data))) {
- container = this.addChild(B_LIST, line_number, first_nonspace);
+ container = this.addChild(14, line_number, first_nonspace);
container.list_data = data;
}
// add the list item
- container = this.addChild(B_LIST_ITEM, line_number, first_nonspace);
+ container = this.addChild(13, line_number, first_nonspace);
container.list_data = data;
} else {
@@ -3477,7 +3477,7 @@
// First check for a lazy paragraph continuation:
if (this.tip !== last_matched_container &&
!blank &&
- this.tip.t == B_PARAGRAPH &&
+ this.tip.t == 11 &&
this.tip.strings.length > 0) {
// lazy paragraph continuation
@@ -3494,9 +3494,9 @@
// lists or breaking out of lists. We also don't set last_line_blank
// on an empty list item.
container.last_line_blank = blank &&
- !(container.t == B_BLOCK_QUOTE ||
- container.t == B_FENCED_CODE ||
- (container.t == B_LIST_ITEM &&
+ !(container.t == 12 ||
+ container.t == 18 ||
+ (container.t == 13 &&
container.children.length === 0 &&
container.start_line == line_number));
@@ -3507,12 +3507,12 @@
}
switch (container.t) {
- case B_INDENTED_CODE:
- case B_HTML_BLOCK:
+ case 17:
+ case 19:
this.addLine(ln, offset);
break;
- case B_FENCED_CODE:
+ case 18:
// check for closing code fence:
match = (indent <= 3 &&
ln.charAt(first_nonspace) == container.fence_char &&
@@ -3525,9 +3525,9 @@
}
break;
- case B_ATX_HEADER:
- case B_SETEXT_HEADER:
- case B_HORIZONTAL_RULE:
+ case 15:
+ case 16:
+ case 21:
// nothing to do; we already added the contents.
break;
@@ -3536,10 +3536,10 @@
this.addLine(ln, first_nonspace);
} else if (blank) {
// do nothing
- } else if (container.t != B_HORIZONTAL_RULE &&
- container.t != B_SETEXT_HEADER) {
+ } else if (container.t != 21 &&
+ container.t != 16) {
// create paragraph container for line
- container = this.addChild(B_PARAGRAPH, line_number, first_nonspace);
+ container = this.addChild(11, line_number, first_nonspace);
this.addLine(ln, first_nonspace);
} else {
console.log("Line " + line_number.toString() +
@@ -3570,32 +3570,32 @@
}
switch (block.t) {
- case B_PARAGRAPH:
+ case 11:
block.string_content = block.strings.join('\n').replace(/^ */m,'');
// try parsing the beginning as link reference definitions:
- while (block.string_content.charCodeAt(0) === C_OPEN_BRACKET &&
+ while (block.string_content.charCodeAt(0) === 91 &&
(pos = this.inlineParser.parseReference(block.string_content,
this.refmap))) {
block.string_content = block.string_content.slice(pos);
if (isBlank(block.string_content)) {
- block.t = B_REFERENCE_DEF;
+ block.t = 20;
break;
}
}
break;
- case B_ATX_HEADER:
- case B_SETEXT_HEADER:
- case B_HTML_BLOCK:
+ case 15:
+ case 16:
+ case 19:
block.string_content = block.strings.join('\n');
break;
- case B_INDENTED_CODE:
+ case 17:
block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n');
break;
- case B_FENCED_CODE:
+ case 18:
// first line becomes info string
block.info = unescapeEntBS(block.strings[0].trim());
if (block.strings.length == 1) {
@@ -3605,7 +3605,7 @@
}
break;
- case B_LIST:
+ case 14:
block.tight = true; // tight by default
var numitems = block.children.length;
@@ -3646,9 +3646,9 @@
// into inline content where appropriate.
var processInlines = function(block) {
switch(block.t) {
- case B_PARAGRAPH:
- case B_SETEXT_HEADER:
- case B_ATX_HEADER:
+ case 11:
+ case 16:
+ case 15:
block.inline_content =
this.inlineParser.parse(block.string_content.trim(), this.refmap);
block.string_content = "";
@@ -3667,7 +3667,7 @@
// The main parsing function. Returns a parsed document AST.
var parse = function(input) {
- this.doc = makeBlock(B_DOCUMENT, 1, 1);
+ this.doc = makeBlock(10, 1, 1);
this.tip = this.doc;
this.refmap = {};
var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/);
@@ -3686,7 +3686,7 @@
// The DocParser object.
function DocParser(){
return {
- doc: makeBlock(B_DOCUMENT, 1, 1),
+ doc: makeBlock(10, 1, 1),
tip: this.doc,
refmap: {},
inlineParser: new InlineParser(),
@@ -3727,32 +3727,32 @@
var renderInline = function(inline) {
var attrs;
switch (inline.t) {
- case I_STR:
+ case 1:
return this.escape(inline.c);
- case I_SOFT_BREAK:
+ case 2:
return this.softbreak;
- case I_HARD_BREAK:
+ case 3:
return inTags('br',[],"",true) + '\n';
- case I_EMPH:
+ case 4:
return inTags('em', [], this.renderInlines(inline.c));
- case I_STRONG:
+ case 5:
return inTags('strong', [], this.renderInlines(inline.c));
- case I_HTML:
+ case 6:
return inline.c;
- case I_LINK:
+ case 7:
attrs = [['href', this.escape(inline.destination, true)]];
if (inline.title) {
attrs.push(['title', this.escape(inline.title, true)]);
}
return inTags('a', attrs, this.renderInlines(inline.label));
- case I_IMAGE:
+ case 8:
attrs = [['src', this.escape(inline.destination, true)],
['alt', this.escape(this.renderInlines(inline.label))]];
if (inline.title) {
attrs.push(['title', this.escape(inline.title, true)]);
}
return inTags('img', attrs, "", true);
- case I_CODE:
+ case 9:
return inTags('code', [], this.escape(inline.c));
default:
console.log("Unknown inline type " + inline.t);
@@ -3775,48 +3775,48 @@
var attr;
var info_words;
switch (block.t) {
- case B_DOCUMENT:
+ case 10:
var whole_doc = this.renderBlocks(block.children);
return (whole_doc === '' ? '' : whole_doc + '\n');
- case B_PARAGRAPH:
+ case 11:
if (in_tight_list) {
return this.renderInlines(block.inline_content);
} else {
return inTags('p', [], this.renderInlines(block.inline_content));
}
break;
- case B_BLOCK_QUOTE:
+ case 12:
var filling = this.renderBlocks(block.children);
return inTags('blockquote', [], filling === '' ? this.innersep :
this.innersep + filling + this.innersep);
- case B_LIST_ITEM:
+ case 13:
return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim());
- case B_LIST:
+ case 14:
tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol';
attr = (!block.list_data.start || block.list_data.start == 1) ?
[] : [['start', block.list_data.start.toString()]];
return inTags(tag, attr, this.innersep +
this.renderBlocks(block.children, block.tight) +
this.innersep);
- case B_ATX_HEADER:
- case B_SETEXT_HEADER:
+ case 15:
+ case 16:
tag = 'h' + block.level;
return inTags(tag, [], this.renderInlines(block.inline_content));
- case B_INDENTED_CODE:
+ case 17:
return inTags('pre', [],
inTags('code', [], this.escape(block.string_content)));
- case B_FENCED_CODE:
+ case 18:
info_words = block.info.split(/ +/);
attr = info_words.length === 0 || info_words[0].length === 0 ?
[] : [['class','language-' +
this.escape(info_words[0],true)]];
return inTags('pre', [],
inTags('code', attr, this.escape(block.string_content)));
- case B_HTML_BLOCK:
+ case 19:
return block.string_content;
- case B_REFERENCE_DEF:
+ case 20:
return "";
- case B_HORIZONTAL_RULE:
+ case 21:
return inTags('hr',[],"",true);
default:
console.log("Unknown block type " + block.t);
@@ -3828,7 +3828,7 @@
var renderBlocks = function(blocks, in_tight_list) {
var result = [];
for (var i=0; i < blocks.length; i++) {
- if (blocks[i].t !== B_REFERENCE_DEF) {
+ if (blocks[i].t !== 20) {
result.push(this.renderBlock(blocks[i], in_tight_list));
}
}
--
cgit v1.2.3
From 6dfc19a529c64d17ec673196d2d549acc809bd54 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Thu, 2 Oct 2014 17:54:14 -0700
Subject: Revert "Use numerical constants."
This reverts commit db25de09f5dc931c0e2b31ce0ccdb58052f3105f.
---
js/stmd.js | 270 ++++++++++++++++++++++++++++++-------------------------------
1 file changed, 135 insertions(+), 135 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 3c4eab0..23caf31 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -2137,7 +2137,8 @@
zwj: '',
zwnj: '' };
- /* Constants for inline and block types
+ // Constants for inline and block types:
+
var I_STR = 1;
var I_SOFT_BREAK = 2;
var I_HARD_BREAK = 3;
@@ -2159,9 +2160,9 @@
var B_HTML_BLOCK = 19;
var B_REFERENCE_DEF = 20;
var B_HORIZONTAL_RULE = 21;
- */
- /* Constants for character codes:
+ // Constants for character codes:
+
var C_NEWLINE = 10;
var C_SPACE = 32;
var C_ASTERISK = 42;
@@ -2176,7 +2177,6 @@
var C_AMPERSAND = 38;
var C_OPEN_PAREN = 40;
var C_COLON = 58;
- */
// Some regexps used in inline parser:
@@ -2360,7 +2360,7 @@
var match;
while (!foundCode && (match = this.match(/`+/m))) {
if (match === ticks) {
- inlines.push({ t: 9, c: this.subject.slice(afterOpenTicks,
+ inlines.push({ t: I_CODE, c: this.subject.slice(afterOpenTicks,
this.pos - ticks.length)
.replace(/[ \n]+/g,' ')
.trim() });
@@ -2369,7 +2369,7 @@
}
// If we got here, we didn't match a closing backtick sequence.
this.pos = afterOpenTicks;
- inlines.push({ t: 1, c: ticks });
+ inlines.push({ t: I_STR, c: ticks });
return true;
};
@@ -2379,16 +2379,16 @@
var parseBackslash = function(inlines) {
var subj = this.subject,
pos = this.pos;
- if (subj.charCodeAt(pos) === 92) {
+ if (subj.charCodeAt(pos) === C_BACKSLASH) {
if (subj.charAt(pos + 1) === '\n') {
this.pos = this.pos + 2;
- inlines.push({ t: 3 });
+ inlines.push({ t: I_HARD_BREAK });
} else if (reEscapable.test(subj.charAt(pos + 1))) {
this.pos = this.pos + 2;
- inlines.push({ t: 1, c: subj.charAt(pos + 1) });
+ inlines.push({ t: I_STR, c: subj.charAt(pos + 1) });
} else {
this.pos++;
- inlines.push({t: 1, c: '\\'});
+ inlines.push({t: I_STR, c: '\\'});
}
return true;
} else {
@@ -2403,15 +2403,15 @@
if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink
dest = m.slice(1,-1);
inlines.push(
- {t: 7,
- label: [{ t: 1, c: dest }],
+ {t: I_LINK,
+ label: [{ t: I_STR, c: dest }],
destination: 'mailto:' + encodeURI(unescape(dest)) });
return true;
} else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) {
dest = m.slice(1,-1);
inlines.push({
- t: 7,
- label: [{ t: 1, c: dest }],
+ t: I_LINK,
+ label: [{ t: I_STR, c: dest }],
destination: encodeURI(unescape(dest)) });
return true;
} else {
@@ -2423,7 +2423,7 @@
var parseHtmlTag = function(inlines) {
var m = this.match(reHtmlTag);
if (m) {
- inlines.push({ t: 6, c: m });
+ inlines.push({ t: I_HTML, c: m });
return true;
} else {
return false;
@@ -2457,7 +2457,7 @@
var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after));
var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before));
- if (cc === 95) {
+ if (cc === C_UNDERSCORE) {
can_open = can_open && !((/[a-z0-9]/i).test(char_before));
can_close = can_close && !((/[a-z0-9]/i).test(char_after));
}
@@ -2468,15 +2468,15 @@
};
var Emph = function(ils) {
- return {t: 4, c: ils};
+ return {t: I_EMPH, c: ils};
}
var Strong = function(ils) {
- return {t: 5, c: ils};
+ return {t: I_STRONG, c: ils};
}
var Str = function(s) {
- return {t: 1, c: s};
+ return {t: I_STR, c: s};
}
// Attempt to parse emphasis or strong emphasis.
@@ -2721,7 +2721,7 @@
// Attempt to parse a link label, returning number of characters parsed.
var parseLinkLabel = function() {
- if (this.peek() != 91) {
+ if (this.peek() != C_OPEN_BRACKET) {
return 0;
}
var startpos = this.pos;
@@ -2738,31 +2738,31 @@
}
this.pos++; // advance past [
var c;
- while ((c = this.peek()) && c != -1 && (c != 93 || nest_level > 0)) {
+ while ((c = this.peek()) && c != -1 && (c != C_CLOSE_BRACKET || nest_level > 0)) {
switch (c) {
- case 96:
+ case C_BACKTICK:
this.parseBackticks([]);
break;
- case 60:
+ case C_LESSTHAN:
this.parseAutolink([]) || this.parseHtmlTag([]) ||
this.pos++;
break;
- case 91: // nested []
+ case C_OPEN_BRACKET: // nested []
nest_level++;
this.pos++;
break;
- case 93: // nested []
+ case C_CLOSE_BRACKET: // nested []
nest_level--;
this.pos++;
break;
- case 92:
+ case C_BACKSLASH:
this.parseBackslash([]);
break;
default:
this.parseString([]);
}
}
- if (c === 93) {
+ if (c === C_CLOSE_BRACKET) {
this.label_nest_level = 0;
this.pos++; // advance past ]
return this.pos - startpos;
@@ -2800,7 +2800,7 @@
// if we got this far, we've parsed a label.
// Try to parse an explicit link: [label](url "title")
- if (this.peek() === 40) {
+ if (this.peek() === C_OPEN_PAREN) {
this.pos++;
if (this.spnl() &&
((dest = this.parseLinkDestination()) !== null) &&
@@ -2810,7 +2810,7 @@
(title = this.parseLinkTitle() || '') || true) &&
this.spnl() &&
this.match(/^\)/)) {
- inlines.push({ t: 7,
+ inlines.push({ t: I_LINK,
destination: dest,
title: title,
label: parseRawLabel(rawlabel) });
@@ -2838,7 +2838,7 @@
// lookup rawlabel in refmap
var link = this.refmap[normalizeReference(reflabel)];
if (link) {
- inlines.push({t: 7,
+ inlines.push({t: I_LINK,
destination: link.destination,
title: link.title,
label: parseRawLabel(rawlabel) });
@@ -2856,7 +2856,7 @@
var parseEntity = function(inlines) {
var m;
if ((m = this.match(reEntityHere))) {
- inlines.push({ t: 1, c: entityToChar(m) });
+ inlines.push({ t: I_STR, c: entityToChar(m) });
return true;
} else {
return false;
@@ -2868,7 +2868,7 @@
var parseString = function(inlines) {
var m;
if ((m = this.match(reMain))) {
- inlines.push({ t: 1, c: m });
+ inlines.push({ t: I_STR, c: m });
return true;
} else {
return false;
@@ -2881,9 +2881,9 @@
var m = this.match(/^ *\n/);
if (m) {
if (m.length > 2) {
- inlines.push({ t: 3 });
+ inlines.push({ t: I_HARD_BREAK });
} else if (m.length > 0) {
- inlines.push({ t: 2 });
+ inlines.push({ t: I_SOFT_BREAK });
}
return true;
}
@@ -2896,10 +2896,10 @@
if (this.match(/^!/)) {
var link = this.parseLink(inlines);
if (link) {
- inlines[inlines.length - 1].t = 8;
+ inlines[inlines.length - 1].t = I_IMAGE;
return true;
} else {
- inlines.push({ t: 1, c: '!' });
+ inlines.push({ t: I_STR, c: '!' });
return true;
}
} else {
@@ -2927,7 +2927,7 @@
}
// colon:
- if (this.peek() === 58) {
+ if (this.peek() === C_COLON) {
this.pos++;
} else {
this.pos = startpos;
@@ -2986,30 +2986,30 @@
}
var res;
switch(c) {
- case 10:
- case 32:
+ case C_NEWLINE:
+ case C_SPACE:
res = this.parseNewline(inlines);
break;
- case 92:
+ case C_BACKSLASH:
res = this.parseBackslash(inlines);
break;
- case 96:
+ case C_BACKTICK:
res = this.parseBackticks(inlines);
break;
- case 42:
- case 95:
+ case C_ASTERISK:
+ case C_UNDERSCORE:
res = this.parseEmphasis(c, inlines);
break;
- case 91:
+ case C_OPEN_BRACKET:
res = this.parseLink(inlines);
break;
- case 33:
+ case C_BANG:
res = this.parseImage(inlines);
break;
- case 60:
+ case C_LESSTHAN:
res = this.parseAutolink(inlines) || this.parseHtmlTag(inlines);
break;
- case 38:
+ case C_AMPERSAND:
res = this.parseEntity(inlines);
break;
default:
@@ -3018,7 +3018,7 @@
}
if (!res) {
this.pos += 1;
- inlines.push({t: 1, c: String.fromCharCode(c)});
+ inlines.push({t: I_STR, c: String.fromCharCode(c)});
}
if (memoize) {
@@ -3095,17 +3095,17 @@
// Returns true if parent block can contain child block.
var canContain = function(parent_type, child_type) {
- return ( parent_type === 10 ||
- parent_type === 12 ||
- parent_type === 13 ||
- (parent_type === 14 && child_type === 13) );
+ return ( parent_type === B_DOCUMENT ||
+ parent_type === B_BLOCK_QUOTE ||
+ parent_type === B_LIST_ITEM ||
+ (parent_type === B_LIST && child_type === B_LIST_ITEM) );
};
// Returns true if block type can accept lines of text.
var acceptsLines = function(block_type) {
- return ( block_type === 11 ||
- block_type === 17 ||
- block_type === 18 );
+ return ( block_type === B_PARAGRAPH ||
+ block_type === B_INDENTED_CODE ||
+ block_type === B_FENCED_CODE );
};
// Returns true if block ends with a blank line, descending if needed
@@ -3114,7 +3114,7 @@
if (block.last_line_blank) {
return true;
}
- if ((block.t === 14 || block.t === 13) && block.children.length > 0) {
+ if ((block.t === B_LIST || block.t === B_LIST_ITEM) && block.children.length > 0) {
return endsWithBlankLine(block.children[block.children.length - 1]);
} else {
return false;
@@ -3129,7 +3129,7 @@
var b = block;
var last_list = null;
do {
- if (b.t === 14) {
+ if (b.t === B_LIST) {
last_list = b;
}
b = b.parent;
@@ -3258,10 +3258,10 @@
indent = first_nonspace - offset;
switch (container.t) {
- case 12:
- if (indent <= 3 && ln.charCodeAt(first_nonspace) === 62) {
+ case B_BLOCK_QUOTE:
+ if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) {
offset = first_nonspace + 1;
- if (ln.charCodeAt(offset) === 32) {
+ if (ln.charCodeAt(offset) === C_SPACE) {
offset++;
}
} else {
@@ -3269,7 +3269,7 @@
}
break;
- case 13:
+ case B_LIST_ITEM:
if (indent >= container.list_data.marker_offset +
container.list_data.padding) {
offset += container.list_data.marker_offset +
@@ -3281,7 +3281,7 @@
}
break;
- case 17:
+ case B_INDENTED_CODE:
if (indent >= CODE_INDENT) {
offset += CODE_INDENT;
} else if (blank) {
@@ -3291,29 +3291,29 @@
}
break;
- case 15:
- case 16:
- case 21:
+ case B_ATX_HEADER:
+ case B_SETEXT_HEADER:
+ case B_HORIZONTAL_RULE:
// a header can never container > 1 line, so fail to match:
all_matched = false;
break;
- case 18:
+ case B_FENCED_CODE:
// skip optional spaces of fence offset
i = container.fence_offset;
- while (i > 0 && ln.charCodeAt(offset) === 32) {
+ while (i > 0 && ln.charCodeAt(offset) === C_SPACE) {
offset++;
i--;
}
break;
- case 19:
+ case B_HTML_BLOCK:
if (blank) {
all_matched = false;
}
break;
- case 11:
+ case B_PARAGRAPH:
if (blank) {
container.last_line_blank = true;
all_matched = false;
@@ -3352,9 +3352,9 @@
// Unless last matched container is a code block, try new container starts,
// adding children to the last matched container:
- while (container.t != 18 &&
- container.t != 17 &&
- container.t != 19 &&
+ while (container.t != B_FENCED_CODE &&
+ container.t != B_INDENTED_CODE &&
+ container.t != B_HTML_BLOCK &&
// this is a little performance optimization:
matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) {
@@ -3370,29 +3370,29 @@
if (indent >= CODE_INDENT) {
// indented code
- if (this.tip.t != 11 && !blank) {
+ if (this.tip.t != B_PARAGRAPH && !blank) {
offset += CODE_INDENT;
closeUnmatchedBlocks(this);
- container = this.addChild(17, line_number, offset);
+ container = this.addChild(B_INDENTED_CODE, line_number, offset);
} else { // indent > 4 in a lazy paragraph continuation
break;
}
- } else if (ln.charCodeAt(first_nonspace) === 62) {
+ } else if (ln.charCodeAt(first_nonspace) === C_GREATERTHAN) {
// blockquote
offset = first_nonspace + 1;
// optional following space
- if (ln.charCodeAt(offset) === 32) {
+ if (ln.charCodeAt(offset) === C_SPACE) {
offset++;
}
closeUnmatchedBlocks(this);
- container = this.addChild(12, line_number, offset);
+ container = this.addChild(B_BLOCK_QUOTE, line_number, offset);
} else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) {
// ATX header
offset = first_nonspace + match[0].length;
closeUnmatchedBlocks(this);
- container = this.addChild(15, line_number, first_nonspace);
+ container = this.addChild(B_ATX_HEADER, line_number, first_nonspace);
container.level = match[0].trim().length; // number of #s
// remove trailing ###s:
container.strings =
@@ -3403,7 +3403,7 @@
// fenced code block
var fence_length = match[0].length;
closeUnmatchedBlocks(this);
- container = this.addChild(18, line_number, first_nonspace);
+ container = this.addChild(B_FENCED_CODE, line_number, first_nonspace);
container.fence_length = fence_length;
container.fence_char = match[0][0];
container.fence_offset = first_nonspace - offset;
@@ -3413,23 +3413,23 @@
} else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) {
// html block
closeUnmatchedBlocks(this);
- container = this.addChild(19, line_number, first_nonspace);
+ container = this.addChild(B_HTML_BLOCK, line_number, first_nonspace);
// note, we don't adjust offset because the tag is part of the text
break;
- } else if (container.t == 11 &&
+ } else if (container.t == B_PARAGRAPH &&
container.strings.length === 1 &&
((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) {
// setext header line
closeUnmatchedBlocks(this);
- container.t = 16; // convert Paragraph to SetextHeader
+ container.t = B_SETEXT_HEADER; // convert Paragraph to SetextHeader
container.level = match[0][0] === '=' ? 1 : 2;
offset = ln.length;
} else if (matchAt(reHrule, ln, first_nonspace) !== null) {
// hrule
closeUnmatchedBlocks(this);
- container = this.addChild(21, line_number, first_nonspace);
+ container = this.addChild(B_HORIZONTAL_RULE, line_number, first_nonspace);
offset = ln.length - 1;
break;
@@ -3440,14 +3440,14 @@
offset = first_nonspace + data.padding;
// add the list if needed
- if (container.t !== 14 ||
+ if (container.t !== B_LIST ||
!(listsMatch(container.list_data, data))) {
- container = this.addChild(14, line_number, first_nonspace);
+ container = this.addChild(B_LIST, line_number, first_nonspace);
container.list_data = data;
}
// add the list item
- container = this.addChild(13, line_number, first_nonspace);
+ container = this.addChild(B_LIST_ITEM, line_number, first_nonspace);
container.list_data = data;
} else {
@@ -3477,7 +3477,7 @@
// First check for a lazy paragraph continuation:
if (this.tip !== last_matched_container &&
!blank &&
- this.tip.t == 11 &&
+ this.tip.t == B_PARAGRAPH &&
this.tip.strings.length > 0) {
// lazy paragraph continuation
@@ -3494,9 +3494,9 @@
// lists or breaking out of lists. We also don't set last_line_blank
// on an empty list item.
container.last_line_blank = blank &&
- !(container.t == 12 ||
- container.t == 18 ||
- (container.t == 13 &&
+ !(container.t == B_BLOCK_QUOTE ||
+ container.t == B_FENCED_CODE ||
+ (container.t == B_LIST_ITEM &&
container.children.length === 0 &&
container.start_line == line_number));
@@ -3507,12 +3507,12 @@
}
switch (container.t) {
- case 17:
- case 19:
+ case B_INDENTED_CODE:
+ case B_HTML_BLOCK:
this.addLine(ln, offset);
break;
- case 18:
+ case B_FENCED_CODE:
// check for closing code fence:
match = (indent <= 3 &&
ln.charAt(first_nonspace) == container.fence_char &&
@@ -3525,9 +3525,9 @@
}
break;
- case 15:
- case 16:
- case 21:
+ case B_ATX_HEADER:
+ case B_SETEXT_HEADER:
+ case B_HORIZONTAL_RULE:
// nothing to do; we already added the contents.
break;
@@ -3536,10 +3536,10 @@
this.addLine(ln, first_nonspace);
} else if (blank) {
// do nothing
- } else if (container.t != 21 &&
- container.t != 16) {
+ } else if (container.t != B_HORIZONTAL_RULE &&
+ container.t != B_SETEXT_HEADER) {
// create paragraph container for line
- container = this.addChild(11, line_number, first_nonspace);
+ container = this.addChild(B_PARAGRAPH, line_number, first_nonspace);
this.addLine(ln, first_nonspace);
} else {
console.log("Line " + line_number.toString() +
@@ -3570,32 +3570,32 @@
}
switch (block.t) {
- case 11:
+ case B_PARAGRAPH:
block.string_content = block.strings.join('\n').replace(/^ */m,'');
// try parsing the beginning as link reference definitions:
- while (block.string_content.charCodeAt(0) === 91 &&
+ while (block.string_content.charCodeAt(0) === C_OPEN_BRACKET &&
(pos = this.inlineParser.parseReference(block.string_content,
this.refmap))) {
block.string_content = block.string_content.slice(pos);
if (isBlank(block.string_content)) {
- block.t = 20;
+ block.t = B_REFERENCE_DEF;
break;
}
}
break;
- case 15:
- case 16:
- case 19:
+ case B_ATX_HEADER:
+ case B_SETEXT_HEADER:
+ case B_HTML_BLOCK:
block.string_content = block.strings.join('\n');
break;
- case 17:
+ case B_INDENTED_CODE:
block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n');
break;
- case 18:
+ case B_FENCED_CODE:
// first line becomes info string
block.info = unescapeEntBS(block.strings[0].trim());
if (block.strings.length == 1) {
@@ -3605,7 +3605,7 @@
}
break;
- case 14:
+ case B_LIST:
block.tight = true; // tight by default
var numitems = block.children.length;
@@ -3646,9 +3646,9 @@
// into inline content where appropriate.
var processInlines = function(block) {
switch(block.t) {
- case 11:
- case 16:
- case 15:
+ case B_PARAGRAPH:
+ case B_SETEXT_HEADER:
+ case B_ATX_HEADER:
block.inline_content =
this.inlineParser.parse(block.string_content.trim(), this.refmap);
block.string_content = "";
@@ -3667,7 +3667,7 @@
// The main parsing function. Returns a parsed document AST.
var parse = function(input) {
- this.doc = makeBlock(10, 1, 1);
+ this.doc = makeBlock(B_DOCUMENT, 1, 1);
this.tip = this.doc;
this.refmap = {};
var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/);
@@ -3686,7 +3686,7 @@
// The DocParser object.
function DocParser(){
return {
- doc: makeBlock(10, 1, 1),
+ doc: makeBlock(B_DOCUMENT, 1, 1),
tip: this.doc,
refmap: {},
inlineParser: new InlineParser(),
@@ -3727,32 +3727,32 @@
var renderInline = function(inline) {
var attrs;
switch (inline.t) {
- case 1:
+ case I_STR:
return this.escape(inline.c);
- case 2:
+ case I_SOFT_BREAK:
return this.softbreak;
- case 3:
+ case I_HARD_BREAK:
return inTags('br',[],"",true) + '\n';
- case 4:
+ case I_EMPH:
return inTags('em', [], this.renderInlines(inline.c));
- case 5:
+ case I_STRONG:
return inTags('strong', [], this.renderInlines(inline.c));
- case 6:
+ case I_HTML:
return inline.c;
- case 7:
+ case I_LINK:
attrs = [['href', this.escape(inline.destination, true)]];
if (inline.title) {
attrs.push(['title', this.escape(inline.title, true)]);
}
return inTags('a', attrs, this.renderInlines(inline.label));
- case 8:
+ case I_IMAGE:
attrs = [['src', this.escape(inline.destination, true)],
['alt', this.escape(this.renderInlines(inline.label))]];
if (inline.title) {
attrs.push(['title', this.escape(inline.title, true)]);
}
return inTags('img', attrs, "", true);
- case 9:
+ case I_CODE:
return inTags('code', [], this.escape(inline.c));
default:
console.log("Unknown inline type " + inline.t);
@@ -3775,48 +3775,48 @@
var attr;
var info_words;
switch (block.t) {
- case 10:
+ case B_DOCUMENT:
var whole_doc = this.renderBlocks(block.children);
return (whole_doc === '' ? '' : whole_doc + '\n');
- case 11:
+ case B_PARAGRAPH:
if (in_tight_list) {
return this.renderInlines(block.inline_content);
} else {
return inTags('p', [], this.renderInlines(block.inline_content));
}
break;
- case 12:
+ case B_BLOCK_QUOTE:
var filling = this.renderBlocks(block.children);
return inTags('blockquote', [], filling === '' ? this.innersep :
this.innersep + filling + this.innersep);
- case 13:
+ case B_LIST_ITEM:
return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim());
- case 14:
+ case B_LIST:
tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol';
attr = (!block.list_data.start || block.list_data.start == 1) ?
[] : [['start', block.list_data.start.toString()]];
return inTags(tag, attr, this.innersep +
this.renderBlocks(block.children, block.tight) +
this.innersep);
- case 15:
- case 16:
+ case B_ATX_HEADER:
+ case B_SETEXT_HEADER:
tag = 'h' + block.level;
return inTags(tag, [], this.renderInlines(block.inline_content));
- case 17:
+ case B_INDENTED_CODE:
return inTags('pre', [],
inTags('code', [], this.escape(block.string_content)));
- case 18:
+ case B_FENCED_CODE:
info_words = block.info.split(/ +/);
attr = info_words.length === 0 || info_words[0].length === 0 ?
[] : [['class','language-' +
this.escape(info_words[0],true)]];
return inTags('pre', [],
inTags('code', attr, this.escape(block.string_content)));
- case 19:
+ case B_HTML_BLOCK:
return block.string_content;
- case 20:
+ case B_REFERENCE_DEF:
return "";
- case 21:
+ case B_HORIZONTAL_RULE:
return inTags('hr',[],"",true);
default:
console.log("Unknown block type " + block.t);
@@ -3828,7 +3828,7 @@
var renderBlocks = function(blocks, in_tight_list) {
var result = [];
for (var i=0; i < blocks.length; i++) {
- if (blocks[i].t !== 20) {
+ if (blocks[i].t !== B_REFERENCE_DEF) {
result.push(this.renderBlock(blocks[i], in_tight_list));
}
}
--
cgit v1.2.3
From c9ad75b4c69edf064106bc63fdf6a2637a7c5a8b Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Thu, 2 Oct 2014 17:54:20 -0700
Subject: Revert "Use integers instead of strings for tags."
This reverts commit ac611d51c9de9aa719b42b9463e6f28d6e7d74a4.
---
js/stmd.js | 238 +++++++++++++++++++++++++++----------------------------------
1 file changed, 107 insertions(+), 131 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 23caf31..efccad8 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -2137,30 +2137,6 @@
zwj: '',
zwnj: '' };
- // Constants for inline and block types:
-
- var I_STR = 1;
- var I_SOFT_BREAK = 2;
- var I_HARD_BREAK = 3;
- var I_EMPH = 4;
- var I_STRONG = 5;
- var I_HTML = 6;
- var I_LINK = 7;
- var I_IMAGE = 8;
- var I_CODE = 9;
- var B_DOCUMENT = 10;
- var B_PARAGRAPH = 11;
- var B_BLOCK_QUOTE = 12;
- var B_LIST_ITEM = 13;
- var B_LIST = 14;
- var B_ATX_HEADER = 15;
- var B_SETEXT_HEADER = 16;
- var B_INDENTED_CODE = 17;
- var B_FENCED_CODE = 18;
- var B_HTML_BLOCK = 19;
- var B_REFERENCE_DEF = 20;
- var B_HORIZONTAL_RULE = 21;
-
// Constants for character codes:
var C_NEWLINE = 10;
@@ -2297,7 +2273,7 @@
// Convert tabs to spaces on each line using a 4-space tab stop.
var detabLine = function(text) {
- if (text.indexOf('\t') === -1) {
+ if (text.indexOf('\t') == -1) {
return text;
} else {
var lastStop = 0;
@@ -2359,8 +2335,8 @@
var foundCode = false;
var match;
while (!foundCode && (match = this.match(/`+/m))) {
- if (match === ticks) {
- inlines.push({ t: I_CODE, c: this.subject.slice(afterOpenTicks,
+ if (match == ticks) {
+ inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks,
this.pos - ticks.length)
.replace(/[ \n]+/g,' ')
.trim() });
@@ -2369,7 +2345,7 @@
}
// If we got here, we didn't match a closing backtick sequence.
this.pos = afterOpenTicks;
- inlines.push({ t: I_STR, c: ticks });
+ inlines.push({ t: 'Str', c: ticks });
return true;
};
@@ -2382,13 +2358,13 @@
if (subj.charCodeAt(pos) === C_BACKSLASH) {
if (subj.charAt(pos + 1) === '\n') {
this.pos = this.pos + 2;
- inlines.push({ t: I_HARD_BREAK });
+ inlines.push({ t: 'Hardbreak' });
} else if (reEscapable.test(subj.charAt(pos + 1))) {
this.pos = this.pos + 2;
- inlines.push({ t: I_STR, c: subj.charAt(pos + 1) });
+ inlines.push({ t: 'Str', c: subj.charAt(pos + 1) });
} else {
this.pos++;
- inlines.push({t: I_STR, c: '\\'});
+ inlines.push({t: 'Str', c: '\\'});
}
return true;
} else {
@@ -2403,15 +2379,15 @@
if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink
dest = m.slice(1,-1);
inlines.push(
- {t: I_LINK,
- label: [{ t: I_STR, c: dest }],
+ {t: 'Link',
+ label: [{ t: 'Str', c: dest }],
destination: 'mailto:' + encodeURI(unescape(dest)) });
return true;
} else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) {
dest = m.slice(1,-1);
inlines.push({
- t: I_LINK,
- label: [{ t: I_STR, c: dest }],
+ t: 'Link',
+ label: [{ t: 'Str', c: dest }],
destination: encodeURI(unescape(dest)) });
return true;
} else {
@@ -2423,7 +2399,7 @@
var parseHtmlTag = function(inlines) {
var m = this.match(reHtmlTag);
if (m) {
- inlines.push({ t: I_HTML, c: m });
+ inlines.push({ t: 'Html', c: m });
return true;
} else {
return false;
@@ -2468,15 +2444,15 @@
};
var Emph = function(ils) {
- return {t: I_EMPH, c: ils};
+ return {t: 'Emph', c: ils};
}
var Strong = function(ils) {
- return {t: I_STRONG, c: ils};
+ return {t: 'Strong', c: ils};
}
var Str = function(s) {
- return {t: I_STR, c: s};
+ return {t: 'Str', c: s};
}
// Attempt to parse emphasis or strong emphasis.
@@ -2800,7 +2776,7 @@
// if we got this far, we've parsed a label.
// Try to parse an explicit link: [label](url "title")
- if (this.peek() === C_OPEN_PAREN) {
+ if (this.peek() == C_OPEN_PAREN) {
this.pos++;
if (this.spnl() &&
((dest = this.parseLinkDestination()) !== null) &&
@@ -2810,7 +2786,7 @@
(title = this.parseLinkTitle() || '') || true) &&
this.spnl() &&
this.match(/^\)/)) {
- inlines.push({ t: I_LINK,
+ inlines.push({ t: 'Link',
destination: dest,
title: title,
label: parseRawLabel(rawlabel) });
@@ -2826,7 +2802,7 @@
this.spnl();
var beforelabel = this.pos;
n = this.parseLinkLabel();
- if (n === 2) {
+ if (n == 2) {
// empty second label
reflabel = rawlabel;
} else if (n > 0) {
@@ -2838,7 +2814,7 @@
// lookup rawlabel in refmap
var link = this.refmap[normalizeReference(reflabel)];
if (link) {
- inlines.push({t: I_LINK,
+ inlines.push({t: 'Link',
destination: link.destination,
title: link.title,
label: parseRawLabel(rawlabel) });
@@ -2856,7 +2832,7 @@
var parseEntity = function(inlines) {
var m;
if ((m = this.match(reEntityHere))) {
- inlines.push({ t: I_STR, c: entityToChar(m) });
+ inlines.push({ t: 'Str', c: entityToChar(m) });
return true;
} else {
return false;
@@ -2868,7 +2844,7 @@
var parseString = function(inlines) {
var m;
if ((m = this.match(reMain))) {
- inlines.push({ t: I_STR, c: m });
+ inlines.push({ t: 'Str', c: m });
return true;
} else {
return false;
@@ -2881,9 +2857,9 @@
var m = this.match(/^ *\n/);
if (m) {
if (m.length > 2) {
- inlines.push({ t: I_HARD_BREAK });
+ inlines.push({ t: 'Hardbreak' });
} else if (m.length > 0) {
- inlines.push({ t: I_SOFT_BREAK });
+ inlines.push({ t: 'Softbreak' });
}
return true;
}
@@ -2896,10 +2872,10 @@
if (this.match(/^!/)) {
var link = this.parseLink(inlines);
if (link) {
- inlines[inlines.length - 1].t = I_IMAGE;
+ inlines[inlines.length - 1].t = 'Image';
return true;
} else {
- inlines.push({ t: I_STR, c: '!' });
+ inlines.push({ t: 'Str', c: '!' });
return true;
}
} else {
@@ -3018,7 +2994,7 @@
}
if (!res) {
this.pos += 1;
- inlines.push({t: I_STR, c: String.fromCharCode(c)});
+ inlines.push({t: 'Str', c: String.fromCharCode(c)});
}
if (memoize) {
@@ -3095,17 +3071,17 @@
// Returns true if parent block can contain child block.
var canContain = function(parent_type, child_type) {
- return ( parent_type === B_DOCUMENT ||
- parent_type === B_BLOCK_QUOTE ||
- parent_type === B_LIST_ITEM ||
- (parent_type === B_LIST && child_type === B_LIST_ITEM) );
+ return ( parent_type == 'Document' ||
+ parent_type == 'BlockQuote' ||
+ parent_type == 'ListItem' ||
+ (parent_type == 'List' && child_type == 'ListItem') );
};
// Returns true if block type can accept lines of text.
var acceptsLines = function(block_type) {
- return ( block_type === B_PARAGRAPH ||
- block_type === B_INDENTED_CODE ||
- block_type === B_FENCED_CODE );
+ return ( block_type == 'Paragraph' ||
+ block_type == 'IndentedCode' ||
+ block_type == 'FencedCode' );
};
// Returns true if block ends with a blank line, descending if needed
@@ -3114,7 +3090,7 @@
if (block.last_line_blank) {
return true;
}
- if ((block.t === B_LIST || block.t === B_LIST_ITEM) && block.children.length > 0) {
+ if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) {
return endsWithBlankLine(block.children[block.children.length - 1]);
} else {
return false;
@@ -3129,7 +3105,7 @@
var b = block;
var last_list = null;
do {
- if (b.t === B_LIST) {
+ if (b.t === 'List') {
last_list = b;
}
b = b.parent;
@@ -3258,7 +3234,7 @@
indent = first_nonspace - offset;
switch (container.t) {
- case B_BLOCK_QUOTE:
+ case 'BlockQuote':
if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) {
offset = first_nonspace + 1;
if (ln.charCodeAt(offset) === C_SPACE) {
@@ -3269,7 +3245,7 @@
}
break;
- case B_LIST_ITEM:
+ case 'ListItem':
if (indent >= container.list_data.marker_offset +
container.list_data.padding) {
offset += container.list_data.marker_offset +
@@ -3281,7 +3257,7 @@
}
break;
- case B_INDENTED_CODE:
+ case 'IndentedCode':
if (indent >= CODE_INDENT) {
offset += CODE_INDENT;
} else if (blank) {
@@ -3291,14 +3267,14 @@
}
break;
- case B_ATX_HEADER:
- case B_SETEXT_HEADER:
- case B_HORIZONTAL_RULE:
+ case 'ATXHeader':
+ case 'SetextHeader':
+ case 'HorizontalRule':
// a header can never container > 1 line, so fail to match:
all_matched = false;
break;
- case B_FENCED_CODE:
+ case 'FencedCode':
// skip optional spaces of fence offset
i = container.fence_offset;
while (i > 0 && ln.charCodeAt(offset) === C_SPACE) {
@@ -3307,13 +3283,13 @@
}
break;
- case B_HTML_BLOCK:
+ case 'HtmlBlock':
if (blank) {
all_matched = false;
}
break;
- case B_PARAGRAPH:
+ case 'Paragraph':
if (blank) {
container.last_line_blank = true;
all_matched = false;
@@ -3352,9 +3328,9 @@
// Unless last matched container is a code block, try new container starts,
// adding children to the last matched container:
- while (container.t != B_FENCED_CODE &&
- container.t != B_INDENTED_CODE &&
- container.t != B_HTML_BLOCK &&
+ while (container.t != 'FencedCode' &&
+ container.t != 'IndentedCode' &&
+ container.t != 'HtmlBlock' &&
// this is a little performance optimization:
matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) {
@@ -3370,10 +3346,10 @@
if (indent >= CODE_INDENT) {
// indented code
- if (this.tip.t != B_PARAGRAPH && !blank) {
+ if (this.tip.t != 'Paragraph' && !blank) {
offset += CODE_INDENT;
closeUnmatchedBlocks(this);
- container = this.addChild(B_INDENTED_CODE, line_number, offset);
+ container = this.addChild('IndentedCode', line_number, offset);
} else { // indent > 4 in a lazy paragraph continuation
break;
}
@@ -3386,13 +3362,13 @@
offset++;
}
closeUnmatchedBlocks(this);
- container = this.addChild(B_BLOCK_QUOTE, line_number, offset);
+ container = this.addChild('BlockQuote', line_number, offset);
} else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) {
// ATX header
offset = first_nonspace + match[0].length;
closeUnmatchedBlocks(this);
- container = this.addChild(B_ATX_HEADER, line_number, first_nonspace);
+ container = this.addChild('ATXHeader', line_number, first_nonspace);
container.level = match[0].trim().length; // number of #s
// remove trailing ###s:
container.strings =
@@ -3403,7 +3379,7 @@
// fenced code block
var fence_length = match[0].length;
closeUnmatchedBlocks(this);
- container = this.addChild(B_FENCED_CODE, line_number, first_nonspace);
+ container = this.addChild('FencedCode', line_number, first_nonspace);
container.fence_length = fence_length;
container.fence_char = match[0][0];
container.fence_offset = first_nonspace - offset;
@@ -3413,23 +3389,23 @@
} else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) {
// html block
closeUnmatchedBlocks(this);
- container = this.addChild(B_HTML_BLOCK, line_number, first_nonspace);
+ container = this.addChild('HtmlBlock', line_number, first_nonspace);
// note, we don't adjust offset because the tag is part of the text
break;
- } else if (container.t == B_PARAGRAPH &&
+ } else if (container.t == 'Paragraph' &&
container.strings.length === 1 &&
((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) {
// setext header line
closeUnmatchedBlocks(this);
- container.t = B_SETEXT_HEADER; // convert Paragraph to SetextHeader
+ container.t = 'SetextHeader'; // convert Paragraph to SetextHeader
container.level = match[0][0] === '=' ? 1 : 2;
offset = ln.length;
} else if (matchAt(reHrule, ln, first_nonspace) !== null) {
// hrule
closeUnmatchedBlocks(this);
- container = this.addChild(B_HORIZONTAL_RULE, line_number, first_nonspace);
+ container = this.addChild('HorizontalRule', line_number, first_nonspace);
offset = ln.length - 1;
break;
@@ -3440,14 +3416,14 @@
offset = first_nonspace + data.padding;
// add the list if needed
- if (container.t !== B_LIST ||
+ if (container.t !== 'List' ||
!(listsMatch(container.list_data, data))) {
- container = this.addChild(B_LIST, line_number, first_nonspace);
+ container = this.addChild('List', line_number, first_nonspace);
container.list_data = data;
}
// add the list item
- container = this.addChild(B_LIST_ITEM, line_number, first_nonspace);
+ container = this.addChild('ListItem', line_number, first_nonspace);
container.list_data = data;
} else {
@@ -3477,7 +3453,7 @@
// First check for a lazy paragraph continuation:
if (this.tip !== last_matched_container &&
!blank &&
- this.tip.t == B_PARAGRAPH &&
+ this.tip.t == 'Paragraph' &&
this.tip.strings.length > 0) {
// lazy paragraph continuation
@@ -3494,9 +3470,9 @@
// lists or breaking out of lists. We also don't set last_line_blank
// on an empty list item.
container.last_line_blank = blank &&
- !(container.t == B_BLOCK_QUOTE ||
- container.t == B_FENCED_CODE ||
- (container.t == B_LIST_ITEM &&
+ !(container.t == 'BlockQuote' ||
+ container.t == 'FencedCode' ||
+ (container.t == 'ListItem' &&
container.children.length === 0 &&
container.start_line == line_number));
@@ -3507,12 +3483,12 @@
}
switch (container.t) {
- case B_INDENTED_CODE:
- case B_HTML_BLOCK:
+ case 'IndentedCode':
+ case 'HtmlBlock':
this.addLine(ln, offset);
break;
- case B_FENCED_CODE:
+ case 'FencedCode':
// check for closing code fence:
match = (indent <= 3 &&
ln.charAt(first_nonspace) == container.fence_char &&
@@ -3525,9 +3501,9 @@
}
break;
- case B_ATX_HEADER:
- case B_SETEXT_HEADER:
- case B_HORIZONTAL_RULE:
+ case 'ATXHeader':
+ case 'SetextHeader':
+ case 'HorizontalRule':
// nothing to do; we already added the contents.
break;
@@ -3536,10 +3512,10 @@
this.addLine(ln, first_nonspace);
} else if (blank) {
// do nothing
- } else if (container.t != B_HORIZONTAL_RULE &&
- container.t != B_SETEXT_HEADER) {
+ } else if (container.t != 'HorizontalRule' &&
+ container.t != 'SetextHeader') {
// create paragraph container for line
- container = this.addChild(B_PARAGRAPH, line_number, first_nonspace);
+ container = this.addChild('Paragraph', line_number, first_nonspace);
this.addLine(ln, first_nonspace);
} else {
console.log("Line " + line_number.toString() +
@@ -3570,7 +3546,7 @@
}
switch (block.t) {
- case B_PARAGRAPH:
+ case 'Paragraph':
block.string_content = block.strings.join('\n').replace(/^ */m,'');
// try parsing the beginning as link reference definitions:
@@ -3579,23 +3555,23 @@
this.refmap))) {
block.string_content = block.string_content.slice(pos);
if (isBlank(block.string_content)) {
- block.t = B_REFERENCE_DEF;
+ block.t = 'ReferenceDef';
break;
}
}
break;
- case B_ATX_HEADER:
- case B_SETEXT_HEADER:
- case B_HTML_BLOCK:
+ case 'ATXHeader':
+ case 'SetextHeader':
+ case 'HtmlBlock':
block.string_content = block.strings.join('\n');
break;
- case B_INDENTED_CODE:
+ case 'IndentedCode':
block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n');
break;
- case B_FENCED_CODE:
+ case 'FencedCode':
// first line becomes info string
block.info = unescapeEntBS(block.strings[0].trim());
if (block.strings.length == 1) {
@@ -3605,7 +3581,7 @@
}
break;
- case B_LIST:
+ case 'List':
block.tight = true; // tight by default
var numitems = block.children.length;
@@ -3646,9 +3622,9 @@
// into inline content where appropriate.
var processInlines = function(block) {
switch(block.t) {
- case B_PARAGRAPH:
- case B_SETEXT_HEADER:
- case B_ATX_HEADER:
+ case 'Paragraph':
+ case 'SetextHeader':
+ case 'ATXHeader':
block.inline_content =
this.inlineParser.parse(block.string_content.trim(), this.refmap);
block.string_content = "";
@@ -3667,7 +3643,7 @@
// The main parsing function. Returns a parsed document AST.
var parse = function(input) {
- this.doc = makeBlock(B_DOCUMENT, 1, 1);
+ this.doc = makeBlock('Document', 1, 1);
this.tip = this.doc;
this.refmap = {};
var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/);
@@ -3686,7 +3662,7 @@
// The DocParser object.
function DocParser(){
return {
- doc: makeBlock(B_DOCUMENT, 1, 1),
+ doc: makeBlock('Document', 1, 1),
tip: this.doc,
refmap: {},
inlineParser: new InlineParser(),
@@ -3727,32 +3703,32 @@
var renderInline = function(inline) {
var attrs;
switch (inline.t) {
- case I_STR:
+ case 'Str':
return this.escape(inline.c);
- case I_SOFT_BREAK:
+ case 'Softbreak':
return this.softbreak;
- case I_HARD_BREAK:
+ case 'Hardbreak':
return inTags('br',[],"",true) + '\n';
- case I_EMPH:
+ case 'Emph':
return inTags('em', [], this.renderInlines(inline.c));
- case I_STRONG:
+ case 'Strong':
return inTags('strong', [], this.renderInlines(inline.c));
- case I_HTML:
+ case 'Html':
return inline.c;
- case I_LINK:
+ case 'Link':
attrs = [['href', this.escape(inline.destination, true)]];
if (inline.title) {
attrs.push(['title', this.escape(inline.title, true)]);
}
return inTags('a', attrs, this.renderInlines(inline.label));
- case I_IMAGE:
+ case 'Image':
attrs = [['src', this.escape(inline.destination, true)],
['alt', this.escape(this.renderInlines(inline.label))]];
if (inline.title) {
attrs.push(['title', this.escape(inline.title, true)]);
}
return inTags('img', attrs, "", true);
- case I_CODE:
+ case 'Code':
return inTags('code', [], this.escape(inline.c));
default:
console.log("Unknown inline type " + inline.t);
@@ -3775,48 +3751,48 @@
var attr;
var info_words;
switch (block.t) {
- case B_DOCUMENT:
+ case 'Document':
var whole_doc = this.renderBlocks(block.children);
return (whole_doc === '' ? '' : whole_doc + '\n');
- case B_PARAGRAPH:
+ case 'Paragraph':
if (in_tight_list) {
return this.renderInlines(block.inline_content);
} else {
return inTags('p', [], this.renderInlines(block.inline_content));
}
break;
- case B_BLOCK_QUOTE:
+ case 'BlockQuote':
var filling = this.renderBlocks(block.children);
return inTags('blockquote', [], filling === '' ? this.innersep :
this.innersep + filling + this.innersep);
- case B_LIST_ITEM:
+ case 'ListItem':
return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim());
- case B_LIST:
+ case 'List':
tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol';
attr = (!block.list_data.start || block.list_data.start == 1) ?
[] : [['start', block.list_data.start.toString()]];
return inTags(tag, attr, this.innersep +
this.renderBlocks(block.children, block.tight) +
this.innersep);
- case B_ATX_HEADER:
- case B_SETEXT_HEADER:
+ case 'ATXHeader':
+ case 'SetextHeader':
tag = 'h' + block.level;
return inTags(tag, [], this.renderInlines(block.inline_content));
- case B_INDENTED_CODE:
+ case 'IndentedCode':
return inTags('pre', [],
inTags('code', [], this.escape(block.string_content)));
- case B_FENCED_CODE:
+ case 'FencedCode':
info_words = block.info.split(/ +/);
attr = info_words.length === 0 || info_words[0].length === 0 ?
[] : [['class','language-' +
this.escape(info_words[0],true)]];
return inTags('pre', [],
inTags('code', attr, this.escape(block.string_content)));
- case B_HTML_BLOCK:
+ case 'HtmlBlock':
return block.string_content;
- case B_REFERENCE_DEF:
+ case 'ReferenceDef':
return "";
- case B_HORIZONTAL_RULE:
+ case 'HorizontalRule':
return inTags('hr',[],"",true);
default:
console.log("Unknown block type " + block.t);
@@ -3828,7 +3804,7 @@
var renderBlocks = function(blocks, in_tight_list) {
var result = [];
for (var i=0; i < blocks.length; i++) {
- if (blocks[i].t !== B_REFERENCE_DEF) {
+ if (blocks[i].t !== 'ReferenceDef') {
result.push(this.renderBlock(blocks[i], in_tight_list));
}
}
--
cgit v1.2.3
From 52c69afc6f4ad2f962f55c6daa7adaab87f835ae Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Sat, 4 Oct 2014 16:43:21 -0700
Subject: Use simpler algorithm.
This handles things like `**hi***there*` and gives symmetrical
treatment of `**hi*` and `*hi**`.
Also handles the case from #147.
---
js/stmd.js | 192 ++++++++++---------------------------------------------------
1 file changed, 30 insertions(+), 162 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index efccad8..24651fb 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -2463,6 +2463,7 @@
c = String.fromCharCode(cc);
var numdelims;
+ var numclosedelims;
var delimpos;
// Get opening delimiters.
@@ -2482,187 +2483,54 @@
this.pos += numdelims;
- var fallbackpos = this.pos;
+ var delims_to_match = numdelims;
- var next_inline;
- var first = [];
- var second = [];
- var current = first;
+ var current = [];
var state = 0;
var can_close = false;
var can_open = false;
var last_emphasis_closer = null;
-
- if (numdelims === 3) {
- state = 1;
- } else if (numdelims === 2) {
- state = 2;
- } else if (numdelims === 1) {
- state = 3;
- }
-
- while (true) {
- if (this.last_emphasis_closer[c] < this.pos) {
- break;
- }
+ while (this.last_emphasis_closer[c] >= this.pos) {
res = this.scanDelims(cc);
+ numclosedelims = res.numdelims;
- if (res) {
- numdelims = res.numdelims;
- can_close = res.can_close;
- if (can_close) {
+ if (res.can_close) {
+ if (last_emphasis_closer === null ||
+ last_emphasis_closer < this.pos) {
last_emphasis_closer = this.pos;
}
- can_open = res.can_open;
- switch (state) {
- case 1: // ***a
- if (numdelims === 3 && can_close) {
- this.pos += 3;
- inlines.push(Strong([Emph(first)]));
- return true;
- } else if (numdelims === 2 && can_close) {
- this.pos += 2;
- current = second;
- state = can_open ? 4 : 6;
- continue;
- } else if (numdelims === 1 && can_close) {
- this.pos += 1;
- current = second;
- state = can_open ? 5 : 7;
- continue;
- }
- break;
- case 2: // **a
- if (numdelims === 2 && can_close) {
- this.pos += 2;
- inlines.push(Strong(first));
- return true;
- } else if (numdelims === 1 && can_open) {
- this.pos += 1;
- current = second;
- state = 8;
- continue;
- }
- break;
- case 3: // *a
- if (numdelims === 1 && can_close) {
- this.pos += 1;
- inlines.push(Emph(first));
- return true;
- } else if (numdelims === 2 && can_open) {
- this.pos += 2;
- current = second;
- state = 9;
- continue;
- }
- break;
- case 4: // ***a**b
- if (numdelims === 3 && can_close) {
- this.pos += 3;
- inlines.push(Strong([Emph(first.concat([Str(c+c)], second))]));
- return true;
- } else if (numdelims === 2 && can_close) {
- this.pos += 2;
- inlines.push(Strong([Str(c+c+c)].concat(
- first,
- [Strong(second)])));
- return true;
- } else if (numdelims === 1 && can_close) {
- this.pos += 1;
- inlines.push(Emph([Strong(first)].concat(second)));
- return true;
- }
- break;
- case 5: // ***a*b
- if (numdelims === 3 && can_close) {
- this.pos += 3;
- inlines.push(Strong([Emph(first.concat([Str(c)], second))]));
- return true;
- } else if (numdelims === 2 && can_close) {
- this.pos += 2;
- inlines.push(Strong([Emph(first)].concat(second)));
- return true;
- } else if (numdelims === 1 && can_close) {
- this.pos += 1;
- inlines.push(Strong([Str(c+c+c)].concat(
- first,
- [Emph(second)])));
- return true;
- }
- break;
- case 6: // ***a** b
- if (numdelims === 3 && can_close) {
- this.pos += 3;
- inlines.push(Strong([Emph(first.concat([Str(c+c)], second))]));
- return true;
- } else if (numdelims === 1 && can_close) {
- this.pos += 1;
- inlines.push(Emph([Strong(first)].concat(second)));
- return true;
- }
- break;
- case 7: // ***a* b
- if (numdelims === 3 && can_close) {
- this.pos += 3;
- inlines.push(Strong([Emph(first.concat([Str(c)], second))]));
- return true;
- } else if (numdelims === 2 && can_close) {
- this.pos += 2;
- inlines.push(Strong([Emph(first)].concat(second)));
- return true;
- }
- break;
- case 8: // **a *b
- if (numdelims === 3 && can_close) {
- this.pos += 3;
- inlines.push(Strong(first.concat([Emph(second)])));
- return true;
- } else if (numdelims === 2 && can_close) {
- this.pos += 2;
- inlines.push(Strong(first.concat([Str(c)], second)));
- return true;
- } else if (numdelims === 1 && can_close) {
- this.pos += 1;
- first.push(Emph(second));
- current = first;
- state = 2;
- continue;
- }
- break;
- case 9: // *a **b
- if (numdelims === 3 && can_close) {
- this.pos += 3;
- inlines.push(Emph(first.concat([Strong(second)])));
- return true;
- } else if (numdelims === 2 && can_close) {
- this.pos += 2;
- first.push(Strong(second));
- current = first;
- state = 3;
- continue;
- } else if (numdelims === 1 && can_close) {
- this.pos += 1;
- inlines.push(Emph(first.concat([Str(c+c)], second)));
- return true;
+ if (numclosedelims === 3 && delims_to_match === 3) {
+ delims_to_match -= 3;
+ this.pos += 3;
+ current = [{t: 'Strong', c: [{t: 'Emph', c: current}]}];
+ } else if (numclosedelims >= 2 && delims_to_match >= 2) {
+ delims_to_match -= 2;
+ this.pos += 2;
+ current = [{t: 'Strong', c: current}];
+ } else if (numclosedelims >= 1 && delims_to_match >= 1) {
+ delims_to_match -= 1;
+ this.pos += 1;
+ current = [{t: 'Emph', c: current}];
+ } else {
+ if (!(this.parseInline(current,true))) {
+ break;
}
- break;
- default:
- break;
}
- }
-
- if (!(this.parseInline(current,true))) {
+ if (delims_to_match === 0) {
+ Array.prototype.push.apply(inlines, current);
+ return true;
+ }
+ } else if (!(this.parseInline(current,true))) {
break;
}
-
}
// we didn't match emphasis: fallback
- this.pos = fallbackpos;
+ this.pos = startpos + 1;
if (last_emphasis_closer) {
this.last_emphasis_closer[c] = last_emphasis_closer;
}
- inlines.push(Str(this.subject.slice(startpos, fallbackpos)));
+ inlines.push(Str(c));
return true;
};
--
cgit v1.2.3
From 9d590fa7cd1158da138e602af542d2ca59d8d76e Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Sat, 4 Oct 2014 16:49:05 -0700
Subject: Some jshint fixes.
---
js/stmd.js | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 24651fb..9a3a8c7 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -2184,7 +2184,7 @@
PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")";
var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" +
"/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])";
- var ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"
+ var ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});";
var reHtmlTag = new RegExp('^' + HTMLTAG, 'i');
@@ -2239,12 +2239,12 @@
uchar = entities[m.slice(1,-1)];
}
return (uchar || m);
- }
+ };
// Replace entities and backslash escapes with literal characters.
var unescapeEntBS = function(s) {
return s.replace(reAllEscapedChar, '$1')
- .replace(reEntity, entityToChar);;
+ .replace(reEntity, entityToChar);
};
// Returns true if string contains only space characters.
@@ -2445,15 +2445,15 @@
var Emph = function(ils) {
return {t: 'Emph', c: ils};
- }
+ };
var Strong = function(ils) {
return {t: 'Strong', c: ils};
- }
+ };
var Str = function(s) {
return {t: 'Str', c: s};
- }
+ };
// Attempt to parse emphasis or strong emphasis.
var parseEmphasis = function(cc,inlines) {
@@ -2588,8 +2588,9 @@
this.parseBackticks([]);
break;
case C_LESSTHAN:
- this.parseAutolink([]) || this.parseHtmlTag([]) ||
+ if (!(this.parseAutolink([]) || this.parseHtmlTag([]))) {
this.pos++;
+ }
break;
case C_OPEN_BRACKET: // nested []
nest_level++;
--
cgit v1.2.3
From cd198620a44576afb0f325abd58d503eab65bf32 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Sat, 4 Oct 2014 17:15:52 -0700
Subject: Further emph fallback optimizations.
---
js/stmd.js | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/js/stmd.js b/js/stmd.js
index 9a3a8c7..e227578 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -2486,6 +2486,8 @@
var delims_to_match = numdelims;
var current = [];
+ var firstend;
+ var firstpos;
var state = 0;
var can_close = false;
var can_open = false;
@@ -2506,10 +2508,14 @@
} else if (numclosedelims >= 2 && delims_to_match >= 2) {
delims_to_match -= 2;
this.pos += 2;
+ firstend = current.length;
+ firstpos = this.pos;
current = [{t: 'Strong', c: current}];
} else if (numclosedelims >= 1 && delims_to_match >= 1) {
delims_to_match -= 1;
this.pos += 1;
+ firstend = current.length;
+ firstpos = this.pos;
current = [{t: 'Emph', c: current}];
} else {
if (!(this.parseInline(current,true))) {
@@ -2526,13 +2532,19 @@
}
// we didn't match emphasis: fallback
- this.pos = startpos + 1;
+ inlines.push(Str(this.subject.slice(startpos,
+ startpos + delims_to_match)));
+ if (delims_to_match < numdelims) {
+ Array.prototype.push.apply(inlines, current.slice(0,firstend));
+ this.pos = firstpos;
+ } else { // delims_to_match === numdelims
+ this.pos = startpos + delims_to_match;
+ }
+
if (last_emphasis_closer) {
this.last_emphasis_closer[c] = last_emphasis_closer;
}
- inlines.push(Str(c));
return true;
-
};
// Attempt to parse link title (sans quotes), returning the string
--
cgit v1.2.3
From 8a2b85da34e1de10abaf55b212b0660a7917b5d8 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Tue, 7 Oct 2014 09:05:27 -0700
Subject: Removed spurious 'and', reflowed.
---
spec.txt | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/spec.txt b/spec.txt
index bc2e381..c520272 100644
--- a/spec.txt
+++ b/spec.txt
@@ -4817,9 +4817,10 @@ in Markdown:
.
-URL-escaping and should be left alone inside the destination, as all URL-escaped characters
-are also valid URL characters. HTML entities in the destination will be parsed into their UTF8
-codepoints, as usual, and optionally URL-escaped when written as HTML.
+URL-escaping should be left alone inside the destination, as all
+URL-escaped characters are also valid URL characters. HTML entities in
+the destination will be parsed into their UTF8 codepoints, as usual, and
+optionally URL-escaped when written as HTML.
.
[link](foo%20bä)
--
cgit v1.2.3
From 4dc7bbb0c3fb1057c921dedc2f83786caaa6f0ad Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Tue, 7 Oct 2014 09:05:27 -0700
Subject: Removed spurious 'and', reflowed.
---
spec.txt | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/spec.txt b/spec.txt
index 0a62b80..990ae8c 100644
--- a/spec.txt
+++ b/spec.txt
@@ -4816,9 +4816,10 @@ in Markdown:
.
-URL-escaping and should be left alone inside the destination, as all URL-escaped characters
-are also valid URL characters. HTML entities in the destination will be parsed into their UTF8
-codepoints, as usual, and optionally URL-escaped when written as HTML.
+URL-escaping should be left alone inside the destination, as all
+URL-escaped characters are also valid URL characters. HTML entities in
+the destination will be parsed into their UTF8 codepoints, as usual, and
+optionally URL-escaped when written as HTML.
.
[link](foo%20bä)
--
cgit v1.2.3
From aabd412250999ecd9c1033966ddfe8a66e26972f Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Tue, 7 Oct 2014 21:31:35 -0700
Subject: Reset label_nest_level before parsing reference.
This fixes a bug with text like:
[[some unrelated text [link]
[link]: destination
See #146.
---
js/stmd.js | 1 +
1 file changed, 1 insertion(+)
diff --git a/js/stmd.js b/js/stmd.js
index e227578..bc6b2d1 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -2768,6 +2768,7 @@
var parseReference = function(s, refmap) {
this.subject = s;
this.pos = 0;
+ this.label_nest_level = 0;
var rawlabel;
var dest;
var title;
--
cgit v1.2.3
From c0c33f83326927d515a973aa7afdd26bb194e0c8 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Tue, 7 Oct 2014 22:02:21 -0700
Subject: stmd.js: Fixed entityToChar, adding fromCodePoint polyfill.
Closes #151.
---
LICENSE | 25 +++++++++++++++++++++
js/stmd.js | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 94 insertions(+), 4 deletions(-)
diff --git a/LICENSE b/LICENSE
index bb8c36f..988c4b4 100644
--- a/LICENSE
+++ b/LICENSE
@@ -28,3 +28,28 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-----
+
+The polyfill for String.fromCodePoint included in stmd.js is
+Copyright Mathias Bynens
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
diff --git a/js/stmd.js b/js/stmd.js
index bc6b2d1..dd7876a 100755
--- a/js/stmd.js
+++ b/js/stmd.js
@@ -2223,6 +2223,71 @@
var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m;
// UTILITY FUNCTIONS
+ // polyfill for fromCodePoint:
+ // https://github.com/mathiasbynens/String.fromCodePoint
+ /*! http://mths.be/fromcodepoint v0.2.1 by @mathias */
+ if (!String.fromCodePoint) {
+ (function() {
+ var defineProperty = (function() {
+ // IE 8 only supports `Object.defineProperty` on DOM elements
+ try {
+ var object = {};
+ var $defineProperty = Object.defineProperty;
+ var result = $defineProperty(object, object, object) && $defineProperty;
+ } catch(error) {}
+ return result;
+ }());
+ var stringFromCharCode = String.fromCharCode;
+ var floor = Math.floor;
+ var fromCodePoint = function(_) {
+ var MAX_SIZE = 0x4000;
+ var codeUnits = [];
+ var highSurrogate;
+ var lowSurrogate;
+ var index = -1;
+ var length = arguments.length;
+ if (!length) {
+ return '';
+ }
+ var result = '';
+ while (++index < length) {
+ var codePoint = Number(arguments[index]);
+ if (
+ !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
+ codePoint < 0 || // not a valid Unicode code point
+ codePoint > 0x10FFFF || // not a valid Unicode code point
+ floor(codePoint) != codePoint // not an integer
+ ) {
+ return String.fromCharCode(0xFFFD);
+ }
+ if (codePoint <= 0xFFFF) { // BMP code point
+ codeUnits.push(codePoint);
+ } else { // Astral code point; split in surrogate halves
+ // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
+ codePoint -= 0x10000;
+ highSurrogate = (codePoint >> 10) + 0xD800;
+ lowSurrogate = (codePoint % 0x400) + 0xDC00;
+ codeUnits.push(highSurrogate, lowSurrogate);
+ }
+ if (index + 1 == length || codeUnits.length > MAX_SIZE) {
+ result += stringFromCharCode.apply(null, codeUnits);
+ codeUnits.length = 0;
+ }
+ }
+ return result;
+ };
+ if (defineProperty) {
+ defineProperty(String, 'fromCodePoint', {
+ 'value': fromCodePoint,
+ 'configurable': true,
+ 'writable': true
+ });
+ } else {
+ String.fromCodePoint = fromCodePoint;
+ }
+ }());
+ }
+
var entityToChar = function(m) {
var isNumeric = /^/.test(m);
var isHex = /^[Xx]/.test(m);
@@ -2234,7 +2299,7 @@
} else {
num = parseInt(m.slice(2,-1), 10);
}
- uchar = String.fromCharCode(num);
+ uchar = String.fromCodePoint(num);
} else {
uchar = entities[m.slice(1,-1)];
}
@@ -2428,7 +2493,7 @@
if (cc_after === -1) {
char_after = '\n';
} else {
- char_after = String.fromCharCode(cc_after);
+ char_after = String.fromCodePoint(cc_after);
}
var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after));
@@ -2460,7 +2525,7 @@
var startpos = this.pos;
var c ;
var first_close = 0;
- c = String.fromCharCode(cc);
+ c = String.fromCodePoint(cc);
var numdelims;
var numclosedelims;
@@ -2876,7 +2941,7 @@
}
if (!res) {
this.pos += 1;
- inlines.push({t: 'Str', c: String.fromCharCode(c)});
+ inlines.push({t: 'Str', c: String.fromCodePoint(c)});
}
if (memoize) {
--
cgit v1.2.3
From 3d99baba064091f74b9da78eaed38fcf4875af46 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Tue, 7 Oct 2014 22:21:03 -0700
Subject: Adjusted tests for new js parser.
---
spec.txt | 26 ++++++++++++++++++++++----
1 file changed, 22 insertions(+), 4 deletions(-)
diff --git a/spec.txt b/spec.txt
index 990ae8c..db62f53 100644
--- a/spec.txt
+++ b/spec.txt
@@ -4525,6 +4525,24 @@ __foo _bar_ baz__
foo bar baz
.
+But note:
+
+.
+*foo**bar**baz*
+.
+foobarbaz
+.
+
+.
+**foo*bar*baz**
+.
+foobarbaz**
+.
+
+The difference is that in the two preceding cases,
+the internal delimiters [can close emphasis](#can-close-emphasis),
+while in the cases with spaces, they cannot.
+
Note that you cannot nest emphasis directly inside emphasis
using the same delimeter, or strong emphasis directly inside
strong emphasis:
@@ -4606,7 +4624,7 @@ However, a string of four or more `****` can never close emphasis:
*foo****
.
-Note that there are some asymmetries here:
+We retain symmetry in these cases:
.
*foo**
@@ -4614,7 +4632,7 @@ Note that there are some asymmetries here:
**foo*
.
foo*
-**foo*
+*foo
.
.
@@ -4637,7 +4655,7 @@ More cases with mismatched delimiters:
.
***foo*
.
-***foo*
+**foo
.
.
@@ -4649,7 +4667,7 @@ More cases with mismatched delimiters:
.
***foo**
.
-***foo**
+*foo
.
.
--
cgit v1.2.3
From d3c3e749f4f7b95a9604f751cf993fd488a15b19 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Tue, 7 Oct 2014 22:24:53 -0700
Subject: Cleaned up entity section of spec.
We convert entities to unicode characters, not UTF-8 sequences.
(Though they might ultimately be output that way.)
---
spec.txt | 41 ++++++++++++++++++++++++-----------------
1 file changed, 24 insertions(+), 17 deletions(-)
diff --git a/spec.txt b/spec.txt
index db62f53..489b9c0 100644
--- a/spec.txt
+++ b/spec.txt
@@ -3727,21 +3727,25 @@ foo
## Entities
-With the goal of making this standard as HTML-agnostic as possible, all HTML valid HTML Entities in any
-context are recognized as such and converted into their actual values (i.e. the UTF8 characters representing
-the entity itself) before they are stored in the AST.
+With the goal of making this standard as HTML-agnostic as possible, all
+valid HTML entities in any context are recognized as such and
+converted into unicode characters before they are stored in the AST.
-This allows implementations that target HTML output to trivially escape the entities when generating HTML,
-and simplifies the job of implementations targetting other languages, as these will only need to handle the
-UTF8 chars and need not be HTML-entity aware.
+This allows implementations that target HTML output to trivially escape
+the entities when generating HTML, and simplifies the job of
+implementations targetting other languages, as these will only need to
+handle the unicode chars and need not be HTML-entity aware.
[Named entities](#name-entities) consist of `&`
-+ any of the valid HTML5 entity names + `;`. The [following document](http://www.whatwg.org/specs/web-apps/current-work/multipage/entities.json)
-is used as an authoritative source of the valid entity names and their corresponding codepoints.
++ any of the valid HTML5 entity names + `;`. The
+[following document](http://www.whatwg.org/specs/web-apps/current-work/multipage/entities.json)
+is used as an authoritative source of the valid entity names and their
+corresponding codepoints.
-Conforming implementations that target Markdown don't need to generate entities for all the valid
-named entities that exist, with the exception of `"` (`"`), `&` (`&`), `<` (`<`) and `>` (`>`),
-which always need to be written as entities for security reasons.
+Conforming implementations that target HTML don't need to generate
+entities for all the valid named entities that exist, with the exception
+of `"` (`"`), `&` (`&`), `<` (`<`) and `>` (`>`), which
+always need to be written as entities for security reasons.
.
& © Æ Ď ¾ ℋ ⅆ ∲
@@ -3750,9 +3754,10 @@ which always need to be written as entities for security reasons.
.
[Decimal entities](#decimal-entities)
-consist of `` + a string of 1--8 arabic digits + `;`. Again, these entities need to be recognised
-and tranformed into their corresponding UTF8 codepoints. Invalid Unicode codepoints will be written
-as the "unknown codepoint" character (`0xFFFD`)
+consist of `` + a string of 1--8 arabic digits + `;`. Again, these
+entities need to be recognised and tranformed into their corresponding
+UTF8 codepoints. Invalid Unicode codepoints will be written as the
+"unknown codepoint" character (`0xFFFD`)
.
# Ӓ Ϡ
@@ -3779,7 +3784,8 @@ Here are some nonentities:
.
Although HTML5 does accept some entities without a trailing semicolon
-(such as `©`), these are not recognized as entities here, because it makes the grammar too ambiguous:
+(such as `©`), these are not recognized as entities here, because it
+makes the grammar too ambiguous:
.
©
@@ -3787,7 +3793,8 @@ Although HTML5 does accept some entities without a trailing semicolon
©
.
-Strings that are not on the list of HTML5 named entities are not recognized as entities either:
+Strings that are not on the list of HTML5 named entities are not
+recognized as entities either:
.
&MadeUpEntity;
@@ -4836,7 +4843,7 @@ in Markdown:
URL-escaping should be left alone inside the destination, as all
URL-escaped characters are also valid URL characters. HTML entities in
-the destination will be parsed into their UTF8 codepoints, as usual, and
+the destination will be parsed into their UTF-8 codepoints, as usual, and
optionally URL-escaped when written as HTML.
.
--
cgit v1.2.3
From bc5b7c288d29215c585db254a203889e0dea54e2 Mon Sep 17 00:00:00 2001
From: John MacFarlane
Date: Tue, 7 Oct 2014 22:35:19 -0700
Subject: Removed oldtests.
---
Makefile | 8 +-
oldtests/Blockquotes/Indents.html | 12 -
oldtests/Blockquotes/Indents.markdown | 5 -
oldtests/Blockquotes/Nesting.html | 32 -
oldtests/Blockquotes/Nesting.markdown | 22 -
oldtests/Blockquotes/Separation.html | 39 -
oldtests/Blockquotes/Separation.markdown | 29 -
oldtests/Code/BlankLines.html | 33 -
oldtests/Code/BlankLines.markdown | 28 -
oldtests/Code/BlankLinesAtEnd.html | 14 -
oldtests/Code/BlankLinesAtEnd.markdown | 14 -
oldtests/Code/FenceMatching.html | 8 -
oldtests/Code/FenceMatching.markdown | 10 -
oldtests/Code/FencedCodeBlocks.html | 24 -
oldtests/Code/FencedCodeBlocks.markdown | 35 -
oldtests/Code/IndentedCodeBlocks.html | 22 -
oldtests/Code/IndentedCodeBlocks.markdown | 22 -
oldtests/Code/IndentedFences.html | 20 -
oldtests/Code/IndentedFences.markdown | 26 -
oldtests/Code/IndentedInLists.html | 22 -
oldtests/Code/IndentedInLists.markdown | 17 -
oldtests/Code/Inline.html | 13 -
oldtests/Code/Inline.markdown | 13 -
oldtests/Code/ListBreakAfter.html | 30 -
oldtests/Code/ListBreakAfter.markdown | 26 -
oldtests/Code/WhiteLines.html | 7 -
oldtests/Code/WhiteLines.markdown | 9 -
oldtests/Emphasis/Escapes.html | 1 -
oldtests/Emphasis/Escapes.markdown | 1 -
oldtests/Emphasis/NestedEmphAndStrong.html | 66 --
oldtests/Emphasis/NestedEmphAndStrong.markdown | 69 --
oldtests/Emphasis/Pathological.html | 24 -
oldtests/Emphasis/Pathological.markdown | 26 -
oldtests/Emphasis/Punctuation.html | 10 -
oldtests/Emphasis/Punctuation.markdown | 19 -
oldtests/HTML/Blocks.html | 18 -
oldtests/HTML/Blocks.markdown | 26 -
oldtests/HTML/Inline.html | 8 -
oldtests/HTML/Inline.markdown | 8 -
oldtests/HTML/UppercaseTags.html | 4 -
oldtests/HTML/UppercaseTags.markdown | 5 -
oldtests/Headers/ATX.html | 14 -
oldtests/Headers/ATX.markdown | 20 -
oldtests/Headers/Setext.html | 9 -
oldtests/Headers/Setext.markdown | 17 -
oldtests/Links/AngleBrackets.html | 3 -
oldtests/Links/AngleBrackets.markdown | 7 -
oldtests/Links/AutoLinks.html | 7 -
oldtests/Links/AutoLinks.markdown | 7 -
oldtests/Links/BackticksInLinks.html | 1 -
oldtests/Links/BackticksInLinks.markdown | 1 -
oldtests/Links/CaseInsensitiveReferences.html | 1 -
oldtests/Links/CaseInsensitiveReferences.markdown | 3 -
oldtests/Links/Entities.html | 2 -
oldtests/Links/Entities.markdown | 3 -
oldtests/Links/InlineLinks.html | 10 -
oldtests/Links/InlineLinks.markdown | 9 -
oldtests/Links/ParensInURLs.html | 6 -
oldtests/Links/ParensInURLs.markdown | 14 -
oldtests/Links/ReferenceLinks.html | 7 -
oldtests/Links/ReferenceLinks.markdown | 10 -
oldtests/Lists/CodeBlocksInLists.html | 14 -
oldtests/Lists/CodeBlocksInLists.markdown | 18 -
oldtests/Lists/ConsecutiveLists.html | 20 -
oldtests/Lists/ConsecutiveLists.markdown | 10 -
oldtests/Lists/EmptyListItem.html | 10 -
oldtests/Lists/EmptyListItem.markdown | 7 -
oldtests/Lists/InBlockquote.html | 22 -
oldtests/Lists/InBlockquote.markdown | 12 -
oldtests/Lists/Indents.html | 22 -
oldtests/Lists/Indents.markdown | 17 -
oldtests/Lists/ListsAndHRs.html | 7 -
oldtests/Lists/ListsAndHRs.markdown | 3 -
oldtests/Lists/ListsAndSetextHeaders.html | 6 -
oldtests/Lists/ListsAndSetextHeaders.markdown | 4 -
oldtests/Lists/MultipleBlankLines.html | 56 --
oldtests/Lists/MultipleBlankLines.markdown | 37 -
oldtests/Lists/Start.html | 11 -
oldtests/Lists/Start.markdown | 7 -
oldtests/Lists/Sublists.html | 49 --
oldtests/Lists/Sublists.markdown | 24 -
oldtests/Lists/TightAndLoose.html | 49 --
oldtests/Lists/TightAndLoose.markdown | 45 --
oldtests/Lists/TightLooseBlockquote.html | 32 -
oldtests/Lists/TightLooseBlockquote.markdown | 25 -
oldtests/Lists/TightLooseMore.html | 7 -
oldtests/Lists/TightLooseMore.markdown | 4 -
oldtests/Lists/TwoBlankLinesEndList.html | 21 -
oldtests/Lists/TwoBlankLinesEndList.markdown | 20 -
oldtests/Makefile | 55 --
oldtests/Misc/BackslashEscapes.html | 14 -
oldtests/Misc/BackslashEscapes.markdown | 19 -
oldtests/Misc/Laziness.html | 22 -
oldtests/Misc/Laziness.markdown | 14 -
oldtests/Misc/LineBreaks.html | 11 -
oldtests/Misc/LineBreaks.markdown | 18 -
oldtests/Misc/Transitions.html | 26 -
oldtests/Misc/Transitions.markdown | 20 -
oldtests/Original/Amps_and_angle_encoding.html | 9 -
oldtests/Original/Amps_and_angle_encoding.markdown | 21 -
oldtests/Original/Auto_links.html | 13 -
oldtests/Original/Auto_links.markdown | 13 -
oldtests/Original/Backslash_escapes.html | 75 --
oldtests/Original/Backslash_escapes.markdown | 120 ---
.../Original/Blockquotes_with_code_blocks.html | 12 -
.../Original/Blockquotes_with_code_blocks.markdown | 11 -
oldtests/Original/Code_Blocks.html | 12 -
oldtests/Original/Code_Blocks.markdown | 14 -
oldtests/Original/Code_Spans.html | 3 -
oldtests/Original/Code_Spans.markdown | 5 -
oldtests/Original/Horizontal_rules.html | 39 -
oldtests/Original/Horizontal_rules.markdown | 67 --
oldtests/Original/Images.html | 11 -
oldtests/Original/Images.markdown | 26 -
oldtests/Original/Inline_HTML_Advanced.html | 23 -
oldtests/Original/Inline_HTML_Advanced.markdown | 30 -
oldtests/Original/Inline_HTML_Simple.html | 45 --
oldtests/Original/Inline_HTML_Simple.markdown | 69 --
oldtests/Original/Inline_HTML_comments.html | 8 -
oldtests/Original/Inline_HTML_comments.markdown | 13 -
oldtests/Original/Links_inline_style.html | 12 -
oldtests/Original/Links_inline_style.markdown | 24 -
oldtests/Original/Links_reference_style.html | 28 -
oldtests/Original/Links_reference_style.markdown | 71 --
oldtests/Original/Links_shortcut_references.html | 6 -
.../Original/Links_shortcut_references.markdown | 20 -
oldtests/Original/Literal_quotes_in_titles.html | 2 -
.../Original/Literal_quotes_in_titles.markdown | 7 -
.../Original/Markdown_Documentation_Basics.html | 242 ------
.../Markdown_Documentation_Basics.markdown | 306 -------
.../Original/Markdown_Documentation_Syntax.html | 708 ----------------
.../Markdown_Documentation_Syntax.markdown | 888 ---------------------
oldtests/Original/Nested_blockquotes.html | 7 -
oldtests/Original/Nested_blockquotes.markdown | 5 -
oldtests/Original/Ordered_and_unordered_lists.html | 112 ---
.../Original/Ordered_and_unordered_lists.markdown | 131 ---
oldtests/Original/README | 15 -
oldtests/Original/Strong_and_em_together.html | 4 -
oldtests/Original/Strong_and_em_together.markdown | 7 -
oldtests/Original/Tabs.html | 19 -
oldtests/Original/Tabs.markdown | 21 -
oldtests/Original/Tidyness.html | 8 -
oldtests/Original/Tidyness.markdown | 5 -
oldtests/Tabs/TabConversionUnicode.html | 1 -
oldtests/Tabs/TabConversionUnicode.markdown | 1 -
145 files changed, 3 insertions(+), 5020 deletions(-)
delete mode 100644 oldtests/Blockquotes/Indents.html
delete mode 100644 oldtests/Blockquotes/Indents.markdown
delete mode 100644 oldtests/Blockquotes/Nesting.html
delete mode 100644 oldtests/Blockquotes/Nesting.markdown
delete mode 100644 oldtests/Blockquotes/Separation.html
delete mode 100644 oldtests/Blockquotes/Separation.markdown
delete mode 100644 oldtests/Code/BlankLines.html
delete mode 100644 oldtests/Code/BlankLines.markdown
delete mode 100644 oldtests/Code/BlankLinesAtEnd.html
delete mode 100644 oldtests/Code/BlankLinesAtEnd.markdown
delete mode 100644 oldtests/Code/FenceMatching.html
delete mode 100644 oldtests/Code/FenceMatching.markdown
delete mode 100644 oldtests/Code/FencedCodeBlocks.html
delete mode 100644 oldtests/Code/FencedCodeBlocks.markdown
delete mode 100644 oldtests/Code/IndentedCodeBlocks.html
delete mode 100644 oldtests/Code/IndentedCodeBlocks.markdown
delete mode 100644 oldtests/Code/IndentedFences.html
delete mode 100644 oldtests/Code/IndentedFences.markdown
delete mode 100644 oldtests/Code/IndentedInLists.html
delete mode 100644 oldtests/Code/IndentedInLists.markdown
delete mode 100644 oldtests/Code/Inline.html
delete mode 100644 oldtests/Code/Inline.markdown
delete mode 100644 oldtests/Code/ListBreakAfter.html
delete mode 100644 oldtests/Code/ListBreakAfter.markdown
delete mode 100644 oldtests/Code/WhiteLines.html
delete mode 100644 oldtests/Code/WhiteLines.markdown
delete mode 100644 oldtests/Emphasis/Escapes.html
delete mode 100644 oldtests/Emphasis/Escapes.markdown
delete mode 100644 oldtests/Emphasis/NestedEmphAndStrong.html
delete mode 100644 oldtests/Emphasis/NestedEmphAndStrong.markdown
delete mode 100644 oldtests/Emphasis/Pathological.html
delete mode 100644 oldtests/Emphasis/Pathological.markdown
delete mode 100644 oldtests/Emphasis/Punctuation.html
delete mode 100644 oldtests/Emphasis/Punctuation.markdown
delete mode 100644 oldtests/HTML/Blocks.html
delete mode 100644 oldtests/HTML/Blocks.markdown
delete mode 100644 oldtests/HTML/Inline.html
delete mode 100644 oldtests/HTML/Inline.markdown
delete mode 100644 oldtests/HTML/UppercaseTags.html
delete mode 100644 oldtests/HTML/UppercaseTags.markdown
delete mode 100644 oldtests/Headers/ATX.html
delete mode 100644 oldtests/Headers/ATX.markdown
delete mode 100644 oldtests/Headers/Setext.html
delete mode 100644 oldtests/Headers/Setext.markdown
delete mode 100644 oldtests/Links/AngleBrackets.html
delete mode 100644 oldtests/Links/AngleBrackets.markdown
delete mode 100644 oldtests/Links/AutoLinks.html
delete mode 100644 oldtests/Links/AutoLinks.markdown
delete mode 100644 oldtests/Links/BackticksInLinks.html
delete mode 100644 oldtests/Links/BackticksInLinks.markdown
delete mode 100644 oldtests/Links/CaseInsensitiveReferences.html
delete mode 100644 oldtests/Links/CaseInsensitiveReferences.markdown
delete mode 100644 oldtests/Links/Entities.html
delete mode 100644 oldtests/Links/Entities.markdown
delete mode 100644 oldtests/Links/InlineLinks.html
delete mode 100644 oldtests/Links/InlineLinks.markdown
delete mode 100644 oldtests/Links/ParensInURLs.html
delete mode 100644 oldtests/Links/ParensInURLs.markdown
delete mode 100644 oldtests/Links/ReferenceLinks.html
delete mode 100644 oldtests/Links/ReferenceLinks.markdown
delete mode 100644 oldtests/Lists/CodeBlocksInLists.html
delete mode 100644 oldtests/Lists/CodeBlocksInLists.markdown
delete mode 100644 oldtests/Lists/ConsecutiveLists.html
delete mode 100644 oldtests/Lists/ConsecutiveLists.markdown
delete mode 100644 oldtests/Lists/EmptyListItem.html
delete mode 100644 oldtests/Lists/EmptyListItem.markdown
delete mode 100644 oldtests/Lists/InBlockquote.html
delete mode 100644 oldtests/Lists/InBlockquote.markdown
delete mode 100644 oldtests/Lists/Indents.html
delete mode 100644 oldtests/Lists/Indents.markdown
delete mode 100644 oldtests/Lists/ListsAndHRs.html
delete mode 100644 oldtests/Lists/ListsAndHRs.markdown
delete mode 100644 oldtests/Lists/ListsAndSetextHeaders.html
delete mode 100644 oldtests/Lists/ListsAndSetextHeaders.markdown
delete mode 100644 oldtests/Lists/MultipleBlankLines.html
delete mode 100644 oldtests/Lists/MultipleBlankLines.markdown
delete mode 100644 oldtests/Lists/Start.html
delete mode 100644 oldtests/Lists/Start.markdown
delete mode 100644 oldtests/Lists/Sublists.html
delete mode 100644 oldtests/Lists/Sublists.markdown
delete mode 100644 oldtests/Lists/TightAndLoose.html
delete mode 100644 oldtests/Lists/TightAndLoose.markdown
delete mode 100644 oldtests/Lists/TightLooseBlockquote.html
delete mode 100644 oldtests/Lists/TightLooseBlockquote.markdown
delete mode 100644 oldtests/Lists/TightLooseMore.html
delete mode 100644 oldtests/Lists/TightLooseMore.markdown
delete mode 100644 oldtests/Lists/TwoBlankLinesEndList.html
delete mode 100644 oldtests/Lists/TwoBlankLinesEndList.markdown
delete mode 100644 oldtests/Makefile
delete mode 100644 oldtests/Misc/BackslashEscapes.html
delete mode 100644 oldtests/Misc/BackslashEscapes.markdown
delete mode 100644 oldtests/Misc/Laziness.html
delete mode 100644 oldtests/Misc/Laziness.markdown
delete mode 100644 oldtests/Misc/LineBreaks.html
delete mode 100644 oldtests/Misc/LineBreaks.markdown
delete mode 100644 oldtests/Misc/Transitions.html
delete mode 100644 oldtests/Misc/Transitions.markdown
delete mode 100644 oldtests/Original/Amps_and_angle_encoding.html
delete mode 100644 oldtests/Original/Amps_and_angle_encoding.markdown
delete mode 100644 oldtests/Original/Auto_links.html
delete mode 100644 oldtests/Original/Auto_links.markdown
delete mode 100644 oldtests/Original/Backslash_escapes.html
delete mode 100644 oldtests/Original/Backslash_escapes.markdown
delete mode 100644 oldtests/Original/Blockquotes_with_code_blocks.html
delete mode 100644 oldtests/Original/Blockquotes_with_code_blocks.markdown
delete mode 100644 oldtests/Original/Code_Blocks.html
delete mode 100644 oldtests/Original/Code_Blocks.markdown
delete mode 100644 oldtests/Original/Code_Spans.html
delete mode 100644 oldtests/Original/Code_Spans.markdown
delete mode 100644 oldtests/Original/Horizontal_rules.html
delete mode 100644 oldtests/Original/Horizontal_rules.markdown
delete mode 100644 oldtests/Original/Images.html
delete mode 100644 oldtests/Original/Images.markdown
delete mode 100644 oldtests/Original/Inline_HTML_Advanced.html
delete mode 100644 oldtests/Original/Inline_HTML_Advanced.markdown
delete mode 100644 oldtests/Original/Inline_HTML_Simple.html
delete mode 100644 oldtests/Original/Inline_HTML_Simple.markdown
delete mode 100644 oldtests/Original/Inline_HTML_comments.html
delete mode 100644 oldtests/Original/Inline_HTML_comments.markdown
delete mode 100644 oldtests/Original/Links_inline_style.html
delete mode 100644 oldtests/Original/Links_inline_style.markdown
delete mode 100644 oldtests/Original/Links_reference_style.html
delete mode 100644 oldtests/Original/Links_reference_style.markdown
delete mode 100644 oldtests/Original/Links_shortcut_references.html
delete mode 100644 oldtests/Original/Links_shortcut_references.markdown
delete mode 100644 oldtests/Original/Literal_quotes_in_titles.html
delete mode 100644 oldtests/Original/Literal_quotes_in_titles.markdown
delete mode 100644 oldtests/Original/Markdown_Documentation_Basics.html
delete mode 100644 oldtests/Original/Markdown_Documentation_Basics.markdown
delete mode 100644 oldtests/Original/Markdown_Documentation_Syntax.html
delete mode 100644 oldtests/Original/Markdown_Documentation_Syntax.markdown
delete mode 100644 oldtests/Original/Nested_blockquotes.html
delete mode 100644 oldtests/Original/Nested_blockquotes.markdown
delete mode 100644 oldtests/Original/Ordered_and_unordered_lists.html
delete mode 100644 oldtests/Original/Ordered_and_unordered_lists.markdown
delete mode 100644 oldtests/Original/README
delete mode 100644 oldtests/Original/Strong_and_em_together.html
delete mode 100644 oldtests/Original/Strong_and_em_together.markdown
delete mode 100644 oldtests/Original/Tabs.html
delete mode 100644 oldtests/Original/Tabs.markdown
delete mode 100644 oldtests/Original/Tidyness.html
delete mode 100644 oldtests/Original/Tidyness.markdown
delete mode 100644 oldtests/Tabs/TabConversionUnicode.html
delete mode 100644 oldtests/Tabs/TabConversionUnicode.markdown
diff --git a/Makefile b/Makefile
index 671d30d..8d35b9d 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ DATADIR?=data
PROG?=./stmd
-.PHONY: all oldtests test spec benchjs testjs
+.PHONY: all test spec benchjs testjs
all: $(SRCDIR)/case_fold_switch.inc $(PROG)
README.html: README.md template.html
@@ -28,9 +28,6 @@ spec.pdf: spec.md template.tex specfilter.hs
--number-sections -V documentclass=report -V tocdepth=2 \
-V classoption=twosides
-oldtests:
- make -C oldtests --quiet clean all
-
test: spec.txt
perl runtests.pl $< $(PROG)
@@ -63,7 +60,8 @@ dingus:
cd js && echo "Starting dingus server at http://localhost:9000" && python -m SimpleHTTPServer 9000
leakcheck: $(PROG)
- cat oldtests/*/*.markdown | valgrind --leak-check=full --dsymutil=yes $(PROG)
+ # TODO produce leaktest.md that tests everything
+ cat leaktest.md | valgrind --leak-check=full --dsymutil=yes $(PROG)
operf: $(PROG)
operf $(PROG) /dev/null
diff --git a/oldtests/Blockquotes/Indents.html b/oldtests/Blockquotes/Indents.html
deleted file mode 100644
index fd98ee8..0000000
--- a/oldtests/Blockquotes/Indents.html
+++ /dev/null
@@ -1,12 +0,0 @@
-
-one
-blockquote
-
-
-
-
-triply nested
-triply nested
-
-
-
diff --git a/oldtests/Blockquotes/Indents.markdown b/oldtests/Blockquotes/Indents.markdown
deleted file mode 100644
index f9342ff..0000000
--- a/oldtests/Blockquotes/Indents.markdown
+++ /dev/null
@@ -1,5 +0,0 @@
-> one
- > blockquote
-
->>> triply nested
- > > > triply nested
diff --git a/oldtests/Blockquotes/Nesting.html b/oldtests/Blockquotes/Nesting.html
deleted file mode 100644
index f40e999..0000000
--- a/oldtests/Blockquotes/Nesting.html
+++ /dev/null
@@ -1,32 +0,0 @@
-These are all equivalent:
-
-
-nested
-blockquote
-
-
-
-
-nested
-blockquote
-
-
-
-
-nested
-blockquote
-
-
-
-
-nested
-blockquote
-
-
-This is not:
-
-nested
-
-blockquote
-
-
diff --git a/oldtests/Blockquotes/Nesting.markdown b/oldtests/Blockquotes/Nesting.markdown
deleted file mode 100644
index 3d67843..0000000
--- a/oldtests/Blockquotes/Nesting.markdown
+++ /dev/null
@@ -1,22 +0,0 @@
-These are all equivalent:
-
-> > nested
-> > blockquote
-
-
->> nested
->> blockquote
-
-
-> > nested
-blockquote
-
-
-> > nested
-> blockquote
-
-
-This is not:
-
-> nested
-> > blockquote
diff --git a/oldtests/Blockquotes/Separation.html b/oldtests/Blockquotes/Separation.html
deleted file mode 100644
index 910d545..0000000
--- a/oldtests/Blockquotes/Separation.html
+++ /dev/null
@@ -1,39 +0,0 @@
-One blockquote, two paragraphs:
-
-one
-two
-
-Two blockquotes:
-
-one
-
-
-two
-
-Nested blockquote, two paragraphs:
-
-
-one
-two
-
-
-Nested blockquote, two blockquotes:
-
-
-one
-
-
-two
-
-
-Two nested blockquotes:
-
-
-one
-
-
-
-
-two
-
-
diff --git a/oldtests/Blockquotes/Separation.markdown b/oldtests/Blockquotes/Separation.markdown
deleted file mode 100644
index 823d865..0000000
--- a/oldtests/Blockquotes/Separation.markdown
+++ /dev/null
@@ -1,29 +0,0 @@
-One blockquote, two paragraphs:
-
-> one
->
-> two
-
-Two blockquotes:
-
-> one
-
-> two
-
-Nested blockquote, two paragraphs:
-
-> > one
-> >
-> > two
-
-Nested blockquote, two blockquotes:
-
-> > one
->
-> > two
-
-Two nested blockquotes:
-
-> > one
-
-> > two
diff --git a/oldtests/Code/BlankLines.html b/oldtests/Code/BlankLines.html
deleted file mode 100644
index ae0abf7..0000000
--- a/oldtests/Code/BlankLines.html
+++ /dev/null
@@ -1,33 +0,0 @@
-foo
-
-
-
-bar
-
-
-foo
-
-
-
-bar
-
-
-foo
-
-
-
-bar
-
-
-One
-CodeA
-
-CodeB
-
-Two
-CodeA
-
-
-
-- One
-
diff --git a/oldtests/Code/BlankLines.markdown b/oldtests/Code/BlankLines.markdown
deleted file mode 100644
index b0d5a0c..0000000
--- a/oldtests/Code/BlankLines.markdown
+++ /dev/null
@@ -1,28 +0,0 @@
- foo
-
-
-
- bar
-> foo
->
->
->
-> bar
- foo
-
-
-
- bar
-
-1. One
-
- CodeA
-
- CodeB
-
-2. Two
-
- CodeA
-
-
-1. One
diff --git a/oldtests/Code/BlankLinesAtEnd.html b/oldtests/Code/BlankLinesAtEnd.html
deleted file mode 100644
index ac803d9..0000000
--- a/oldtests/Code/BlankLinesAtEnd.html
+++ /dev/null
@@ -1,14 +0,0 @@
-
-List
-code
-
-
-
-- one
-- two
-
-
-one
-not code
-two
-
diff --git a/oldtests/Code/BlankLinesAtEnd.markdown b/oldtests/Code/BlankLinesAtEnd.markdown
deleted file mode 100644
index 55879ae..0000000
--- a/oldtests/Code/BlankLinesAtEnd.markdown
+++ /dev/null
@@ -1,14 +0,0 @@
-* List
-
- code
-
-
- * one
- * two
-
-
-
-* one
- not code
-
-* two
diff --git a/oldtests/Code/FenceMatching.html b/oldtests/Code/FenceMatching.html
deleted file mode 100644
index 4c7468e..0000000
--- a/oldtests/Code/FenceMatching.html
+++ /dev/null
@@ -1,8 +0,0 @@
-```
-
-
-`````
-
-````
-
-
diff --git a/oldtests/Code/FenceMatching.markdown b/oldtests/Code/FenceMatching.markdown
deleted file mode 100644
index d86169a..0000000
--- a/oldtests/Code/FenceMatching.markdown
+++ /dev/null
@@ -1,10 +0,0 @@
-````abc
-```
-````
-``````blah
-
-`````
-
-````
-
-```````````
diff --git a/oldtests/Code/FencedCodeBlocks.html b/oldtests/Code/FencedCodeBlocks.html
deleted file mode 100644
index 4813d72..0000000
--- a/oldtests/Code/FencedCodeBlocks.html
+++ /dev/null
@@ -1,24 +0,0 @@
-This is a fenced code block:
-pairs :: [(Int,Char)]
-pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']]
-
-Here is one with tildes:
-pairs :: [(Int,Char)]
-pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']]
-
-More metadata:
-pairs :: [(Int,Char)]
-pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']]
-
-More backticks:
-pairs :: [(Int,Char)]
-pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']]
-
-backticks :: String
-backticks = "`````"
-
-Without an end:
-code with
-no end
-
-
diff --git a/oldtests/Code/FencedCodeBlocks.markdown b/oldtests/Code/FencedCodeBlocks.markdown
deleted file mode 100644
index 6ccc6be..0000000
--- a/oldtests/Code/FencedCodeBlocks.markdown
+++ /dev/null
@@ -1,35 +0,0 @@
-This is a fenced code block:
-```haskell
-pairs :: [(Int,Char)]
-pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']]
-```
-Here is one with tildes:
-
-~~~ haskell
-pairs :: [(Int,Char)]
-pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']]
-~~~
-
-More metadata:
-
-```haskell numberLines start=50
-pairs :: [(Int,Char)]
-pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']]
-```
-
-More backticks:
-
-```````` haskell
-pairs :: [(Int,Char)]
-pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']]
-
-backticks :: String
-backticks = "`````"
-`````````````
-
-Without an end:
-
-```
-code with
-no end
-
diff --git a/oldtests/Code/IndentedCodeBlocks.html b/oldtests/Code/IndentedCodeBlocks.html
deleted file mode 100644
index 0b9b7e7..0000000
--- a/oldtests/Code/IndentedCodeBlocks.html
+++ /dev/null
@@ -1,22 +0,0 @@
-Indented code with two space indent in first and last line:
- two spaces *hello*
-{ more }
-
- and
-
-Indented code requires a leading/trailing blank line:
-quick-command --option "$*"
-Indented code does not require a trailing blank line:
-code
-
-and not code.
-Code in blockquote:
-
-code
-
-
-Code in list:
-
-code
-
-
diff --git a/oldtests/Code/IndentedCodeBlocks.markdown b/oldtests/Code/IndentedCodeBlocks.markdown
deleted file mode 100644
index 2a99db0..0000000
--- a/oldtests/Code/IndentedCodeBlocks.markdown
+++ /dev/null
@@ -1,22 +0,0 @@
-Indented code with two space indent in first and last line:
-
- two spaces *hello*
- { more }
-
- and
-
-Indented code requires a leading/trailing blank line:
- quick-command --option "$*"
-
-Indented code does not require a trailing blank line:
-
- code
-and not code.
-
-Code in blockquote:
-
-> code
-
-Code in list:
-
-1. code
diff --git a/oldtests/Code/IndentedFences.html b/oldtests/Code/IndentedFences.html
deleted file mode 100644
index 66e76da..0000000
--- a/oldtests/Code/IndentedFences.html
+++ /dev/null
@@ -1,20 +0,0 @@
-a
-
-z
-
-a
-a
-a
- a
-
-
-foo
- Hello
-
-World
-
-
-
-a
-
-
diff --git a/oldtests/Code/IndentedFences.markdown b/oldtests/Code/IndentedFences.markdown
deleted file mode 100644
index 098545f..0000000
--- a/oldtests/Code/IndentedFences.markdown
+++ /dev/null
@@ -1,26 +0,0 @@
- ```
- a
- ```
-
- ```
-z
-```
-
- ```
-a
- a
- a
- a
- ```
-
-* foo
-
- ```
- Hello
-
- World
- ```
-
-> ```
->a
->```
diff --git a/oldtests/Code/IndentedInLists.html b/oldtests/Code/IndentedInLists.html
deleted file mode 100644
index 76ed424..0000000
--- a/oldtests/Code/IndentedInLists.html
+++ /dev/null
@@ -1,22 +0,0 @@
-
-code starts here
-
-
-
-foo
-code starts here
-
-foo
-code starts here
-
-
-
-foo
-code starts here
-
-
-foo
-code starts here
-
-
-
diff --git a/oldtests/Code/IndentedInLists.markdown b/oldtests/Code/IndentedInLists.markdown
deleted file mode 100644
index 54e1af1..0000000
--- a/oldtests/Code/IndentedInLists.markdown
+++ /dev/null
@@ -1,17 +0,0 @@
-- code starts here
-
-1. foo
-
- code starts here
-
-2. foo
-
- code starts here
-
-- foo
-
- code starts here
-
- - foo
-
- code starts here
diff --git a/oldtests/Code/Inline.html b/oldtests/Code/Inline.html
deleted file mode 100644
index 9c52790..0000000
--- a/oldtests/Code/Inline.html
+++ /dev/null
@@ -1,13 +0,0 @@
-All of these are equivalent:
-
-*hi*
-*hi*
-*hi*
-*hi*
-*hi*
-
-Backticks in code spans:
-
-``code``
-``code``
-
diff --git a/oldtests/Code/Inline.markdown b/oldtests/Code/Inline.markdown
deleted file mode 100644
index 38e5b0c..0000000
--- a/oldtests/Code/Inline.markdown
+++ /dev/null
@@ -1,13 +0,0 @@
-All of these are equivalent:
-
-- `*hi*`
-- ` *hi* `
-- ``*hi* ``
-- ````*hi*````
-- `*hi*
- `
-
-Backticks in code spans:
-
-- ``` ``code`` ```
-- ` ``code`` `
diff --git a/oldtests/Code/ListBreakAfter.html b/oldtests/Code/ListBreakAfter.html
deleted file mode 100644
index 29d6d5e..0000000
--- a/oldtests/Code/ListBreakAfter.html
+++ /dev/null
@@ -1,30 +0,0 @@
-
-foo
-
-bar
-code1
-code2
-
-code?
-
-foo
-
-bar
-code1
-code2
-
-
-
-code?
-
-
-- foo
-
-bar
-code1
-code2
-
-
-
-code?
-
diff --git a/oldtests/Code/ListBreakAfter.markdown b/oldtests/Code/ListBreakAfter.markdown
deleted file mode 100644
index 4fa79f1..0000000
--- a/oldtests/Code/ListBreakAfter.markdown
+++ /dev/null
@@ -1,26 +0,0 @@
-* foo
- * bar
-
- code1
- code2
-
- code?
-
-* foo
- * bar
-
- code1
- code2
-
-
- code?
-
-* foo
- * bar
-
- code1
- code2
-
-
-
- code?
diff --git a/oldtests/Code/WhiteLines.html b/oldtests/Code/WhiteLines.html
deleted file mode 100644
index 7fa137f..0000000
--- a/oldtests/Code/WhiteLines.html
+++ /dev/null
@@ -1,7 +0,0 @@
-ABC
-
-
-
-DEF
-
-GHI
diff --git a/oldtests/Code/WhiteLines.markdown b/oldtests/Code/WhiteLines.markdown
deleted file mode 100644
index ea17af7..0000000
--- a/oldtests/Code/WhiteLines.markdown
+++ /dev/null
@@ -1,9 +0,0 @@
- ABC
-
-
-
- DEF
-
-
-
-GHI
diff --git a/oldtests/Emphasis/Escapes.html b/oldtests/Emphasis/Escapes.html
deleted file mode 100644
index 17c9e2d..0000000
--- a/oldtests/Emphasis/Escapes.html
+++ /dev/null
@@ -1 +0,0 @@
-hi* there
diff --git a/oldtests/Emphasis/Escapes.markdown b/oldtests/Emphasis/Escapes.markdown
deleted file mode 100644
index 4f14698..0000000
--- a/oldtests/Emphasis/Escapes.markdown
+++ /dev/null
@@ -1 +0,0 @@
-*hi\* there*
\ No newline at end of file
diff --git a/oldtests/Emphasis/NestedEmphAndStrong.html b/oldtests/Emphasis/NestedEmphAndStrong.html
deleted file mode 100644
index b41b527..0000000
--- a/oldtests/Emphasis/NestedEmphAndStrong.html
+++ /dev/null
@@ -1,66 +0,0 @@
-
-- test test
-- test test
-- test test
-- test test
-- test test
-- test test
-- test test
-- test test
-- test test
-- test test
-- test test
-- test test
-- test test
-- test test
-- test test
-- test test
-
-Incorrect nesting:
-
-- *test test* test
-- _test test_ test
-- **test test* test*
-- __test␣test_␣test_
-- test test test
-- test test test
-- test test test
-- test test test
-
-No emphasis:
-
-- test* test *test
-- test** test **test
-- test_ test _test
-- test__ test __test
-
-Middle-word emphasis (asterisks):
-
-- ab
-- ab
-- abc
-- ab
-- ab
-- abc
-
-Middle-word emphasis (underscore):
-
-- _a_b
-- a_b_
-- a_b_c
-- __a__b
-- a__b__
-- a__b__c
-- my_precious_file.txt
-
-Tricky Cases:
-
-- E**. Test TestTestTest
-- E**. Test Test Test Test
-
-Overlong emphasis:
-Name: ____________
-Organization: ____
-Region/Country: __
-_____Cut here_____
-____Cut here____
diff --git a/oldtests/Emphasis/NestedEmphAndStrong.markdown b/oldtests/Emphasis/NestedEmphAndStrong.markdown
deleted file mode 100644
index ec7da25..0000000
--- a/oldtests/Emphasis/NestedEmphAndStrong.markdown
+++ /dev/null
@@ -1,69 +0,0 @@
-1. ***test test***
-2. ___test test___
-3. *test **test***
-4. **test *test***
-5. ***test* test**
-6. ***test** test*
-7. ***test* test**
-8. **test *test***
-9. *test **test***
-10. _test __test___
-11. __test _test___
-12. ___test_ test__
-13. ___test__ test_
-14. ___test_ test__
-15. __test _test___
-16. _test __test___
-
-Incorrect nesting:
-
-1. *test **test* test**
-2. _test __test_ test__
-3. **test *test** test*
-4. __test _test__ test_
-5. *test *test* test*
-6. _test _test_ test_
-7. **test **test** test**
-8. __test __test__ test__
-
-No emphasis:
-
-1. test* test *test
-2. test** test **test
-3. test_ test _test
-4. test__ test __test
-
-Middle-word emphasis (asterisks):
-
-1. *a*b
-2. a*b*
-3. a*b*c
-4. **a**b
-5. a**b**
-6. a**b**c
-
-Middle-word emphasis (underscore):
-
-1. _a_b
-2. a_b_
-3. a_b_c
-4. __a__b
-5. a__b__
-6. a__b__c
-7. my_precious_file.txt
-
-Tricky Cases:
-
-1. E**. **Test** TestTestTest
-2. E**. **Test** Test Test Test
-
-Overlong emphasis:
-
-Name: ____________
-Organization: ____
-Region/Country: __
-
-_____Cut here_____
-
-____Cut here____
-
diff --git a/oldtests/Emphasis/Pathological.html b/oldtests/Emphasis/Pathological.html
deleted file mode 100644
index 37eb9fa..0000000
--- a/oldtests/Emphasis/Pathological.html
+++ /dev/null
@@ -1,24 +0,0 @@
-This input can take a long time to parse in some implementations.
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-aaaaa
-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa**
diff --git a/oldtests/Emphasis/Pathological.markdown b/oldtests/Emphasis/Pathological.markdown
deleted file mode 100644
index 5deb95e..0000000
--- a/oldtests/Emphasis/Pathological.markdown
+++ /dev/null
@@ -1,26 +0,0 @@
-This input can take a long time to parse in some implementations.
-
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-*a
-aaaaa
-
-*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**
diff --git a/oldtests/Emphasis/Punctuation.html b/oldtests/Emphasis/Punctuation.html
deleted file mode 100644
index 6061b81..0000000
--- a/oldtests/Emphasis/Punctuation.html
+++ /dev/null
@@ -1,10 +0,0 @@
-Here is a _ that is cool.
-Foo.
-Foo.
-Foo.
-Foo.
-Foo.
-Foo.
-Foo. Foo? Foo! Foo: Foo; (Foo)
-Foo. Foo? Foo! Foo: Foo; (Foo)
-Foo. Foo? Foo! Foo: Foo; (Foo)
diff --git a/oldtests/Emphasis/Punctuation.markdown b/oldtests/Emphasis/Punctuation.markdown
deleted file mode 100644
index e3f23b8..0000000
--- a/oldtests/Emphasis/Punctuation.markdown
+++ /dev/null
@@ -1,19 +0,0 @@
-Here is a _ that is _cool_.
-
-_Foo._
-
-__Foo.__
-
-___Foo.___
-
-_Foo_.
-
-__Foo__.
-
-___Foo___.
-
-_Foo_. _Foo_? _Foo_! _Foo_: _Foo_; (_Foo_)
-
-__Foo__. __Foo__? __Foo__! __Foo__: __Foo__; (__Foo__)
-
-___Foo___. ___Foo___? ___Foo___! ___Foo___: ___Foo___; (___Foo___)
diff --git a/oldtests/HTML/Blocks.html b/oldtests/HTML/Blocks.html
deleted file mode 100644
index dc80335..0000000
--- a/oldtests/HTML/Blocks.html
+++ /dev/null
@@ -1,18 +0,0 @@
-
-
- *raw html*
-
-
-
-
-this is markdown
-
-
-
-
-
-* raw html with trailing space
-
diff --git a/oldtests/HTML/Blocks.markdown b/oldtests/HTML/Blocks.markdown
deleted file mode 100644
index a83fa66..0000000
--- a/oldtests/HTML/Blocks.markdown
+++ /dev/null
@@ -1,26 +0,0 @@
-
-
- *raw html*
-
-
-
-
-
-
-*this is markdown*
-
-
-
-
-
-
-
-
-
-* raw html with trailing space
-
diff --git a/oldtests/HTML/Inline.html b/oldtests/HTML/Inline.html
deleted file mode 100644
index 94d40ac..0000000
--- a/oldtests/HTML/Inline.html
+++ /dev/null
@@ -1,8 +0,0 @@
-hi
-hi
-
diff --git a/oldtests/HTML/Inline.markdown b/oldtests/HTML/Inline.markdown
deleted file mode 100644
index 2259421..0000000
--- a/oldtests/HTML/Inline.markdown
+++ /dev/null
@@ -1,8 +0,0 @@
-hi
-`hi`
-