From e216094e2192c05ddbd0988458eb8c0012e7baf8 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Tue, 2 Sep 2014 01:10:54 +0200 Subject: lol --- src/inlines.c | 1711 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 908 insertions(+), 803 deletions(-) (limited to 'src/inlines.c') diff --git a/src/inlines.c b/src/inlines.c index f75c846..4ff45ad 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -2,133 +2,154 @@ #include #include #include -#include "bstrlib.h" +#include + #include "stmd.h" #include "uthash.h" #include "debug.h" #include "scanners.h" #include "utf8.h" +typedef struct Subject { + const gh_buf *buffer; + int pos; + reference** reference_map; + int label_nestlevel; +} subject; + +reference* lookup_reference(reference** refmap, chunk *label); +reference* make_reference(chunk *label, chunk *url, chunk *title); + +static unsigned char *clean_url(chunk *url); +static unsigned char *clean_title(chunk *title); + +inline static unsigned char *chunk_to_cstr(chunk *c); +inline static void chunk_free(chunk *c); +inline static void chunk_trim(chunk *c); + +inline static chunk chunk_literal(const char *data); +inline static chunk chunk_buf_detach(gh_buf *buf); +inline static chunk chunk_buf(const gh_buf *buf, int pos, int len); + +static inl *parse_chunk_inlines(chunk *chunk, reference** refmap); +static inl *parse_inlines_while(subject* subj, int (*f)(subject*)); +static int parse_inline(subject* subj, inl ** last); + extern void free_reference(reference *ref) { - bdestroy(ref->label); - bdestroy(ref->url); - bdestroy(ref->title); - free(ref); + free(ref->label); + free(ref->url); + free(ref->title); + free(ref); } extern void free_reference_map(reference **refmap) { - /* free the hash table contents */ - reference *s; - reference *tmp; - if (refmap != NULL) { - HASH_ITER(hh, *refmap, s, tmp) { - HASH_DEL(*refmap, s); - free_reference(s); - } - free(refmap); - } + /* free the hash table contents */ + reference *s; + reference *tmp; + if (refmap != NULL) { + HASH_ITER(hh, *refmap, s, tmp) { + HASH_DEL(*refmap, s); + free_reference(s); + } + free(refmap); + } } // normalize reference: collapse internal whitespace to single space, // remove leading/trailing whitespace, case fold -static bstring normalize_reference(bstring s) -{ - bstring normalized = case_fold(s); - int pos = 0; - int startpos; - char c; - while ((c = bchar(normalized, pos))) { - if (isspace(c)) { - startpos = pos; - // skip til next non-space - pos++; - while (isspace(bchar(s, pos))) { - pos++; - } - bdelete(normalized, startpos, pos - startpos); - binsertch(normalized, startpos, 1, ' '); - pos = startpos + 1; - } - pos++; - } - btrimws(normalized); - return normalized; +static unsigned char *normalize_reference(chunk *ref) +{ + gh_buf normalized = GH_BUF_INIT; + int r, w; + + utf8proc_case_fold(&normalized, ref->data, ref->len); + gh_buf_trim(&normalized); + + for (r = 0, w = 0; r < normalized.size; ++r) { + if (r && gh_buf_at(&normalized, r - 1) == ' ') { + while (gh_buf_at(&normalized, r) == ' ') + r++; + } + + normalized.ptr[w++] = normalized.ptr[r]; + } + + return gh_buf_detach(&normalized); } // Returns reference if refmap contains a reference with matching // label, otherwise NULL. -extern reference* lookup_reference(reference** refmap, bstring lab) +extern reference* lookup_reference(reference** refmap, chunk *label) { - reference * ref = NULL; - bstring label = normalize_reference(lab); - if (refmap != NULL) { - HASH_FIND_STR(*refmap, (char*) label->data, ref); - } - bdestroy(label); - return ref; + reference *ref = NULL; + unsigned char *norm = normalize_reference(label); + if (refmap != NULL) { + HASH_FIND_STR(*refmap, (char*)norm, ref); + } + free(label); + return ref; } -extern reference* make_reference(bstring label, bstring url, bstring title) +extern reference* make_reference(chunk *label, chunk *url, chunk *title) { - reference * ref; - ref = malloc(sizeof(reference)); - ref->label = normalize_reference(label); - ref->url = bstrcpy(url); - ref->title = bstrcpy(title); - return ref; + reference *ref; + ref = malloc(sizeof(reference)); + ref->label = normalize_reference(label); + ref->url = clean_url(url); + ref->title = clean_title(title); + return ref; } extern void add_reference(reference** refmap, reference* ref) { - reference * t = NULL; - HASH_FIND(hh, *refmap, (char*) ref->label->data, - (unsigned) blength(ref->label), t); - if (t == NULL) { - HASH_ADD_KEYPTR(hh, *refmap, (char*) ref->label->data, - (unsigned) blength(ref->label), ref); - } else { - free_reference(ref); // we free this now since it won't be in the refmap - } + reference * t = NULL; + HASH_FIND(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), t); + + if (t == NULL) { + HASH_ADD_KEYPTR(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), ref); + } else { + free_reference(ref); // we free this now since it won't be in the refmap + } } // Create an inline with a linkable string value. -inline static inl* make_linkable(int t, inl* label, bstring url, bstring title) +inline static inl* make_linkable(int t, inl* label, chunk url, chunk title) { - inl* e = (inl*) malloc(sizeof(inl)); - e->tag = t; - e->content.linkable.label = label; - e->content.linkable.url = url; - e->content.linkable.title = title; - e->next = NULL; - return e; + inl* e = (inl*) malloc(sizeof(inl)); + e->tag = t; + e->content.linkable.label = label; + e->content.linkable.url = chunk_to_cstr(&url); + e->content.linkable.title = chunk_to_cstr(&title); + e->next = NULL; + return e; } inline static inl* make_inlines(int t, inl* contents) { - inl* e = (inl*) malloc(sizeof(inl)); - e->tag = t; - e->content.inlines = contents; - e->next = NULL; - return e; + inl* e = (inl*) malloc(sizeof(inl)); + e->tag = t; + e->content.inlines = contents; + e->next = NULL; + return e; } // Create an inline with a literal string value. -inline static inl* make_literal(int t, bstring s) +inline static inl* make_literal(int t, chunk s) { - inl* e = (inl*) malloc(sizeof(inl)); - e->tag = t; - e->content.literal = s; - e->next = NULL; - return e; + inl* e = (inl*) malloc(sizeof(inl)); + e->tag = t; + e->content.literal = s; + e->next = NULL; + return e; } // Create an inline with no value. inline static inl* make_simple(int t) { - inl* e = (inl*) malloc(sizeof(inl)); - e->tag = t; - e->next = NULL; - return e; + inl* e = (inl*) malloc(sizeof(inl)); + e->tag = t; + e->next = NULL; + return e; } // Macros for creating various kinds of inlines. @@ -139,113 +160,157 @@ inline static inl* make_simple(int t) #define make_linebreak() make_simple(linebreak) #define make_softbreak() make_simple(softbreak) #define make_link(label, url, title) make_linkable(link, label, url, title) -#define make_image(alt, url, title) make_linkable(image, alt, url, title) #define make_emph(contents) make_inlines(emph, contents) #define make_strong(contents) make_inlines(strong, contents) // Free an inline list. extern void free_inlines(inl* e) { - inl * next; - while (e != NULL) { - switch (e->tag){ - case str: - case raw_html: - case code: - case entity: - bdestroy(e->content.literal); - break; - case linebreak: - case softbreak: - break; - case link: - case image: - bdestroy(e->content.linkable.url); - bdestroy(e->content.linkable.title); - free_inlines(e->content.linkable.label); - break; - case emph: - case strong: - free_inlines(e->content.inlines); - break; - default: - break; - } - next = e->next; - free(e); - e = next; - } + inl * next; + while (e != NULL) { + switch (e->tag){ + case str: + case raw_html: + case code: + case entity: + chunk_free(&e->content.literal); + break; + case linebreak: + case softbreak: + break; + case link: + case image: + free(e->content.linkable.url); + free(e->content.linkable.title); + free_inlines(e->content.linkable.label); + break; + case emph: + case strong: + free_inlines(e->content.inlines); + break; + default: + break; + } + next = e->next; + free(e); + e = next; + } } // Append inline list b to the end of inline list a. // Return pointer to head of new list. inline static inl* append_inlines(inl* a, inl* b) { - if (a == NULL) { // NULL acts like an empty list - return b; - } - inl* cur = a; - while (cur->next) { - cur = cur->next; - } - cur->next = b; - return a; + if (a == NULL) { // NULL acts like an empty list + return b; + } + inl* cur = a; + while (cur->next) { + cur = cur->next; + } + cur->next = b; + return a; } // Make a 'subject' from an input string. -static subject* make_subject(bstring s, reference** refmap) +static void init_subject(subject *e, gh_buf *buffer, int input_pos, reference** refmap) { - subject* e = (subject*) malloc(sizeof(subject)); - // remove final whitespace - brtrimws(s); - e->buffer = s; - e->pos = 0; - e->label_nestlevel = 0; - e->reference_map = refmap; - return e; + e->buffer = buffer; + e->pos = input_pos; + e->label_nestlevel = 0; + e->reference_map = refmap; } inline static int isbacktick(int c) { - return (c == '`'); + return (c == '`'); +} + +inline static void chunk_free(chunk *c) +{ + if (c->alloc) + free((char *)c->data); + + c->data = NULL; + c->alloc = 0; + c->len = 0; +} + +inline static void chunk_trim(chunk *c) +{ + while (c->len && isspace(c->data[0])) { + c->data++; + c->len--; + } + + while (c->len > 0) { + if (!isspace(c->data[c->len - 1])) + break; + + c->len--; + } +} + +inline static unsigned char *chunk_to_cstr(chunk *c) +{ + unsigned char *str; + + str = malloc(c->len + 1); + memcpy(str, c->data, c->len); + str[c->len] = 0; + + return str; +} + +inline static chunk chunk_literal(const char *data) +{ + chunk c = {data, strlen(data), 0}; + return c; +} + +inline static chunk chunk_buf(const gh_buf *buf, int pos, int len) +{ + chunk c = {buf->ptr + pos, len, 0}; + return c; +} + +inline static chunk chunk_buf_detach(gh_buf *buf) +{ + chunk c; + + c.len = buf->size; + c.data = gh_buf_detach(buf); + c.alloc = 1; + + return c; } // Return the next character in the subject, without advancing. // Return 0 if at the end of the subject. -#define peek_char(subj) bchar(subj->buffer, subj->pos) +#define peek_char(subj) gh_buf_at((subj)->buffer, (subj)->pos) // Return true if there are more characters in the subject. inline static int is_eof(subject* subj) { - return (subj->pos >= blength(subj->buffer)); + return (subj->pos >= gh_buf_len(subj->buffer)); } // Advance the subject. Doesn't check for eof. -#define advance(subj) subj->pos += 1 +#define advance(subj) (subj)->pos += 1 // Take characters while a predicate holds, and return a string. -inline static bstring take_while(subject* subj, int (*f)(int)) +inline static chunk take_while(subject* subj, int (*f)(int)) { - unsigned char c; - int startpos = subj->pos; - int len = 0; - while ((c = peek_char(subj)) && (*f)(c)) { - advance(subj); - len++; - } - return bmidstr(subj->buffer, startpos, len); -} + unsigned char c; + int startpos = subj->pos; + int len = 0; -// Take one character and return a string, or NULL if eof. -inline static bstring take_one(subject* subj) -{ - int startpos = subj->pos; - if (is_eof(subj)){ - return NULL; - } else { - advance(subj); - return bmidstr(subj->buffer, startpos, 1); - } + while ((c = peek_char(subj)) && (*f)(c)) { + advance(subj); + len++; + } + + return chunk_buf(subj->buffer, startpos, len); } // Try to process a backtick code span that began with a @@ -255,381 +320,406 @@ inline static bstring take_one(subject* subj) // after the closing backticks. static int scan_to_closing_backticks(subject* subj, int openticklength) { - // read non backticks - char c; - while ((c = peek_char(subj)) && c != '`') { - advance(subj); - } - if (is_eof(subj)) { - return 0; // did not find closing ticks, return 0 - } - int numticks = 0; - while (peek_char(subj) == '`') { - advance(subj); - numticks++; - } - if (numticks != openticklength){ - return(scan_to_closing_backticks(subj, openticklength)); - } - return (subj->pos); -} - -// Destructively modify bstring, collapsing consecutive + // read non backticks + char c; + while ((c = peek_char(subj)) && c != '`') { + advance(subj); + } + if (is_eof(subj)) { + return 0; // did not find closing ticks, return 0 + } + int numticks = 0; + while (peek_char(subj) == '`') { + advance(subj); + numticks++; + } + if (numticks != openticklength){ + return(scan_to_closing_backticks(subj, openticklength)); + } + return (subj->pos); +} + +// Destructively modify string, collapsing consecutive // space and newline characters into a single space. -static int normalize_whitespace(bstring s) -{ - bool last_char_was_space = false; - int pos = 0; - char c; - while ((c = bchar(s, pos))) { - switch (c) { - case ' ': - if (last_char_was_space) { - bdelete(s, pos, 1); - } else { - pos++; - } - last_char_was_space = true; - break; - case '\n': - if (last_char_was_space) { - bdelete(s, pos, 1); - } else { - bdelete(s, pos, 1); - binsertch(s, pos, 1, ' '); - pos++; - } - last_char_was_space = true; - break; - default: - pos++; - last_char_was_space = false; - } - } - return 0; +static void normalize_whitespace(gh_buf *s) +{ + /* TODO */ +#if 0 + bool last_char_was_space = false; + int pos = 0; + char c; + while ((c = gh_buf_at(s, pos))) { + switch (c) { + case ' ': + if (last_char_was_space) { + bdelete(s, pos, 1); + } else { + pos++; + } + last_char_was_space = true; + break; + case '\n': + if (last_char_was_space) { + bdelete(s, pos, 1); + } else { + bdelete(s, pos, 1); + binsertch(s, pos, 1, ' '); + pos++; + } + last_char_was_space = true; + break; + default: + pos++; + last_char_was_space = false; + } + } +#endif } // Parse backtick code section or raw backticks, return an inline. // Assumes that the subject has a backtick at the current position. static inl* handle_backticks(subject *subj) { - bstring openticks = take_while(subj, isbacktick); - bstring result; - int ticklength = blength(openticks); - int startpos = subj->pos; - int endpos = scan_to_closing_backticks(subj, ticklength); - if (endpos == 0) { // not found - subj->pos = startpos; // rewind - return make_str(openticks); - } else { - bdestroy(openticks); - result = bmidstr(subj->buffer, startpos, endpos - startpos - ticklength); - btrimws(result); - normalize_whitespace(result); - return make_code(result); - } + chunk openticks = take_while(subj, isbacktick); + int startpos = subj->pos; + int endpos = scan_to_closing_backticks(subj, openticks.len); + + if (endpos == 0) { // not found + subj->pos = startpos; // rewind + return make_str(openticks); + } else { + gh_buf buf = GH_BUF_INIT; + + gh_buf_set(&buf, subj->buffer->ptr + startpos, endpos - startpos - openticks.len); + gh_buf_trim(&buf); + normalize_whitespace(&buf); + + return make_code(chunk_buf_detach(&buf)); + } } // Scan ***, **, or * and return number scanned, or 0. // Don't advance position. static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close) { - int numdelims = 0; - char char_before, char_after; - int startpos = subj->pos; - - char_before = subj->pos == 0 ? '\n' : bchar(subj->buffer, subj->pos - 1); - while (peek_char(subj) == c) { - numdelims++; - advance(subj); - } - char_after = peek_char(subj); - *can_open = numdelims > 0 && numdelims <= 3 && !isspace(char_after); - *can_close = numdelims > 0 && numdelims <= 3 && !isspace(char_before); - if (c == '_') { - *can_open = *can_open && !isalnum(char_before); - *can_close = *can_close && !isalnum(char_after); - } - subj->pos = startpos; - return numdelims; + int numdelims = 0; + char char_before, char_after; + int startpos = subj->pos; + + char_before = subj->pos == 0 ? '\n' : gh_buf_at(subj->buffer, subj->pos - 1); + while (peek_char(subj) == c) { + numdelims++; + advance(subj); + } + char_after = peek_char(subj); + *can_open = numdelims > 0 && numdelims <= 3 && !isspace(char_after); + *can_close = numdelims > 0 && numdelims <= 3 && !isspace(char_before); + if (c == '_') { + *can_open = *can_open && !isalnum(char_before); + *can_close = *can_close && !isalnum(char_after); + } + subj->pos = startpos; + return numdelims; } // Parse strong/emph or a fallback. // Assumes the subject has '_' or '*' at the current position. static inl* handle_strong_emph(subject* subj, char c) { - bool can_open, can_close; - inl * result = NULL; - inl ** last = malloc(sizeof(inl *)); - inl * new; - inl * il; - inl * first_head = NULL; - inl * first_close = NULL; - int first_close_delims = 0; - int numdelims; - - *last = NULL; - - numdelims = scan_delims(subj, c, &can_open, &can_close); - subj->pos += numdelims; - - new = make_str(bmidstr(subj->buffer, subj->pos - numdelims, numdelims)); - *last = new; - first_head = new; - result = new; - - if (!can_open || numdelims == 0) { - goto done; - } - - switch (numdelims) { - case 1: - while (true) { - numdelims = scan_delims(subj, c, &can_open, &can_close); - if (numdelims >= 1 && can_close) { - subj->pos += 1; - first_head->tag = emph; - bdestroy(first_head->content.literal); - first_head->content.inlines = first_head->next; - first_head->next = NULL; - goto done; - } else { - if (!parse_inline(subj, last)) { - goto done; - } - } - } - break; - case 2: - while (true) { - numdelims = scan_delims(subj, c, &can_open, &can_close); - if (numdelims >= 2 && can_close) { - subj->pos += 2; - first_head->tag = strong; - bdestroy(first_head->content.literal); - first_head->content.inlines = first_head->next; - first_head->next = NULL; - goto done; - } else { - if (!parse_inline(subj, last)) { - goto done; - } - } - } - break; - case 3: - while (true) { - numdelims = scan_delims(subj, c, &can_open, &can_close); - if (can_close && numdelims >= 1 && numdelims <= 3 && - numdelims != first_close_delims) { - new = make_str(bmidstr(subj->buffer, subj->pos, numdelims)); - append_inlines(*last, new); - *last = new; - - if (first_close_delims == 1 && numdelims > 2) { - numdelims = 2; - } else if (first_close_delims == 2) { - numdelims = 1; - } else if (numdelims == 3) { - // If we opened with ***, we interpret it as ** followed by * - // giving us - numdelims = 1; - } - - subj->pos += numdelims; - if (first_close) { - first_head->tag = first_close_delims == 1 ? strong : emph; - bdestroy(first_head->content.literal); - first_head->content.inlines = - make_inlines(first_close_delims == 1 ? emph : strong, - first_head->next); - - il = first_head->next; - while (il->next && il->next != first_close) { - il = il->next; - } - il->next = NULL; - - first_head->content.inlines->next = first_close->next; - - il = first_head->content.inlines; - while (il->next && il->next != *last) { - il = il->next; - } - il->next = NULL; - free_inlines(*last); - - first_close->next = NULL; - free_inlines(first_close); - first_head->next = NULL; - goto done; - } else { - first_close = *last; - first_close_delims = numdelims; - } - } else { - if (!parse_inline(subj, last)) { - goto done; - } - } - } - break; - default: - goto done; - } - - done: - free(last); - return result; + bool can_open, can_close; + inl * result = NULL; + inl ** last = malloc(sizeof(inl *)); + inl * new; + inl * il; + inl * first_head = NULL; + inl * first_close = NULL; + int first_close_delims = 0; + int numdelims; + + *last = NULL; + + numdelims = scan_delims(subj, c, &can_open, &can_close); + subj->pos += numdelims; + + new = make_str(chunk_buf(subj->buffer, subj->pos - numdelims, numdelims)); + *last = new; + first_head = new; + result = new; + + if (!can_open || numdelims == 0) { + goto done; + } + + switch (numdelims) { + case 1: + while (true) { + numdelims = scan_delims(subj, c, &can_open, &can_close); + if (numdelims >= 1 && can_close) { + subj->pos += 1; + first_head->tag = emph; + chunk_free(&first_head->content.literal); + first_head->content.inlines = first_head->next; + first_head->next = NULL; + goto done; + } else { + if (!parse_inline(subj, last)) { + goto done; + } + } + } + break; + case 2: + while (true) { + numdelims = scan_delims(subj, c, &can_open, &can_close); + if (numdelims >= 2 && can_close) { + subj->pos += 2; + first_head->tag = strong; + chunk_free(&first_head->content.literal); + first_head->content.inlines = first_head->next; + first_head->next = NULL; + goto done; + } else { + if (!parse_inline(subj, last)) { + goto done; + } + } + } + break; + case 3: + while (true) { + numdelims = scan_delims(subj, c, &can_open, &can_close); + if (can_close && numdelims >= 1 && numdelims <= 3 && + numdelims != first_close_delims) { + new = make_str(chunk_buf(subj->buffer, subj->pos, numdelims)); + append_inlines(*last, new); + *last = new; + if (first_close_delims == 1 && numdelims > 2) { + numdelims = 2; + } else if (first_close_delims == 2) { + numdelims = 1; + } else if (numdelims == 3) { + // If we opened with ***, we interpret it as ** followed by * + // giving us + numdelims = 1; + } + subj->pos += numdelims; + if (first_close) { + first_head->tag = first_close_delims == 1 ? strong : emph; + chunk_free(&first_head->content.literal); + first_head->content.inlines = + make_inlines(first_close_delims == 1 ? emph : strong, + first_head->next); + + il = first_head->next; + while (il->next && il->next != first_close) { + il = il->next; + } + il->next = NULL; + + first_head->content.inlines->next = first_close->next; + + il = first_head->content.inlines; + while (il->next && il->next != *last) { + il = il->next; + } + il->next = NULL; + free_inlines(*last); + + first_close->next = NULL; + free_inlines(first_close); + first_head->next = NULL; + goto done; + } else { + first_close = *last; + first_close_delims = numdelims; + } + } else { + if (!parse_inline(subj, last)) { + goto done; + } + } + } + break; + default: + goto done; + } + +done: + free(last); + return result; } // Parse backslash-escape or just a backslash, returning an inline. static inl* handle_backslash(subject *subj) { - advance(subj); - unsigned char nextchar = peek_char(subj); - if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped - advance(subj); - return make_str(bformat("%c", nextchar)); - } else if (nextchar == '\n') { - advance(subj); - return make_linebreak(); - } else { - return make_str(bfromcstr("\\")); - } + advance(subj); + unsigned char nextchar = peek_char(subj); + if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped + advance(subj); + return make_str(chunk_buf(subj->buffer, subj->pos - 1, 1)); + } else if (nextchar == '\n') { + advance(subj); + return make_linebreak(); + } else { + return make_str(chunk_literal("\\")); + } } // Parse an entity or a regular "&" string. // Assumes the subject has an '&' character at the current position. static inl* handle_entity(subject* subj) { - int match; - inl * result; - match = scan_entity(subj->buffer, subj->pos); - if (match) { - result = make_entity(bmidstr(subj->buffer, subj->pos, match)); - subj->pos += match; - } else { - advance(subj); - result = make_str(bfromcstr("&")); - } - return result; + int match; + inl *result; + match = scan_entity(subj->buffer, subj->pos); + if (match) { + result = make_entity(chunk_buf(subj->buffer, subj->pos, match)); + subj->pos += match; + } else { + advance(subj); + result = make_str(chunk_literal("&")); + } + return result; } // Like make_str, but parses entities. // Returns an inline sequence consisting of str and entity elements. -static inl * make_str_with_entities(bstring s) -{ - inl * result = NULL; - inl * new; - int searchpos; - char c; - subject * subj = make_subject(s, NULL); - - while ((c = peek_char(subj))) { - switch (c) { - case '&': - new = handle_entity(subj); - break; - default: - searchpos = bstrchrp(subj->buffer, '&', subj->pos); - if (searchpos == BSTR_ERR) { - searchpos = blength(subj->buffer); - } - new = make_str(bmidstr(subj->buffer, subj->pos, searchpos - subj->pos)); - subj->pos = searchpos; - } - result = append_inlines(result, new); - } - free(subj); - return result; +static inl *make_str_with_entities(chunk *content) +{ + inl * result = NULL; + inl * new; + int searchpos; + char c; + subject subj; + gh_buf content_buf = GH_BUF_INIT; + + gh_buf_set(&content_buf, content->data, content->len); + init_subject(&subj, &content_buf, 0, NULL); + + while ((c = peek_char(&subj))) { + switch (c) { + case '&': + new = handle_entity(&subj); + break; + default: + searchpos = gh_buf_strchr(subj.buffer, '&', subj.pos); + if (searchpos < 0) { + searchpos = gh_buf_len(subj.buffer); + } + + new = make_str(chunk_buf(subj.buffer, subj.pos, searchpos - subj.pos)); + subj.pos = searchpos; + } + result = append_inlines(result, new); + } + + gh_buf_free(&content_buf); + return result; } // Destructively unescape a string: remove backslashes before punctuation chars. -extern int unescape(bstring url) +extern void unescape_buffer(gh_buf *buf) { - // remove backslashes before punctuation chars: - int searchpos = 0; - while ((searchpos = bstrchrp(url, '\\', searchpos)) != BSTR_ERR) { - if (ispunct(bchar(url, searchpos + 1))) { - bdelete(url, searchpos, 1); - } else { - searchpos++; - } - } - return 0; + int r, w; + + for (r = 0, w = 0; r < buf->size; ++r) { + if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1])) + continue; + + buf->ptr[w++] = buf->ptr[r]; + } + + gh_buf_truncate(buf, w); } // Clean a URL: remove surrounding whitespace and surrounding <>, // and remove \ that escape punctuation. -static int clean_url(bstring url) +static unsigned char *clean_url(chunk *url) { - // remove surrounding <> if any: - int urllength = blength(url); - btrimws(url); - if (bchar(url, 0) == '<' && bchar(url, urllength - 1) == '>') { - bdelete(url, 0, 1); - bdelete(url, urllength - 2, 1); - } - unescape(url); - return 0; + gh_buf buf = GH_BUF_INIT; + + chunk_trim(url); + + if (url->data[0] == '<' && url->data[url->len - 1] == '>') { + gh_buf_set(&buf, url->data + 1, url->len - 2); + } else { + gh_buf_set(&buf, url->data, url->len); + } + + unescape_buffer(&buf); + return gh_buf_detach(&buf); } // Clean a title: remove surrounding quotes and remove \ that escape punctuation. -static int clean_title(bstring title) +static unsigned char *clean_title(chunk *title) { - // remove surrounding quotes if any: - int titlelength = blength(title); - if ((bchar(title, 0) == '\'' && bchar(title, titlelength - 1) == '\'') || - (bchar(title, 0) == '(' && bchar(title, titlelength - 1) == ')') || - (bchar(title, 0) == '"' && bchar(title, titlelength - 1) == '"')) { - bdelete(title, 0, 1); - bdelete(title, titlelength - 2, 1); - } - unescape(title); - return 0; + gh_buf buf = GH_BUF_INIT; + unsigned char first = title->data[0]; + unsigned char last = title->data[title->len - 1]; + + // remove surrounding quotes if any: + if ((first == '\'' && last == '\'') || + (first == '(' && last == ')') || + (first == '"' && last == '"')) { + gh_buf_set(&buf, title->data + 1, title->len - 2); + } else { + gh_buf_set(&buf, title->data, title->len); + } + + unescape_buffer(&buf); + return gh_buf_detach(&buf); } // Parse an autolink or HTML tag. // Assumes the subject has a '<' character at the current position. static inl* handle_pointy_brace(subject* subj) { - int matchlen = 0; - bstring contents; - inl* result; - - advance(subj); // advance past first < - // first try to match a URL autolink - matchlen = scan_autolink_uri(subj->buffer, subj->pos); - if (matchlen > 0) { - contents = bmidstr(subj->buffer, subj->pos, matchlen - 1); - subj->pos += matchlen; - result = make_link(make_str_with_entities(contents), - bstrcpy(contents), bfromcstr("")); - bdestroy(contents); - return result; - } - // next try to match an email autolink - matchlen = scan_autolink_email(subj->buffer, subj->pos); - if (matchlen > 0) { - contents = bmidstr(subj->buffer, subj->pos, matchlen - 1); - subj->pos += matchlen; - result = make_link(make_str_with_entities(contents), - bformat("mailto:%s", contents->data), - bfromcstr("")); - bdestroy(contents); - return result; - } - // finally, try to match an html tag - matchlen = scan_html_tag(subj->buffer, subj->pos); - if (matchlen > 0) { - contents = bmidstr(subj->buffer, subj->pos, matchlen); - binsertch(contents, 0, 1, '<'); - subj->pos += matchlen; - return make_raw_html(contents); - } else {// if nothing matches, just return the opening <: - return make_str(bfromcstr("<")); - } + int matchlen = 0; + chunk contents; + + advance(subj); // advance past first < + + // first try to match a URL autolink + matchlen = scan_autolink_uri(subj->buffer, subj->pos); + if (matchlen > 0) { + contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1); + subj->pos += matchlen; + + return make_link( + make_str_with_entities(&contents), + contents, + chunk_literal("") + ); + } + + // next try to match an email autolink + matchlen = scan_autolink_email(subj->buffer, subj->pos); + if (matchlen > 0) { + gh_buf mail_url = GH_BUF_INIT; + + contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1); + subj->pos += matchlen; + + gh_buf_puts(&mail_url, "mailto:"); + gh_buf_put(&mail_url, contents.data, contents.len); + + return make_link( + make_str_with_entities(&contents), + chunk_buf_detach(&mail_url), + chunk_literal("") + ); + } + + // finally, try to match an html tag + matchlen = scan_html_tag(subj->buffer, subj->pos); + if (matchlen > 0) { + contents = chunk_buf(subj->buffer, subj->pos - 1, matchlen + 1); + subj->pos += matchlen; + return make_raw_html(contents); + } + + // if nothing matches, just return the opening <: + return make_str(chunk_literal("<")); } // Parse a link label. Returns 1 if successful. @@ -641,366 +731,381 @@ static inl* handle_pointy_brace(subject* subj) // markers. So, 2 below contains a link while 1 does not: // 1. [a link `with a ](/url)` character // 2. [a link *with emphasized ](/url) text* -static int link_label(subject* subj, bstring* raw_label) -{ - int nestlevel = 0; - inl* tmp = NULL; - bstring raw; - int startpos = subj->pos; - if (subj->label_nestlevel) { - // if we've already checked to the end of the subject - // for a label, even with a different starting [, we - // know we won't find one here and we can just return. - // Note: nestlevel 1 would be: [foo [bar] - // nestlevel 2 would be: [foo [bar [baz] - subj->label_nestlevel--; - return 0; - } - advance(subj); // advance past [ - char c; - while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) { - switch (c) { - case '`': - tmp = handle_backticks(subj); - free_inlines(tmp); - break; - case '<': - tmp = handle_pointy_brace(subj); - free_inlines(tmp); - break; - case '[': // nested [] - nestlevel++; - advance(subj); - break; - case ']': // nested [] - nestlevel--; - advance(subj); - break; - case '\\': - advance(subj); - if (ispunct(peek_char(subj))) { - advance(subj); - } - break; - default: - advance(subj); - } - } - if (c == ']') { - if (raw_label != NULL) { - raw = bmidstr(subj->buffer, startpos + 1, subj->pos - (startpos + 1)); - *raw_label = raw; - } - subj->label_nestlevel = 0; - advance(subj); // advance past ] - return 1; - } else { - if (c == 0) { - subj->label_nestlevel = nestlevel; - } - subj->pos = startpos; // rewind - return 0; - } +static int link_label(subject* subj, chunk *raw_label) +{ + int nestlevel = 0; + inl* tmp = NULL; + int startpos = subj->pos; + + if (subj->label_nestlevel) { + // if we've already checked to the end of the subject + // for a label, even with a different starting [, we + // know we won't find one here and we can just return. + // Note: nestlevel 1 would be: [foo [bar] + // nestlevel 2 would be: [foo [bar [baz] + subj->label_nestlevel--; + return 0; + } + + advance(subj); // advance past [ + char c; + while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) { + switch (c) { + case '`': + tmp = handle_backticks(subj); + free_inlines(tmp); + break; + case '<': + tmp = handle_pointy_brace(subj); + free_inlines(tmp); + break; + case '[': // nested [] + nestlevel++; + advance(subj); + break; + case ']': // nested [] + nestlevel--; + advance(subj); + break; + case '\\': + advance(subj); + if (ispunct(peek_char(subj))) { + advance(subj); + } + break; + default: + advance(subj); + } + } + if (c == ']') { + *raw_label = chunk_buf( + subj->buffer, + startpos + 1, + subj->pos - (startpos + 1) + ); + + subj->label_nestlevel = 0; + advance(subj); // advance past ] + return 1; + } else { + if (c == 0) { + subj->label_nestlevel = nestlevel; + } + subj->pos = startpos; // rewind + return 0; + } } // Parse a link or the link portion of an image, or return a fallback. static inl* handle_left_bracket(subject* subj) { - inl* lab = NULL; - inl* result = NULL; - reference* ref; - int n; - int sps; - int found_label; - int endlabel, starturl, endurl, starttitle, endtitle, endall; - bstring url, title, rawlabel, reflabel; - bstring rawlabel2 = NULL; - found_label = link_label(subj, &rawlabel); - endlabel = subj->pos; - if (found_label) { - if (peek_char(subj) == '(' && - ((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) && - ((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) { - // try to parse an explicit link: - starturl = subj->pos + 1 + sps; // after ( - endurl = starturl + n; - starttitle = endurl + scan_spacechars(subj->buffer, endurl); - // ensure there are spaces btw url and title - endtitle = (starttitle == endurl) ? starttitle : - starttitle + scan_link_title(subj->buffer, starttitle); - endall = endtitle + scan_spacechars(subj->buffer, endtitle); - if (bchar(subj->buffer, endall) == ')') { - subj->pos = endall + 1; - url = bmidstr(subj->buffer, starturl, endurl - starturl); - clean_url(url); - title = bmidstr(subj->buffer, starttitle, endtitle - starttitle); - clean_title(title); - lab = parse_inlines(rawlabel, NULL); - bdestroy(rawlabel); - return make_link(lab, url, title); - } else { - // if we get here, we matched a label but didn't get further: - subj->pos = endlabel; - lab = parse_inlines(rawlabel, subj->reference_map); - bdestroy(rawlabel); - result = append_inlines(make_str(bfromcstr("[")), - append_inlines(lab, - make_str(bfromcstr("]")))); - return result; - } - } else { - // Check for reference link. - // First, see if there's another label: - subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel); - reflabel = rawlabel; - // if followed by a nonempty link label, we change reflabel to it: - if (peek_char(subj) == '[' && - link_label(subj, &rawlabel2)) { - if (blength(rawlabel2) > 0) { - reflabel = rawlabel2; - } - } else { - subj->pos = endlabel; - } - // lookup rawlabel in subject->reference_map: - ref = lookup_reference(subj->reference_map, reflabel); - if (ref != NULL) { // found - lab = parse_inlines(rawlabel, NULL); - result = make_link(lab, bstrcpy(ref->url), bstrcpy(ref->title)); - } else { - subj->pos = endlabel; - lab = parse_inlines(rawlabel, subj->reference_map); - result = append_inlines(make_str(bfromcstr("[")), - append_inlines(lab, make_str(bfromcstr("]")))); - } - bdestroy(rawlabel); - bdestroy(rawlabel2); - return result; - } - } - // If we fall through to here, it means we didn't match a link: - advance(subj); // advance past [ - return make_str(bfromcstr("[")); + inl *lab = NULL; + inl *result = NULL; + reference *ref; + int n; + int sps; + int found_label; + int endlabel, starturl, endurl, starttitle, endtitle, endall; + + chunk rawlabel; + chunk url, title; + + found_label = link_label(subj, &rawlabel); + endlabel = subj->pos; + + if (found_label) { + if (peek_char(subj) == '(' && + ((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) && + ((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) { + + // try to parse an explicit link: + starturl = subj->pos + 1 + sps; // after ( + endurl = starturl + n; + starttitle = endurl + scan_spacechars(subj->buffer, endurl); + + // ensure there are spaces btw url and title + endtitle = (starttitle == endurl) ? starttitle : + starttitle + scan_link_title(subj->buffer, starttitle); + + endall = endtitle + scan_spacechars(subj->buffer, endtitle); + + if (gh_buf_at(subj->buffer, endall) == ')') { + subj->pos = endall + 1; + + url = chunk_buf(subj->buffer, starturl, endurl - starturl); + title = chunk_buf(subj->buffer, starttitle, endtitle - starttitle); + lab = parse_chunk_inlines(&rawlabel, NULL); + + return make_link(lab, url, title); + } else { + // if we get here, we matched a label but didn't get further: + subj->pos = endlabel; + lab = parse_chunk_inlines(&rawlabel, subj->reference_map); + result = append_inlines(make_str(chunk_literal("[")), + append_inlines(lab, + make_str(chunk_literal("]")))); + return result; + } + } else { + chunk rawlabel_tmp; + chunk reflabel; + + // Check for reference link. + // First, see if there's another label: + subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel); + reflabel = rawlabel; + + // if followed by a nonempty link label, we change reflabel to it: + if (peek_char(subj) == '[' && link_label(subj, &rawlabel_tmp)) { + if (rawlabel_tmp.len > 0) + reflabel = rawlabel_tmp; + } else { + subj->pos = endlabel; + } + + // lookup rawlabel in subject->reference_map: + ref = lookup_reference(subj->reference_map, &reflabel); + if (ref != NULL) { // found + lab = parse_chunk_inlines(&rawlabel, NULL); + result = make_link(lab, chunk_literal(ref->url), chunk_literal(ref->title)); + } else { + subj->pos = endlabel; + lab = parse_chunk_inlines(&rawlabel, subj->reference_map); + result = append_inlines(make_str(chunk_literal("[")), + append_inlines(lab, make_str(chunk_literal("]")))); + } + return result; + } + } + // If we fall through to here, it means we didn't match a link: + advance(subj); // advance past [ + return make_str(chunk_literal("[")); } // Parse a hard or soft linebreak, returning an inline. // Assumes the subject has a newline at the current position. static inl* handle_newline(subject *subj) { - int nlpos = subj->pos; - // skip over newline - advance(subj); - // skip spaces at beginning of line - while (peek_char(subj) == ' ') { - advance(subj); - } - if (nlpos > 1 && - bchar(subj->buffer, nlpos - 1) == ' ' && - bchar(subj->buffer, nlpos - 2) == ' ') { - return make_linebreak(); - } else { - return make_softbreak(); - } + int nlpos = subj->pos; + // skip over newline + advance(subj); + // skip spaces at beginning of line + while (peek_char(subj) == ' ') { + advance(subj); + } + if (nlpos > 1 && + gh_buf_at(subj->buffer, nlpos - 1) == ' ' && + gh_buf_at(subj->buffer, nlpos - 2) == ' ') { + return make_linebreak(); + } else { + return make_softbreak(); + } } inline static int not_eof(subject* subj) { - return !is_eof(subj); + return !is_eof(subj); } // Parse inlines while a predicate is satisfied. Return inlines. extern inl* parse_inlines_while(subject* subj, int (*f)(subject*)) { - inl* result = NULL; - inl** last = &result; - while ((*f)(subj) && parse_inline(subj, last)) { - } - return result; + inl* result = NULL; + inl** last = &result; + while ((*f)(subj) && parse_inline(subj, last)) { + } + return result; +} + +inl *parse_chunk_inlines(chunk *chunk, reference** refmap) +{ + inl *result; + subject subj; + gh_buf full_chunk = GH_BUF_INIT; + + gh_buf_set(&full_chunk, chunk->data, chunk->len); + init_subject(&subj, &full_chunk, 0, refmap); + result = parse_inlines_while(&subj, not_eof); + + gh_buf_free(&full_chunk); + return result; +} + +static int find_special_char(subject *subj) +{ + int n = subj->pos + 1; + int size = (int)gh_buf_len(subj->buffer); + + while (n < size) { + if (strchr("\n\\`&_*[]buffer, n))) + return n; + } + + return -1; } // Parse an inline, advancing subject, and add it to last element. // Adjust tail to point to new last element of list. // Return 0 if no inline can be parsed, 1 otherwise. -extern int parse_inline(subject* subj, inl ** last) -{ - inl* new = NULL; - bstring contents; - bstring special_chars; - unsigned char c; - int endpos; - c = peek_char(subj); - if (c == 0) { - return 0; - } - switch(c){ - case '\n': - new = handle_newline(subj); - break; - case '`': - new = handle_backticks(subj); - break; - case '\\': - new = handle_backslash(subj); - break; - case '&': - new = handle_entity(subj); - break; - case '<': - new = handle_pointy_brace(subj); - break; - case '_': - if (subj->pos > 0 && (isalnum(bchar(subj->buffer, subj->pos - 1)) || - bchar(subj->buffer, subj->pos - 1) == '_')) { - new = make_str(take_one(subj)); - } else { - new = handle_strong_emph(subj, '_'); - } - break; - case '*': - new = handle_strong_emph(subj, '*'); - break; - case '[': - new = handle_left_bracket(subj); - break; - case '!': - advance(subj); - if (peek_char(subj) == '[') { - new = handle_left_bracket(subj); - if (new != NULL && new->tag == link) { - new->tag = image; - } else { - new = append_inlines(make_str(bfromcstr("!")), new); - } - } else { - new = make_str(bfromcstr("!")); - } - break; - default: - // we read until we hit a special character - special_chars = bfromcstr("\n\\`&_*[]buffer, subj->pos, special_chars); - bdestroy(special_chars); - if (endpos == subj->pos) { - // current char is special: read a 1-character str - contents = take_one(subj); - } else if (endpos == BSTR_ERR) { - // special char not found, take whole rest of buffer: - endpos = subj->buffer->slen; - contents = bmidstr(subj->buffer, subj->pos, endpos - subj->pos); - subj->pos = endpos; - } else { - // take buffer from subj->pos to endpos to str. - contents = bmidstr(subj->buffer, subj->pos, endpos - subj->pos); - subj->pos = endpos; - // if we're at a newline, strip trailing spaces. - if (peek_char(subj) == '\n') { - brtrimws(contents); - } - } - new = make_str(contents); - } - if (*last == NULL) { - *last = new; - } else { - append_inlines(*last, new); - } - return 1; -} - -extern inl* parse_inlines(bstring input, reference** refmap) -{ - subject * subj = make_subject(input, refmap); - inl * result = parse_inlines_while(subj, not_eof); - free(subj); - return result; +static int parse_inline(subject* subj, inl ** last) +{ + inl* new = NULL; + chunk contents; + unsigned char c; + int endpos; + c = peek_char(subj); + if (c == 0) { + return 0; + } + switch(c){ + case '\n': + new = handle_newline(subj); + break; + case '`': + new = handle_backticks(subj); + break; + case '\\': + new = handle_backslash(subj); + break; + case '&': + new = handle_entity(subj); + break; + case '<': + new = handle_pointy_brace(subj); + break; + case '_': + if (subj->pos > 0 && (isalnum(gh_buf_at(subj->buffer, subj->pos - 1)) || + gh_buf_at(subj->buffer, subj->pos - 1) == '_')) { + goto text_literal; + } + + new = handle_strong_emph(subj, '_'); + break; + case '*': + new = handle_strong_emph(subj, '*'); + break; + case '[': + new = handle_left_bracket(subj); + break; + case '!': + advance(subj); + if (peek_char(subj) == '[') { + new = handle_left_bracket(subj); + if (new != NULL && new->tag == link) { + new->tag = image; + } else { + new = append_inlines(make_str(chunk_literal("!")), new); + } + } else { + new = make_str(chunk_literal("!")); + } + break; + default: + text_literal: + endpos = find_special_char(subj); + if (endpos < 0) { + endpos = gh_buf_len(subj->buffer); + } + + contents = chunk_buf(subj->buffer, subj->pos, endpos - subj->pos); + subj->pos = endpos; + + // if we're at a newline, strip trailing spaces. + if (peek_char(subj) == '\n') { + chunk_trim(&contents); + } + + new = make_str(contents); + } + if (*last == NULL) { + *last = new; + } else { + append_inlines(*last, new); + } + return 1; +} + +extern inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap) +{ + subject subj; + init_subject(&subj, input, input_pos, refmap); + return parse_inlines_while(&subj, not_eof); } // Parse zero or more space characters, including at most one newline. void spnl(subject* subj) { - bool seen_newline = false; - while (peek_char(subj) == ' ' || - (!seen_newline && - (seen_newline = peek_char(subj) == '\n'))) { - advance(subj); - } + bool seen_newline = false; + while (peek_char(subj) == ' ' || + (!seen_newline && + (seen_newline = peek_char(subj) == '\n'))) { + advance(subj); + } } // Parse reference. Assumes string begins with '[' character. // Modify refmap if a reference is encountered. // Return 0 if no reference found, otherwise position of subject // after reference is parsed. -extern int parse_reference(bstring input, reference** refmap) -{ - subject * subj = make_subject(input, NULL); - bstring lab = NULL; - bstring url = NULL; - bstring title = NULL; - int matchlen = 0; - int beforetitle; - reference * new = NULL; - int newpos; - - // parse label: - if (!link_label(subj, &lab)) { - free(subj); - return 0; - } - // colon: - if (peek_char(subj) == ':') { - advance(subj); - } else { - free(subj); - bdestroy(lab); - return 0; - } - // parse link url: - spnl(subj); - matchlen = scan_link_url(subj->buffer, subj->pos); - if (matchlen) { - url = bmidstr(subj->buffer, subj->pos, matchlen); - clean_url(url); - subj->pos += matchlen; - } else { - free(subj); - bdestroy(lab); - bdestroy(url); - return 0; - } - // parse optional link_title - beforetitle = subj->pos; - spnl(subj); - matchlen = scan_link_title(subj->buffer, subj->pos); - if (matchlen) { - title = bmidstr(subj->buffer, subj->pos, matchlen); - clean_title(title); - subj->pos += matchlen; - } else { - subj->pos = beforetitle; - title = bfromcstr(""); - } - // parse final spaces and newline: - while (peek_char(subj) == ' ') { - advance(subj); - } - if (peek_char(subj) == '\n') { - advance(subj); - } else if (peek_char(subj) != 0) { - free(subj); - bdestroy(lab); - bdestroy(url); - bdestroy(title); - return 0; - } - // insert reference into refmap - new = make_reference(lab, url, title); - add_reference(refmap, new); - - newpos = subj->pos; - free(subj); - bdestroy(lab); - bdestroy(url); - bdestroy(title); - return newpos; +extern int parse_reference(gh_buf *input, int input_pos, reference** refmap) +{ + subject subj; + + chunk lab; + chunk url; + chunk title; + + int matchlen = 0; + int beforetitle; + reference * new = NULL; + + init_subject(&subj, input, input_pos, NULL); + + // parse label: + if (!link_label(&subj, &lab)) + return 0; + + // colon: + if (peek_char(&subj) == ':') { + advance(&subj); + } else { + return 0; + } + + // parse link url: + spnl(&subj); + matchlen = scan_link_url(subj.buffer, subj.pos); + if (matchlen) { + url = chunk_buf(subj.buffer, subj.pos, matchlen); + subj.pos += matchlen; + } else { + return 0; + } + + // parse optional link_title + beforetitle = subj.pos; + spnl(&subj); + matchlen = scan_link_title(subj.buffer, subj.pos); + if (matchlen) { + title = chunk_buf(subj.buffer, subj.pos, matchlen); + subj.pos += matchlen; + } else { + subj.pos = beforetitle; + title = chunk_literal(""); + } + // parse final spaces and newline: + while (peek_char(&subj) == ' ') { + advance(&subj); + } + if (peek_char(&subj) == '\n') { + advance(&subj); + } else if (peek_char(&subj) != 0) { + return 0; + } + // insert reference into refmap + new = make_reference(&lab, &url, &title); + add_reference(refmap, new); + + return subj.pos; } -- cgit v1.2.3 From 582674e662d1f8757350c51486a5e0a837195e15 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Tue, 2 Sep 2014 13:18:04 +0200 Subject: ffffix --- src/inlines.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/inlines.c') diff --git a/src/inlines.c b/src/inlines.c index 4ff45ad..82c7219 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -6,9 +6,7 @@ #include "stmd.h" #include "uthash.h" -#include "debug.h" #include "scanners.h" -#include "utf8.h" typedef struct Subject { const gh_buf *buffer; @@ -119,7 +117,7 @@ inline static inl* make_linkable(int t, inl* label, chunk url, chunk title) e->tag = t; e->content.linkable.label = label; e->content.linkable.url = chunk_to_cstr(&url); - e->content.linkable.title = chunk_to_cstr(&title); + e->content.linkable.title = url.len ? chunk_to_cstr(&title) : NULL; e->next = NULL; return e; } -- cgit v1.2.3 From 24248c0f1a6de6f229890c5c03aeff8738214fee Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Tue, 2 Sep 2014 13:30:13 +0200 Subject: Rename inlines --- src/inlines.c | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'src/inlines.c') diff --git a/src/inlines.c b/src/inlines.c index 82c7219..b9ece0e 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -151,15 +151,15 @@ inline static inl* make_simple(int t) } // Macros for creating various kinds of inlines. -#define make_str(s) make_literal(str, s) -#define make_code(s) make_literal(code, s) -#define make_raw_html(s) make_literal(raw_html, s) -#define make_entity(s) make_literal(entity, s) -#define make_linebreak() make_simple(linebreak) -#define make_softbreak() make_simple(softbreak) -#define make_link(label, url, title) make_linkable(link, label, url, title) -#define make_emph(contents) make_inlines(emph, contents) -#define make_strong(contents) make_inlines(strong, contents) +#define make_str(s) make_literal(INL_STRING, s) +#define make_code(s) make_literal(INL_CODE, s) +#define make_raw_html(s) make_literal(INL_RAW_HTML, s) +#define make_entity(s) make_literal(INL_ENTITY, s) +#define make_linebreak() make_simple(INL_LINEBREAK) +#define make_softbreak() make_simple(INL_SOFTBREAK) +#define make_link(label, url, title) make_linkable(INL_LINK, label, url, title) +#define make_emph(contents) make_inlines(INL_EMPH, contents) +#define make_strong(contents) make_inlines(INL_STRONG, contents) // Free an inline list. extern void free_inlines(inl* e) @@ -167,23 +167,23 @@ extern void free_inlines(inl* e) inl * next; while (e != NULL) { switch (e->tag){ - case str: - case raw_html: - case code: - case entity: + case INL_STRING: + case INL_RAW_HTML: + case INL_CODE: + case INL_ENTITY: chunk_free(&e->content.literal); break; - case linebreak: - case softbreak: + case INL_LINEBREAK: + case INL_SOFTBREAK: break; - case link: - case image: + case INL_LINK: + case INL_IMAGE: free(e->content.linkable.url); free(e->content.linkable.title); free_inlines(e->content.linkable.label); break; - case emph: - case strong: + case INL_EMPH: + case INL_STRONG: free_inlines(e->content.inlines); break; default: @@ -454,7 +454,7 @@ static inl* handle_strong_emph(subject* subj, char c) numdelims = scan_delims(subj, c, &can_open, &can_close); if (numdelims >= 1 && can_close) { subj->pos += 1; - first_head->tag = emph; + first_head->tag = INL_EMPH; chunk_free(&first_head->content.literal); first_head->content.inlines = first_head->next; first_head->next = NULL; @@ -471,7 +471,7 @@ static inl* handle_strong_emph(subject* subj, char c) numdelims = scan_delims(subj, c, &can_open, &can_close); if (numdelims >= 2 && can_close) { subj->pos += 2; - first_head->tag = strong; + first_head->tag = INL_STRONG; chunk_free(&first_head->content.literal); first_head->content.inlines = first_head->next; first_head->next = NULL; @@ -502,10 +502,10 @@ static inl* handle_strong_emph(subject* subj, char c) } subj->pos += numdelims; if (first_close) { - first_head->tag = first_close_delims == 1 ? strong : emph; + first_head->tag = first_close_delims == 1 ? INL_STRONG : INL_EMPH; chunk_free(&first_head->content.literal); first_head->content.inlines = - make_inlines(first_close_delims == 1 ? emph : strong, + make_inlines(first_close_delims == 1 ? INL_EMPH : INL_STRONG, first_head->next); il = first_head->next; @@ -989,8 +989,8 @@ static int parse_inline(subject* subj, inl ** last) advance(subj); if (peek_char(subj) == '[') { new = handle_left_bracket(subj); - if (new != NULL && new->tag == link) { - new->tag = image; + if (new != NULL && new->tag == INL_LINK) { + new->tag = INL_IMAGE; } else { new = append_inlines(make_str(chunk_literal("!")), new); } -- cgit v1.2.3 From 7e12fdba0c9a444a3cfc29c520e2f2caa57a8232 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Tue, 2 Sep 2014 14:15:24 +0200 Subject: NO SEGFAULTS KTHX --- src/inlines.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src/inlines.c') diff --git a/src/inlines.c b/src/inlines.c index b9ece0e..7b48ad9 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -84,7 +84,7 @@ extern reference* lookup_reference(reference** refmap, chunk *label) if (refmap != NULL) { HASH_FIND_STR(*refmap, (char*)norm, ref); } - free(label); + free(norm); return ref; } @@ -262,7 +262,7 @@ inline static unsigned char *chunk_to_cstr(chunk *c) inline static chunk chunk_literal(const char *data) { - chunk c = {data, strlen(data), 0}; + chunk c = {data, data ? strlen(data) : 0, 0}; return c; } @@ -937,6 +937,7 @@ static int find_special_char(subject *subj) while (n < size) { if (strchr("\n\\`&_*[]buffer, n))) return n; + n++; } return -1; @@ -974,7 +975,9 @@ static int parse_inline(subject* subj, inl ** last) case '_': if (subj->pos > 0 && (isalnum(gh_buf_at(subj->buffer, subj->pos - 1)) || gh_buf_at(subj->buffer, subj->pos - 1) == '_')) { - goto text_literal; + new = make_str(chunk_literal("_")); + advance(subj); + break; } new = handle_strong_emph(subj, '_'); -- cgit v1.2.3 From a7314deae649646f1f7ce5ede972641b5b62538c Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Wed, 3 Sep 2014 03:40:23 +0200 Subject: 338/103 --- src/inlines.c | 235 ++++++++++++++++++++++------------------------------------ 1 file changed, 90 insertions(+), 145 deletions(-) (limited to 'src/inlines.c') diff --git a/src/inlines.c b/src/inlines.c index 7b48ad9..ef27a24 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -9,10 +9,10 @@ #include "scanners.h" typedef struct Subject { - const gh_buf *buffer; - int pos; - reference** reference_map; - int label_nestlevel; + chunk input; + int pos; + int label_nestlevel; + reference** reference_map; } subject; reference* lookup_reference(reference** refmap, chunk *label); @@ -27,12 +27,16 @@ inline static void chunk_trim(chunk *c); inline static chunk chunk_literal(const char *data); inline static chunk chunk_buf_detach(gh_buf *buf); -inline static chunk chunk_buf(const gh_buf *buf, int pos, int len); +inline static chunk chunk_dup(const chunk *ch, int pos, int len); static inl *parse_chunk_inlines(chunk *chunk, reference** refmap); static inl *parse_inlines_while(subject* subj, int (*f)(subject*)); static int parse_inline(subject* subj, inl ** last); +static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap); +static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap); +static int subject_find_special_char(subject *subj); + extern void free_reference(reference *ref) { free(ref->label); free(ref->url); @@ -101,10 +105,12 @@ extern reference* make_reference(chunk *label, chunk *url, chunk *title) extern void add_reference(reference** refmap, reference* ref) { reference * t = NULL; - HASH_FIND(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), t); + const char *label = (const char *)ref->label; + + HASH_FIND(hh, *refmap, label, strlen(label), t); if (t == NULL) { - HASH_ADD_KEYPTR(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), ref); + HASH_ADD_KEYPTR(hh, *refmap, label, strlen(label), ref); } else { free_reference(ref); // we free this now since it won't be in the refmap } @@ -210,87 +216,49 @@ inline static inl* append_inlines(inl* a, inl* b) return a; } -// Make a 'subject' from an input string. -static void init_subject(subject *e, gh_buf *buffer, int input_pos, reference** refmap) +static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap) { - e->buffer = buffer; - e->pos = input_pos; + e->input.data = buffer->ptr; + e->input.len = buffer->size; + e->input.alloc = 0; + e->pos = 0; e->label_nestlevel = 0; e->reference_map = refmap; -} - -inline static int isbacktick(int c) -{ - return (c == '`'); -} - -inline static void chunk_free(chunk *c) -{ - if (c->alloc) - free((char *)c->data); - - c->data = NULL; - c->alloc = 0; - c->len = 0; -} - -inline static void chunk_trim(chunk *c) -{ - while (c->len && isspace(c->data[0])) { - c->data++; - c->len--; - } - - while (c->len > 0) { - if (!isspace(c->data[c->len - 1])) - break; - c->len--; - } + chunk_rtrim(&e->input); } -inline static unsigned char *chunk_to_cstr(chunk *c) +static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap) { - unsigned char *str; - - str = malloc(c->len + 1); - memcpy(str, c->data, c->len); - str[c->len] = 0; + e->input.data = chunk->data; + e->input.len = chunk->len; + e->input.alloc = 0; + e->pos = 0; + e->label_nestlevel = 0; + e->reference_map = refmap; - return str; + chunk_rtrim(&e->input); } -inline static chunk chunk_literal(const char *data) +inline static int isbacktick(int c) { - chunk c = {data, data ? strlen(data) : 0, 0}; - return c; + return (c == '`'); } -inline static chunk chunk_buf(const gh_buf *buf, int pos, int len) +static inline unsigned char peek_char(subject *subj) { - chunk c = {buf->ptr + pos, len, 0}; - return c; + return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0; } -inline static chunk chunk_buf_detach(gh_buf *buf) +static inline unsigned char peek_at(subject *subj, int pos) { - chunk c; - - c.len = buf->size; - c.data = gh_buf_detach(buf); - c.alloc = 1; - - return c; + return subj->input.data[pos]; } -// Return the next character in the subject, without advancing. -// Return 0 if at the end of the subject. -#define peek_char(subj) gh_buf_at((subj)->buffer, (subj)->pos) - // Return true if there are more characters in the subject. inline static int is_eof(subject* subj) { - return (subj->pos >= gh_buf_len(subj->buffer)); + return (subj->pos >= subj->input.len); } // Advance the subject. Doesn't check for eof. @@ -308,7 +276,7 @@ inline static chunk take_while(subject* subj, int (*f)(int)) len++; } - return chunk_buf(subj->buffer, startpos, len); + return chunk_dup(&subj->input, startpos, len); } // Try to process a backtick code span that began with a @@ -388,7 +356,7 @@ static inl* handle_backticks(subject *subj) } else { gh_buf buf = GH_BUF_INIT; - gh_buf_set(&buf, subj->buffer->ptr + startpos, endpos - startpos - openticks.len); + gh_buf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len); gh_buf_trim(&buf); normalize_whitespace(&buf); @@ -404,7 +372,7 @@ static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close) char char_before, char_after; int startpos = subj->pos; - char_before = subj->pos == 0 ? '\n' : gh_buf_at(subj->buffer, subj->pos - 1); + char_before = subj->pos == 0 ? '\n' : peek_at(subj, subj->pos - 1); while (peek_char(subj) == c) { numdelims++; advance(subj); @@ -439,7 +407,7 @@ static inl* handle_strong_emph(subject* subj, char c) numdelims = scan_delims(subj, c, &can_open, &can_close); subj->pos += numdelims; - new = make_str(chunk_buf(subj->buffer, subj->pos - numdelims, numdelims)); + new = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims)); *last = new; first_head = new; result = new; @@ -488,7 +456,7 @@ static inl* handle_strong_emph(subject* subj, char c) numdelims = scan_delims(subj, c, &can_open, &can_close); if (can_close && numdelims >= 1 && numdelims <= 3 && numdelims != first_close_delims) { - new = make_str(chunk_buf(subj->buffer, subj->pos, numdelims)); + new = make_str(chunk_dup(&subj->input, subj->pos, numdelims)); append_inlines(*last, new); *last = new; if (first_close_delims == 1 && numdelims > 2) { @@ -554,7 +522,7 @@ static inl* handle_backslash(subject *subj) unsigned char nextchar = peek_char(subj); if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped advance(subj); - return make_str(chunk_buf(subj->buffer, subj->pos - 1, 1)); + return make_str(chunk_dup(&subj->input, subj->pos - 1, 1)); } else if (nextchar == '\n') { advance(subj); return make_linebreak(); @@ -569,9 +537,9 @@ static inl* handle_entity(subject* subj) { int match; inl *result; - match = scan_entity(subj->buffer, subj->pos); + match = scan_entity(&subj->input, subj->pos); if (match) { - result = make_entity(chunk_buf(subj->buffer, subj->pos, match)); + result = make_entity(chunk_dup(&subj->input, subj->pos, match)); subj->pos += match; } else { advance(subj); @@ -584,15 +552,13 @@ static inl* handle_entity(subject* subj) // Returns an inline sequence consisting of str and entity elements. static inl *make_str_with_entities(chunk *content) { - inl * result = NULL; - inl * new; + inl *result = NULL; + inl *new; int searchpos; char c; subject subj; - gh_buf content_buf = GH_BUF_INIT; - gh_buf_set(&content_buf, content->data, content->len); - init_subject(&subj, &content_buf, 0, NULL); + subject_from_chunk(&subj, content, NULL); while ((c = peek_char(&subj))) { switch (c) { @@ -600,18 +566,13 @@ static inl *make_str_with_entities(chunk *content) new = handle_entity(&subj); break; default: - searchpos = gh_buf_strchr(subj.buffer, '&', subj.pos); - if (searchpos < 0) { - searchpos = gh_buf_len(subj.buffer); - } - - new = make_str(chunk_buf(subj.buffer, subj.pos, searchpos - subj.pos)); + searchpos = chunk_strchr(&subj.input, '&', subj.pos); + new = make_str(chunk_dup(&subj.input, subj.pos, searchpos - subj.pos)); subj.pos = searchpos; } result = append_inlines(result, new); } - gh_buf_free(&content_buf); return result; } @@ -678,9 +639,9 @@ static inl* handle_pointy_brace(subject* subj) advance(subj); // advance past first < // first try to match a URL autolink - matchlen = scan_autolink_uri(subj->buffer, subj->pos); + matchlen = scan_autolink_uri(&subj->input, subj->pos); if (matchlen > 0) { - contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1); + contents = chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; return make_link( @@ -691,11 +652,11 @@ static inl* handle_pointy_brace(subject* subj) } // next try to match an email autolink - matchlen = scan_autolink_email(subj->buffer, subj->pos); + matchlen = scan_autolink_email(&subj->input, subj->pos); if (matchlen > 0) { gh_buf mail_url = GH_BUF_INIT; - contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1); + contents = chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; gh_buf_puts(&mail_url, "mailto:"); @@ -709,9 +670,9 @@ static inl* handle_pointy_brace(subject* subj) } // finally, try to match an html tag - matchlen = scan_html_tag(subj->buffer, subj->pos); + matchlen = scan_html_tag(&subj->input, subj->pos); if (matchlen > 0) { - contents = chunk_buf(subj->buffer, subj->pos - 1, matchlen + 1); + contents = chunk_dup(&subj->input, subj->pos - 1, matchlen + 1); subj->pos += matchlen; return make_raw_html(contents); } @@ -776,12 +737,7 @@ static int link_label(subject* subj, chunk *raw_label) } } if (c == ']') { - *raw_label = chunk_buf( - subj->buffer, - startpos + 1, - subj->pos - (startpos + 1) - ); - + *raw_label = chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1)); subj->label_nestlevel = 0; advance(subj); // advance past ] return 1; @@ -813,25 +769,25 @@ static inl* handle_left_bracket(subject* subj) if (found_label) { if (peek_char(subj) == '(' && - ((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) && - ((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) { + ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) && + ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) { // try to parse an explicit link: starturl = subj->pos + 1 + sps; // after ( endurl = starturl + n; - starttitle = endurl + scan_spacechars(subj->buffer, endurl); + starttitle = endurl + scan_spacechars(&subj->input, endurl); // ensure there are spaces btw url and title endtitle = (starttitle == endurl) ? starttitle : - starttitle + scan_link_title(subj->buffer, starttitle); + starttitle + scan_link_title(&subj->input, starttitle); - endall = endtitle + scan_spacechars(subj->buffer, endtitle); + endall = endtitle + scan_spacechars(&subj->input, endtitle); - if (gh_buf_at(subj->buffer, endall) == ')') { + if (peek_at(subj, endall) == ')') { subj->pos = endall + 1; - url = chunk_buf(subj->buffer, starturl, endurl - starturl); - title = chunk_buf(subj->buffer, starttitle, endtitle - starttitle); + url = chunk_dup(&subj->input, starturl, endurl - starturl); + title = chunk_dup(&subj->input, starttitle, endtitle - starttitle); lab = parse_chunk_inlines(&rawlabel, NULL); return make_link(lab, url, title); @@ -850,7 +806,7 @@ static inl* handle_left_bracket(subject* subj) // Check for reference link. // First, see if there's another label: - subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel); + subj->pos = subj->pos + scan_spacechars(&subj->input, endlabel); reflabel = rawlabel; // if followed by a nonempty link label, we change reflabel to it: @@ -892,8 +848,8 @@ static inl* handle_newline(subject *subj) advance(subj); } if (nlpos > 1 && - gh_buf_at(subj->buffer, nlpos - 1) == ' ' && - gh_buf_at(subj->buffer, nlpos - 2) == ' ') { + peek_at(subj, nlpos - 1) == ' ' && + peek_at(subj, nlpos - 2) == ' ') { return make_linebreak(); } else { return make_softbreak(); @@ -917,30 +873,22 @@ extern inl* parse_inlines_while(subject* subj, int (*f)(subject*)) inl *parse_chunk_inlines(chunk *chunk, reference** refmap) { - inl *result; subject subj; - gh_buf full_chunk = GH_BUF_INIT; - - gh_buf_set(&full_chunk, chunk->data, chunk->len); - init_subject(&subj, &full_chunk, 0, refmap); - result = parse_inlines_while(&subj, not_eof); - - gh_buf_free(&full_chunk); - return result; + subject_from_chunk(&subj, chunk, refmap); + return parse_inlines_while(&subj, not_eof); } -static int find_special_char(subject *subj) +static int subject_find_special_char(subject *subj) { int n = subj->pos + 1; - int size = (int)gh_buf_len(subj->buffer); - while (n < size) { - if (strchr("\n\\`&_*[]buffer, n))) + while (n < subj->input.len) { + if (strchr("\n\\`&_*[]input.data[n])) return n; n++; } - return -1; + return subj->input.len; } // Parse an inline, advancing subject, and add it to last element. @@ -973,11 +921,13 @@ static int parse_inline(subject* subj, inl ** last) new = handle_pointy_brace(subj); break; case '_': - if (subj->pos > 0 && (isalnum(gh_buf_at(subj->buffer, subj->pos - 1)) || - gh_buf_at(subj->buffer, subj->pos - 1) == '_')) { - new = make_str(chunk_literal("_")); - advance(subj); - break; + if (subj->pos > 0) { + unsigned char prev = peek_at(subj, subj->pos - 1); + if (isalnum(prev) || prev == '_') { + new = make_str(chunk_literal("_")); + advance(subj); + break; + } } new = handle_strong_emph(subj, '_'); @@ -1002,18 +952,13 @@ static int parse_inline(subject* subj, inl ** last) } break; default: - text_literal: - endpos = find_special_char(subj); - if (endpos < 0) { - endpos = gh_buf_len(subj->buffer); - } - - contents = chunk_buf(subj->buffer, subj->pos, endpos - subj->pos); + endpos = subject_find_special_char(subj); + contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos); subj->pos = endpos; // if we're at a newline, strip trailing spaces. if (peek_char(subj) == '\n') { - chunk_trim(&contents); + chunk_rtrim(&contents); } new = make_str(contents); @@ -1026,10 +971,10 @@ static int parse_inline(subject* subj, inl ** last) return 1; } -extern inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap) +extern inl* parse_inlines(gh_buf *input, reference** refmap) { subject subj; - init_subject(&subj, input, input_pos, refmap); + subject_from_buf(&subj, input, refmap); return parse_inlines_while(&subj, not_eof); } @@ -1048,7 +993,7 @@ void spnl(subject* subj) // Modify refmap if a reference is encountered. // Return 0 if no reference found, otherwise position of subject // after reference is parsed. -extern int parse_reference(gh_buf *input, int input_pos, reference** refmap) +extern int parse_reference(gh_buf *input, reference** refmap) { subject subj; @@ -1058,9 +1003,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap) int matchlen = 0; int beforetitle; - reference * new = NULL; + reference *new = NULL; - init_subject(&subj, input, input_pos, NULL); + subject_from_buf(&subj, input, NULL); // parse label: if (!link_label(&subj, &lab)) @@ -1075,9 +1020,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap) // parse link url: spnl(&subj); - matchlen = scan_link_url(subj.buffer, subj.pos); + matchlen = scan_link_url(&subj.input, subj.pos); if (matchlen) { - url = chunk_buf(subj.buffer, subj.pos, matchlen); + url = chunk_dup(&subj.input, subj.pos, matchlen); subj.pos += matchlen; } else { return 0; @@ -1086,9 +1031,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap) // parse optional link_title beforetitle = subj.pos; spnl(&subj); - matchlen = scan_link_title(subj.buffer, subj.pos); + matchlen = scan_link_title(&subj.input, subj.pos); if (matchlen) { - title = chunk_buf(subj.buffer, subj.pos, matchlen); + title = chunk_dup(&subj.input, subj.pos, matchlen); subj.pos += matchlen; } else { subj.pos = beforetitle; -- cgit v1.2.3 From f5168c63ad305b3e331eb7d31efaf46b0541bba4 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Thu, 4 Sep 2014 06:41:18 +0200 Subject: 368/73 --- src/inlines.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/inlines.c') diff --git a/src/inlines.c b/src/inlines.c index ef27a24..ced4673 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -123,7 +123,7 @@ inline static inl* make_linkable(int t, inl* label, chunk url, chunk title) e->tag = t; e->content.linkable.label = label; e->content.linkable.url = chunk_to_cstr(&url); - e->content.linkable.title = url.len ? chunk_to_cstr(&title) : NULL; + e->content.linkable.title = title.len ? chunk_to_cstr(&title) : NULL; e->next = NULL; return e; } -- cgit v1.2.3 From 45c1d9fadb3e8aab4a01bb27a4e2ece379902d1a Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Thu, 4 Sep 2014 17:26:11 +0200 Subject: 426/15 --- src/inlines.c | 105 ++++++++++++++++++++++++++++++---------------------------- 1 file changed, 55 insertions(+), 50 deletions(-) (limited to 'src/inlines.c') diff --git a/src/inlines.c b/src/inlines.c index ced4673..a0dcac9 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -1,8 +1,8 @@ #include +#include #include #include #include -#include #include "stmd.h" #include "uthash.h" @@ -18,7 +18,7 @@ typedef struct Subject { reference* lookup_reference(reference** refmap, chunk *label); reference* make_reference(chunk *label, chunk *url, chunk *title); -static unsigned char *clean_url(chunk *url); +static unsigned char *clean_url(chunk *url, int is_email); static unsigned char *clean_title(chunk *title); inline static unsigned char *chunk_to_cstr(chunk *c); @@ -97,7 +97,7 @@ extern reference* make_reference(chunk *label, chunk *url, chunk *title) reference *ref; ref = malloc(sizeof(reference)); ref->label = normalize_reference(label); - ref->url = clean_url(url); + ref->url = clean_url(url, 0); ref->title = clean_title(title); return ref; } @@ -116,14 +116,25 @@ extern void add_reference(reference** refmap, reference* ref) } } +inline static inl* make_link_from_reference(inl* label, reference *ref) +{ + inl* e = (inl*) malloc(sizeof(inl)); + e->tag = INL_LINK; + e->content.linkable.label = label; + e->content.linkable.url = strdup(ref->url); + e->content.linkable.title = ref->title ? strdup(ref->title) : NULL; + e->next = NULL; + return e; +} + // Create an inline with a linkable string value. -inline static inl* make_linkable(int t, inl* label, chunk url, chunk title) +inline static inl* make_link(inl* label, chunk url, chunk title, int is_email) { inl* e = (inl*) malloc(sizeof(inl)); - e->tag = t; + e->tag = INL_LINK; e->content.linkable.label = label; - e->content.linkable.url = chunk_to_cstr(&url); - e->content.linkable.title = title.len ? chunk_to_cstr(&title) : NULL; + e->content.linkable.url = clean_url(&url, is_email); + e->content.linkable.title = clean_title(&title); e->next = NULL; return e; } @@ -163,7 +174,6 @@ inline static inl* make_simple(int t) #define make_entity(s) make_literal(INL_ENTITY, s) #define make_linebreak() make_simple(INL_LINEBREAK) #define make_softbreak() make_simple(INL_SOFTBREAK) -#define make_link(label, url, title) make_linkable(INL_LINK, label, url, title) #define make_emph(contents) make_inlines(INL_EMPH, contents) #define make_strong(contents) make_inlines(INL_STRONG, contents) @@ -309,37 +319,27 @@ static int scan_to_closing_backticks(subject* subj, int openticklength) // space and newline characters into a single space. static void normalize_whitespace(gh_buf *s) { - /* TODO */ -#if 0 bool last_char_was_space = false; - int pos = 0; - char c; - while ((c = gh_buf_at(s, pos))) { - switch (c) { - case ' ': - if (last_char_was_space) { - bdelete(s, pos, 1); - } else { - pos++; - } - last_char_was_space = true; - break; - case '\n': - if (last_char_was_space) { - bdelete(s, pos, 1); - } else { - bdelete(s, pos, 1); - binsertch(s, pos, 1, ' '); - pos++; - } - last_char_was_space = true; + int r, w; + + for (r = 0, w = 0; r < s->size; ++r) { + switch (s->ptr[r]) { + case ' ': + case '\n': + if (last_char_was_space) break; - default: - pos++; - last_char_was_space = false; + + s->ptr[w++] = ' '; + last_char_was_space = true; + break; + + default: + s->ptr[w++] = s->ptr[r]; + last_char_was_space = false; } } -#endif + + gh_buf_truncate(s, w); } // Parse backtick code section or raw backticks, return an inline. @@ -593,16 +593,19 @@ extern void unescape_buffer(gh_buf *buf) // Clean a URL: remove surrounding whitespace and surrounding <>, // and remove \ that escape punctuation. -static unsigned char *clean_url(chunk *url) +static unsigned char *clean_url(chunk *url, int is_email) { gh_buf buf = GH_BUF_INIT; chunk_trim(url); + if (is_email) + gh_buf_puts(&buf, "mailto:"); + if (url->data[0] == '<' && url->data[url->len - 1] == '>') { - gh_buf_set(&buf, url->data + 1, url->len - 2); + gh_buf_put(&buf, url->data + 1, url->len - 2); } else { - gh_buf_set(&buf, url->data, url->len); + gh_buf_put(&buf, url->data, url->len); } unescape_buffer(&buf); @@ -613,8 +616,13 @@ static unsigned char *clean_url(chunk *url) static unsigned char *clean_title(chunk *title) { gh_buf buf = GH_BUF_INIT; - unsigned char first = title->data[0]; - unsigned char last = title->data[title->len - 1]; + unsigned char first, last; + + if (title->len == 0) + return NULL; + + first = title->data[0]; + last = title->data[title->len - 1]; // remove surrounding quotes if any: if ((first == '\'' && last == '\'') || @@ -647,25 +655,22 @@ static inl* handle_pointy_brace(subject* subj) return make_link( make_str_with_entities(&contents), contents, - chunk_literal("") + chunk_literal(""), + 0 ); } // next try to match an email autolink matchlen = scan_autolink_email(&subj->input, subj->pos); if (matchlen > 0) { - gh_buf mail_url = GH_BUF_INIT; - contents = chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; - gh_buf_puts(&mail_url, "mailto:"); - gh_buf_put(&mail_url, contents.data, contents.len); - return make_link( make_str_with_entities(&contents), - chunk_buf_detach(&mail_url), - chunk_literal("") + contents, + chunk_literal(""), + 1 ); } @@ -790,7 +795,7 @@ static inl* handle_left_bracket(subject* subj) title = chunk_dup(&subj->input, starttitle, endtitle - starttitle); lab = parse_chunk_inlines(&rawlabel, NULL); - return make_link(lab, url, title); + return make_link(lab, url, title, 0); } else { // if we get here, we matched a label but didn't get further: subj->pos = endlabel; @@ -821,7 +826,7 @@ static inl* handle_left_bracket(subject* subj) ref = lookup_reference(subj->reference_map, &reflabel); if (ref != NULL) { // found lab = parse_chunk_inlines(&rawlabel, NULL); - result = make_link(lab, chunk_literal(ref->url), chunk_literal(ref->title)); + result = make_link_from_reference(lab, ref); } else { subj->pos = endlabel; lab = parse_chunk_inlines(&rawlabel, subj->reference_map); -- cgit v1.2.3 From 9830d3a05a374a0d05676301bd4065917b59ad53 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Thu, 4 Sep 2014 17:42:12 +0200 Subject: 430/11 --- src/inlines.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'src/inlines.c') diff --git a/src/inlines.c b/src/inlines.c index a0dcac9..599be84 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -21,7 +21,6 @@ reference* make_reference(chunk *label, chunk *url, chunk *title); static unsigned char *clean_url(chunk *url, int is_email); static unsigned char *clean_title(chunk *title); -inline static unsigned char *chunk_to_cstr(chunk *c); inline static void chunk_free(chunk *c); inline static void chunk_trim(chunk *c); @@ -37,6 +36,8 @@ static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap); static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap); static int subject_find_special_char(subject *subj); +static void normalize_whitespace(gh_buf *s); + extern void free_reference(reference *ref) { free(ref->label); free(ref->url); @@ -62,19 +63,10 @@ extern void free_reference_map(reference **refmap) { static unsigned char *normalize_reference(chunk *ref) { gh_buf normalized = GH_BUF_INIT; - int r, w; utf8proc_case_fold(&normalized, ref->data, ref->len); gh_buf_trim(&normalized); - - for (r = 0, w = 0; r < normalized.size; ++r) { - if (r && gh_buf_at(&normalized, r - 1) == ' ') { - while (gh_buf_at(&normalized, r) == ' ') - r++; - } - - normalized.ptr[w++] = normalized.ptr[r]; - } + normalize_whitespace(&normalized); return gh_buf_detach(&normalized); } -- cgit v1.2.3 From d8f44f1e4f0bd944ab43e6434a1579d670ed66cf Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Thu, 4 Sep 2014 17:49:13 +0200 Subject: 433/8 --- src/inlines.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/inlines.c') diff --git a/src/inlines.c b/src/inlines.c index 599be84..8e2e683 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -591,6 +591,9 @@ static unsigned char *clean_url(chunk *url, int is_email) chunk_trim(url); + if (url->len == 0) + return NULL; + if (is_email) gh_buf_puts(&buf, "mailto:"); -- cgit v1.2.3 From 543c2c94d71adee42c7bd2f8027d75c87ed8120d Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Thu, 4 Sep 2014 18:38:14 +0200 Subject: Rename to strbuf --- src/inlines.c | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'src/inlines.c') diff --git a/src/inlines.c b/src/inlines.c index 8e2e683..33973df 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -25,7 +25,7 @@ inline static void chunk_free(chunk *c); inline static void chunk_trim(chunk *c); inline static chunk chunk_literal(const char *data); -inline static chunk chunk_buf_detach(gh_buf *buf); +inline static chunk chunk_buf_detach(strbuf *buf); inline static chunk chunk_dup(const chunk *ch, int pos, int len); static inl *parse_chunk_inlines(chunk *chunk, reference** refmap); @@ -33,10 +33,10 @@ static inl *parse_inlines_while(subject* subj, int (*f)(subject*)); static int parse_inline(subject* subj, inl ** last); static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap); -static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap); +static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap); static int subject_find_special_char(subject *subj); -static void normalize_whitespace(gh_buf *s); +static void normalize_whitespace(strbuf *s); extern void free_reference(reference *ref) { free(ref->label); @@ -62,13 +62,13 @@ extern void free_reference_map(reference **refmap) { // remove leading/trailing whitespace, case fold static unsigned char *normalize_reference(chunk *ref) { - gh_buf normalized = GH_BUF_INIT; + strbuf normalized = GH_BUF_INIT; utf8proc_case_fold(&normalized, ref->data, ref->len); - gh_buf_trim(&normalized); + strbuf_trim(&normalized); normalize_whitespace(&normalized); - return gh_buf_detach(&normalized); + return strbuf_detach(&normalized); } // Returns reference if refmap contains a reference with matching @@ -218,7 +218,7 @@ inline static inl* append_inlines(inl* a, inl* b) return a; } -static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap) +static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap) { e->input.data = buffer->ptr; e->input.len = buffer->size; @@ -309,7 +309,7 @@ static int scan_to_closing_backticks(subject* subj, int openticklength) // Destructively modify string, collapsing consecutive // space and newline characters into a single space. -static void normalize_whitespace(gh_buf *s) +static void normalize_whitespace(strbuf *s) { bool last_char_was_space = false; int r, w; @@ -331,7 +331,7 @@ static void normalize_whitespace(gh_buf *s) } } - gh_buf_truncate(s, w); + strbuf_truncate(s, w); } // Parse backtick code section or raw backticks, return an inline. @@ -346,10 +346,10 @@ static inl* handle_backticks(subject *subj) subj->pos = startpos; // rewind return make_str(openticks); } else { - gh_buf buf = GH_BUF_INIT; + strbuf buf = GH_BUF_INIT; - gh_buf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len); - gh_buf_trim(&buf); + strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len); + strbuf_trim(&buf); normalize_whitespace(&buf); return make_code(chunk_buf_detach(&buf)); @@ -569,7 +569,7 @@ static inl *make_str_with_entities(chunk *content) } // Destructively unescape a string: remove backslashes before punctuation chars. -extern void unescape_buffer(gh_buf *buf) +extern void unescape_buffer(strbuf *buf) { int r, w; @@ -580,14 +580,14 @@ extern void unescape_buffer(gh_buf *buf) buf->ptr[w++] = buf->ptr[r]; } - gh_buf_truncate(buf, w); + strbuf_truncate(buf, w); } // Clean a URL: remove surrounding whitespace and surrounding <>, // and remove \ that escape punctuation. static unsigned char *clean_url(chunk *url, int is_email) { - gh_buf buf = GH_BUF_INIT; + strbuf buf = GH_BUF_INIT; chunk_trim(url); @@ -595,22 +595,22 @@ static unsigned char *clean_url(chunk *url, int is_email) return NULL; if (is_email) - gh_buf_puts(&buf, "mailto:"); + strbuf_puts(&buf, "mailto:"); if (url->data[0] == '<' && url->data[url->len - 1] == '>') { - gh_buf_put(&buf, url->data + 1, url->len - 2); + strbuf_put(&buf, url->data + 1, url->len - 2); } else { - gh_buf_put(&buf, url->data, url->len); + strbuf_put(&buf, url->data, url->len); } unescape_buffer(&buf); - return gh_buf_detach(&buf); + return strbuf_detach(&buf); } // Clean a title: remove surrounding quotes and remove \ that escape punctuation. static unsigned char *clean_title(chunk *title) { - gh_buf buf = GH_BUF_INIT; + strbuf buf = GH_BUF_INIT; unsigned char first, last; if (title->len == 0) @@ -623,13 +623,13 @@ static unsigned char *clean_title(chunk *title) if ((first == '\'' && last == '\'') || (first == '(' && last == ')') || (first == '"' && last == '"')) { - gh_buf_set(&buf, title->data + 1, title->len - 2); + strbuf_set(&buf, title->data + 1, title->len - 2); } else { - gh_buf_set(&buf, title->data, title->len); + strbuf_set(&buf, title->data, title->len); } unescape_buffer(&buf); - return gh_buf_detach(&buf); + return strbuf_detach(&buf); } // Parse an autolink or HTML tag. @@ -971,7 +971,7 @@ static int parse_inline(subject* subj, inl ** last) return 1; } -extern inl* parse_inlines(gh_buf *input, reference** refmap) +extern inl* parse_inlines(strbuf *input, reference** refmap) { subject subj; subject_from_buf(&subj, input, refmap); @@ -993,7 +993,7 @@ void spnl(subject* subj) // Modify refmap if a reference is encountered. // Return 0 if no reference found, otherwise position of subject // after reference is parsed. -extern int parse_reference(gh_buf *input, reference** refmap) +extern int parse_reference(strbuf *input, reference** refmap) { subject subj; -- cgit v1.2.3 From 647b15968c95ec268d6d728eea73756c7ba648a8 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Thu, 4 Sep 2014 18:42:49 +0200 Subject: Rename inl --- src/inlines.c | 88 +++++++++++++++++++++++++++++------------------------------ 1 file changed, 44 insertions(+), 44 deletions(-) (limited to 'src/inlines.c') diff --git a/src/inlines.c b/src/inlines.c index 33973df..301125e 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -28,9 +28,9 @@ inline static chunk chunk_literal(const char *data); inline static chunk chunk_buf_detach(strbuf *buf); inline static chunk chunk_dup(const chunk *ch, int pos, int len); -static inl *parse_chunk_inlines(chunk *chunk, reference** refmap); -static inl *parse_inlines_while(subject* subj, int (*f)(subject*)); -static int parse_inline(subject* subj, inl ** last); +static struct inl *parse_chunk_inlines(chunk *chunk, reference** refmap); +static struct inl *parse_inlines_while(subject* subj, int (*f)(subject*)); +static int parse_inline(subject* subj, struct inl ** last); static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap); static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap); @@ -108,9 +108,9 @@ extern void add_reference(reference** refmap, reference* ref) } } -inline static inl* make_link_from_reference(inl* label, reference *ref) +inline static struct inl* make_link_from_reference(struct inl* label, reference *ref) { - inl* e = (inl*) malloc(sizeof(inl)); + struct inl* e = (struct inl*) malloc(sizeof(struct inl)); e->tag = INL_LINK; e->content.linkable.label = label; e->content.linkable.url = strdup(ref->url); @@ -120,9 +120,9 @@ inline static inl* make_link_from_reference(inl* label, reference *ref) } // Create an inline with a linkable string value. -inline static inl* make_link(inl* label, chunk url, chunk title, int is_email) +inline static struct inl* make_link(struct inl* label, chunk url, chunk title, int is_email) { - inl* e = (inl*) malloc(sizeof(inl)); + struct inl* e = (struct inl*) malloc(sizeof(struct inl)); e->tag = INL_LINK; e->content.linkable.label = label; e->content.linkable.url = clean_url(&url, is_email); @@ -131,9 +131,9 @@ inline static inl* make_link(inl* label, chunk url, chunk title, int is_email) return e; } -inline static inl* make_inlines(int t, inl* contents) +inline static struct inl* make_inlines(int t, struct inl* contents) { - inl* e = (inl*) malloc(sizeof(inl)); + struct inl* e = (struct inl*) malloc(sizeof(struct inl)); e->tag = t; e->content.inlines = contents; e->next = NULL; @@ -141,9 +141,9 @@ inline static inl* make_inlines(int t, inl* contents) } // Create an inline with a literal string value. -inline static inl* make_literal(int t, chunk s) +inline static struct inl* make_literal(int t, chunk s) { - inl* e = (inl*) malloc(sizeof(inl)); + struct inl* e = (struct inl*) malloc(sizeof(struct inl)); e->tag = t; e->content.literal = s; e->next = NULL; @@ -151,9 +151,9 @@ inline static inl* make_literal(int t, chunk s) } // Create an inline with no value. -inline static inl* make_simple(int t) +inline static struct inl* make_simple(int t) { - inl* e = (inl*) malloc(sizeof(inl)); + struct inl* e = (struct inl*) malloc(sizeof(struct inl)); e->tag = t; e->next = NULL; return e; @@ -170,9 +170,9 @@ inline static inl* make_simple(int t) #define make_strong(contents) make_inlines(INL_STRONG, contents) // Free an inline list. -extern void free_inlines(inl* e) +extern void free_inlines(struct inl* e) { - inl * next; + struct inl * next; while (e != NULL) { switch (e->tag){ case INL_STRING: @@ -205,12 +205,12 @@ extern void free_inlines(inl* e) // Append inline list b to the end of inline list a. // Return pointer to head of new list. -inline static inl* append_inlines(inl* a, inl* b) +inline static struct inl* append_inlines(struct inl* a, struct inl* b) { if (a == NULL) { // NULL acts like an empty list return b; } - inl* cur = a; + struct inl* cur = a; while (cur->next) { cur = cur->next; } @@ -336,7 +336,7 @@ static void normalize_whitespace(strbuf *s) // Parse backtick code section or raw backticks, return an inline. // Assumes that the subject has a backtick at the current position. -static inl* handle_backticks(subject *subj) +static struct inl* handle_backticks(subject *subj) { chunk openticks = take_while(subj, isbacktick); int startpos = subj->pos; @@ -382,15 +382,15 @@ static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close) // Parse strong/emph or a fallback. // Assumes the subject has '_' or '*' at the current position. -static inl* handle_strong_emph(subject* subj, char c) +static struct inl* handle_strong_emph(subject* subj, char c) { bool can_open, can_close; - inl * result = NULL; - inl ** last = malloc(sizeof(inl *)); - inl * new; - inl * il; - inl * first_head = NULL; - inl * first_close = NULL; + struct inl * result = NULL; + struct inl ** last = malloc(sizeof(struct inl *)); + struct inl * new; + struct inl * il; + struct inl * first_head = NULL; + struct inl * first_close = NULL; int first_close_delims = 0; int numdelims; @@ -508,7 +508,7 @@ done: } // Parse backslash-escape or just a backslash, returning an inline. -static inl* handle_backslash(subject *subj) +static struct inl* handle_backslash(subject *subj) { advance(subj); unsigned char nextchar = peek_char(subj); @@ -525,10 +525,10 @@ static inl* handle_backslash(subject *subj) // Parse an entity or a regular "&" string. // Assumes the subject has an '&' character at the current position. -static inl* handle_entity(subject* subj) +static struct inl* handle_entity(subject* subj) { int match; - inl *result; + struct inl *result; match = scan_entity(&subj->input, subj->pos); if (match) { result = make_entity(chunk_dup(&subj->input, subj->pos, match)); @@ -542,10 +542,10 @@ static inl* handle_entity(subject* subj) // Like make_str, but parses entities. // Returns an inline sequence consisting of str and entity elements. -static inl *make_str_with_entities(chunk *content) +static struct inl *make_str_with_entities(chunk *content) { - inl *result = NULL; - inl *new; + struct inl *result = NULL; + struct inl *new; int searchpos; char c; subject subj; @@ -634,7 +634,7 @@ static unsigned char *clean_title(chunk *title) // Parse an autolink or HTML tag. // Assumes the subject has a '<' character at the current position. -static inl* handle_pointy_brace(subject* subj) +static struct inl* handle_pointy_brace(subject* subj) { int matchlen = 0; chunk contents; @@ -693,7 +693,7 @@ static inl* handle_pointy_brace(subject* subj) static int link_label(subject* subj, chunk *raw_label) { int nestlevel = 0; - inl* tmp = NULL; + struct inl* tmp = NULL; int startpos = subj->pos; if (subj->label_nestlevel) { @@ -751,10 +751,10 @@ static int link_label(subject* subj, chunk *raw_label) } // Parse a link or the link portion of an image, or return a fallback. -static inl* handle_left_bracket(subject* subj) +static struct inl* handle_left_bracket(subject* subj) { - inl *lab = NULL; - inl *result = NULL; + struct inl *lab = NULL; + struct inl *result = NULL; reference *ref; int n; int sps; @@ -838,7 +838,7 @@ static inl* handle_left_bracket(subject* subj) // Parse a hard or soft linebreak, returning an inline. // Assumes the subject has a newline at the current position. -static inl* handle_newline(subject *subj) +static struct inl* handle_newline(subject *subj) { int nlpos = subj->pos; // skip over newline @@ -862,16 +862,16 @@ inline static int not_eof(subject* subj) } // Parse inlines while a predicate is satisfied. Return inlines. -extern inl* parse_inlines_while(subject* subj, int (*f)(subject*)) +extern struct inl* parse_inlines_while(subject* subj, int (*f)(subject*)) { - inl* result = NULL; - inl** last = &result; + struct inl* result = NULL; + struct inl** last = &result; while ((*f)(subj) && parse_inline(subj, last)) { } return result; } -inl *parse_chunk_inlines(chunk *chunk, reference** refmap) +struct inl *parse_chunk_inlines(chunk *chunk, reference** refmap) { subject subj; subject_from_chunk(&subj, chunk, refmap); @@ -894,9 +894,9 @@ static int subject_find_special_char(subject *subj) // Parse an inline, advancing subject, and add it to last element. // Adjust tail to point to new last element of list. // Return 0 if no inline can be parsed, 1 otherwise. -static int parse_inline(subject* subj, inl ** last) +static int parse_inline(subject* subj, struct inl ** last) { - inl* new = NULL; + struct inl* new = NULL; chunk contents; unsigned char c; int endpos; @@ -971,7 +971,7 @@ static int parse_inline(subject* subj, inl ** last) return 1; } -extern inl* parse_inlines(strbuf *input, reference** refmap) +extern struct inl* parse_inlines(strbuf *input, reference** refmap) { subject subj; subject_from_buf(&subj, input, refmap); -- cgit v1.2.3 From 9e4855365b920c2a80b0f1ab6937280f0b504334 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Thu, 4 Sep 2014 18:45:44 +0200 Subject: Rename `inl` --- src/inlines.c | 88 +++++++++++++++++++++++++++++------------------------------ 1 file changed, 44 insertions(+), 44 deletions(-) (limited to 'src/inlines.c') diff --git a/src/inlines.c b/src/inlines.c index 301125e..6bb89da 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -28,9 +28,9 @@ inline static chunk chunk_literal(const char *data); inline static chunk chunk_buf_detach(strbuf *buf); inline static chunk chunk_dup(const chunk *ch, int pos, int len); -static struct inl *parse_chunk_inlines(chunk *chunk, reference** refmap); -static struct inl *parse_inlines_while(subject* subj, int (*f)(subject*)); -static int parse_inline(subject* subj, struct inl ** last); +static node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap); +static node_inl *parse_inlines_while(subject* subj, int (*f)(subject*)); +static int parse_inline(subject* subj, node_inl ** last); static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap); static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap); @@ -108,9 +108,9 @@ extern void add_reference(reference** refmap, reference* ref) } } -inline static struct inl* make_link_from_reference(struct inl* label, reference *ref) +inline static node_inl* make_link_from_reference(node_inl* label, reference *ref) { - struct inl* e = (struct inl*) malloc(sizeof(struct inl)); + node_inl* e = (node_inl*) malloc(sizeof(node_inl)); e->tag = INL_LINK; e->content.linkable.label = label; e->content.linkable.url = strdup(ref->url); @@ -120,9 +120,9 @@ inline static struct inl* make_link_from_reference(struct inl* label, reference } // Create an inline with a linkable string value. -inline static struct inl* make_link(struct inl* label, chunk url, chunk title, int is_email) +inline static node_inl* make_link(node_inl* label, chunk url, chunk title, int is_email) { - struct inl* e = (struct inl*) malloc(sizeof(struct inl)); + node_inl* e = (node_inl*) malloc(sizeof(node_inl)); e->tag = INL_LINK; e->content.linkable.label = label; e->content.linkable.url = clean_url(&url, is_email); @@ -131,9 +131,9 @@ inline static struct inl* make_link(struct inl* label, chunk url, chunk title, i return e; } -inline static struct inl* make_inlines(int t, struct inl* contents) +inline static node_inl* make_inlines(int t, node_inl* contents) { - struct inl* e = (struct inl*) malloc(sizeof(struct inl)); + node_inl* e = (node_inl*) malloc(sizeof(node_inl)); e->tag = t; e->content.inlines = contents; e->next = NULL; @@ -141,9 +141,9 @@ inline static struct inl* make_inlines(int t, struct inl* contents) } // Create an inline with a literal string value. -inline static struct inl* make_literal(int t, chunk s) +inline static node_inl* make_literal(int t, chunk s) { - struct inl* e = (struct inl*) malloc(sizeof(struct inl)); + node_inl* e = (node_inl*) malloc(sizeof(node_inl)); e->tag = t; e->content.literal = s; e->next = NULL; @@ -151,9 +151,9 @@ inline static struct inl* make_literal(int t, chunk s) } // Create an inline with no value. -inline static struct inl* make_simple(int t) +inline static node_inl* make_simple(int t) { - struct inl* e = (struct inl*) malloc(sizeof(struct inl)); + node_inl* e = (node_inl*) malloc(sizeof(node_inl)); e->tag = t; e->next = NULL; return e; @@ -170,9 +170,9 @@ inline static struct inl* make_simple(int t) #define make_strong(contents) make_inlines(INL_STRONG, contents) // Free an inline list. -extern void free_inlines(struct inl* e) +extern void free_inlines(node_inl* e) { - struct inl * next; + node_inl * next; while (e != NULL) { switch (e->tag){ case INL_STRING: @@ -205,12 +205,12 @@ extern void free_inlines(struct inl* e) // Append inline list b to the end of inline list a. // Return pointer to head of new list. -inline static struct inl* append_inlines(struct inl* a, struct inl* b) +inline static node_inl* append_inlines(node_inl* a, node_inl* b) { if (a == NULL) { // NULL acts like an empty list return b; } - struct inl* cur = a; + node_inl* cur = a; while (cur->next) { cur = cur->next; } @@ -336,7 +336,7 @@ static void normalize_whitespace(strbuf *s) // Parse backtick code section or raw backticks, return an inline. // Assumes that the subject has a backtick at the current position. -static struct inl* handle_backticks(subject *subj) +static node_inl* handle_backticks(subject *subj) { chunk openticks = take_while(subj, isbacktick); int startpos = subj->pos; @@ -382,15 +382,15 @@ static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close) // Parse strong/emph or a fallback. // Assumes the subject has '_' or '*' at the current position. -static struct inl* handle_strong_emph(subject* subj, char c) +static node_inl* handle_strong_emph(subject* subj, char c) { bool can_open, can_close; - struct inl * result = NULL; - struct inl ** last = malloc(sizeof(struct inl *)); - struct inl * new; - struct inl * il; - struct inl * first_head = NULL; - struct inl * first_close = NULL; + node_inl * result = NULL; + node_inl ** last = malloc(sizeof(node_inl *)); + node_inl * new; + node_inl * il; + node_inl * first_head = NULL; + node_inl * first_close = NULL; int first_close_delims = 0; int numdelims; @@ -508,7 +508,7 @@ done: } // Parse backslash-escape or just a backslash, returning an inline. -static struct inl* handle_backslash(subject *subj) +static node_inl* handle_backslash(subject *subj) { advance(subj); unsigned char nextchar = peek_char(subj); @@ -525,10 +525,10 @@ static struct inl* handle_backslash(subject *subj) // Parse an entity or a regular "&" string. // Assumes the subject has an '&' character at the current position. -static struct inl* handle_entity(subject* subj) +static node_inl* handle_entity(subject* subj) { int match; - struct inl *result; + node_inl *result; match = scan_entity(&subj->input, subj->pos); if (match) { result = make_entity(chunk_dup(&subj->input, subj->pos, match)); @@ -542,10 +542,10 @@ static struct inl* handle_entity(subject* subj) // Like make_str, but parses entities. // Returns an inline sequence consisting of str and entity elements. -static struct inl *make_str_with_entities(chunk *content) +static node_inl *make_str_with_entities(chunk *content) { - struct inl *result = NULL; - struct inl *new; + node_inl *result = NULL; + node_inl *new; int searchpos; char c; subject subj; @@ -634,7 +634,7 @@ static unsigned char *clean_title(chunk *title) // Parse an autolink or HTML tag. // Assumes the subject has a '<' character at the current position. -static struct inl* handle_pointy_brace(subject* subj) +static node_inl* handle_pointy_brace(subject* subj) { int matchlen = 0; chunk contents; @@ -693,7 +693,7 @@ static struct inl* handle_pointy_brace(subject* subj) static int link_label(subject* subj, chunk *raw_label) { int nestlevel = 0; - struct inl* tmp = NULL; + node_inl* tmp = NULL; int startpos = subj->pos; if (subj->label_nestlevel) { @@ -751,10 +751,10 @@ static int link_label(subject* subj, chunk *raw_label) } // Parse a link or the link portion of an image, or return a fallback. -static struct inl* handle_left_bracket(subject* subj) +static node_inl* handle_left_bracket(subject* subj) { - struct inl *lab = NULL; - struct inl *result = NULL; + node_inl *lab = NULL; + node_inl *result = NULL; reference *ref; int n; int sps; @@ -838,7 +838,7 @@ static struct inl* handle_left_bracket(subject* subj) // Parse a hard or soft linebreak, returning an inline. // Assumes the subject has a newline at the current position. -static struct inl* handle_newline(subject *subj) +static node_inl* handle_newline(subject *subj) { int nlpos = subj->pos; // skip over newline @@ -862,16 +862,16 @@ inline static int not_eof(subject* subj) } // Parse inlines while a predicate is satisfied. Return inlines. -extern struct inl* parse_inlines_while(subject* subj, int (*f)(subject*)) +extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*)) { - struct inl* result = NULL; - struct inl** last = &result; + node_inl* result = NULL; + node_inl** last = &result; while ((*f)(subj) && parse_inline(subj, last)) { } return result; } -struct inl *parse_chunk_inlines(chunk *chunk, reference** refmap) +node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap) { subject subj; subject_from_chunk(&subj, chunk, refmap); @@ -894,9 +894,9 @@ static int subject_find_special_char(subject *subj) // Parse an inline, advancing subject, and add it to last element. // Adjust tail to point to new last element of list. // Return 0 if no inline can be parsed, 1 otherwise. -static int parse_inline(subject* subj, struct inl ** last) +static int parse_inline(subject* subj, node_inl ** last) { - struct inl* new = NULL; + node_inl* new = NULL; chunk contents; unsigned char c; int endpos; @@ -971,7 +971,7 @@ static int parse_inline(subject* subj, struct inl ** last) return 1; } -extern struct inl* parse_inlines(strbuf *input, reference** refmap) +extern node_inl* parse_inlines(strbuf *input, reference** refmap) { subject subj; subject_from_buf(&subj, input, refmap); -- cgit v1.2.3 From add5dd1b9a9ba8c58cdc6ca0bb62d287acb56278 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Thu, 4 Sep 2014 19:40:27 +0200 Subject: Remove warnings --- src/inlines.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'src/inlines.c') diff --git a/src/inlines.c b/src/inlines.c index 6bb89da..5e0f3e5 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -108,13 +108,26 @@ extern void add_reference(reference** refmap, reference* ref) } } +static unsigned char *bufdup(const unsigned char *buf) +{ + unsigned char *new = NULL; + + if (!buf) { + int len = strlen((char *)buf); + new = malloc(len + 1); + memcpy(new, buf, len + 1); + } + + return new; +} + inline static node_inl* make_link_from_reference(node_inl* label, reference *ref) { node_inl* e = (node_inl*) malloc(sizeof(node_inl)); e->tag = INL_LINK; e->content.linkable.label = label; - e->content.linkable.url = strdup(ref->url); - e->content.linkable.title = ref->title ? strdup(ref->title) : NULL; + e->content.linkable.url = bufdup(ref->url); + e->content.linkable.title = bufdup(ref->title); e->next = NULL; return e; } -- cgit v1.2.3 From 278b89d092cae8fe9cdd6346c69512886d36abbd Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Thu, 4 Sep 2014 20:04:21 +0200 Subject: Silly me --- src/inlines.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/inlines.c') diff --git a/src/inlines.c b/src/inlines.c index 5e0f3e5..6b17027 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -112,7 +112,7 @@ static unsigned char *bufdup(const unsigned char *buf) { unsigned char *new = NULL; - if (!buf) { + if (buf) { int len = strlen((char *)buf); new = malloc(len + 1); memcpy(new, buf, len + 1); -- cgit v1.2.3 From 61e3e606e64221eaa5cf3d83dc598d5a42818d10 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Sat, 6 Sep 2014 20:48:05 +0200 Subject: UTF8-aware detabbing and entity handling --- src/inlines.c | 63 ++++++++++++++++++++++++----------------------------------- 1 file changed, 26 insertions(+), 37 deletions(-) (limited to 'src/inlines.c') diff --git a/src/inlines.c b/src/inlines.c index 6b17027..7b27150 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -5,6 +5,8 @@ #include #include "stmd.h" +#include "html/houdini.h" +#include "utf8.h" #include "uthash.h" #include "scanners.h" @@ -176,7 +178,6 @@ inline static node_inl* make_simple(int t) #define make_str(s) make_literal(INL_STRING, s) #define make_code(s) make_literal(INL_CODE, s) #define make_raw_html(s) make_literal(INL_RAW_HTML, s) -#define make_entity(s) make_literal(INL_ENTITY, s) #define make_linebreak() make_simple(INL_LINEBREAK) #define make_softbreak() make_simple(INL_SOFTBREAK) #define make_emph(contents) make_inlines(INL_EMPH, contents) @@ -191,7 +192,6 @@ extern void free_inlines(node_inl* e) case INL_STRING: case INL_RAW_HTML: case INL_CODE: - case INL_ENTITY: chunk_free(&e->content.literal); break; case INL_LINEBREAK: @@ -540,45 +540,34 @@ static node_inl* handle_backslash(subject *subj) // Assumes the subject has an '&' character at the current position. static node_inl* handle_entity(subject* subj) { - int match; - node_inl *result; - match = scan_entity(&subj->input, subj->pos); - if (match) { - result = make_entity(chunk_dup(&subj->input, subj->pos, match)); - subj->pos += match; - } else { - advance(subj); - result = make_str(chunk_literal("&")); - } - return result; + strbuf ent = GH_BUF_INIT; + size_t len; + + advance(subj); + + len = houdini_unescape_ent(&ent, + subj->input.data + subj->pos, + subj->input.len - subj->pos + ); + + if (len == 0) + return make_str(chunk_literal("&")); + + subj->pos += len; + return make_str(chunk_buf_detach(&ent)); } // Like make_str, but parses entities. // Returns an inline sequence consisting of str and entity elements. static node_inl *make_str_with_entities(chunk *content) { - node_inl *result = NULL; - node_inl *new; - int searchpos; - char c; - subject subj; - - subject_from_chunk(&subj, content, NULL); + strbuf unescaped = GH_BUF_INIT; - while ((c = peek_char(&subj))) { - switch (c) { - case '&': - new = handle_entity(&subj); - break; - default: - searchpos = chunk_strchr(&subj.input, '&', subj.pos); - new = make_str(chunk_dup(&subj.input, subj.pos, searchpos - subj.pos)); - subj.pos = searchpos; - } - result = append_inlines(result, new); + if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) { + return make_str(chunk_buf_detach(&unescaped)); + } else { + return make_str(*content); } - - return result; } // Destructively unescape a string: remove backslashes before punctuation chars. @@ -611,9 +600,9 @@ static unsigned char *clean_url(chunk *url, int is_email) strbuf_puts(&buf, "mailto:"); if (url->data[0] == '<' && url->data[url->len - 1] == '>') { - strbuf_put(&buf, url->data + 1, url->len - 2); + houdini_unescape_html_f(&buf, url->data + 1, url->len - 2); } else { - strbuf_put(&buf, url->data, url->len); + houdini_unescape_html_f(&buf, url->data, url->len); } unescape_buffer(&buf); @@ -636,9 +625,9 @@ static unsigned char *clean_title(chunk *title) if ((first == '\'' && last == '\'') || (first == '(' && last == ')') || (first == '"' && last == '"')) { - strbuf_set(&buf, title->data + 1, title->len - 2); + houdini_unescape_html_f(&buf, title->data + 1, title->len - 2); } else { - strbuf_set(&buf, title->data, title->len); + houdini_unescape_html_f(&buf, title->data, title->len); } unescape_buffer(&buf); -- cgit v1.2.3 From 798f58a2b614280201141b398c8e498cecc8ab5e Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Sat, 6 Sep 2014 21:17:23 +0200 Subject: This is going well --- src/inlines.c | 68 +++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 27 deletions(-) (limited to 'src/inlines.c') diff --git a/src/inlines.c b/src/inlines.c index 7b27150..aa0e13e 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -20,8 +20,9 @@ typedef struct Subject { reference* lookup_reference(reference** refmap, chunk *label); reference* make_reference(chunk *label, chunk *url, chunk *title); -static unsigned char *clean_url(chunk *url, int is_email); +static unsigned char *clean_url(chunk *url); static unsigned char *clean_title(chunk *title); +static unsigned char *clean_autolink(chunk *url, int is_email); inline static void chunk_free(chunk *c); inline static void chunk_trim(chunk *c); @@ -91,7 +92,7 @@ extern reference* make_reference(chunk *label, chunk *url, chunk *title) reference *ref; ref = malloc(sizeof(reference)); ref->label = normalize_reference(label); - ref->url = clean_url(url, 0); + ref->url = clean_url(url); ref->title = clean_title(title); return ref; } @@ -123,27 +124,31 @@ static unsigned char *bufdup(const unsigned char *buf) return new; } -inline static node_inl* make_link_from_reference(node_inl* label, reference *ref) +static inline node_inl *make_link_(node_inl *label, unsigned char *url, unsigned char *title) { node_inl* e = (node_inl*) malloc(sizeof(node_inl)); e->tag = INL_LINK; e->content.linkable.label = label; - e->content.linkable.url = bufdup(ref->url); - e->content.linkable.title = bufdup(ref->title); + e->content.linkable.url = url; + e->content.linkable.title = title; e->next = NULL; return e; } +inline static node_inl* make_ref_link(node_inl* label, reference *ref) +{ + return make_link_(label, bufdup(ref->url), bufdup(ref->title)); +} + +inline static node_inl* make_autolink(node_inl* label, chunk url, int is_email) +{ + return make_link_(label, clean_autolink(&url, is_email), NULL); +} + // Create an inline with a linkable string value. -inline static node_inl* make_link(node_inl* label, chunk url, chunk title, int is_email) +inline static node_inl* make_link(node_inl* label, chunk url, chunk title) { - node_inl* e = (node_inl*) malloc(sizeof(node_inl)); - e->tag = INL_LINK; - e->content.linkable.label = label; - e->content.linkable.url = clean_url(&url, is_email); - e->content.linkable.title = clean_title(&title); - e->next = NULL; - return e; + return make_link_(label, clean_url(&url), clean_title(&title)); } inline static node_inl* make_inlines(int t, node_inl* contents) @@ -587,7 +592,7 @@ extern void unescape_buffer(strbuf *buf) // Clean a URL: remove surrounding whitespace and surrounding <>, // and remove \ that escape punctuation. -static unsigned char *clean_url(chunk *url, int is_email) +static unsigned char *clean_url(chunk *url) { strbuf buf = GH_BUF_INIT; @@ -596,9 +601,6 @@ static unsigned char *clean_url(chunk *url, int is_email) if (url->len == 0) return NULL; - if (is_email) - strbuf_puts(&buf, "mailto:"); - if (url->data[0] == '<' && url->data[url->len - 1] == '>') { houdini_unescape_html_f(&buf, url->data + 1, url->len - 2); } else { @@ -609,6 +611,22 @@ static unsigned char *clean_url(chunk *url, int is_email) return strbuf_detach(&buf); } +static unsigned char *clean_autolink(chunk *url, int is_email) +{ + strbuf buf = GH_BUF_INIT; + + chunk_trim(url); + + if (url->len == 0) + return NULL; + + if (is_email) + strbuf_puts(&buf, "mailto:"); + + houdini_unescape_html_f(&buf, url->data, url->len); + return strbuf_detach(&buf); +} + // Clean a title: remove surrounding quotes and remove \ that escape punctuation. static unsigned char *clean_title(chunk *title) { @@ -649,11 +667,9 @@ static node_inl* handle_pointy_brace(subject* subj) contents = chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; - return make_link( + return make_autolink( make_str_with_entities(&contents), - contents, - chunk_literal(""), - 0 + contents, 0 ); } @@ -663,11 +679,9 @@ static node_inl* handle_pointy_brace(subject* subj) contents = chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; - return make_link( + return make_autolink( make_str_with_entities(&contents), - contents, - chunk_literal(""), - 1 + contents, 1 ); } @@ -792,7 +806,7 @@ static node_inl* handle_left_bracket(subject* subj) title = chunk_dup(&subj->input, starttitle, endtitle - starttitle); lab = parse_chunk_inlines(&rawlabel, NULL); - return make_link(lab, url, title, 0); + return make_link(lab, url, title); } else { // if we get here, we matched a label but didn't get further: subj->pos = endlabel; @@ -823,7 +837,7 @@ static node_inl* handle_left_bracket(subject* subj) ref = lookup_reference(subj->reference_map, &reflabel); if (ref != NULL) { // found lab = parse_chunk_inlines(&rawlabel, NULL); - result = make_link_from_reference(lab, ref); + result = make_ref_link(lab, ref); } else { subj->pos = endlabel; lab = parse_chunk_inlines(&rawlabel, subj->reference_map); -- cgit v1.2.3 From 94a79a605f3e76a43f1f87a5044f6761b99e5ca5 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Wed, 10 Sep 2014 18:33:27 +0200 Subject: Cleanup reference implementation --- src/inlines.c | 176 ++++++++-------------------------------------------------- 1 file changed, 22 insertions(+), 154 deletions(-) (limited to 'src/inlines.c') diff --git a/src/inlines.c b/src/inlines.c index aa0e13e..3040f09 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -7,110 +7,23 @@ #include "stmd.h" #include "html/houdini.h" #include "utf8.h" -#include "uthash.h" #include "scanners.h" typedef struct Subject { chunk input; int pos; - int label_nestlevel; - reference** reference_map; + int label_nestlevel; + reference_map *refmap; } subject; -reference* lookup_reference(reference** refmap, chunk *label); -reference* make_reference(chunk *label, chunk *url, chunk *title); - -static unsigned char *clean_url(chunk *url); -static unsigned char *clean_title(chunk *title); -static unsigned char *clean_autolink(chunk *url, int is_email); - -inline static void chunk_free(chunk *c); -inline static void chunk_trim(chunk *c); - -inline static chunk chunk_literal(const char *data); -inline static chunk chunk_buf_detach(strbuf *buf); -inline static chunk chunk_dup(const chunk *ch, int pos, int len); - -static node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap); +static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap); static node_inl *parse_inlines_while(subject* subj, int (*f)(subject*)); static int parse_inline(subject* subj, node_inl ** last); -static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap); -static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap); +static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap); +static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap); static int subject_find_special_char(subject *subj); -static void normalize_whitespace(strbuf *s); - -extern void free_reference(reference *ref) { - free(ref->label); - free(ref->url); - free(ref->title); - free(ref); -} - -extern void free_reference_map(reference **refmap) { - /* free the hash table contents */ - reference *s; - reference *tmp; - if (refmap != NULL) { - HASH_ITER(hh, *refmap, s, tmp) { - HASH_DEL(*refmap, s); - free_reference(s); - } - free(refmap); - } -} - -// normalize reference: collapse internal whitespace to single space, -// remove leading/trailing whitespace, case fold -static unsigned char *normalize_reference(chunk *ref) -{ - strbuf normalized = GH_BUF_INIT; - - utf8proc_case_fold(&normalized, ref->data, ref->len); - strbuf_trim(&normalized); - normalize_whitespace(&normalized); - - return strbuf_detach(&normalized); -} - -// Returns reference if refmap contains a reference with matching -// label, otherwise NULL. -extern reference* lookup_reference(reference** refmap, chunk *label) -{ - reference *ref = NULL; - unsigned char *norm = normalize_reference(label); - if (refmap != NULL) { - HASH_FIND_STR(*refmap, (char*)norm, ref); - } - free(norm); - return ref; -} - -extern reference* make_reference(chunk *label, chunk *url, chunk *title) -{ - reference *ref; - ref = malloc(sizeof(reference)); - ref->label = normalize_reference(label); - ref->url = clean_url(url); - ref->title = clean_title(title); - return ref; -} - -extern void add_reference(reference** refmap, reference* ref) -{ - reference * t = NULL; - const char *label = (const char *)ref->label; - - HASH_FIND(hh, *refmap, label, strlen(label), t); - - if (t == NULL) { - HASH_ADD_KEYPTR(hh, *refmap, label, strlen(label), ref); - } else { - free_reference(ref); // we free this now since it won't be in the refmap - } -} - static unsigned char *bufdup(const unsigned char *buf) { unsigned char *new = NULL; @@ -236,26 +149,26 @@ inline static node_inl* append_inlines(node_inl* a, node_inl* b) return a; } -static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap) +static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap) { e->input.data = buffer->ptr; e->input.len = buffer->size; e->input.alloc = 0; e->pos = 0; e->label_nestlevel = 0; - e->reference_map = refmap; + e->refmap = refmap; chunk_rtrim(&e->input); } -static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap) +static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap) { e->input.data = chunk->data; e->input.len = chunk->len; e->input.alloc = 0; e->pos = 0; e->label_nestlevel = 0; - e->reference_map = refmap; + e->refmap = refmap; chunk_rtrim(&e->input); } @@ -325,33 +238,6 @@ static int scan_to_closing_backticks(subject* subj, int openticklength) return (subj->pos); } -// Destructively modify string, collapsing consecutive -// space and newline characters into a single space. -static void normalize_whitespace(strbuf *s) -{ - bool last_char_was_space = false; - int r, w; - - for (r = 0, w = 0; r < s->size; ++r) { - switch (s->ptr[r]) { - case ' ': - case '\n': - if (last_char_was_space) - break; - - s->ptr[w++] = ' '; - last_char_was_space = true; - break; - - default: - s->ptr[w++] = s->ptr[r]; - last_char_was_space = false; - } - } - - strbuf_truncate(s, w); -} - // Parse backtick code section or raw backticks, return an inline. // Assumes that the subject has a backtick at the current position. static node_inl* handle_backticks(subject *subj) @@ -368,7 +254,7 @@ static node_inl* handle_backticks(subject *subj) strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len); strbuf_trim(&buf); - normalize_whitespace(&buf); + strbuf_normalize_whitespace(&buf); return make_code(chunk_buf_detach(&buf)); } @@ -575,24 +461,9 @@ static node_inl *make_str_with_entities(chunk *content) } } -// Destructively unescape a string: remove backslashes before punctuation chars. -extern void unescape_buffer(strbuf *buf) -{ - int r, w; - - for (r = 0, w = 0; r < buf->size; ++r) { - if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1])) - continue; - - buf->ptr[w++] = buf->ptr[r]; - } - - strbuf_truncate(buf, w); -} - // Clean a URL: remove surrounding whitespace and surrounding <>, // and remove \ that escape punctuation. -static unsigned char *clean_url(chunk *url) +unsigned char *clean_url(chunk *url) { strbuf buf = GH_BUF_INIT; @@ -607,11 +478,11 @@ static unsigned char *clean_url(chunk *url) houdini_unescape_html_f(&buf, url->data, url->len); } - unescape_buffer(&buf); + strbuf_unescape(&buf); return strbuf_detach(&buf); } -static unsigned char *clean_autolink(chunk *url, int is_email) +unsigned char *clean_autolink(chunk *url, int is_email) { strbuf buf = GH_BUF_INIT; @@ -628,7 +499,7 @@ static unsigned char *clean_autolink(chunk *url, int is_email) } // Clean a title: remove surrounding quotes and remove \ that escape punctuation. -static unsigned char *clean_title(chunk *title) +unsigned char *clean_title(chunk *title) { strbuf buf = GH_BUF_INIT; unsigned char first, last; @@ -648,7 +519,7 @@ static unsigned char *clean_title(chunk *title) houdini_unescape_html_f(&buf, title->data, title->len); } - unescape_buffer(&buf); + strbuf_unescape(&buf); return strbuf_detach(&buf); } @@ -810,7 +681,7 @@ static node_inl* handle_left_bracket(subject* subj) } else { // if we get here, we matched a label but didn't get further: subj->pos = endlabel; - lab = parse_chunk_inlines(&rawlabel, subj->reference_map); + lab = parse_chunk_inlines(&rawlabel, subj->refmap); result = append_inlines(make_str(chunk_literal("[")), append_inlines(lab, make_str(chunk_literal("]")))); @@ -834,13 +705,13 @@ static node_inl* handle_left_bracket(subject* subj) } // lookup rawlabel in subject->reference_map: - ref = lookup_reference(subj->reference_map, &reflabel); + ref = reference_lookup(subj->refmap, &reflabel); if (ref != NULL) { // found lab = parse_chunk_inlines(&rawlabel, NULL); result = make_ref_link(lab, ref); } else { subj->pos = endlabel; - lab = parse_chunk_inlines(&rawlabel, subj->reference_map); + lab = parse_chunk_inlines(&rawlabel, subj->refmap); result = append_inlines(make_str(chunk_literal("[")), append_inlines(lab, make_str(chunk_literal("]")))); } @@ -887,7 +758,7 @@ extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*)) return result; } -node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap) +node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap) { subject subj; subject_from_chunk(&subj, chunk, refmap); @@ -987,7 +858,7 @@ static int parse_inline(subject* subj, node_inl ** last) return 1; } -extern node_inl* parse_inlines(strbuf *input, reference** refmap) +extern node_inl* parse_inlines(strbuf *input, reference_map *refmap) { subject subj; subject_from_buf(&subj, input, refmap); @@ -1009,7 +880,7 @@ void spnl(subject* subj) // Modify refmap if a reference is encountered. // Return 0 if no reference found, otherwise position of subject // after reference is parsed. -extern int parse_reference(strbuf *input, reference** refmap) +int parse_reference_inline(strbuf *input, reference_map *refmap) { subject subj; @@ -1019,7 +890,6 @@ extern int parse_reference(strbuf *input, reference** refmap) int matchlen = 0; int beforetitle; - reference *new = NULL; subject_from_buf(&subj, input, NULL); @@ -1065,9 +935,7 @@ extern int parse_reference(strbuf *input, reference** refmap) return 0; } // insert reference into refmap - new = make_reference(&lab, &url, &title); - add_reference(refmap, new); - + reference_create(refmap, &lab, &url, &title); return subj.pos; } -- cgit v1.2.3 From 7c2a062cdf9c0514cdf32f4f8bd07cf52d183c8b Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Wed, 10 Sep 2014 19:46:34 +0200 Subject: Do not use strchr for span searches Strchr will return a valid pointer for '\0' when searching a static string, as the NULL byte is part of the string. --- src/inlines.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/inlines.c') diff --git a/src/inlines.c b/src/inlines.c index 3040f09..cd2d124 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -767,10 +767,13 @@ node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap) static int subject_find_special_char(subject *subj) { + static const char CHARS[] = "\n\\`&_*[]pos + 1; while (n < subj->input.len) { - if (strchr("\n\\`&_*[]input.data[n])) + if (memchr(CHARS, subj->input.data[n], CHARS_SIZE)) return n; n++; } -- cgit v1.2.3 From 118e3d3c39242225baa876319cdbfbb1adadc77b Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Mon, 15 Sep 2014 15:28:49 +0200 Subject: Cleanup external APIs --- src/inlines.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/inlines.c') diff --git a/src/inlines.c b/src/inlines.c index cd2d124..145825c 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -8,6 +8,7 @@ #include "html/houdini.h" #include "utf8.h" #include "scanners.h" +#include "inlines.h" typedef struct Subject { chunk input; -- cgit v1.2.3