summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/blocks.c270
-rw-r--r--src/buffer.c15
-rw-r--r--src/chunk.h9
-rw-r--r--src/cmark.h (renamed from src/stmd.h)12
-rw-r--r--src/html/html.c4
-rw-r--r--src/inlines.c518
-rw-r--r--src/main.c23
-rw-r--r--src/print.c38
-rw-r--r--src/references.c47
-rw-r--r--src/scanners.h2
-rw-r--r--src/scanners.re2
-rw-r--r--src/utf8.c39
-rw-r--r--src/utf8.h4
13 files changed, 500 insertions, 483 deletions
diff --git a/src/blocks.c b/src/blocks.c
index 5b38116..ae106d2 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -4,7 +4,7 @@
#include <stdbool.h>
#include <ctype.h>
-#include "stmd.h"
+#include "cmark.h"
#include "utf8.h"
#include "scanners.h"
#include "inlines.h"
@@ -19,15 +19,15 @@ static node_block* make_block(int tag, int start_line, int start_column)
{
node_block* e;
- e = malloc(sizeof(node_block));
- memset(e, 0x0, sizeof(*e));
-
- e->tag = tag;
- e->open = true;
- e->start_line = start_line;
- e->start_column = start_column;
- e->end_line = start_line;
- strbuf_init(&e->string_content, 32);
+ e = calloc(1, sizeof(*e));
+ if(e != NULL) {
+ e->tag = tag;
+ e->open = true;
+ e->start_line = start_line;
+ e->start_column = start_column;
+ e->end_line = start_line;
+ strbuf_init(&e->string_content, 32);
+ }
return e;
}
@@ -47,13 +47,13 @@ bool is_blank(strbuf *s, int offset)
{
while (offset < s->size) {
switch (s->ptr[offset]) {
- case '\n':
- return true;
- case ' ':
- offset++;
- break;
- default:
- return false;
+ case '\n':
+ return true;
+ case ' ':
+ offset++;
+ break;
+ default:
+ return false;
}
}
@@ -63,17 +63,17 @@ bool is_blank(strbuf *s, int offset)
static inline bool can_contain(int parent_type, int child_type)
{
return ( parent_type == BLOCK_DOCUMENT ||
- parent_type == BLOCK_BQUOTE ||
- parent_type == BLOCK_LIST_ITEM ||
- (parent_type == BLOCK_LIST && child_type == BLOCK_LIST_ITEM) );
+ parent_type == BLOCK_BQUOTE ||
+ parent_type == BLOCK_LIST_ITEM ||
+ (parent_type == BLOCK_LIST && child_type == BLOCK_LIST_ITEM) );
}
static inline bool accepts_lines(int block_type)
{
return (block_type == BLOCK_PARAGRAPH ||
- block_type == BLOCK_ATX_HEADER ||
- block_type == BLOCK_INDENTED_CODE ||
- block_type == BLOCK_FENCED_CODE);
+ block_type == BLOCK_ATX_HEADER ||
+ block_type == BLOCK_INDENTED_CODE ||
+ block_type == BLOCK_FENCED_CODE);
}
static void add_line(node_block* node_block, chunk *ch, int offset)
@@ -87,7 +87,7 @@ static void remove_trailing_blank_lines(strbuf *ln)
int i;
for (i = ln->size - 1; i >= 0; --i) {
- char c = ln->ptr[i];
+ unsigned char c = ln->ptr[i];
if (c != ' ' && c != '\t' && c != '\r' && c != '\n')
break;
@@ -156,77 +156,77 @@ static void finalize(node_block* b, int line_number)
}
switch (b->tag) {
- case BLOCK_PARAGRAPH:
- pos = 0;
- while (strbuf_at(&b->string_content, 0) == '[' &&
- (pos = parse_reference_inline(&b->string_content, b->top->as.document.refmap))) {
-
- strbuf_drop(&b->string_content, pos);
- }
- if (is_blank(&b->string_content, 0)) {
- b->tag = BLOCK_REFERENCE_DEF;
- }
- break;
+ case BLOCK_PARAGRAPH:
+ pos = 0;
+ while (strbuf_at(&b->string_content, 0) == '[' &&
+ (pos = parse_reference_inline(&b->string_content, b->top->as.document.refmap))) {
- case BLOCK_INDENTED_CODE:
- remove_trailing_blank_lines(&b->string_content);
- strbuf_putc(&b->string_content, '\n');
- break;
-
- case BLOCK_FENCED_CODE:
- // first line of contents becomes info
- firstlinelen = strbuf_strchr(&b->string_content, '\n', 0);
-
- strbuf_init(&b->as.code.info, 0);
- houdini_unescape_html_f(
- &b->as.code.info,
- b->string_content.ptr,
- firstlinelen
+ strbuf_drop(&b->string_content, pos);
+ }
+ if (is_blank(&b->string_content, 0)) {
+ b->tag = BLOCK_REFERENCE_DEF;
+ }
+ break;
+
+ case BLOCK_INDENTED_CODE:
+ remove_trailing_blank_lines(&b->string_content);
+ strbuf_putc(&b->string_content, '\n');
+ break;
+
+ case BLOCK_FENCED_CODE:
+ // first line of contents becomes info
+ firstlinelen = strbuf_strchr(&b->string_content, '\n', 0);
+
+ strbuf_init(&b->as.code.info, 0);
+ houdini_unescape_html_f(
+ &b->as.code.info,
+ b->string_content.ptr,
+ firstlinelen
);
- strbuf_drop(&b->string_content, firstlinelen + 1);
+ strbuf_drop(&b->string_content, firstlinelen + 1);
- strbuf_trim(&b->as.code.info);
- strbuf_unescape(&b->as.code.info);
- break;
+ strbuf_trim(&b->as.code.info);
+ strbuf_unescape(&b->as.code.info);
+ break;
- case BLOCK_LIST: // determine tight/loose status
- b->as.list.tight = true; // tight by default
- item = b->children;
+ case BLOCK_LIST: // determine tight/loose status
+ b->as.list.tight = true; // tight by default
+ item = b->children;
- while (item) {
- // check for non-final non-empty list item ending with blank line:
- if (item->last_line_blank && item->next) {
+ while (item) {
+ // check for non-final non-empty list item ending with blank line:
+ if (item->last_line_blank && item->next) {
+ b->as.list.tight = false;
+ break;
+ }
+ // recurse into children of list item, to see if there are
+ // spaces between them:
+ subitem = item->children;
+ while (subitem) {
+ if (ends_with_blank_line(subitem) &&
+ (item->next || subitem->next)) {
b->as.list.tight = false;
break;
}
- // recurse into children of list item, to see if there are
- // spaces between them:
- subitem = item->children;
- while (subitem) {
- if (ends_with_blank_line(subitem) &&
- (item->next || subitem->next)) {
- b->as.list.tight = false;
- break;
- }
- subitem = subitem->next;
- }
- if (!(b->as.list.tight)) {
- break;
- }
- item = item->next;
+ subitem = subitem->next;
+ }
+ if (!(b->as.list.tight)) {
+ break;
}
+ item = item->next;
+ }
- break;
+ break;
- default:
- break;
+ default:
+ break;
}
}
// Add a node_block as child of another. Return pointer to child.
static node_block* add_child(node_block* parent,
- int block_type, int start_line, int start_column)
+ int block_type, int start_line, int start_column)
{
assert(parent);
@@ -253,7 +253,7 @@ static node_block* add_child(node_block* parent,
}
// Free a node_block list and any children.
-void stmd_free_nodes(node_block *e)
+void cmark_free_nodes(node_block *e)
{
node_block * next;
while (e != NULL) {
@@ -265,7 +265,7 @@ void stmd_free_nodes(node_block *e)
} else if (e->tag == BLOCK_DOCUMENT) {
reference_map_free(e->as.document.refmap);
}
- stmd_free_nodes(e->children);
+ cmark_free_nodes(e->children);
free(e);
e = next;
}
@@ -276,14 +276,14 @@ void stmd_free_nodes(node_block *e)
void process_inlines(node_block* cur, reference_map *refmap)
{
switch (cur->tag) {
- case BLOCK_PARAGRAPH:
- case BLOCK_ATX_HEADER:
- case BLOCK_SETEXT_HEADER:
- cur->inline_content = parse_inlines(&cur->string_content, refmap);
- break;
-
- default:
- break;
+ case BLOCK_PARAGRAPH:
+ case BLOCK_ATX_HEADER:
+ case BLOCK_SETEXT_HEADER:
+ cur->inline_content = parse_inlines(&cur->string_content, refmap);
+ break;
+
+ default:
+ break;
}
node_block *child = cur->children;
@@ -310,14 +310,17 @@ static int parse_list_marker(chunk *input, int pos, struct ListData ** dataptr)
if (!isspace(peek_at(input, pos))) {
return 0;
}
- data = malloc(sizeof(struct ListData));
- data->marker_offset = 0; // will be adjusted later
- data->list_type = bullet;
- data->bullet_char = c;
- data->start = 1;
- data->delimiter = period;
- data->tight = false;
-
+ data = calloc(1, sizeof(*data));
+ if(data == NULL) {
+ return 0;
+ } else {
+ data->marker_offset = 0; // will be adjusted later
+ data->list_type = bullet;
+ data->bullet_char = c;
+ data->start = 1;
+ data->delimiter = period;
+ data->tight = false;
+ }
} else if (isdigit(c)) {
int start = 0;
@@ -332,13 +335,17 @@ static int parse_list_marker(chunk *input, int pos, struct ListData ** dataptr)
if (!isspace(peek_at(input, pos))) {
return 0;
}
- data = malloc(sizeof(struct ListData));
- data->marker_offset = 0; // will be adjusted later
- data->list_type = ordered;
- data->bullet_char = 0;
- data->start = start;
- data->delimiter = (c == '.' ? period : parens);
- data->tight = false;
+ data = calloc(1, sizeof(*data));
+ if(data == NULL) {
+ return 0;
+ } else {
+ data->marker_offset = 0; // will be adjusted later
+ data->list_type = ordered;
+ data->bullet_char = 0;
+ data->start = start;
+ data->delimiter = (c == '.' ? period : parens);
+ data->tight = false;
+ }
} else {
return 0;
}
@@ -355,9 +362,9 @@ static int parse_list_marker(chunk *input, int pos, struct ListData ** dataptr)
static int lists_match(struct ListData *list_data, struct ListData *item_data)
{
return (list_data->list_type == item_data->list_type &&
- list_data->delimiter == item_data->delimiter &&
- // list_data->marker_offset == item_data.marker_offset &&
- list_data->bullet_char == item_data->bullet_char);
+ list_data->delimiter == item_data->delimiter &&
+ // list_data->marker_offset == item_data.marker_offset &&
+ list_data->bullet_char == item_data->bullet_char);
}
static node_block *finalize_document(node_block *document, int linenum)
@@ -373,7 +380,7 @@ static node_block *finalize_document(node_block *document, int linenum)
return document;
}
-extern node_block *stmd_parse_file(FILE *f)
+extern node_block *cmark_parse_file(FILE *f)
{
strbuf line = GH_BUF_INIT;
unsigned char buffer[4096];
@@ -391,7 +398,7 @@ extern node_block *stmd_parse_file(FILE *f)
return finalize_document(document, linenum);
}
-extern node_block *stmd_parse_document(const unsigned char *buffer, size_t len)
+extern node_block *cmark_parse_document(const unsigned char *buffer, size_t len)
{
strbuf line = GH_BUF_INIT;
int linenum = 1;
@@ -453,6 +460,10 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
int indent;
chunk input;
+ // Add a newline to the end if not present:
+ if (line->ptr[line->size - 1] != '\n') {
+ strbuf_putc(line, '\n');
+ }
input.data = line->ptr;
input.len = line->size;
@@ -486,7 +497,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
} else if (container->tag == BLOCK_LIST_ITEM) {
if (indent >= container->as.list.marker_offset +
- container->as.list.padding) {
+ container->as.list.padding) {
offset += container->as.list.marker_offset +
container->as.list.padding;
} else if (blank) {
@@ -506,7 +517,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
}
} else if (container->tag == BLOCK_ATX_HEADER ||
- container->tag == BLOCK_SETEXT_HEADER) {
+ container->tag == BLOCK_SETEXT_HEADER) {
// a header can never contain more than one line
all_matched = false;
@@ -550,7 +561,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
// unless last matched container is code node_block, try new container starts:
while (container->tag != BLOCK_FENCED_CODE && container->tag != BLOCK_INDENTED_CODE &&
- container->tag != BLOCK_HTML) {
+ container->tag != BLOCK_HTML) {
first_nonspace = offset;
while (peek_at(&input, first_nonspace) == ' ')
@@ -603,17 +614,17 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
// note, we don't adjust offset because the tag is part of the text
} else if (container->tag == BLOCK_PARAGRAPH &&
- (lev = scan_setext_header_line(&input, first_nonspace)) &&
- // check that there is only one line in the paragraph:
- strbuf_strrchr(&container->string_content, '\n',
- strbuf_len(&container->string_content) - 2) < 0) {
+ (lev = scan_setext_header_line(&input, first_nonspace)) &&
+ // check that there is only one line in the paragraph:
+ strbuf_strrchr(&container->string_content, '\n',
+ strbuf_len(&container->string_content) - 2) < 0) {
container->tag = BLOCK_SETEXT_HEADER;
container->as.header.level = lev;
offset = input.len - 1;
} else if (!(container->tag == BLOCK_PARAGRAPH && !all_matched) &&
- (matched = scan_hrule(&input, first_nonspace))) {
+ (matched = scan_hrule(&input, first_nonspace))) {
// it's only now that we know the line is not part of a setext header:
container = add_child(container, BLOCK_HRULE, line_number, first_nonspace + 1);
@@ -646,16 +657,16 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
data->marker_offset = indent;
if (container->tag != BLOCK_LIST ||
- !lists_match(&container->as.list, data)) {
+ !lists_match(&container->as.list, data)) {
container = add_child(container, BLOCK_LIST, line_number,
- first_nonspace + 1);
+ first_nonspace + 1);
memcpy(&container->as.list, data, sizeof(*data));
}
// add the list item
container = add_child(container, BLOCK_LIST_ITEM, line_number,
- first_nonspace + 1);
+ first_nonspace + 1);
/* TODO: static */
memcpy(&container->as.list, data, sizeof(*data));
free(data);
@@ -684,11 +695,11 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
// lists or breaking out of lists. we also don't set last_line_blank
// on an empty list item.
container->last_line_blank = (blank &&
- container->tag != BLOCK_BQUOTE &&
- container->tag != BLOCK_FENCED_CODE &&
- !(container->tag == BLOCK_LIST_ITEM &&
- container->children == NULL &&
- container->start_line == line_number));
+ container->tag != BLOCK_BQUOTE &&
+ container->tag != BLOCK_FENCED_CODE &&
+ !(container->tag == BLOCK_LIST_ITEM &&
+ container->children == NULL &&
+ container->start_line == line_number));
node_block *cont = container;
while (cont->parent) {
@@ -697,10 +708,10 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
}
if (cur != last_matched_container &&
- container == last_matched_container &&
- !blank &&
- cur->tag == BLOCK_PARAGRAPH &&
- strbuf_len(&cur->string_content) > 0) {
+ container == last_matched_container &&
+ !blank &&
+ cur->tag == BLOCK_PARAGRAPH &&
+ strbuf_len(&cur->string_content) > 0) {
add_line(cur, &input, offset);
@@ -721,7 +732,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
matched = 0;
if (indent <= 3 &&
- peek_at(&input, first_nonspace) == container->as.code.fence_char) {
+ peek_at(&input, first_nonspace) == container->as.code.fence_char) {
int fence_len = scan_close_code_fence(&input, first_nonspace);
if (fence_len > container->as.code.fence_length)
matched = 1;
@@ -767,4 +778,3 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr)
*curptr = container;
}
}
-
diff --git a/src/buffer.c b/src/buffer.c
index 7c2b86b..1cdcae8 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -15,10 +15,14 @@
unsigned char strbuf__initbuf[1];
unsigned char strbuf__oom[1];
-#define ENSURE_SIZE(b, d) \
- if ((d) > buf->asize && strbuf_grow(b, (d)) < 0)\
+#define ENSURE_SIZE(b, d) \
+ if ((d) > buf->asize && strbuf_grow(b, (d)) < 0) \
return -1;
+#ifndef MIN
+#define MIN(x,y) ((x<y) ? x : y)
+#endif
+
void strbuf_init(strbuf *buf, int initial_size)
{
buf->asize = 0;
@@ -111,8 +115,8 @@ int strbuf_set(strbuf *buf, const unsigned char *data, int len)
int strbuf_sets(strbuf *buf, const char *string)
{
return strbuf_set(buf,
- (const unsigned char *)string,
- string ? strlen(string) : 0);
+ (const unsigned char *)string,
+ string ? strlen(string) : 0);
}
int strbuf_putc(strbuf *buf, int c)
@@ -155,7 +159,7 @@ int strbuf_vprintf(strbuf *buf, const char *format, va_list ap)
(char *)buf->ptr + buf->size,
buf->asize - buf->size,
format, args
- );
+ );
if (len < 0) {
free(buf->ptr);
@@ -351,4 +355,3 @@ extern void strbuf_unescape(strbuf *buf)
strbuf_truncate(buf, w);
}
-
diff --git a/src/chunk.h b/src/chunk.h
index f37a2f3..015bbf9 100644
--- a/src/chunk.h
+++ b/src/chunk.h
@@ -59,10 +59,11 @@ static inline unsigned char *chunk_to_cstr(chunk *c)
{
unsigned char *str;
- str = malloc(c->len + 1);
- memcpy(str, c->data, c->len);
- str[c->len] = 0;
-
+ str = calloc(c->len + 1, sizeof(*str));
+ if(str != NULL) {
+ memcpy(str, c->data, c->len);
+ str[c->len] = 0;
+ }
return str;
}
diff --git a/src/stmd.h b/src/cmark.h
index c6473a6..ff2f9a2 100644
--- a/src/stmd.h
+++ b/src/cmark.h
@@ -56,7 +56,7 @@ struct ListData {
struct FencedCodeData {
int fence_length;
int fence_offset;
- char fence_char;
+ unsigned char fence_char;
strbuf info;
};
@@ -104,12 +104,12 @@ struct node_block {
typedef struct node_block node_block;
-node_block *stmd_parse_document(const unsigned char *buffer, size_t len);
-node_block *stmd_parse_file(FILE *f);
+node_block *cmark_parse_document(const unsigned char *buffer, size_t len);
+node_block *cmark_parse_file(FILE *f);
-void stmd_free_nodes(node_block *e);
+void cmark_free_nodes(node_block *e);
-void stmd_debug_print(node_block *root);
-void stmd_render_html(strbuf *html, node_block *root);
+void cmark_debug_print(node_block *root);
+void cmark_render_html(strbuf *html, node_block *root);
#endif
diff --git a/src/html/html.c b/src/html/html.c
index ab6fc35..fde1cb4 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -4,7 +4,7 @@
#include <string.h>
#include <assert.h>
-#include "stmd.h"
+#include "cmark.h"
#include "debug.h"
#include "html/houdini.h"
@@ -222,7 +222,7 @@ static void blocks_to_html(strbuf *html, node_block *b, bool tight)
}
}
-void stmd_render_html(strbuf *html, node_block *root)
+void cmark_render_html(strbuf *html, node_block *root)
{
blocks_to_html(html, root, false);
}
diff --git a/src/inlines.c b/src/inlines.c
index 71d75e9..7a7f08a 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -4,17 +4,25 @@
#include <stdbool.h>
#include <ctype.h>
-#include "stmd.h"
+#include "cmark.h"
#include "html/houdini.h"
#include "utf8.h"
#include "scanners.h"
#include "inlines.h"
+typedef struct InlineStack {
+ struct InlineStack *previous;
+ node_inl *first_inline;
+ int delim_count;
+ unsigned char delim_char;
+} inline_stack;
+
typedef struct Subject {
chunk input;
int pos;
int label_nestlevel;
reference_map *refmap;
+ inline_stack *emphasis_openers;
} subject;
static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap);
@@ -31,8 +39,10 @@ static unsigned char *bufdup(const unsigned char *buf)
if (buf) {
int len = strlen((char *)buf);
- new = malloc(len + 1);
- memcpy(new, buf, len + 1);
+ new = calloc(len + 1, sizeof(*new));
+ if(new != NULL) {
+ memcpy(new, buf, len + 1);
+ }
}
return new;
@@ -40,12 +50,14 @@ static unsigned char *bufdup(const unsigned char *buf)
static inline node_inl *make_link_(node_inl *label, unsigned char *url, unsigned char *title)
{
- node_inl* e = (node_inl*) malloc(sizeof(node_inl));
- e->tag = INL_LINK;
- e->content.linkable.label = label;
- e->content.linkable.url = url;
- e->content.linkable.title = title;
- e->next = NULL;
+ node_inl* e = calloc(1, sizeof(*e));
+ if(e != NULL) {
+ e->tag = INL_LINK;
+ e->content.linkable.label = label;
+ e->content.linkable.url = url;
+ e->content.linkable.title = title;
+ e->next = NULL;
+ }
return e;
}
@@ -67,29 +79,35 @@ inline static node_inl* make_link(node_inl* label, chunk url, chunk title)
inline static node_inl* make_inlines(int t, node_inl* contents)
{
- node_inl* e = (node_inl*) malloc(sizeof(node_inl));
- e->tag = t;
- e->content.inlines = contents;
- e->next = NULL;
+ node_inl * e = calloc(1, sizeof(*e));
+ if(e != NULL) {
+ e->tag = t;
+ e->content.inlines = contents;
+ e->next = NULL;
+ }
return e;
}
// Create an inline with a literal string value.
inline static node_inl* make_literal(int t, chunk s)
{
- node_inl* e = (node_inl*) malloc(sizeof(node_inl));
- e->tag = t;
- e->content.literal = s;
- e->next = NULL;
+ node_inl * e = calloc(1, sizeof(*e));
+ if(e != NULL) {
+ e->tag = t;
+ e->content.literal = s;
+ e->next = NULL;
+ }
return e;
}
// Create an inline with no value.
inline static node_inl* make_simple(int t)
{
- node_inl* e = (node_inl*) malloc(sizeof(node_inl));
- e->tag = t;
- e->next = NULL;
+ node_inl* e = calloc(1, sizeof(*e));
+ if(e != NULL) {
+ e->tag = t;
+ e->next = NULL;
+ }
return e;
}
@@ -108,26 +126,26 @@ extern void free_inlines(node_inl* e)
node_inl * next;
while (e != NULL) {
switch (e->tag){
- case INL_STRING:
- case INL_RAW_HTML:
- case INL_CODE:
- chunk_free(&e->content.literal);
- break;
- case INL_LINEBREAK:
- case INL_SOFTBREAK:
- break;
- case INL_LINK:
- case INL_IMAGE:
- free(e->content.linkable.url);
- free(e->content.linkable.title);
- free_inlines(e->content.linkable.label);
- break;
- case INL_EMPH:
- case INL_STRONG:
- free_inlines(e->content.inlines);
- break;
- default:
- break;
+ case INL_STRING:
+ case INL_RAW_HTML:
+ case INL_CODE:
+ chunk_free(&e->content.literal);
+ break;
+ case INL_LINEBREAK:
+ case INL_SOFTBREAK:
+ break;
+ case INL_LINK:
+ case INL_IMAGE:
+ free(e->content.linkable.url);
+ free(e->content.linkable.title);
+ free_inlines(e->content.linkable.label);
+ break;
+ case INL_EMPH:
+ case INL_STRONG:
+ free_inlines(e->content.inlines);
+ break;
+ default:
+ break;
}
next = e->next;
free(e);
@@ -158,6 +176,7 @@ static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap)
e->pos = 0;
e->label_nestlevel = 0;
e->refmap = refmap;
+ e->emphasis_openers = NULL;
chunk_rtrim(&e->input);
}
@@ -170,6 +189,7 @@ static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap)
e->pos = 0;
e->label_nestlevel = 0;
e->refmap = refmap;
+ e->emphasis_openers = NULL;
chunk_rtrim(&e->input);
}
@@ -221,7 +241,7 @@ inline static chunk take_while(subject* subj, int (*f)(int))
static int scan_to_closing_backticks(subject* subj, int openticklength)
{
// read non backticks
- char c;
+ unsigned char c;
while ((c = peek_char(subj)) && c != '`') {
advance(subj);
}
@@ -262,12 +282,11 @@ static node_inl* handle_backticks(subject *subj)
}
// Scan ***, **, or * and return number scanned, or 0.
-// Don't advance position.
-static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close)
+// Advances position.
+static int scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
{
int numdelims = 0;
- char char_before, char_after;
- int startpos = subj->pos;
+ unsigned char char_before, char_after;
char_before = subj->pos == 0 ? '\n' : peek_at(subj, subj->pos - 1);
while (peek_char(subj) == c) {
@@ -281,135 +300,109 @@ static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close)
*can_open = *can_open && !isalnum(char_before);
*can_close = *can_close && !isalnum(char_after);
}
- subj->pos = startpos;
return numdelims;
}
+static void free_openers(subject* subj, inline_stack* istack)
+{
+ inline_stack * tempstack;
+ while (subj->emphasis_openers != istack) {
+ tempstack = subj->emphasis_openers;
+ subj->emphasis_openers = subj->emphasis_openers->previous;
+ free(tempstack);
+ }
+}
+
// Parse strong/emph or a fallback.
// Assumes the subject has '_' or '*' at the current position.
-static node_inl* handle_strong_emph(subject* subj, char c)
+static node_inl* handle_strong_emph(subject* subj, unsigned char c, node_inl **last)
{
bool can_open, can_close;
- node_inl * result = NULL;
- node_inl ** last = malloc(sizeof(node_inl *));
- node_inl * new;
- node_inl * il;
- node_inl * first_head = NULL;
- node_inl * first_close = NULL;
- int first_close_delims = 0;
int numdelims;
-
- *last = NULL;
+ int useDelims;
+ inline_stack * istack;
+ node_inl * inl;
+ node_inl * emph;
+ node_inl * inl_text;
numdelims = scan_delims(subj, c, &can_open, &can_close);
- subj->pos += numdelims;
-
- new = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims));
- *last = new;
- first_head = new;
- result = new;
-
- if (!can_open || numdelims == 0) {
- goto done;
- }
-
- switch (numdelims) {
- case 1:
- while (true) {
- numdelims = scan_delims(subj, c, &can_open, &can_close);
- if (numdelims >= 1 && can_close) {
- subj->pos += 1;
- first_head->tag = INL_EMPH;
- chunk_free(&first_head->content.literal);
- first_head->content.inlines = first_head->next;
- first_head->next = NULL;
- goto done;
- } else {
- if (!parse_inline(subj, last)) {
- goto done;
- }
- }
- }
- break;
- case 2:
- while (true) {
- numdelims = scan_delims(subj, c, &can_open, &can_close);
- if (numdelims >= 2 && can_close) {
- subj->pos += 2;
- first_head->tag = INL_STRONG;
- chunk_free(&first_head->content.literal);
- first_head->content.inlines = first_head->next;
- first_head->next = NULL;
- goto done;
- } else {
- if (!parse_inline(subj, last)) {
- goto done;
- }
- }
- }
- break;
- case 3:
- while (true) {
- numdelims = scan_delims(subj, c, &can_open, &can_close);
- if (can_close && numdelims >= 1 && numdelims <= 3 &&
- numdelims != first_close_delims) {
- new = make_str(chunk_dup(&subj->input, subj->pos, numdelims));
- append_inlines(*last, new);
- *last = new;
- if (first_close_delims == 1 && numdelims > 2) {
- numdelims = 2;
- } else if (first_close_delims == 2) {
- numdelims = 1;
- } else if (numdelims == 3) {
- // If we opened with ***, we interpret it as ** followed by *
- // giving us <strong><em>
- numdelims = 1;
- }
- subj->pos += numdelims;
- if (first_close) {
- first_head->tag = first_close_delims == 1 ? INL_STRONG : INL_EMPH;
- chunk_free(&first_head->content.literal);
- first_head->content.inlines =
- make_inlines(first_close_delims == 1 ? INL_EMPH : INL_STRONG,
- first_head->next);
-
- il = first_head->next;
- while (il->next && il->next != first_close) {
- il = il->next;
- }
- il->next = NULL;
-
- first_head->content.inlines->next = first_close->next;
-
- il = first_head->content.inlines;
- while (il->next && il->next != *last) {
- il = il->next;
- }
- il->next = NULL;
- free_inlines(*last);
-
- first_close->next = NULL;
- free_inlines(first_close);
- first_head->next = NULL;
- goto done;
- } else {
- first_close = *last;
- first_close_delims = numdelims;
- }
- } else {
- if (!parse_inline(subj, last)) {
- goto done;
- }
- }
- }
- break;
- default:
- goto done;
+
+ if (can_close)
+ {
+ // walk the stack and find a matching opener, if there is one
+ istack = subj->emphasis_openers;
+ while (true)
+ {
+ if (istack == NULL)
+ goto cannotClose;
+
+ if (istack->delim_char == c)
+ break;
+
+ istack = istack->previous;
+ }
+
+ // calculate the actual number of delimeters used from this closer
+ useDelims = istack->delim_count;
+ if (useDelims == 3) useDelims = numdelims == 3 ? 1 : numdelims;
+ else if (useDelims > numdelims) useDelims = 1;
+
+ if (istack->delim_count == useDelims)
+ {
+ // the opener is completely used up - remove the stack entry and reuse the inline element
+ inl = istack->first_inline;
+ inl->tag = useDelims == 1 ? INL_EMPH : INL_STRONG;
+ chunk_free(&inl->content.literal);
+ inl->content.inlines = inl->next;
+ inl->next = NULL;
+
+ // remove this opener and all later ones from stack:
+ free_openers(subj, istack->previous);
+ *last = inl;
+ }
+ else
+ {
+ // the opener will only partially be used - stack entry remains (truncated) and a new inline is added.
+ inl = istack->first_inline;
+ istack->delim_count -= useDelims;
+ inl->content.literal.len = istack->delim_count;
+
+ emph = useDelims == 1 ? make_emph(inl->next) : make_strong(inl->next);
+ inl->next = emph;
+
+ // remove all later openers from stack:
+ free_openers(subj, istack);
+
+ *last = emph;
+ }
+
+ // if the closer was not fully used, move back a char or two and try again.
+ if (useDelims < numdelims)
+ {
+ subj->pos = subj->pos - numdelims + useDelims;
+ return handle_strong_emph(subj, c, last);
+ }
+
+ return NULL; // make_str(chunk_literal(""));
+ }
+
+cannotClose:
+ inl_text = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims));
+
+ if (can_open)
+ {
+ istack = (inline_stack*)malloc(sizeof(inline_stack));
+ if (istack == NULL) {
+ return NULL;
+ }
+ istack->delim_count = numdelims;
+ istack->delim_char = c;
+ istack->first_inline = inl_text;
+ istack->previous = subj->emphasis_openers;
+ subj->emphasis_openers = istack;
}
-done:
- free(last);
- return result;
+ return inl_text;
}
// Parse backslash-escape or just a backslash, returning an inline.
@@ -438,9 +431,9 @@ static node_inl* handle_entity(subject* subj)
advance(subj);
len = houdini_unescape_ent(&ent,
- subj->input.data + subj->pos,
- subj->input.len - subj->pos
- );
+ subj->input.data + subj->pos,
+ subj->input.len - subj->pos
+ );
if (len == 0)
return make_str(chunk_literal("&"));
@@ -513,8 +506,8 @@ unsigned char *clean_title(chunk *title)
// remove surrounding quotes if any:
if ((first == '\'' && last == '\'') ||
- (first == '(' && last == ')') ||
- (first == '"' && last == '"')) {
+ (first == '(' && last == ')') ||
+ (first == '"' && last == '"')) {
houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
} else {
houdini_unescape_html_f(&buf, title->data, title->len);
@@ -542,7 +535,7 @@ static node_inl* handle_pointy_brace(subject* subj)
return make_autolink(
make_str_with_entities(&contents),
contents, 0
- );
+ );
}
// next try to match an email autolink
@@ -552,9 +545,9 @@ static node_inl* handle_pointy_brace(subject* subj)
subj->pos += matchlen;
return make_autolink(
- make_str_with_entities(&contents),
- contents, 1
- );
+ make_str_with_entities(&contents),
+ contents, 1
+ );
}
// finally, try to match an html tag
@@ -595,33 +588,33 @@ static int link_label(subject* subj, chunk *raw_label)
}
advance(subj); // advance past [
- char c;
+ unsigned char c;
while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) {
switch (c) {
- case '`':
- tmp = handle_backticks(subj);
- free_inlines(tmp);
- break;
- case '<':
- tmp = handle_pointy_brace(subj);
- free_inlines(tmp);
- break;
- case '[': // nested []
- nestlevel++;
- advance(subj);
- break;
- case ']': // nested []
- nestlevel--;
- advance(subj);
- break;
- case '\\':
- advance(subj);
- if (ispunct(peek_char(subj))) {
- advance(subj);
- }
- break;
- default:
+ case '`':
+ tmp = handle_backticks(subj);
+ free_inlines(tmp);
+ break;
+ case '<':
+ tmp = handle_pointy_brace(subj);
+ free_inlines(tmp);
+ break;
+ case '[': // nested []
+ nestlevel++;
+ advance(subj);
+ break;
+ case ']': // nested []
+ nestlevel--;
+ advance(subj);
+ break;
+ case '\\':
+ advance(subj);
+ if (ispunct(peek_char(subj))) {
advance(subj);
+ }
+ break;
+ default:
+ advance(subj);
}
}
if (c == ']') {
@@ -657,8 +650,8 @@ static node_inl* handle_left_bracket(subject* subj)
if (found_label) {
if (peek_char(subj) == '(' &&
- ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
- ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {
+ ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
+ ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {
// try to parse an explicit link:
starturl = subj->pos + 1 + sps; // after (
@@ -684,8 +677,8 @@ static node_inl* handle_left_bracket(subject* subj)
subj->pos = endlabel;
lab = parse_chunk_inlines(&rawlabel, subj->refmap);
result = append_inlines(make_str(chunk_literal("[")),
- append_inlines(lab,
- make_str(chunk_literal("]"))));
+ append_inlines(lab,
+ make_str(chunk_literal("]"))));
return result;
}
} else {
@@ -714,7 +707,7 @@ static node_inl* handle_left_bracket(subject* subj)
subj->pos = endlabel;
lab = parse_chunk_inlines(&rawlabel, subj->refmap);
result = append_inlines(make_str(chunk_literal("[")),
- append_inlines(lab, make_str(chunk_literal("]"))));
+ append_inlines(lab, make_str(chunk_literal("]"))));
}
return result;
}
@@ -736,8 +729,8 @@ static node_inl* handle_newline(subject *subj)
advance(subj);
}
if (nlpos > 1 &&
- peek_at(subj, nlpos - 1) == ' ' &&
- peek_at(subj, nlpos - 2) == ' ') {
+ peek_at(subj, nlpos - 1) == ' ' &&
+ peek_at(subj, nlpos - 2) == ' ') {
return make_linebreak();
} else {
return make_softbreak();
@@ -754,9 +747,22 @@ extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*))
{
node_inl* result = NULL;
node_inl** last = &result;
+ node_inl* first = NULL;
while ((*f)(subj) && parse_inline(subj, last)) {
+ if (!first) {
+ first = *last;
+ }
+ }
+
+ inline_stack* istack = subj->emphasis_openers;
+ inline_stack* temp;
+ while (istack != NULL) {
+ temp = istack->previous;
+ free(istack);
+ istack = temp;
}
- return result;
+
+ return first;
}
node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap)
@@ -812,69 +818,62 @@ static int parse_inline(subject* subj, node_inl ** last)
return 0;
}
switch(c){
- case '\n':
- new = handle_newline(subj);
- break;
- case '`':
- new = handle_backticks(subj);
- break;
- case '\\':
- new = handle_backslash(subj);
- break;
- case '&':
- new = handle_entity(subj);
- break;
- case '<':
- new = handle_pointy_brace(subj);
- break;
- case '_':
- if (subj->pos > 0) {
- unsigned char prev = peek_at(subj, subj->pos - 1);
- if (isalnum(prev) || prev == '_') {
- new = make_str(chunk_literal("_"));
- advance(subj);
- break;
- }
- }
-
- new = handle_strong_emph(subj, '_');
- break;
- case '*':
- new = handle_strong_emph(subj, '*');
- break;
- case '[':
+ case '\n':
+ new = handle_newline(subj);
+ break;
+ case '`':
+ new = handle_backticks(subj);
+ break;
+ case '\\':
+ new = handle_backslash(subj);
+ break;
+ case '&':
+ new = handle_entity(subj);
+ break;
+ case '<':
+ new = handle_pointy_brace(subj);
+ break;
+ case '_':
+ new = handle_strong_emph(subj, '_', last);
+ break;
+ case '*':
+ new = handle_strong_emph(subj, '*', last);
+ break;
+ case '[':
+ new = handle_left_bracket(subj);
+ break;
+ case '!':
+ advance(subj);
+ if (peek_char(subj) == '[') {
new = handle_left_bracket(subj);
- break;
- case '!':
- advance(subj);
- if (peek_char(subj) == '[') {
- new = handle_left_bracket(subj);
- if (new != NULL && new->tag == INL_LINK) {
- new->tag = INL_IMAGE;
- } else {
- new = append_inlines(make_str(chunk_literal("!")), new);
- }
+ if (new != NULL && new->tag == INL_LINK) {
+ new->tag = INL_IMAGE;
} else {
- new = make_str(chunk_literal("!"));
- }
- break;
- default:
- endpos = subject_find_special_char(subj);
- contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
- subj->pos = endpos;
-
- // if we're at a newline, strip trailing spaces.
- if (peek_char(subj) == '\n') {
- chunk_rtrim(&contents);
+ new = append_inlines(make_str(chunk_literal("!")), new);
}
+ } else {
+ new = make_str(chunk_literal("!"));
+ }
+ break;
+ default:
+ endpos = subject_find_special_char(subj);
+ contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
+ subj->pos = endpos;
+
+ // if we're at a newline, strip trailing spaces.
+ if (peek_char(subj) == '\n') {
+ chunk_rtrim(&contents);
+ }
- new = make_str(contents);
+ new = make_str(contents);
}
if (*last == NULL) {
*last = new;
- } else {
+ } else if (new) {
append_inlines(*last, new);
+ *last = new;
}
+
return 1;
}
@@ -890,8 +889,8 @@ void spnl(subject* subj)
{
bool seen_newline = false;
while (peek_char(subj) == ' ' ||
- (!seen_newline &&
- (seen_newline = peek_char(subj) == '\n'))) {
+ (!seen_newline &&
+ (seen_newline = peek_char(subj) == '\n'))) {
advance(subj);
}
}
@@ -958,4 +957,3 @@ int parse_reference_inline(strbuf *input, reference_map *refmap)
reference_create(refmap, &lab, &url, &title);
return subj.pos;
}
-
diff --git a/src/main.c b/src/main.c
index 76a0e12..b2404f6 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,12 +1,12 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
-#include "stmd.h"
+#include "cmark.h"
#include "debug.h"
void print_usage()
{
- printf("Usage: stmd [FILE*]\n");
+ printf("Usage: cmark [FILE*]\n");
printf("Options: --help, -h Print usage information\n");
printf(" --ast Print AST instead of HTML\n");
printf(" --version Print version\n");
@@ -17,9 +17,9 @@ static void print_document(node_block *document, bool ast)
strbuf html = GH_BUF_INIT;
if (ast) {
- stmd_debug_print(document);
+ cmark_debug_print(document);
} else {
- stmd_render_html(&html, document);
+ cmark_render_html(&html, document);
printf("%s", html.ptr);
strbuf_free(&html);
}
@@ -34,11 +34,11 @@ int main(int argc, char *argv[])
for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "--version") == 0) {
- printf("stmd %s", VERSION);
+ printf("cmark %s", VERSION);
printf(" - CommonMark converter (c) 2014 John MacFarlane\n");
exit(0);
} else if ((strcmp(argv[i], "--help") == 0) ||
- (strcmp(argv[i], "-h") == 0)) {
+ (strcmp(argv[i], "-h") == 0)) {
print_usage();
exit(0);
} else if (strcmp(argv[i], "--ast") == 0) {
@@ -52,26 +52,25 @@ int main(int argc, char *argv[])
}
if (numfps == 0) {
- document = stmd_parse_file(stdin);
+ document = cmark_parse_file(stdin);
print_document(document, ast);
- stmd_free_nodes(document);
+ cmark_free_nodes(document);
} else {
for (i = 0; i < numfps; i++) {
FILE *fp = fopen(argv[files[i]], "r");
if (fp == NULL) {
fprintf(stderr, "Error opening file %s: %s\n",
- argv[files[i]], strerror(errno));
+ argv[files[i]], strerror(errno));
exit(1);
}
- document = stmd_parse_file(fp);
+ document = cmark_parse_file(fp);
print_document(document, ast);
- stmd_free_nodes(document);
+ cmark_free_nodes(document);
fclose(fp);
}
}
return 0;
}
-
diff --git a/src/print.c b/src/print.c
index 83f8daa..be3bced 100644
--- a/src/print.c
+++ b/src/print.c
@@ -1,7 +1,7 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
-#include "stmd.h"
+#include "cmark.h"
#include "debug.h"
static void print_str(const unsigned char *s, int len)
@@ -16,17 +16,17 @@ static void print_str(const unsigned char *s, int len)
unsigned char c = s[i];
switch (c) {
- case '\n':
- printf("\\n");
- break;
- case '"':
- printf("\\\"");
- break;
- case '\\':
- printf("\\\\");
- break;
- default:
- putchar((int)c);
+ case '\n':
+ printf("\\n");
+ break;
+ case '"':
+ printf("\\\"");
+ break;
+ case '\\':
+ printf("\\\\");
+ break;
+ default:
+ putchar((int)c);
}
}
putchar('"');
@@ -116,13 +116,13 @@ static void print_blocks(node_block* b, int indent)
data = &(b->as.list);
if (data->list_type == ordered) {
printf("list (type=ordered tight=%s start=%d delim=%s)\n",
- (data->tight ? "true" : "false"),
- data->start,
- (data->delimiter == parens ? "parens" : "period"));
+ (data->tight ? "true" : "false"),
+ data->start,
+ (data->delimiter == parens ? "parens" : "period"));
} else {
printf("list (type=bullet tight=%s bullet_char=%c)\n",
- (data->tight ? "true" : "false"),
- data->bullet_char);
+ (data->tight ? "true" : "false"),
+ data->bullet_char);
}
print_blocks(b->children, indent + 2);
break;
@@ -148,7 +148,7 @@ static void print_blocks(node_block* b, int indent)
break;
case BLOCK_FENCED_CODE:
printf("fenced_code length=%d info=",
- b->as.code.fence_length);
+ b->as.code.fence_length);
print_str(b->as.code.info.ptr, -1);
putchar(' ');
print_str(b->string_content.ptr, -1);
@@ -170,7 +170,7 @@ static void print_blocks(node_block* b, int indent)
}
}
-void stmd_debug_print(node_block *root)
+void cmark_debug_print(node_block *root)
{
print_blocks(root, 0);
}
diff --git a/src/references.c b/src/references.c
index 3e54b48..04b9025 100644
--- a/src/references.c
+++ b/src/references.c
@@ -1,4 +1,4 @@
-#include "stmd.h"
+#include "cmark.h"
#include "utf8.h"
#include "references.h"
#include "inlines.h"
@@ -16,10 +16,12 @@ refhash(const unsigned char *link_ref)
static void reference_free(reference *ref)
{
- free(ref->label);
- free(ref->url);
- free(ref->title);
- free(ref);
+ if(ref != NULL) {
+ free(ref->label);
+ free(ref->url);
+ free(ref->title);
+ free(ref);
+ }
}
// normalize reference: collapse internal whitespace to single space,
@@ -31,6 +33,9 @@ static unsigned char *normalize_reference(chunk *ref)
strbuf normalized = GH_BUF_INIT;
unsigned char *result;
+ if(ref == NULL)
+ return NULL;
+
if (ref->len == 0)
return NULL;
@@ -55,7 +60,7 @@ static void add_reference(reference_map *map, reference* ref)
while (t) {
if (t->hash == ref->hash &&
- !strcmp((char *)t->label, (char *)ref->label)) {
+ !strcmp((char *)t->label, (char *)ref->label)) {
reference_free(ref);
return;
}
@@ -75,14 +80,16 @@ extern void reference_create(reference_map *map, chunk *label, chunk *url, chunk
if (reflabel == NULL)
return;
- ref = malloc(sizeof(reference));
- ref->label = reflabel;
- ref->hash = refhash(ref->label);
- ref->url = clean_url(url);
- ref->title = clean_title(title);
- ref->next = NULL;
+ ref = calloc(1, sizeof(*ref));
+ if(ref != NULL) {
+ ref->label = reflabel;
+ ref->hash = refhash(ref->label);
+ ref->url = clean_url(url);
+ ref->title = clean_title(title);
+ ref->next = NULL;
- add_reference(map, ref);
+ add_reference(map, ref);
+ }
}
// Returns reference if refmap contains a reference with matching
@@ -92,10 +99,10 @@ reference* reference_lookup(reference_map *map, chunk *label)
reference *ref = NULL;
unsigned char *norm;
unsigned int hash;
-
+
if (map == NULL)
return NULL;
-
+
norm = normalize_reference(label);
if (norm == NULL)
return NULL;
@@ -105,7 +112,7 @@ reference* reference_lookup(reference_map *map, chunk *label)
while (ref) {
if (ref->hash == hash &&
- !strcmp((char *)ref->label, (char *)norm))
+ !strcmp((char *)ref->label, (char *)norm))
break;
ref = ref->next;
}
@@ -118,6 +125,9 @@ void reference_map_free(reference_map *map)
{
unsigned int i;
+ if(map == NULL)
+ return;
+
for (i = 0; i < REFMAP_SIZE; ++i) {
reference *ref = map->table[i];
reference *next;
@@ -134,8 +144,5 @@ void reference_map_free(reference_map *map)
reference_map *reference_map_new(void)
{
- reference_map *map = malloc(sizeof(reference_map));
- memset(map, 0x0, sizeof(reference_map));
- return map;
+ return calloc(1, sizeof(reference_map));
}
-
diff --git a/src/scanners.h b/src/scanners.h
index 785d424..86dbcf5 100644
--- a/src/scanners.h
+++ b/src/scanners.h
@@ -1,4 +1,4 @@
-#include "stmd.h"
+#include "cmark.h"
extern int _scan_autolink_uri(const unsigned char *p);
extern int _scan_autolink_email(const unsigned char *p);
diff --git a/src/scanners.re b/src/scanners.re
index 28aba9d..5ec1bf3 100644
--- a/src/scanners.re
+++ b/src/scanners.re
@@ -36,7 +36,7 @@
htmlcomment = "!--" ([^-\x00]+ | [-][^-\x00]+)* "-->";
- processinginstruction = "?" ([^?>\x00]+ | [?][^>\x00])* "?>";
+ processinginstruction = "?" ([^?>\x00]+ | [?][^>\x00] | [>])* "?>";
declaration = "!" [A-Z]+ spacechar+ [^>\x00]* ">";
diff --git a/src/utf8.c b/src/utf8.c
index 6b34831..8a786b7 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -103,24 +103,24 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst)
return -1;
switch (length) {
- case 1:
- uc = str[0];
- break;
- case 2:
- uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
- if (uc < 0x80) uc = -1;
- break;
- case 3:
- uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6)
- + (str[2] & 0x3F);
- if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) ||
- (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1;
- break;
- case 4:
- uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12)
- + ((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
- if (uc < 0x10000 || uc >= 0x110000) uc = -1;
- break;
+ case 1:
+ uc = str[0];
+ break;
+ case 2:
+ uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
+ if (uc < 0x80) uc = -1;
+ break;
+ case 3:
+ uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6)
+ + (str[2] & 0x3F);
+ if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) ||
+ (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1;
+ break;
+ case 4:
+ uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12)
+ + ((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
+ if (uc < 0x10000 || uc >= 0x110000) uc = -1;
+ break;
}
if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE))
@@ -173,7 +173,7 @@ void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len)
{
int32_t c;
-#define bufpush(x) \
+#define bufpush(x) \
utf8proc_encode_char(x, dest)
while (len > 0) {
@@ -190,4 +190,3 @@ void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len)
len -= char_len;
}
}
-
diff --git a/src/utf8.h b/src/utf8.h
index c971250..496eae9 100644
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -1,5 +1,5 @@
-#ifndef _H_STMD_UTF8_
-#define _H_STMD_UTF8_
+#ifndef _H_cmark_UTF8_
+#define _H_cmark_UTF8_
#include <stdint.h>
#include "buffer.h"