diff options
-rw-r--r-- | api_test/main.c | 6 | ||||
-rw-r--r-- | man/man3/cmark.3 | 4 | ||||
-rw-r--r-- | src/blocks.c | 31 | ||||
-rw-r--r-- | src/buffer.c | 154 | ||||
-rw-r--r-- | src/buffer.h | 44 | ||||
-rw-r--r-- | src/chunk.h | 15 | ||||
-rw-r--r-- | src/cmark.c | 2 | ||||
-rw-r--r-- | src/cmark.h | 2 | ||||
-rw-r--r-- | src/commonmark.c | 44 | ||||
-rw-r--r-- | src/houdini.h | 19 | ||||
-rw-r--r-- | src/houdini_href_e.c | 4 | ||||
-rw-r--r-- | src/houdini_html_e.c | 6 | ||||
-rw-r--r-- | src/houdini_html_u.c | 14 | ||||
-rw-r--r-- | src/html.c | 25 | ||||
-rw-r--r-- | src/inlines.c | 62 | ||||
-rw-r--r-- | src/inlines.h | 2 | ||||
-rw-r--r-- | src/parser.h | 6 | ||||
-rw-r--r-- | src/scanners.c | 84 | ||||
-rw-r--r-- | src/scanners.h | 30 | ||||
-rw-r--r-- | src/scanners.re | 76 | ||||
-rw-r--r-- | src/utf8.c | 20 | ||||
-rw-r--r-- | src/utf8.h | 6 | ||||
-rw-r--r-- | src/xml.c | 9 |
23 files changed, 344 insertions, 321 deletions
diff --git a/api_test/main.c b/api_test/main.c index 01df51d..132d48c 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -643,16 +643,16 @@ test_incomplete_char(test_batch_runner *runner, const char *utf8, static void test_continuation_byte(test_batch_runner *runner, const char *utf8) { - int len = strlen(utf8); + size_t len = strlen(utf8); - for (int pos = 1; pos < len; ++pos) { + for (size_t pos = 1; pos < len; ++pos) { char buf[20]; sprintf(buf, "((((%s))))", utf8); buf[4+pos] = '\x20'; char expected[50]; strcpy(expected, "<p>((((" UTF8_REPL "\x20"); - for (int i = pos + 1; i < len; ++i) { + for (size_t i = pos + 1; i < len; ++i) { strcat(expected, UTF8_REPL); } strcat(expected, "))))</p>\n"); diff --git a/man/man3/cmark.3 b/man/man3/cmark.3 index 5b68ecb..82c34cd 100644 --- a/man/man3/cmark.3 +++ b/man/man3/cmark.3 @@ -1,4 +1,4 @@ -.TH cmark 3 "March 21, 2015" "LOCAL" "Library Functions Manual" +.TH cmark 3 "June 07, 2015" "LOCAL" "Library Functions Manual" .SH NAME .PP @@ -10,7 +10,7 @@ DESCRIPTION Simple Interface .PP -\fIchar *\f[] \fBcmark_markdown_to_html\f[](\fIconst char *text\f[], \fIint len\f[], \fIint options\f[]) +\fIchar *\f[] \fBcmark_markdown_to_html\f[](\fIconst char *text\f[], \fIsize_t len\f[], \fIint options\f[]) .PP Convert \f[I]text\f[] (assumed to be a UTF\-8 encoded string with length diff --git a/src/blocks.c b/src/blocks.c index b72c256..a3ac712 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -30,7 +30,7 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, static void S_process_line(cmark_parser *parser, const unsigned char *buffer, - size_t bytes); + bufsize_t bytes); static cmark_node* make_block(cmark_node_type tag, int start_line, int start_column) { @@ -95,7 +95,7 @@ static cmark_node* finalize(cmark_parser *parser, cmark_node* b); // Returns true if line has only space characters, else false. -static bool is_blank(cmark_strbuf *s, int offset) +static bool is_blank(cmark_strbuf *s, bufsize_t offset) { while (offset < s->size) { switch (s->ptr[offset]) { @@ -128,7 +128,7 @@ static inline bool accepts_lines(cmark_node_type block_type) block_type == NODE_CODE_BLOCK); } -static void add_line(cmark_node* node, cmark_chunk *ch, int offset) +static void add_line(cmark_node* node, cmark_chunk *ch, bufsize_t offset) { assert(node->open); cmark_strbuf_put(&node->string_content, ch->data + offset, ch->len - offset); @@ -136,7 +136,7 @@ static void add_line(cmark_node* node, cmark_chunk *ch, int offset) static void remove_trailing_blank_lines(cmark_strbuf *ln) { - int i; + bufsize_t i; unsigned char c; for (i = ln->size - 1; i >= 0; --i) { @@ -204,7 +204,7 @@ static int break_out_of_lists(cmark_parser *parser, cmark_node ** bptr) static cmark_node* finalize(cmark_parser *parser, cmark_node* b) { - int pos; + bufsize_t pos; cmark_node* item; cmark_node* subitem; cmark_node* parent; @@ -367,10 +367,10 @@ static void process_inlines(cmark_node* root, cmark_reference_map *refmap, int o // Attempts to parse a list item marker (bullet or enumerated). // On success, returns length of the marker, and populates // data with the details. On failure, returns 0. -static int parse_list_marker(cmark_chunk *input, int pos, cmark_list **dataptr) +static bufsize_t parse_list_marker(cmark_chunk *input, bufsize_t pos, cmark_list **dataptr) { unsigned char c; - int startpos; + bufsize_t startpos; cmark_list *data; startpos = pos; @@ -497,6 +497,7 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, while (buffer < end) { const unsigned char *eol; size_t line_len; + bufsize_t bufsize; for (eol = buffer; eol < end; ++eol) { if (S_is_line_end_char(*eol)) @@ -514,17 +515,19 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, } else if (eof) { line_len = end - buffer; } else { - cmark_strbuf_put(parser->linebuf, buffer, end - buffer); + bufsize = cmark_strbuf_check_bufsize(end - buffer); + cmark_strbuf_put(parser->linebuf, buffer, bufsize); break; } + bufsize = cmark_strbuf_check_bufsize(line_len); if (parser->linebuf->size > 0) { - cmark_strbuf_put(parser->linebuf, buffer, line_len); + cmark_strbuf_put(parser->linebuf, buffer, bufsize); S_process_line(parser, parser->linebuf->ptr, parser->linebuf->size); cmark_strbuf_clear(parser->linebuf); } else { - S_process_line(parser, buffer, line_len); + S_process_line(parser, buffer, bufsize); } buffer += line_len; @@ -533,7 +536,7 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, static void chop_trailing_hashtags(cmark_chunk *ch) { - int n, orig_n; + bufsize_t n, orig_n; cmark_chunk_rtrim(ch); orig_n = n = ch->len - 1; @@ -562,10 +565,10 @@ S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input) } static void -S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) +S_process_line(cmark_parser *parser, const unsigned char *buffer, bufsize_t bytes) { cmark_node* last_matched_container; - int matched = 0; + bufsize_t matched = 0; int lev = 0; int i; cmark_list *data = NULL; @@ -712,7 +715,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) parser->offset = parser->first_nonspace + matched; container = add_child(parser, container, NODE_HEADER, parser->offset + 1); - int hashpos = cmark_chunk_strchr(&input, '#', parser->first_nonspace); + bufsize_t hashpos = cmark_chunk_strchr(&input, '#', parser->first_nonspace); int level = 0; while (peek_at(&input, hashpos) == '#') { diff --git a/src/buffer.c b/src/buffer.c index e2ebc02..7d16af8 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -4,6 +4,7 @@ #include <string.h> #include <stdio.h> #include <stdlib.h> +#include <stdint.h> #include "config.h" #include "cmark_ctype.h" @@ -14,48 +15,75 @@ */ unsigned char cmark_strbuf__initbuf[1]; -#define ENSURE_SIZE(b, d) \ - if ((d) > b->asize) \ - cmark_strbuf_grow(b, (d)); \ - #ifndef MIN #define MIN(x,y) ((x<y) ? x : y) #endif -void cmark_strbuf_init(cmark_strbuf *buf, int initial_size) +void cmark_strbuf_init(cmark_strbuf *buf, bufsize_t initial_size) { buf->asize = 0; buf->size = 0; buf->ptr = cmark_strbuf__initbuf; - if (initial_size) + if (initial_size > 0) cmark_strbuf_grow(buf, initial_size); } -void cmark_strbuf_grow(cmark_strbuf *buf, int target_size) +void cmark_strbuf_overflow_err() { + fprintf(stderr, "String buffer overflow"); + abort(); +} + +static inline void +S_strbuf_grow_by(cmark_strbuf *buf, size_t add) { + size_t target_size = (size_t)buf->size + add; + + if (target_size < add /* Integer overflow. */ + || target_size > BUFSIZE_MAX /* Truncation overflow. */ + ) { + cmark_strbuf_overflow_err(); + return; /* unreachable */ + } + + if ((bufsize_t)target_size >= buf->asize) + cmark_strbuf_grow(buf, (bufsize_t)target_size); +} + +void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) { unsigned char *new_ptr; - int new_size; - if (target_size <= buf->asize) + if (target_size < buf->asize) return; if (buf->asize == 0) { - new_size = target_size; new_ptr = NULL; } else { - new_size = buf->asize; new_ptr = buf->ptr; } - /* grow the buffer size by 1.5, until it's big enough - * to fit our target size */ - while (new_size < target_size) - new_size = (new_size << 1) - (new_size >> 1); + /* Oversize the buffer by 50% to guarantee amortized linear time + * complexity on append operations. */ + size_t new_size = (size_t)target_size + (size_t)target_size / 2; + + /* Account for terminating null byte. */ + new_size += 1; /* round allocation up to multiple of 8 */ new_size = (new_size + 7) & ~7; + if (new_size < (size_t)target_size /* Integer overflow. */ + || new_size > BUFSIZE_MAX /* Truncation overflow. */ + ) { + if (target_size >= BUFSIZE_MAX) { + /* No space for terminating null byte. */ + cmark_strbuf_overflow_err(); + return; /* unreachable */ + } + /* Oversize by the maximum possible amount. */ + new_size = BUFSIZE_MAX; + } + new_ptr = (unsigned char *)realloc(new_ptr, new_size); if (!new_ptr) { @@ -63,16 +91,11 @@ void cmark_strbuf_grow(cmark_strbuf *buf, int target_size) abort(); } - buf->asize = new_size; + buf->asize = (bufsize_t)new_size; buf->ptr = new_ptr; - - /* truncate the existing buffer size if necessary */ - if (buf->size >= buf->asize) - buf->size = buf->asize - 1; - buf->ptr[buf->size] = '\0'; } -size_t cmark_strbuf_len(const cmark_strbuf *buf) +bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; } @@ -95,13 +118,14 @@ void cmark_strbuf_clear(cmark_strbuf *buf) buf->ptr[0] = '\0'; } -void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, int len) +void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, bufsize_t len) { if (len <= 0 || data == NULL) { cmark_strbuf_clear(buf); } else { if (data != buf->ptr) { - ENSURE_SIZE(buf, len + 1); + if (len >= buf->asize) + cmark_strbuf_grow(buf, len); memmove(buf->ptr, data, len); } buf->size = len; @@ -112,22 +136,22 @@ void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, int len) void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) { cmark_strbuf_set(buf, (const unsigned char *)string, - string ? strlen(string) : 0); + string ? cmark_strbuf_safe_strlen(string) : 0); } void cmark_strbuf_putc(cmark_strbuf *buf, int c) { - ENSURE_SIZE(buf, buf->size + 2); + S_strbuf_grow_by(buf, 1); buf->ptr[buf->size++] = c; buf->ptr[buf->size] = '\0'; } -void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, int len) +void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t len) { if (len <= 0) return; - ENSURE_SIZE(buf, buf->size + len + 1); + S_strbuf_grow_by(buf, len); memmove(buf->ptr + buf->size, data, len); buf->size += len; buf->ptr[buf->size] = '\0'; @@ -135,21 +159,22 @@ void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, int len) void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) { - cmark_strbuf_put(buf, (const unsigned char *)string, strlen(string)); + cmark_strbuf_put(buf, (const unsigned char *)string, + cmark_strbuf_safe_strlen(string)); } void cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap) { - const int expected_size = buf->size + (strlen(format) * 2); - int len; - - ENSURE_SIZE(buf, expected_size); + size_t expected_size = strlen(format); + if (expected_size <= SIZE_MAX / 2) + expected_size *= 2; + S_strbuf_grow_by(buf, expected_size); while (1) { va_list args; va_copy(args, ap); - len = vsnprintf( + int len = vsnprintf( (char *)buf->ptr + buf->size, buf->asize - buf->size, format, args @@ -168,12 +193,12 @@ void cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap) abort(); } - if (len + 1 <= buf->asize - buf->size) { + if ((size_t)len < (size_t)(buf->asize - buf->size)) { buf->size += len; break; } - ENSURE_SIZE(buf, buf->size + len + 1); + S_strbuf_grow_by(buf, len); } } @@ -186,11 +211,13 @@ void cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...) va_end(ap); } -void cmark_strbuf_copy_cstr(char *data, int datasize, const cmark_strbuf *buf) +void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, const cmark_strbuf *buf) { - int copylen; + bufsize_t copylen; - assert(data && datasize && buf); + assert(buf); + if (!data || datasize <= 0) + return; data[0] = '\0'; @@ -224,22 +251,6 @@ unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) return data; } -void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, int asize) -{ - cmark_strbuf_free(buf); - - if (ptr) { - buf->ptr = ptr; - buf->size = strlen((char *)ptr); - if (asize) - buf->asize = (asize < buf->size) ? buf->size + 1 : asize; - else /* pass 0 to fall back on strlen + 1 */ - buf->asize = buf->size + 1; - } else { - cmark_strbuf_grow(buf, asize); - } -} - int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b) { int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size)); @@ -247,20 +258,28 @@ int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b) (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0; } -int cmark_strbuf_strchr(const cmark_strbuf *buf, int c, int pos) +bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos) { + if (pos >= buf->size) + return -1; + if (pos < 0) + pos = 0; + const unsigned char *p = (unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos); if (!p) return -1; - return (int)(p - (const unsigned char *)buf->ptr); + return (bufsize_t)(p - (const unsigned char *)buf->ptr); } -int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos) +bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos) { - int i; + if (pos < 0 || buf->size == 0) + return -1; + if (pos >= buf->size) + pos = buf->size - 1; - for (i = pos; i >= 0; i--) { + for (bufsize_t i = pos; i >= 0; i--) { if (buf->ptr[i] == (unsigned char) c) return i; } @@ -268,17 +287,22 @@ int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos) return -1; } -void cmark_strbuf_truncate(cmark_strbuf *buf, int len) +void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len) { + if (len < 0) + len = 0; + if (len < buf->size) { buf->size = len; buf->ptr[buf->size] = '\0'; } } -void cmark_strbuf_drop(cmark_strbuf *buf, int n) +void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n) { if (n > 0) { + if (n > buf->size) + n = buf->size; buf->size = buf->size - n; if (buf->size) memmove(buf->ptr, buf->ptr + n, buf->size); @@ -304,7 +328,7 @@ void cmark_strbuf_rtrim(cmark_strbuf *buf) void cmark_strbuf_trim(cmark_strbuf *buf) { - int i = 0; + bufsize_t i = 0; if (!buf->size) return; @@ -322,7 +346,7 @@ void cmark_strbuf_trim(cmark_strbuf *buf) void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) { bool last_char_was_space = false; - int r, w; + bufsize_t r, w; for (r = 0, w = 0; r < s->size; ++r) { switch (s->ptr[r]) { @@ -347,7 +371,7 @@ void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) // Destructively unescape a string: remove backslashes before punctuation chars. extern void cmark_strbuf_unescape(cmark_strbuf *buf) { - int r, w; + bufsize_t r, w; for (r = 0, w = 0; r < buf->size; ++r) { if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1])) diff --git a/src/buffer.h b/src/buffer.h index 417df26..babd051 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -3,20 +3,25 @@ #include <stddef.h> #include <stdarg.h> +#include <string.h> +#include <limits.h> #include "config.h" #ifdef __cplusplus extern "C" { #endif +typedef int bufsize_t; + typedef struct { unsigned char *ptr; - int asize, size; + bufsize_t asize, size; } cmark_strbuf; extern unsigned char cmark_strbuf__initbuf[]; #define GH_BUF_INIT { cmark_strbuf__initbuf, 0, 0 } +#define BUFSIZE_MAX INT_MAX /** * Initialize a cmark_strbuf structure. @@ -24,23 +29,22 @@ extern unsigned char cmark_strbuf__initbuf[]; * For the cases where GH_BUF_INIT cannot be used to do static * initialization. */ -void cmark_strbuf_init(cmark_strbuf *buf, int initial_size); +void cmark_strbuf_init(cmark_strbuf *buf, bufsize_t initial_size); /** * Grow the buffer to hold at least `target_size` bytes. */ -void cmark_strbuf_grow(cmark_strbuf *buf, int target_size); +void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size); void cmark_strbuf_free(cmark_strbuf *buf); void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b); -size_t cmark_strbuf_len(const cmark_strbuf *buf); +bufsize_t cmark_strbuf_len(const cmark_strbuf *buf); int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b); -void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, int asize); unsigned char *cmark_strbuf_detach(cmark_strbuf *buf); -void cmark_strbuf_copy_cstr(char *data, int datasize, const cmark_strbuf *buf); +void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, const cmark_strbuf *buf); static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf) { @@ -49,25 +53,41 @@ static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf) #define cmark_strbuf_at(buf, n) ((buf)->ptr[n]) -void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, int len); +void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, bufsize_t len); void cmark_strbuf_sets(cmark_strbuf *buf, const char *string); void cmark_strbuf_putc(cmark_strbuf *buf, int c); -void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, int len); +void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t len); void cmark_strbuf_puts(cmark_strbuf *buf, const char *string); void cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...) CMARK_ATTRIBUTE((format (printf, 2, 3))); void cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap); void cmark_strbuf_clear(cmark_strbuf *buf); -int cmark_strbuf_strchr(const cmark_strbuf *buf, int c, int pos); -int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos); -void cmark_strbuf_drop(cmark_strbuf *buf, int n); -void cmark_strbuf_truncate(cmark_strbuf *buf, int len); +bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos); +bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos); +void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n); +void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len); void cmark_strbuf_rtrim(cmark_strbuf *buf); void cmark_strbuf_trim(cmark_strbuf *buf); void cmark_strbuf_normalize_whitespace(cmark_strbuf *s); void cmark_strbuf_unescape(cmark_strbuf *s); +/* Print error and abort. */ +void cmark_strbuf_overflow_err(void); + +static inline bufsize_t +cmark_strbuf_check_bufsize(size_t size) { + if (size > BUFSIZE_MAX) { + cmark_strbuf_overflow_err(); + } + return (bufsize_t)size; +} + +static inline bufsize_t +cmark_strbuf_safe_strlen(const char *str) { + return cmark_strbuf_check_bufsize(strlen(str)); +} + #ifdef __cplusplus } #endif diff --git a/src/chunk.h b/src/chunk.h index a246a9d..f23a02d 100644 --- a/src/chunk.h +++ b/src/chunk.h @@ -11,8 +11,8 @@ typedef struct { unsigned char *data; - int len; - int alloc; // also implies a NULL-terminated string + bufsize_t len; + bufsize_t alloc; // also implies a NULL-terminated string } cmark_chunk; static inline void cmark_chunk_free(cmark_chunk *c) @@ -51,10 +51,10 @@ static inline void cmark_chunk_trim(cmark_chunk *c) cmark_chunk_rtrim(c); } -static inline int cmark_chunk_strchr(cmark_chunk *ch, int c, int offset) +static inline bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c, bufsize_t offset) { const unsigned char *p = (unsigned char *)memchr(ch->data + offset, c, ch->len - offset); - return p ? (int)(p - ch->data) : ch->len; + return p ? (bufsize_t)(p - ch->data) : ch->len; } static inline const char *cmark_chunk_to_cstr(cmark_chunk *c) @@ -87,7 +87,7 @@ static inline void cmark_chunk_set_cstr(cmark_chunk *c, const char *str) c->data = NULL; c->alloc = 0; } else { - c->len = strlen(str); + c->len = cmark_strbuf_safe_strlen(str); c->data = (unsigned char *)malloc(c->len + 1); c->alloc = 1; memcpy(c->data, str, c->len + 1); @@ -96,11 +96,12 @@ static inline void cmark_chunk_set_cstr(cmark_chunk *c, const char *str) static inline cmark_chunk cmark_chunk_literal(const char *data) { - cmark_chunk c = {(unsigned char *)data, data ? strlen(data) : 0, 0}; + bufsize_t len = data ? cmark_strbuf_safe_strlen(data) : 0; + cmark_chunk c = {(unsigned char *)data, len, 0}; return c; } -static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, int pos, int len) +static inline cmark_chunk cmark_chunk_dup(const cmark_chunk *ch, bufsize_t pos, bufsize_t len) { cmark_chunk c = {ch->data + pos, len, 0}; return c; diff --git a/src/cmark.c b/src/cmark.c index 79ceabf..35765b1 100644 --- a/src/cmark.c +++ b/src/cmark.c @@ -9,7 +9,7 @@ const int cmark_version = CMARK_VERSION; const char cmark_version_string[] = CMARK_VERSION_STRING; -char *cmark_markdown_to_html(const char *text, int len, int options) +char *cmark_markdown_to_html(const char *text, size_t len, int options) { cmark_node *doc; char *result; diff --git a/src/cmark.h b/src/cmark.h index 84c6f76..d86e13e 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -24,7 +24,7 @@ extern "C" { * UTF-8-encoded string. */ CMARK_EXPORT -char *cmark_markdown_to_html(const char *text, int len, int options); +char *cmark_markdown_to_html(const char *text, size_t len, int options); /** ## Node Structure */ diff --git a/src/commonmark.c b/src/commonmark.c index dba1fcf..4594748 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -20,7 +20,7 @@ struct render_state { int column; int width; int need_cr; - int last_breakable; + bufsize_t last_breakable; bool begin_line; bool no_wrap; bool in_tight_list_item; @@ -237,30 +237,29 @@ shortest_unused_backtick_sequence(cmark_chunk *code) static bool is_autolink(cmark_node *node) { - const char *title; - const char *url; + cmark_chunk *title; + cmark_chunk *url; cmark_node *link_text; if (node->type != CMARK_NODE_LINK) { return false; } - url = cmark_node_get_url(node); - if (url == NULL || - _scan_scheme((unsigned char *)url) == 0) { + url = &node->as.link.url; + if (url->len == 0 || scan_scheme(url, 0) == 0) { return false; } - title = cmark_node_get_title(node); + title = &node->as.link.title; // if it has a title, we can't treat it as an autolink: - if (title != NULL && strlen(title) > 0) { + if (title->len > 0) { return false; } link_text = node->first_child; cmark_consolidate_text_nodes(link_text); - return ((int)strlen(url) == link_text->as.literal.len && - strncmp(url, + return (url->len == link_text->as.literal.len && + strncmp((char*)url->data, (char*)link_text->as.literal.data, link_text->as.literal.len) == 0); } @@ -289,11 +288,11 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, int numticks; int i; bool entering = (ev_type == CMARK_EVENT_ENTER); - const char *info; - const char *title; + cmark_chunk *info; + cmark_chunk *title; cmark_strbuf listmarker = GH_BUF_INIT; char *emph_delim; - int marker_width; + bufsize_t marker_width; // Don't adjust tight list status til we've started the list. // Otherwise we loose the blank line between a paragraph and @@ -396,12 +395,12 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_CODE_BLOCK: blankline(state); - info = cmark_node_get_fence_info(node); + info = &node->as.code.info; code = &node->as.code.literal; // use indented form if no info, and code doesn't // begin or end with a blank line, and code isn't // first thing in a list item - if ((info == NULL || strlen(info) == 0) && + if (info->len == 0 && (code->len > 2 && !isspace(code->data[0]) && !(isspace(code->data[code->len - 1]) && @@ -422,7 +421,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, lit(state, "`", false); } lit(state, " ", false); - out(state, cmark_chunk_literal(info), false, LITERAL); + out(state, *info, false, LITERAL); cr(state); out(state, node->as.code.literal, false, LITERAL); cr(state); @@ -542,11 +541,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, out(state, cmark_chunk_literal(cmark_node_get_url(node)), false, URL); - title = cmark_node_get_title(node); - if (title && strlen(title) > 0) { + title = &node->as.link.title; + if (title->len > 0) { lit(state, " \"", true); - out(state, cmark_chunk_literal(title), - false, TITLE); + out(state, *title, false, TITLE); lit(state, "\"", false); } lit(state, ")", false); @@ -560,10 +558,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, } else { lit(state, "](", false); out(state, cmark_chunk_literal(cmark_node_get_url(node)), false, URL); - title = cmark_node_get_title(node); - if (title && strlen(title) > 0) { + title = &node->as.link.title; + if (title->len > 0) { lit(state, " \"", true); - out(state, cmark_chunk_literal(title), false, TITLE); + out(state, *title, false, TITLE); lit(state, "\"", false); } lit(state, ")", false); diff --git a/src/houdini.h b/src/houdini.h index 9f00f6d..b926cf3 100644 --- a/src/houdini.h +++ b/src/houdini.h @@ -31,19 +31,12 @@ extern "C" { #define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10) #define HOUDINI_UNESCAPED_SIZE(x) (x) -extern size_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secure); -extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_xml(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_uri(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_url(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_unescape_uri(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_unescape_url(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_js(cmark_strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_unescape_js(cmark_strbuf *ob, const uint8_t *src, size_t size); +extern bufsize_t houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); +extern int houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); +extern int houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, int secure); +extern int houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); +extern void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); +extern int houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size); #ifdef __cplusplus } diff --git a/src/houdini_href_e.c b/src/houdini_href_e.c index 7527780..7fb958a 100644 --- a/src/houdini_href_e.c +++ b/src/houdini_href_e.c @@ -49,10 +49,10 @@ static const char HREF_SAFE[] = { }; int -houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, size_t size) +houdini_escape_href(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { static const uint8_t hex_chars[] = "0123456789ABCDEF"; - size_t i = 0, org; + bufsize_t i = 0, org; uint8_t hex_str[3]; hex_str[0] = '%'; diff --git a/src/houdini_html_e.c b/src/houdini_html_e.c index 1a4c3e1..7f4b91f 100644 --- a/src/houdini_html_e.c +++ b/src/houdini_html_e.c @@ -45,9 +45,9 @@ static const char *HTML_ESCAPES[] = { }; int -houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secure) +houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, bufsize_t size, int secure) { - size_t i = 0, org, esc = 0; + bufsize_t i = 0, org, esc = 0; while (i < size) { org = i; @@ -75,7 +75,7 @@ houdini_escape_html0(cmark_strbuf *ob, const uint8_t *src, size_t size, int secu } int -houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, size_t size) +houdini_escape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { return houdini_escape_html0(ob, src, size, 1); } diff --git a/src/houdini_html_u.c b/src/houdini_html_u.c index eaf295e..e57894d 100644 --- a/src/houdini_html_u.c +++ b/src/houdini_html_u.c @@ -7,10 +7,10 @@ #include "utf8.h" #include "html_unescape.h" -size_t -houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size) +bufsize_t +houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { - size_t i = 0; + bufsize_t i = 0; if (size >= 3 && src[0] == '#') { int codepoint = 0; @@ -68,7 +68,7 @@ houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size) const struct html_ent *entity = find_entity((char *)src, i); if (entity != NULL) { - int len = 0; + bufsize_t len = 0; while (len < 4 && entity->utf8[len] != '\0') { ++len; } @@ -85,9 +85,9 @@ houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size) } int -houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size) +houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { - size_t i = 0, org, ent; + bufsize_t i = 0, org, ent; while (i < size) { org = i; @@ -122,7 +122,7 @@ houdini_unescape_html(cmark_strbuf *ob, const uint8_t *src, size_t size) return 1; } -void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, size_t size) +void houdini_unescape_html_f(cmark_strbuf *ob, const uint8_t *src, bufsize_t size) { if (!houdini_unescape_html(ob, src, size)) cmark_strbuf_put(ob, src, size); @@ -11,20 +11,9 @@ // Functions to convert cmark_nodes to HTML strings. -static void escape_html(cmark_strbuf *dest, const unsigned char *source, int length) +static void escape_html(cmark_strbuf *dest, const unsigned char *source, bufsize_t length) { - if (length < 0) - length = strlen((char *)source); - - houdini_escape_html0(dest, source, (size_t)length, 0); -} - -static void escape_href(cmark_strbuf *dest, const unsigned char *source, int length) -{ - if (length < 0) - length = strlen((char *)source); - - houdini_escape_href(dest, source, (size_t)length); + houdini_escape_html0(dest, source, length, 0); } static inline void cr(cmark_strbuf *html) @@ -165,7 +154,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, S_render_sourcepos(node, html, options); cmark_strbuf_puts(html, "><code>"); } else { - int first_tag = 0; + bufsize_t first_tag = 0; while (first_tag < node->as.code.info.len && node->as.code.info.data[first_tag] != ' ') { first_tag += 1; @@ -261,8 +250,8 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_LINK: if (entering) { cmark_strbuf_puts(html, "<a href=\""); - escape_href(html, node->as.link.url.data, - node->as.link.url.len); + houdini_escape_href(html, node->as.link.url.data, + node->as.link.url.len); if (node->as.link.title.len) { cmark_strbuf_puts(html, "\" title=\""); @@ -279,8 +268,8 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_IMAGE: if (entering) { cmark_strbuf_puts(html, "<img src=\""); - escape_href(html, node->as.link.url.data, - node->as.link.url.len); + houdini_escape_href(html, node->as.link.url.data, + node->as.link.url.len); cmark_strbuf_puts(html, "\" alt=\""); state->plain = node; diff --git a/src/inlines.c b/src/inlines.c index 8a1ee44..7e8f806 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -36,7 +36,7 @@ typedef struct delimiter { struct delimiter *previous; struct delimiter *next; cmark_node *inl_text; - int position; + bufsize_t position; unsigned char delim_char; bool can_open; bool can_close; @@ -45,7 +45,7 @@ typedef struct delimiter { typedef struct { cmark_chunk input; - int pos; + bufsize_t pos; cmark_reference_map *refmap; delimiter *last_delim; } subject; @@ -57,7 +57,7 @@ static int parse_inline(subject* subj, cmark_node * parent, int options); static void subject_from_buf(subject *e, cmark_strbuf *buffer, cmark_reference_map *refmap); -static int subject_find_special_char(subject *subj, int options); +static bufsize_t subject_find_special_char(subject *subj, int options); static cmark_chunk cmark_clean_autolink(cmark_chunk *url, int is_email) { @@ -143,7 +143,7 @@ static inline cmark_node* make_simple(cmark_node_type t) static cmark_chunk chunk_clone(cmark_chunk *src) { cmark_chunk c; - int len = src->len; + bufsize_t len = src->len; c.len = len; c.data = (unsigned char *)malloc(len + 1); @@ -177,7 +177,7 @@ static inline unsigned char peek_char(subject *subj) return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0; } -static inline unsigned char peek_at(subject *subj, int pos) +static inline unsigned char peek_at(subject *subj, bufsize_t pos) { return subj->input.data[pos]; } @@ -195,8 +195,8 @@ static inline int is_eof(subject* subj) static inline cmark_chunk take_while(subject* subj, int (*f)(int)) { unsigned char c; - int startpos = subj->pos; - int len = 0; + bufsize_t startpos = subj->pos; + bufsize_t len = 0; while ((c = peek_char(subj)) && (*f)(c)) { advance(subj); @@ -211,7 +211,7 @@ static inline cmark_chunk take_while(subject* subj, int (*f)(int)) // parsed). Return 0 if you don't find matching closing // backticks, otherwise return the position in the subject // after the closing backticks. -static int scan_to_closing_backticks(subject* subj, int openticklength) +static bufsize_t scan_to_closing_backticks(subject* subj, bufsize_t openticklength) { // read non backticks unsigned char c; @@ -221,7 +221,7 @@ static int scan_to_closing_backticks(subject* subj, int openticklength) if (is_eof(subj)) { return 0; // did not find closing ticks, return 0 } - int numticks = 0; + bufsize_t numticks = 0; while (peek_char(subj) == '`') { advance(subj); numticks++; @@ -237,8 +237,8 @@ static int scan_to_closing_backticks(subject* subj, int openticklength) static cmark_node* handle_backticks(subject *subj) { cmark_chunk openticks = take_while(subj, isbacktick); - int startpos = subj->pos; - int endpos = scan_to_closing_backticks(subj, openticks.len); + bufsize_t startpos = subj->pos; + bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len); if (endpos == 0) { // not found subj->pos = startpos; // rewind @@ -260,7 +260,7 @@ static int scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close) { int numdelims = 0; - int before_char_pos; + bufsize_t before_char_pos; int32_t after_char = 0; int32_t before_char = 0; int len; @@ -376,7 +376,7 @@ static void push_delimiter(subject *subj, unsigned char c, bool can_open, // Assumes the subject has a c at the current position. static cmark_node* handle_delim(subject* subj, unsigned char c, bool smart) { - int numdelims; + bufsize_t numdelims; cmark_node * inl_text; bool can_open, can_close; cmark_chunk contents; @@ -500,11 +500,11 @@ static delimiter* S_insert_emph(subject *subj, delimiter *opener, delimiter *closer) { delimiter *delim, *tmp_delim; - int use_delims; + bufsize_t use_delims; cmark_node *opener_inl = opener->inl_text; cmark_node *closer_inl = closer->inl_text; - int opener_num_chars = opener_inl->as.literal.len; - int closer_num_chars = closer_inl->as.literal.len; + bufsize_t opener_num_chars = opener_inl->as.literal.len; + bufsize_t closer_num_chars = closer_inl->as.literal.len; cmark_node *tmp, *emph, *first_child, *last_child; // calculate the actual number of characters used from this closer @@ -596,7 +596,7 @@ static cmark_node* handle_backslash(subject *subj) static cmark_node* handle_entity(subject* subj) { cmark_strbuf ent = GH_BUF_INIT; - size_t len; + bufsize_t len; advance(subj); @@ -618,7 +618,7 @@ static cmark_node *make_str_with_entities(cmark_chunk *content) { cmark_strbuf unescaped = GH_BUF_INIT; - if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) { + if (houdini_unescape_html(&unescaped, content->data, content->len)) { return make_str(cmark_chunk_buf_detach(&unescaped)); } else { return make_str(*content); @@ -678,7 +678,7 @@ cmark_chunk cmark_clean_title(cmark_chunk *title) // Assumes the subject has a '<' character at the current position. static cmark_node* handle_pointy_brace(subject* subj) { - int matchlen = 0; + bufsize_t matchlen = 0; cmark_chunk contents; advance(subj); // advance past first < @@ -725,7 +725,7 @@ static cmark_node* handle_pointy_brace(subject* subj) // encountered. Backticks in labels do not start code spans. static int link_label(subject* subj, cmark_chunk *raw_label) { - int startpos = subj->pos; + bufsize_t startpos = subj->pos; int length = 0; unsigned char c; @@ -769,10 +769,10 @@ noMatch: // Return a link, an image, or a literal close bracket. static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent) { - int initial_pos; - int starturl, endurl, starttitle, endtitle, endall; - int n; - int sps; + bufsize_t initial_pos; + bufsize_t starturl, endurl, starttitle, endtitle, endall; + bufsize_t n; + bufsize_t sps; cmark_reference *ref; bool is_image = false; cmark_chunk url_chunk, title_chunk; @@ -922,7 +922,7 @@ match: // Assumes the subject has a newline at the current position. static cmark_node* handle_newline(subject *subj) { - int nlpos = subj->pos; + bufsize_t nlpos = subj->pos; // skip over newline advance(subj); // skip spaces at beginning of line @@ -938,7 +938,7 @@ static cmark_node* handle_newline(subject *subj) } } -static int subject_find_special_char(subject *subj, int options) +static bufsize_t subject_find_special_char(subject *subj, int options) { // "\r\n\\`&_*[]<!" static const int8_t SPECIAL_CHARS[256] = { @@ -980,7 +980,7 @@ static int subject_find_special_char(subject *subj, int options) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - int n = subj->pos + 1; + bufsize_t n = subj->pos + 1; while (n < subj->input.len) { if (SPECIAL_CHARS[subj->input.data[n]]) @@ -1001,7 +1001,7 @@ static int parse_inline(subject* subj, cmark_node * parent, int options) cmark_node* new_inl = NULL; cmark_chunk contents; unsigned char c; - int endpos; + bufsize_t endpos; c = peek_char(subj); if (c == 0) { return 0; @@ -1098,7 +1098,7 @@ static void spnl(subject* subj) // Modify refmap if a reference is encountered. // Return 0 if no reference found, otherwise position of subject // after reference is parsed. -int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap) +bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap) { subject subj; @@ -1106,8 +1106,8 @@ int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refma cmark_chunk url; cmark_chunk title; - int matchlen = 0; - int beforetitle; + bufsize_t matchlen = 0; + bufsize_t beforetitle; subject_from_buf(&subj, input, NULL); diff --git a/src/inlines.h b/src/inlines.h index 534588e..f8847fc 100644 --- a/src/inlines.h +++ b/src/inlines.h @@ -10,7 +10,7 @@ cmark_chunk cmark_clean_title(cmark_chunk *title); void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap, int options); -int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap); +bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap); #ifdef __cplusplus } diff --git a/src/parser.h b/src/parser.h index ccdf84b..6e18c67 100644 --- a/src/parser.h +++ b/src/parser.h @@ -16,12 +16,12 @@ struct cmark_parser { struct cmark_node* root; struct cmark_node* current; int line_number; - int offset; - int first_nonspace; + bufsize_t offset; + bufsize_t first_nonspace; int indent; bool blank; cmark_strbuf *curline; - int last_line_length; + bufsize_t last_line_length; cmark_strbuf *linebuf; int options; }; diff --git a/src/scanners.c b/src/scanners.c index 7f9ed2e..3f4ddac 100644 --- a/src/scanners.c +++ b/src/scanners.c @@ -1,11 +1,11 @@ -/* Generated by re2c 0.13.6 */ +/* Generated by re2c 0.13.5 */ #include <stdlib.h> #include "chunk.h" #include "scanners.h" -int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset) +bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset) { - int res; + bufsize_t res; unsigned char *ptr = (unsigned char *)c->data; unsigned char lim = ptr[c->len]; @@ -19,7 +19,7 @@ int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset) // Try to match a scheme including colon. -int _scan_scheme(const unsigned char *p) +bufsize_t _scan_scheme(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -578,7 +578,7 @@ yy34: if (yych != ':') goto yy31; yy35: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy37: yych = *++p; if (yych == 'E') goto yy38; @@ -2919,7 +2919,7 @@ yy484: } // Try to match URI autolink after first <, returning number of chars matched. -int _scan_autolink_uri(const unsigned char *p) +bufsize_t _scan_autolink_uri(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -3517,7 +3517,7 @@ yy520: } if (yych <= '=') goto yy516; ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy524: yych = *++p; if (yych == 'E') goto yy525; @@ -5858,7 +5858,7 @@ yy971: } // Try to match email autolink after first <, returning num of chars matched. -int _scan_autolink_email(const unsigned char *p) +bufsize_t _scan_autolink_email(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -6060,7 +6060,7 @@ yy984: } yy985: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy987: ++p; yych = *p; @@ -10803,7 +10803,7 @@ yy1230: } // Try to match an HTML tag after first <, returning num of chars matched. -int _scan_html_tag(const unsigned char *p) +bufsize_t _scan_html_tag(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -10964,7 +10964,7 @@ yy1242: if (yych != '>') goto yy1239; yy1243: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1245: yych = *++p; if (yych == 'C') goto yy1260; @@ -11455,7 +11455,7 @@ yy1297: // Try to match an HTML block tag including first <, // returning num of chars matched. -int _scan_html_block_tag(const unsigned char *p) +bufsize_t _scan_html_block_tag(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -11513,7 +11513,7 @@ yy1303: goto yy1301; yy1304: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1306: yych = *++p; if (yych <= '/') { @@ -12022,7 +12022,7 @@ yy1343: } yy1344: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1346: yych = *++p; if (yych <= 'R') { @@ -12639,7 +12639,7 @@ yy1466: } yy1467: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1469: yych = *++p; if (yych <= 'R') { @@ -13243,7 +13243,7 @@ yy1585: // This may optionally be contained in <..>; otherwise // whitespace and unbalanced right parentheses aren't allowed. // Newlines aren't ever allowed. -int _scan_link_url(const unsigned char *p) +bufsize_t _scan_link_url(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -13308,7 +13308,7 @@ int _scan_link_url(const unsigned char *p) } } yy1588: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1589: yyaccept = 0; marker = ++p; @@ -13402,7 +13402,7 @@ yy1599: yy1600: p = marker; if (yyaccept <= 1) { - if (yyaccept == 0) { + if (yyaccept <= 0) { goto yy1588; } else { goto yy1595; @@ -13490,7 +13490,7 @@ yy1607: if (yych <= ' ') goto yy1608; if (yych != ')') goto yy1603; yy1608: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1609: ++p; yych = *p; @@ -13732,7 +13732,7 @@ yy1623: // Try to match a link title (in single quotes, in double quotes, or // in parentheses), returning number of chars matched. Allow one // level of internal nesting (quotes within quotes). -int _scan_link_title(const unsigned char *p) +bufsize_t _scan_link_title(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -13818,13 +13818,13 @@ yy1632: yy1633: p = marker; if (yyaccept <= 1) { - if (yyaccept == 0) { + if (yyaccept <= 0) { goto yy1626; } else { goto yy1637; } } else { - if (yyaccept == 2) { + if (yyaccept <= 2) { goto yy1644; } else { goto yy1651; @@ -13842,7 +13842,7 @@ yy1634: yy1636: ++p; yy1637: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1638: yyaccept = 1; marker = ++p; @@ -13874,7 +13874,7 @@ yy1641: yy1643: ++p; yy1644: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1645: yyaccept = 2; marker = ++p; @@ -13906,7 +13906,7 @@ yy1648: yy1650: ++p; yy1651: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1652: yyaccept = 3; marker = ++p; @@ -13922,7 +13922,7 @@ yy1652: } // Match space characters, including newlines. -int _scan_spacechars(const unsigned char *p) +bufsize_t _scan_spacechars(const unsigned char *p) { const unsigned char *start = p; \ @@ -13973,7 +13973,7 @@ int _scan_spacechars(const unsigned char *p) goto yy1659; } yy1655: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1656: yych = *++p; goto yy1658; @@ -13993,7 +13993,7 @@ yy1659: } // Match ATX header start. -int _scan_atx_header_start(const unsigned char *p) +bufsize_t _scan_atx_header_start(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -14059,7 +14059,7 @@ yy1665: yy1666: ++p; yy1667: - { return (p - start); } + { return (bufsize_t)(p - start); } yy1668: ++p; yych = *p; @@ -14128,7 +14128,7 @@ yy1672: // Match setext header line. Return 1 for level-1 header, // 2 for level-2, 0 for no match. -int _scan_setext_header_line(const unsigned char *p) +bufsize_t _scan_setext_header_line(const unsigned char *p) { const unsigned char *marker = NULL; @@ -14269,7 +14269,7 @@ yy1693: // Scan a horizontal rule line: "...three or more hyphens, asterisks, // or underscores on a line by themselves. If you wish, you may use // spaces between the hyphens or asterisks." -int _scan_hrule(const unsigned char *p) +bufsize_t _scan_hrule(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -14384,7 +14384,7 @@ yy1709: if (yych != '\r') goto yy1704; yy1711: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1713: ++p; yych = *p; @@ -14422,7 +14422,7 @@ yy1719: } yy1721: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1723: ++p; yych = *p; @@ -14460,13 +14460,13 @@ yy1729: } yy1731: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } } } // Scan an opening code fence. -int _scan_open_code_fence(const unsigned char *p) +bufsize_t _scan_open_code_fence(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -14557,7 +14557,7 @@ yy1743: yy1745: ++p; p = marker; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1747: yych = *++p; if (yybm[0+yych] & 64) { @@ -14585,13 +14585,13 @@ yy1750: yy1752: ++p; p = marker; - { return (p - start); } + { return (bufsize_t)(p - start); } } } // Scan a closing code fence with length at least len. -int _scan_close_code_fence(const unsigned char *p) +bufsize_t _scan_close_code_fence(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -14687,7 +14687,7 @@ yy1764: yy1766: ++p; p = marker; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1768: yych = *++p; if (yybm[0+yych] & 128) { @@ -14725,14 +14725,14 @@ yy1771: yy1773: ++p; p = marker; - { return (p - start); } + { return (bufsize_t)(p - start); } } } // Scans an entity. // Returns number of chars matched. -int _scan_entity(const unsigned char *p) +bufsize_t _scan_entity(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -14799,7 +14799,7 @@ yy1783: } yy1784: ++p; - { return (p - start); } + { return (bufsize_t)(p - start); } yy1786: yych = *++p; if (yych <= ';') { diff --git a/src/scanners.h b/src/scanners.h index 1353f3b..bc5134e 100644 --- a/src/scanners.h +++ b/src/scanners.h @@ -5,21 +5,21 @@ extern "C" { #endif -int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset); -int _scan_scheme(const unsigned char *p); -int _scan_autolink_uri(const unsigned char *p); -int _scan_autolink_email(const unsigned char *p); -int _scan_html_tag(const unsigned char *p); -int _scan_html_block_tag(const unsigned char *p); -int _scan_link_url(const unsigned char *p); -int _scan_link_title(const unsigned char *p); -int _scan_spacechars(const unsigned char *p); -int _scan_atx_header_start(const unsigned char *p); -int _scan_setext_header_line(const unsigned char *p); -int _scan_hrule(const unsigned char *p); -int _scan_open_code_fence(const unsigned char *p); -int _scan_close_code_fence(const unsigned char *p); -int _scan_entity(const unsigned char *p); +bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset); +bufsize_t _scan_scheme(const unsigned char *p); +bufsize_t _scan_autolink_uri(const unsigned char *p); +bufsize_t _scan_autolink_email(const unsigned char *p); +bufsize_t _scan_html_tag(const unsigned char *p); +bufsize_t _scan_html_block_tag(const unsigned char *p); +bufsize_t _scan_link_url(const unsigned char *p); +bufsize_t _scan_link_title(const unsigned char *p); +bufsize_t _scan_spacechars(const unsigned char *p); +bufsize_t _scan_atx_header_start(const unsigned char *p); +bufsize_t _scan_setext_header_line(const unsigned char *p); +bufsize_t _scan_hrule(const unsigned char *p); +bufsize_t _scan_open_code_fence(const unsigned char *p); +bufsize_t _scan_close_code_fence(const unsigned char *p); +bufsize_t _scan_entity(const unsigned char *p); #define scan_scheme(c, n) _scan_at(&_scan_scheme, c, n) #define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n) diff --git a/src/scanners.re b/src/scanners.re index 9411018..3722a99 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -2,9 +2,9 @@ #include "chunk.h" #include "scanners.h" -int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset) +bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, bufsize_t offset) { - int res; + bufsize_t res; unsigned char *ptr = (unsigned char *)c->data; unsigned char lim = ptr[c->len]; @@ -70,29 +70,29 @@ int _scan_at(int (*scanner)(const unsigned char *), cmark_chunk *c, int offset) */ // Try to match a scheme including colon. -int _scan_scheme(const unsigned char *p) +bufsize_t _scan_scheme(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - scheme [:] { return (p - start); } + scheme [:] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Try to match URI autolink after first <, returning number of chars matched. -int _scan_autolink_uri(const unsigned char *p) +bufsize_t _scan_autolink_uri(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - scheme [:][^\x00-\x20<>]*[>] { return (p - start); } + scheme [:][^\x00-\x20<>]*[>] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Try to match email autolink after first <, returning num of chars matched. -int _scan_autolink_email(const unsigned char *p) +bufsize_t _scan_autolink_email(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -101,32 +101,32 @@ int _scan_autolink_email(const unsigned char *p) [@] [a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? ([.][a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)* - [>] { return (p - start); } + [>] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Try to match an HTML tag after first <, returning num of chars matched. -int _scan_html_tag(const unsigned char *p) +bufsize_t _scan_html_tag(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - htmltag { return (p - start); } + htmltag { return (bufsize_t)(p - start); } .? { return 0; } */ } // Try to match an HTML block tag including first <, // returning num of chars matched. -int _scan_html_block_tag(const unsigned char *p) +bufsize_t _scan_html_block_tag(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [<] [/] blocktagname (spacechar | [>]) { return (p - start); } - [<] blocktagname (spacechar | [/>]) { return (p - start); } - [<] [!?] { return (p - start); } + [<] [/] blocktagname (spacechar | [>]) { return (bufsize_t)(p - start); } + [<] blocktagname (spacechar | [/>]) { return (bufsize_t)(p - start); } + [<] [!?] { return (bufsize_t)(p - start); } .? { return 0; } */ } @@ -135,13 +135,13 @@ int _scan_html_block_tag(const unsigned char *p) // This may optionally be contained in <..>; otherwise // whitespace and unbalanced right parentheses aren't allowed. // Newlines aren't ever allowed. -int _scan_link_url(const unsigned char *p) +bufsize_t _scan_link_url(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); } - [ \r\n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); } + [ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (bufsize_t)(p - start); } + [ \r\n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (bufsize_t)(p - start); } .? { return 0; } */ } @@ -149,42 +149,42 @@ int _scan_link_url(const unsigned char *p) // Try to match a link title (in single quotes, in double quotes, or // in parentheses), returning number of chars matched. Allow one // level of internal nesting (quotes within quotes). -int _scan_link_title(const unsigned char *p) +bufsize_t _scan_link_title(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - ["] (escaped_char|[^"\x00])* ["] { return (p - start); } - ['] (escaped_char|[^'\x00])* ['] { return (p - start); } - [(] (escaped_char|[^)\x00])* [)] { return (p - start); } + ["] (escaped_char|[^"\x00])* ["] { return (bufsize_t)(p - start); } + ['] (escaped_char|[^'\x00])* ['] { return (bufsize_t)(p - start); } + [(] (escaped_char|[^)\x00])* [)] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Match space characters, including newlines. -int _scan_spacechars(const unsigned char *p) +bufsize_t _scan_spacechars(const unsigned char *p) { const unsigned char *start = p; \ /*!re2c - [ \t\v\f\r\n]* { return (p - start); } + [ \t\v\f\r\n]* { return (bufsize_t)(p - start); } . { return 0; } */ } // Match ATX header start. -int _scan_atx_header_start(const unsigned char *p) +bufsize_t _scan_atx_header_start(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [#]{1,6} ([ ]+|[\r\n]) { return (p - start); } + [#]{1,6} ([ ]+|[\r\n]) { return (bufsize_t)(p - start); } .? { return 0; } */ } // Match setext header line. Return 1 for level-1 header, // 2 for level-2, 0 for no match. -int _scan_setext_header_line(const unsigned char *p) +bufsize_t _scan_setext_header_line(const unsigned char *p) { const unsigned char *marker = NULL; /*!re2c @@ -197,51 +197,51 @@ int _scan_setext_header_line(const unsigned char *p) // Scan a horizontal rule line: "...three or more hyphens, asterisks, // or underscores on a line by themselves. If you wish, you may use // spaces between the hyphens or asterisks." -int _scan_hrule(const unsigned char *p) +bufsize_t _scan_hrule(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - ([*][ ]*){3,} [ \t]* [\r\n] { return (p - start); } - ([_][ ]*){3,} [ \t]* [\r\n] { return (p - start); } - ([-][ ]*){3,} [ \t]* [\r\n] { return (p - start); } + ([*][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); } + ([_][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); } + ([-][ ]*){3,} [ \t]* [\r\n] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Scan an opening code fence. -int _scan_open_code_fence(const unsigned char *p) +bufsize_t _scan_open_code_fence(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [`]{3,} / [^`\r\n\x00]*[\r\n] { return (p - start); } - [~]{3,} / [^~\r\n\x00]*[\r\n] { return (p - start); } + [`]{3,} / [^`\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); } + [~]{3,} / [^~\r\n\x00]*[\r\n] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Scan a closing code fence with length at least len. -int _scan_close_code_fence(const unsigned char *p) +bufsize_t _scan_close_code_fence(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [`]{3,} / [ \t]*[\r\n] { return (p - start); } - [~]{3,} / [ \t]*[\r\n] { return (p - start); } + [`]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); } + [~]{3,} / [ \t]*[\r\n] { return (bufsize_t)(p - start); } .? { return 0; } */ } // Scans an entity. // Returns number of chars matched. -int _scan_entity(const unsigned char *p) +bufsize_t _scan_entity(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c [&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;] - { return (p - start); } + { return (bufsize_t)(p - start); } .? { return 0; } */ } @@ -30,7 +30,7 @@ static void encode_unknown(cmark_strbuf *buf) cmark_strbuf_put(buf, repl, 3); } -static int utf8proc_charlen(const uint8_t *str, int str_len) +static int utf8proc_charlen(const uint8_t *str, bufsize_t str_len) { int length, i; @@ -42,7 +42,7 @@ static int utf8proc_charlen(const uint8_t *str, int str_len) if (!length) return -1; - if (str_len >= 0 && length > str_len) + if (str_len >= 0 && (bufsize_t)length > str_len) return -str_len; for (i = 1; i < length; i++) { @@ -54,7 +54,7 @@ static int utf8proc_charlen(const uint8_t *str, int str_len) } // Validate a single UTF-8 character according to RFC 3629. -static int utf8proc_valid(const uint8_t *str, int str_len) +static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) { int length = utf8proc_charlen(str, str_len); @@ -109,14 +109,14 @@ static int utf8proc_valid(const uint8_t *str, int str_len) return length; } -void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, size_t size) +void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, bufsize_t size) { static const uint8_t whitespace[] = " "; - size_t i = 0, tab = 0; + bufsize_t i = 0, tab = 0; while (i < size) { - size_t org = i; + bufsize_t org = i; while (i < size && line[i] != '\t' && line[i] != '\0' && line[i] < 0x80) { @@ -151,7 +151,7 @@ void utf8proc_detab(cmark_strbuf *ob, const uint8_t *line, size_t size) } } -int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst) +int utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst) { int length; int32_t uc = -1; @@ -191,7 +191,7 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst) void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) { uint8_t dst[4]; - int len = 0; + bufsize_t len = 0; assert(uc >= 0); @@ -227,7 +227,7 @@ void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) cmark_strbuf_put(buf, dst, len); } -void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len) +void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len) { int32_t c; @@ -235,7 +235,7 @@ void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len) utf8proc_encode_char(x, dest) while (len > 0) { - int char_len = utf8proc_iterate(str, len, &c); + bufsize_t char_len = utf8proc_iterate(str, len, &c); if (char_len >= 0) { #include "case_fold_switch.inc" @@ -8,10 +8,10 @@ extern "C" { #endif -void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, int len); +void utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len); void utf8proc_encode_char(int32_t uc, cmark_strbuf *buf); -int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst); -void utf8proc_detab(cmark_strbuf *dest, const uint8_t *line, size_t size); +int utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst); +void utf8proc_detab(cmark_strbuf *dest, const uint8_t *line, bufsize_t size); int utf8proc_is_space(int32_t uc); int utf8proc_is_punctuation(int32_t uc); @@ -11,14 +11,9 @@ // Functions to convert cmark_nodes to XML strings. -static void escape_xml(cmark_strbuf *dest, const unsigned char *source, int length) +static void escape_xml(cmark_strbuf *dest, const unsigned char *source, bufsize_t length) { - if (source != NULL) { - if (length < 0) - length = strlen((char *)source); - - houdini_escape_html0(dest, source, (size_t)length, 0); - } + houdini_escape_html0(dest, source, length, 0); } struct render_state { |