diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/CMakeLists.txt | 18 | ||||
-rw-r--r-- | src/blocks.c | 23 | ||||
-rw-r--r-- | src/buffer.c | 6 | ||||
-rw-r--r-- | src/chunk.h | 21 | ||||
-rw-r--r-- | src/commonmark.c | 100 | ||||
-rw-r--r-- | src/config.h.in | 2 | ||||
-rw-r--r-- | src/houdini_html_u.c | 39 | ||||
-rw-r--r-- | src/html.c | 18 | ||||
-rw-r--r-- | src/inlines.c | 73 | ||||
-rw-r--r-- | src/inlines.h | 4 | ||||
-rw-r--r-- | src/iterator.c | 8 | ||||
-rw-r--r-- | src/main.c | 6 | ||||
-rw-r--r-- | src/man.c | 3 | ||||
-rw-r--r-- | src/node.c | 27 | ||||
-rw-r--r-- | src/node.h | 4 | ||||
-rw-r--r-- | src/references.c | 4 | ||||
-rw-r--r-- | src/references.h | 4 | ||||
-rw-r--r-- | src/utf8.c | 5 | ||||
-rw-r--r-- | src/xml.c | 6 |
19 files changed, 214 insertions, 157 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 06c13e0..716b97b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -67,8 +67,8 @@ set_target_properties(${PROGRAM} PROPERTIES COMPILE_FLAGS -DCMARK_STATIC_DEFINE) # Check integrity of node structure when compiled as debug: -set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -pg -DCMARK_DEBUG_NODES") -set(CMAKE_LINKER_DEBUG "${CMAKE_LINKER_FLAGS_DEBUG} -pg") +set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DCMARK_DEBUG_NODES") +set(CMAKE_LINKER_DEBUG "${CMAKE_LINKER_FLAGS_DEBUG}") set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") set(CMAKE_LINKER_PROFILE "${CMAKE_LINKER_FLAGS_RELEASE} -pg") @@ -76,7 +76,7 @@ set(CMAKE_LINKER_PROFILE "${CMAKE_LINKER_FLAGS_RELEASE} -pg") if (${CMAKE_MAJOR_VERSION} GREATER 1 AND ${CMAKE_MINOR_VERSION} GREATER 8) set(CMAKE_C_VISIBILITY_PRESET hidden) set(CMAKE_VISIBILITY_INLINES_HIDDEN 1) -elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang") +elseif(CMAKE_COMPILER_IS_GNUCC OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden") endif () @@ -87,6 +87,8 @@ set_target_properties(${LIBRARY} PROPERTIES OUTPUT_NAME "cmark" SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH} VERSION ${PROJECT_VERSION}) +set_target_properties(${STATICLIBRARY} PROPERTIES + COMPILE_FLAGS -DCMARK_STATIC_DEFINE) if (MSVC) set_target_properties(${STATICLIBRARY} PROPERTIES @@ -109,6 +111,7 @@ if (MSVC) APPEND PROPERTY LINK_FLAGS /INCREMENTAL:NO) endif(MSVC) +set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON) include (InstallRequiredSystemLibraries) install(TARGETS ${PROGRAM} ${LIBRARY} RUNTIME DESTINATION bin @@ -125,6 +128,7 @@ install(FILES # Feature tests include(CheckIncludeFile) include(CheckCSourceCompiles) +include(CheckCSourceRuns) include(CheckSymbolExists) CHECK_INCLUDE_FILE(stdbool.h HAVE_STDBOOL_H) CHECK_C_SOURCE_COMPILES( @@ -134,6 +138,10 @@ CHECK_C_SOURCE_COMPILES(" int f(void) __attribute__ (()); int main() { return 0; } " HAVE___ATTRIBUTE__) +CHECK_C_SOURCE_RUNS(" + #include <stdio.h> + int main() { return snprintf(NULL, 0, \"123\") == 3 ? 0 : 1; } +" HAVE_C99_SNPRINTF) CHECK_SYMBOL_EXISTS(va_copy stdarg.h HAVE_VA_COPY) CONFIGURE_FILE( @@ -161,3 +169,7 @@ endif() if($ENV{TIMER}) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DTIMER=1") endif($ENV{TIMER}) + +if(CMAKE_BUILD_TYPE STREQUAL "Ubsan") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined") +endif() diff --git a/src/blocks.c b/src/blocks.c index 777356a..8ae452e 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -554,6 +554,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) bool blank = false; int first_nonspace; int indent; + bool indented; cmark_chunk input; bool maybe_lazy; @@ -690,11 +691,11 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) first_nonspace++; indent = first_nonspace - offset; + indented = indent >= CODE_INDENT; blank = peek_at(&input, first_nonspace) == '\n' || peek_at(&input, first_nonspace) == '\r'; - if (indent >= CODE_INDENT) { - if (!maybe_lazy && !blank) { + if (indented && !maybe_lazy && !blank) { offset += CODE_INDENT; container = add_child(parser, container, NODE_CODE_BLOCK, offset + 1); container->as.code.fenced = false; @@ -702,11 +703,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) container->as.code.fence_length = 0; container->as.code.fence_offset = 0; container->as.code.info = cmark_chunk_literal(""); - } else { // indent > 4 in lazy line - break; - } - } else if (peek_at(&input, first_nonspace) == '>') { + } else if (!indented && peek_at(&input, first_nonspace) == '>') { offset = first_nonspace + 1; // optional following character @@ -714,7 +712,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) offset++; container = add_child(parser, container, NODE_BLOCK_QUOTE, offset + 1); - } else if ((matched = scan_atx_header_start(&input, first_nonspace))) { + } else if (!indented && (matched = scan_atx_header_start(&input, first_nonspace))) { offset = first_nonspace + matched; container = add_child(parser, container, NODE_HEADER, offset + 1); @@ -729,7 +727,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) container->as.header.level = level; container->as.header.setext = false; - } else if ((matched = scan_open_code_fence(&input, first_nonspace))) { + } else if (!indented && (matched = scan_open_code_fence(&input, first_nonspace))) { container = add_child(parser, container, NODE_CODE_BLOCK, first_nonspace + 1); container->as.code.fenced = true; @@ -739,12 +737,13 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) container->as.code.info = cmark_chunk_literal(""); offset = first_nonspace + matched; - } else if ((matched = scan_html_block_tag(&input, first_nonspace))) { + } else if (!indented && (matched = scan_html_block_tag(&input, first_nonspace))) { container = add_child(parser, container, NODE_HTML, first_nonspace + 1); // note, we don't adjust offset because the tag is part of the text - } else if (container->type == NODE_PARAGRAPH && + } else if (!indented && + container->type == NODE_PARAGRAPH && (lev = scan_setext_header_line(&input, first_nonspace)) && // check that there is only one line in the paragraph: (cmark_strbuf_strrchr(&container->string_content, '\n', @@ -757,7 +756,9 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) container->as.header.setext = true; offset = input.len - 1; - } else if (!(container->type == NODE_PARAGRAPH && !all_matched) && + } else if (!indented && + !(container->type == NODE_PARAGRAPH && + !all_matched) && (matched = scan_hrule(&input, first_nonspace))) { // it's only now that we know the line is not part of a setext header: diff --git a/src/buffer.c b/src/buffer.c index 5ec8b49..2b7f062 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -175,6 +175,12 @@ int cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap) buf->asize - buf->size, format, args ); +#ifndef HAVE_C99_SNPRINTF + // Assume we're on Windows. + if (len < 0) { + len = _vscprintf(format, args); + } +#endif va_end(args); diff --git a/src/chunk.h b/src/chunk.h index 54c4b16..4bb4980 100644 --- a/src/chunk.h +++ b/src/chunk.h @@ -7,6 +7,8 @@ #include "cmark_ctype.h" #include "buffer.h" +#define CMARK_CHUNK_EMPTY { NULL, 0, 0 } + typedef struct { unsigned char *data; int len; @@ -64,7 +66,9 @@ static inline const char *cmark_chunk_to_cstr(cmark_chunk *c) } str = (unsigned char *)malloc(c->len + 1); if(str != NULL) { - memcpy(str, c->data, c->len); + if(c->len > 0){ + memcpy(str, c->data, c->len); + } str[c->len] = 0; } c->data = str; @@ -78,10 +82,17 @@ static inline void cmark_chunk_set_cstr(cmark_chunk *c, const char *str) if (c->alloc) { free(c->data); } - c->len = strlen(str); - c->data = (unsigned char *)malloc(c->len + 1); - c->alloc = 1; - memcpy(c->data, str, c->len + 1); + if (str == NULL) { + c->len = 0; + c->data = NULL; + c->alloc = 0; + } + else { + c->len = strlen(str); + c->data = (unsigned char *)malloc(c->len + 1); + c->alloc = 1; + memcpy(c->data, str, c->len + 1); + } } static inline cmark_chunk cmark_chunk_literal(const char *data) diff --git a/src/commonmark.c b/src/commonmark.c index bef92f6..47da191 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -49,35 +49,35 @@ typedef enum { static inline bool needs_escaping(escaping escape, - int32_t c, - unsigned char next_c, - struct render_state *state) + int32_t c, + unsigned char next_c, + struct render_state *state) { if (escape == NORMAL) { return (c == '*' || c == '_' || c == '[' || c == ']' || - c == '<' || c == '>' || c == '\\' || c == '`' || - (c == '&' && isalpha(next_c)) || - (c == '!' && next_c == '[') || - (state->begin_line && - (c == '-' || c == '+' || c == '#' || c == '=')) || - (c == '#' && (isspace(next_c) || next_c == '\0')) || - ((c == '.' || c == ')') && - isdigit(state->buffer->ptr[state->buffer->size - 1]))); + c == '<' || c == '>' || c == '\\' || c == '`' || + (c == '&' && isalpha(next_c)) || + (c == '!' && next_c == '[') || + (state->begin_line && + (c == '-' || c == '+' || c == '#' || c == '=')) || + (c == '#' && (isspace(next_c) || next_c == '\0')) || + ((c == '.' || c == ')') && + isdigit(state->buffer->ptr[state->buffer->size - 1]))); } else if (escape == TITLE) { return (c == '`' || c == '<' || c == '>' || c == '"' || - c == '\\'); + c == '\\'); } else if (escape == URL) { return (c == '`' || c == '<' || c == '>' || isspace(c) || - c == '\\' || c == ')' || c == '('); + c == '\\' || c == ')' || c == '('); } else { return false; } } static inline void out(struct render_state *state, - cmark_chunk str, - bool wrap, - escaping escape) + cmark_chunk str, + bool wrap, + escaping escape) { unsigned char* source = str.data; int length = str.len; @@ -100,7 +100,7 @@ static inline void out(struct render_state *state, cmark_strbuf_putc(state->buffer, '\n'); if (state->need_cr > 1) { cmark_strbuf_put(state->buffer, state->prefix->ptr, - state->prefix->size); + state->prefix->size); } } state->column = 0; @@ -111,12 +111,15 @@ static inline void out(struct render_state *state, while (i < length) { if (state->begin_line) { cmark_strbuf_put(state->buffer, state->prefix->ptr, - state->prefix->size); + state->prefix->size); // note: this assumes prefix is ascii: state->column = state->prefix->size; } len = utf8proc_iterate(source + i, length - i, &c); + if (len == -1) { // error condition + return; // return without rendering rest of string + } nextc = source[i + len]; if (c == 32 && wrap) { if (!state->begin_line) { @@ -124,7 +127,7 @@ static inline void out(struct render_state *state, state->column += 1; state->begin_line = false; state->last_breakable = state->buffer->size - - 1; + 1; // skip following spaces while (source[i + 1] == ' ') { i++; @@ -167,7 +170,7 @@ static inline void out(struct render_state *state, // add newline, prefix, and remainder cmark_strbuf_putc(state->buffer, '\n'); cmark_strbuf_put(state->buffer, state->prefix->ptr, - state->prefix->size); + state->prefix->size); cmark_strbuf_put(state->buffer, remainder.data, remainder.len); state->column = state->prefix->size + remainder.len; cmark_chunk_free(&remainder); @@ -236,6 +239,7 @@ is_autolink(cmark_node *node) { const char *title; const char *url; + cmark_node *link_text; if (node->type != CMARK_NODE_LINK) { return false; @@ -252,10 +256,13 @@ is_autolink(cmark_node *node) if (title != NULL && strlen(title) > 0) { return false; } - cmark_consolidate_text_nodes(node); - return (strncmp(url, - (char*)node->as.literal.data, - node->as.literal.len) == 0); + + link_text = node->first_child; + cmark_consolidate_text_nodes(link_text); + return ((int)strlen(url) == link_text->as.literal.len && + strncmp(url, + (char*)link_text->as.literal.data, + link_text->as.literal.len) == 0); } // if node is a block node, returns node. @@ -265,7 +272,7 @@ get_containing_block(cmark_node *node) { while (node && (node->type < CMARK_NODE_FIRST_BLOCK || - node->type > CMARK_NODE_LAST_BLOCK)) { + node->type > CMARK_NODE_LAST_BLOCK)) { node = node->parent; } return node; @@ -293,14 +300,14 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, // a following list. if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) { - tmp = get_containing_block(node); - state->in_tight_list_item = - (tmp->type == CMARK_NODE_ITEM && - cmark_node_get_list_tight(tmp->parent)) || - (tmp && - tmp->parent && - tmp->parent->type == CMARK_NODE_ITEM && - cmark_node_get_list_tight(tmp->parent->parent)); + tmp = get_containing_block(node); + state->in_tight_list_item = + (tmp->type == CMARK_NODE_ITEM && + cmark_node_get_list_tight(tmp->parent)) || + (tmp && + tmp->parent && + tmp->parent->type == CMARK_NODE_ITEM && + cmark_node_get_list_tight(tmp->parent->parent)); } switch (node->type) { @@ -316,7 +323,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, cmark_strbuf_puts(state->prefix, "> "); } else { cmark_strbuf_truncate(state->prefix, - state->prefix->size - 2); + state->prefix->size - 2); blankline(state); } break; @@ -348,10 +355,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, // we get nice transition from single digits // to double cmark_strbuf_printf(&listmarker, - "%d%s%s", list_number, - list_delim == CMARK_PAREN_DELIM ? - ")" : ".", - list_number < 10 ? " " : " "); + "%d%s%s", list_number, + list_delim == CMARK_PAREN_DELIM ? + ")" : ".", + list_number < 10 ? " " : " "); marker_width = listmarker.size; } if (entering) { @@ -361,14 +368,14 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, cmark_strbuf_puts(state->prefix, " "); } else { lit(state, (char *)listmarker.ptr, false); - for (i=marker_width; i--;) { + for (i = marker_width; i--;) { cmark_strbuf_putc(state->prefix, ' '); } } } else { cmark_strbuf_truncate(state->prefix, - state->prefix->size - - marker_width); + state->prefix->size - + marker_width); cr(state); } cmark_strbuf_free(&listmarker); @@ -405,7 +412,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, cmark_strbuf_puts(state->prefix, " "); out(state, node->as.code.literal, false, LITERAL); cmark_strbuf_truncate(state->prefix, - state->prefix->size - 4); + state->prefix->size - 4); } else { numticks = longest_backtick_sequence(code) + 1; if (numticks < 3) { @@ -514,7 +521,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, if (entering) { lit(state, "<", false); if (strncmp(cmark_node_get_url(node), - "mailto:", 7) == 0) { + "mailto:", 7) == 0) { lit(state, (char *)cmark_node_get_url(node) + 7, false); @@ -579,9 +586,10 @@ char *cmark_render_commonmark(cmark_node *root, int options, int width) if (CMARK_OPT_HARDBREAKS & options) { width = 0; } - struct render_state state = - { options, &commonmark, &prefix, 0, width, - 0, 0, true, false, false}; + struct render_state state = { + options, &commonmark, &prefix, 0, width, + 0, 0, true, false, false + }; cmark_node *cur; cmark_event_type ev_type; cmark_iter *iter = cmark_iter_new(root); diff --git a/src/config.h.in b/src/config.h.in index c1e9597..5960928 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -21,3 +21,5 @@ #ifndef HAVE_VA_COPY #define va_copy(dest, src) ((dest) = (src)) #endif + +#cmakedefine HAVE_C99_SNPRINTF diff --git a/src/houdini_html_u.c b/src/houdini_html_u.c index 2cb14b4..eaf295e 100644 --- a/src/houdini_html_u.c +++ b/src/houdini_html_u.c @@ -12,32 +12,45 @@ houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size) { size_t i = 0; - if (size > 3 && src[0] == '#') { - int codepoint = 0; + if (size >= 3 && src[0] == '#') { + int codepoint = 0; + int num_digits = 0; if (_isdigit(src[1])) { for (i = 1; i < size && _isdigit(src[i]); ++i) { - int cp = (codepoint * 10) + (src[i] - '0'); + codepoint = (codepoint * 10) + (src[i] - '0'); - if (cp < codepoint) - return 0; - - codepoint = cp; + if (codepoint >= 0x110000) { + // Keep counting digits but + // avoid integer overflow. + codepoint = 0x110000; + } } + + num_digits = i - 1; } else if (src[1] == 'x' || src[1] == 'X') { for (i = 2; i < size && _isxdigit(src[i]); ++i) { - int cp = (codepoint * 16) + ((src[i] | 32) % 39 - 9); + codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9); - if (cp < codepoint) - return 0; - - codepoint = cp; + if (codepoint >= 0x110000) { + // Keep counting digits but + // avoid integer overflow. + codepoint = 0x110000; + } } + + num_digits = i - 2; } - if (i < size && src[i] == ';' && codepoint) { + if (num_digits >= 1 && num_digits <= 8 && + i < size && src[i] == ';') { + if (codepoint == 0 || + (codepoint >= 0xD800 && codepoint < 0xE000) || + codepoint >= 0x110000) { + codepoint = 0xFFFD; + } utf8proc_encode_char(codepoint, ob); return i + 1; } @@ -261,12 +261,13 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_LINK: if (entering) { cmark_strbuf_puts(html, "<a href=\""); - if (node->as.link.url) - escape_href(html, node->as.link.url, -1); + escape_href(html, node->as.link.url.data, + node->as.link.url.len); - if (node->as.link.title) { + if (node->as.link.title.len) { cmark_strbuf_puts(html, "\" title=\""); - escape_html(html, node->as.link.title, -1); + escape_html(html, node->as.link.title.data, + node->as.link.title.len); } cmark_strbuf_puts(html, "\">"); @@ -278,15 +279,16 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_IMAGE: if (entering) { cmark_strbuf_puts(html, "<img src=\""); - if (node->as.link.url) - escape_href(html, node->as.link.url, -1); + escape_href(html, node->as.link.url.data, + node->as.link.url.len); cmark_strbuf_puts(html, "\" alt=\""); state->plain = node; } else { - if (node->as.link.title) { + if (node->as.link.title.len) { cmark_strbuf_puts(html, "\" title=\""); - escape_html(html, node->as.link.title, -1); + escape_html(html, node->as.link.title.data, + node->as.link.title.len); } cmark_strbuf_puts(html, "\" />"); diff --git a/src/inlines.c b/src/inlines.c index 7175327..232fc10 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -59,31 +59,33 @@ static void subject_from_buf(subject *e, cmark_strbuf *buffer, cmark_reference_map *refmap); static int subject_find_special_char(subject *subj, int options); -static unsigned char *cmark_clean_autolink(cmark_chunk *url, int is_email) +static cmark_chunk cmark_clean_autolink(cmark_chunk *url, int is_email) { cmark_strbuf buf = GH_BUF_INIT; cmark_chunk_trim(url); - if (url->len == 0) - return NULL; + if (url->len == 0) { + cmark_chunk result = CMARK_CHUNK_EMPTY; + return result; + } if (is_email) cmark_strbuf_puts(&buf, "mailto:"); houdini_unescape_html_f(&buf, url->data, url->len); - return cmark_strbuf_detach(&buf); + return cmark_chunk_buf_detach(&buf); } -static inline cmark_node *make_link(cmark_node *label, unsigned char *url, unsigned char *title) +static inline cmark_node *make_link(cmark_node *label, cmark_chunk *url, cmark_chunk *title) { cmark_node* e = (cmark_node *)calloc(1, sizeof(*e)); if(e != NULL) { e->type = CMARK_NODE_LINK; e->first_child = label; e->last_child = label; - e->as.link.url = url; - e->as.link.title = title; + e->as.link.url = *url; + e->as.link.title = *title; e->next = NULL; label->parent = e; } @@ -92,7 +94,9 @@ static inline cmark_node *make_link(cmark_node *label, unsigned char *url, unsig static inline cmark_node* make_autolink(cmark_node* label, cmark_chunk url, int is_email) { - return make_link(label, cmark_clean_autolink(&url, is_email), NULL); + cmark_chunk clean_url = cmark_clean_autolink(&url, is_email); + cmark_chunk title = CMARK_CHUNK_EMPTY; + return make_link(label, &clean_url, &title); } // Create an inline with a literal string value. @@ -134,19 +138,20 @@ static inline cmark_node* make_simple(cmark_node_type t) return e; } -static unsigned char *bufdup(const unsigned char *buf) +// Duplicate a chunk by creating a copy of the buffer not by reusing the +// buffer like cmark_chunk_dup does. +static cmark_chunk chunk_clone(cmark_chunk *src) { - unsigned char *new_buf = NULL; + cmark_chunk c; + int len = src->len; - if (buf) { - int len = strlen((char *)buf); - new_buf = (unsigned char *)calloc(len + 1, sizeof(*new_buf)); - if(new_buf != NULL) { - memcpy(new_buf, buf, len + 1); - } - } + c.len = len; + c.data = (unsigned char *)malloc(len + 1); + c.alloc = 1; + memcpy(c.data, src->data, len); + c.data[len] = '\0'; - return new_buf; + return c; } static void subject_from_buf(subject *e, cmark_strbuf *buffer, @@ -301,8 +306,10 @@ scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close) !utf8proc_is_space(after_char) && !utf8proc_is_punctuation(after_char)); if (c == '_') { - *can_open = left_flanking && !right_flanking; - *can_close = right_flanking && !left_flanking; + *can_open = left_flanking && + (!right_flanking || utf8proc_is_punctuation(before_char)); + *can_close = right_flanking && + (!left_flanking || utf8proc_is_punctuation(after_char)); } else if (c == '\'' || c == '"') { *can_open = left_flanking && !right_flanking; *can_close = right_flanking; @@ -620,14 +627,16 @@ static cmark_node *make_str_with_entities(cmark_chunk *content) // Clean a URL: remove surrounding whitespace and surrounding <>, // and remove \ that escape punctuation. -unsigned char *cmark_clean_url(cmark_chunk *url) +cmark_chunk cmark_clean_url(cmark_chunk *url) { cmark_strbuf buf = GH_BUF_INIT; cmark_chunk_trim(url); - if (url->len == 0) - return NULL; + if (url->len == 0) { + cmark_chunk result = CMARK_CHUNK_EMPTY; + return result; + } if (url->data[0] == '<' && url->data[url->len - 1] == '>') { houdini_unescape_html_f(&buf, url->data + 1, url->len - 2); @@ -636,16 +645,18 @@ unsigned char *cmark_clean_url(cmark_chunk *url) } cmark_strbuf_unescape(&buf); - return buf.size == 0 ? NULL : cmark_strbuf_detach(&buf); + return cmark_chunk_buf_detach(&buf); } -unsigned char *cmark_clean_title(cmark_chunk *title) +cmark_chunk cmark_clean_title(cmark_chunk *title) { cmark_strbuf buf = GH_BUF_INIT; unsigned char first, last; - if (title->len == 0) - return NULL; + if (title->len == 0) { + cmark_chunk result = CMARK_CHUNK_EMPTY; + return result; + } first = title->data[0]; last = title->data[title->len - 1]; @@ -660,7 +671,7 @@ unsigned char *cmark_clean_title(cmark_chunk *title) } cmark_strbuf_unescape(&buf); - return buf.size == 0 ? NULL : cmark_strbuf_detach(&buf); + return cmark_chunk_buf_detach(&buf); } // Parse an autolink or HTML tag. @@ -764,7 +775,7 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent) cmark_reference *ref; bool is_image = false; cmark_chunk url_chunk, title_chunk; - unsigned char *url, *title; + cmark_chunk url, title; delimiter *opener; cmark_node *link_text; cmark_node *inl; @@ -852,8 +863,8 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent) cmark_chunk_free(&raw_label); if (ref != NULL) { // found - url = bufdup(ref->url); - title = bufdup(ref->title); + url = chunk_clone(&ref->url); + title = chunk_clone(&ref->title); goto match; } else { goto noMatch; diff --git a/src/inlines.h b/src/inlines.h index 9e56790..534588e 100644 --- a/src/inlines.h +++ b/src/inlines.h @@ -5,8 +5,8 @@ extern "C" { #endif -unsigned char *cmark_clean_url(cmark_chunk *url); -unsigned char *cmark_clean_title(cmark_chunk *title); +cmark_chunk cmark_clean_url(cmark_chunk *url); +cmark_chunk cmark_clean_title(cmark_chunk *title); void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap, int options); diff --git a/src/iterator.c b/src/iterator.c index c6faf99..f18e3bf 100644 --- a/src/iterator.c +++ b/src/iterator.c @@ -129,18 +129,20 @@ void cmark_consolidate_text_nodes(cmark_node *root) cur->next && cur->next->type == CMARK_NODE_TEXT) { cmark_strbuf_clear(&buf); - cmark_strbuf_puts(&buf, cmark_node_get_literal(cur)); + cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len); tmp = cur->next; while (tmp && tmp->type == CMARK_NODE_TEXT) { cmark_iter_next(iter); // advance pointer - cmark_strbuf_puts(&buf, cmark_node_get_literal(tmp)); + cmark_strbuf_put(&buf, tmp->as.literal.data, tmp->as.literal.len); next = tmp->next; cmark_node_free(tmp); tmp = next; } - cmark_node_set_literal(cur, (char *)cmark_strbuf_detach(&buf)); + cmark_chunk_free(&cur->as.literal); + cur->as.literal = cmark_chunk_buf_detach(&buf); } } + cmark_strbuf_free(&buf); cmark_iter_free(iter); } @@ -102,13 +102,13 @@ int main(int argc, char *argv[]) width = (int)strtol(argv[i], &unparsed, 10); if (unparsed && strlen(unparsed) > 0) { fprintf(stderr, - "failed parsing width '%s' at '%s'\n", - argv[i], unparsed); + "failed parsing width '%s' at '%s'\n", + argv[i], unparsed); exit(1); } } else { fprintf(stderr, - "--width requires an argument\n"); + "--width requires an argument\n"); exit(1); } } else if ((strcmp(argv[i], "-t") == 0) || @@ -20,6 +20,9 @@ static void escape_man(cmark_strbuf *dest, const unsigned char *source, int leng while (i < length) { len = utf8proc_iterate(source + i, length - i, &c); + if (len == -1) { // error condition + return; // return without rendering anything + } switch(c) { case 46: if (beginLine) { @@ -122,12 +122,8 @@ void S_free_nodes(cmark_node *e) break; case NODE_LINK: case NODE_IMAGE: - if (e->as.link.url) { - free(e->as.link.url); - } - if (e->as.link.title) { - free(e->as.link.title); - } + cmark_chunk_free(&e->as.link.url); + cmark_chunk_free(&e->as.link.title); break; default: break; @@ -282,15 +278,6 @@ cmark_node_set_user_data(cmark_node *node, void *user_data) return 1; } -static char* -S_strdup(const char *str) -{ - size_t size = strlen(str) + 1; - char *dup = (char *)malloc(size); - memcpy(dup, str, size); - return dup; -} - const char* cmark_node_get_literal(cmark_node *node) { @@ -541,7 +528,7 @@ cmark_node_get_url(cmark_node *node) switch (node->type) { case NODE_LINK: case NODE_IMAGE: - return (char *)node->as.link.url; + return cmark_chunk_to_cstr(&node->as.link.url); default: break; } @@ -559,8 +546,7 @@ cmark_node_set_url(cmark_node *node, const char *url) switch (node->type) { case NODE_LINK: case NODE_IMAGE: - free(node->as.link.url); - node->as.link.url = (unsigned char *)S_strdup(url); + cmark_chunk_set_cstr(&node->as.link.url, url); return 1; default: break; @@ -579,7 +565,7 @@ cmark_node_get_title(cmark_node *node) switch (node->type) { case NODE_LINK: case NODE_IMAGE: - return (char *)node->as.link.title; + return cmark_chunk_to_cstr(&node->as.link.title); default: break; } @@ -597,8 +583,7 @@ cmark_node_set_title(cmark_node *node, const char *title) switch (node->type) { case NODE_LINK: case NODE_IMAGE: - free(node->as.link.title); - node->as.link.title = (unsigned char *)S_strdup(title); + cmark_chunk_set_cstr(&node->as.link.title, title); return 1; default: break; @@ -38,8 +38,8 @@ typedef struct { } cmark_header; typedef struct { - unsigned char *url; - unsigned char *title; + cmark_chunk url; + cmark_chunk title; } cmark_link; struct cmark_node { diff --git a/src/references.c b/src/references.c index 37bf4cb..1d3d56d 100644 --- a/src/references.c +++ b/src/references.c @@ -20,8 +20,8 @@ static void reference_free(cmark_reference *ref) { if(ref != NULL) { free(ref->label); - free(ref->url); - free(ref->title); + cmark_chunk_free(&ref->url); + cmark_chunk_free(&ref->title); free(ref); } } diff --git a/src/references.h b/src/references.h index 69325bb..a360cd5 100644 --- a/src/references.h +++ b/src/references.h @@ -12,8 +12,8 @@ extern "C" { struct cmark_reference { struct cmark_reference *next; unsigned char *label; - unsigned char *url; - unsigned char *title; + cmark_chunk url; + cmark_chunk title; unsigned int hash; }; @@ -172,8 +172,7 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst) case 3: uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + (str[2] & 0x3F); - if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) || - (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1; + if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000)) uc = -1; break; case 4: uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) @@ -182,7 +181,7 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst) break; } - if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE)) + if (uc < 0) return -1; *dst = uc; @@ -118,10 +118,12 @@ S_render_node(cmark_node *node, cmark_event_type ev_type, case CMARK_NODE_LINK: case CMARK_NODE_IMAGE: cmark_strbuf_puts(xml, " destination=\""); - escape_xml(xml, node->as.link.url, -1); + escape_xml(xml, node->as.link.url.data, + node->as.link.url.len); cmark_strbuf_putc(xml, '"'); cmark_strbuf_puts(xml, " title=\""); - escape_xml(xml, node->as.link.title, -1); + escape_xml(xml, node->as.link.title.data, + node->as.link.title.len); cmark_strbuf_putc(xml, '"'); break; default: |