summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt12
-rw-r--r--src/blocks.c13
-rw-r--r--src/inlines.c262
-rw-r--r--src/inlines.h2
-rw-r--r--src/iterator.c1
-rw-r--r--src/latex.c22
-rw-r--r--src/libcmark.pc.in2
-rw-r--r--src/main.c1
-rw-r--r--src/node.h1
-rw-r--r--src/scanners.c58
-rw-r--r--src/scanners.re2
11 files changed, 243 insertions, 133 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 3197196..d5a1936 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -2,6 +2,8 @@ if(${CMAKE_VERSION} VERSION_GREATER "3.3")
cmake_policy(SET CMP0063 NEW)
endif()
+include(GNUInstallDirs)
+
set(LIBRARY "libcmark")
set(STATICLIBRARY "libcmark_static")
set(HEADERS
@@ -123,19 +125,21 @@ endif(MSVC)
set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON)
+set(libdir lib${LIB_SUFFIX})
+
include (InstallRequiredSystemLibraries)
install(TARGETS ${PROGRAM} ${CMARK_INSTALL}
EXPORT cmark
RUNTIME DESTINATION bin
- LIBRARY DESTINATION lib${LIB_SUFFIX}
- ARCHIVE DESTINATION lib${LIB_SUFFIX}
+ LIBRARY DESTINATION ${libdir}
+ ARCHIVE DESTINATION ${libdir}
)
if(CMARK_SHARED OR CMARK_STATIC)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libcmark.pc.in
${CMAKE_CURRENT_BINARY_DIR}/libcmark.pc @ONLY)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcmark.pc
- DESTINATION lib${LIB_SUFFIX}/pkgconfig)
+ DESTINATION ${libdir}/pkgconfig)
install(FILES
cmark.h
@@ -144,7 +148,7 @@ if(CMARK_SHARED OR CMARK_STATIC)
DESTINATION include
)
- install(EXPORT cmark DESTINATION lib${LIB_SUFFIX}/cmake)
+ install(EXPORT cmark DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake)
endif()
# Feature tests
diff --git a/src/blocks.c b/src/blocks.c
index 5a293b2..7f58ffd 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -255,17 +255,21 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
switch (S_type(b)) {
case CMARK_NODE_PARAGRAPH:
- while (cmark_strbuf_at(node_content, 0) == '[' &&
- (pos = cmark_parse_reference_inline(parser->mem, node_content,
- parser->refmap))) {
+ {
+ cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
+ while (chunk.len && chunk.data[0] == '[' &&
+ (pos = cmark_parse_reference_inline(parser->mem, &chunk, parser->refmap))) {
- cmark_strbuf_drop(node_content, pos);
+ chunk.data += pos;
+ chunk.len -= pos;
}
+ cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
if (is_blank(node_content, 0)) {
// remove blank node (former reference def)
cmark_node_free(b);
}
break;
+ }
case CMARK_NODE_CODE_BLOCK:
if (!b->as.code.fenced) { // indented code
@@ -900,6 +904,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
(*container)->as.heading.level = level;
(*container)->as.heading.setext = false;
+ (*container)->internal_offset = matched;
} else if (!indented && (matched = scan_open_code_fence(
input, parser->first_nonspace))) {
diff --git a/src/inlines.c b/src/inlines.c
index c95809c..d0ab253 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -22,9 +22,9 @@ static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98";
static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99";
// Macros for creating various kinds of simple.
-#define make_str(mem, s) make_literal(mem, CMARK_NODE_TEXT, s)
-#define make_code(mem, s) make_literal(mem, CMARK_NODE_CODE, s)
-#define make_raw_html(mem, s) make_literal(mem, CMARK_NODE_HTML_INLINE, s)
+#define make_str(subj, sc, ec, s) make_literal(subj, CMARK_NODE_TEXT, sc, ec, s)
+#define make_code(subj, sc, ec, s) make_literal(subj, CMARK_NODE_CODE, sc, ec, s)
+#define make_raw_html(subj, sc, ec, s) make_literal(subj, CMARK_NODE_HTML_INLINE, sc, ec, s)
#define make_linebreak(mem) make_simple(mem, CMARK_NODE_LINEBREAK)
#define make_softbreak(mem) make_simple(mem, CMARK_NODE_SOFTBREAK)
#define make_emph(mem) make_simple(mem, CMARK_NODE_EMPH)
@@ -55,7 +55,10 @@ typedef struct bracket {
typedef struct {
cmark_mem *mem;
cmark_chunk input;
+ int line;
bufsize_t pos;
+ int block_offset;
+ int column_offset;
cmark_reference_map *refmap;
delimiter *last_delim;
bracket *last_bracket;
@@ -72,17 +75,22 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
static int parse_inline(subject *subj, cmark_node *parent, int options);
-static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer,
- cmark_reference_map *refmap);
+static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e,
+ cmark_chunk *chunk, cmark_reference_map *refmap);
static bufsize_t subject_find_special_char(subject *subj, int options);
// Create an inline with a literal string value.
-static CMARK_INLINE cmark_node *make_literal(cmark_mem *mem, cmark_node_type t,
+static CMARK_INLINE cmark_node *make_literal(subject *subj, cmark_node_type t,
+ int start_column, int end_column,
cmark_chunk s) {
- cmark_node *e = (cmark_node *)mem->calloc(1, sizeof(*e));
- cmark_strbuf_init(mem, &e->content, 0);
- e->type = t;
+ cmark_node *e = (cmark_node *)subj->mem->calloc(1, sizeof(*e));
+ cmark_strbuf_init(subj->mem, &e->content, 0);
+ e->type = (uint16_t)t;
e->as.literal = s;
+ e->start_line = e->end_line = subj->line;
+ // columns are 1 based.
+ e->start_column = start_column + 1 + subj->column_offset + subj->block_offset;
+ e->end_column = end_column + 1 + subj->column_offset + subj->block_offset;
return e;
}
@@ -95,14 +103,15 @@ static CMARK_INLINE cmark_node *make_simple(cmark_mem *mem, cmark_node_type t) {
}
// Like make_str, but parses entities.
-static cmark_node *make_str_with_entities(cmark_mem *mem,
+static cmark_node *make_str_with_entities(subject *subj,
+ int start_column, int end_column,
cmark_chunk *content) {
- cmark_strbuf unescaped = CMARK_BUF_INIT(mem);
+ cmark_strbuf unescaped = CMARK_BUF_INIT(subj->mem);
if (houdini_unescape_html(&unescaped, content->data, content->len)) {
- return make_str(mem, cmark_chunk_buf_detach(&unescaped));
+ return make_str(subj, start_column, end_column, cmark_chunk_buf_detach(&unescaped));
} else {
- return make_str(mem, *content);
+ return make_str(subj, start_column, end_column, *content);
}
}
@@ -140,23 +149,28 @@ static cmark_chunk cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url,
return cmark_chunk_buf_detach(&buf);
}
-static CMARK_INLINE cmark_node *make_autolink(cmark_mem *mem, cmark_chunk url,
- int is_email) {
- cmark_node *link = make_simple(mem, CMARK_NODE_LINK);
- link->as.link.url = cmark_clean_autolink(mem, &url, is_email);
+static CMARK_INLINE cmark_node *make_autolink(subject *subj,
+ int start_column, int end_column,
+ cmark_chunk url, int is_email) {
+ cmark_node *link = make_simple(subj->mem, CMARK_NODE_LINK);
+ link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email);
link->as.link.title = cmark_chunk_literal("");
- cmark_node_append_child(link, make_str_with_entities(mem, &url));
+ link->start_line = link->end_line = subj->line;
+ link->start_column = start_column + 1;
+ link->end_column = end_column + 1;
+ cmark_node_append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url));
return link;
}
-static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer,
- cmark_reference_map *refmap) {
+static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e,
+ cmark_chunk *chunk, cmark_reference_map *refmap) {
int i;
e->mem = mem;
- e->input.data = buffer->ptr;
- e->input.len = buffer->size;
- e->input.alloc = 0;
+ e->input = *chunk;
+ e->line = line_number;
e->pos = 0;
+ e->block_offset = block_offset;
+ e->column_offset = 0;
e->refmap = refmap;
e->last_delim = NULL;
e->last_bracket = NULL;
@@ -223,6 +237,47 @@ static CMARK_INLINE cmark_chunk take_while(subject *subj, int (*f)(int)) {
return cmark_chunk_dup(&subj->input, startpos, len);
}
+// Return the number of newlines in a given span of text in a subject. If
+// the number is greater than zero, also return the number of characters
+// between the last newline and the end of the span in `since_newline`.
+static int count_newlines(subject *subj, bufsize_t from, bufsize_t len, int *since_newline) {
+ int nls = 0;
+ int since_nl = 0;
+
+ while (len--) {
+ if (subj->input.data[from++] == '\n') {
+ ++nls;
+ since_nl = 0;
+ } else {
+ ++since_nl;
+ }
+ }
+
+ if (!nls)
+ return 0;
+
+ *since_newline = since_nl;
+ return nls;
+}
+
+// Adjust `node`'s `end_line`, `end_column`, and `subj`'s `line` and
+// `column_offset` according to the number of newlines in a just-matched span
+// of text in `subj`.
+static void adjust_subj_node_newlines(subject *subj, cmark_node *node, int matchlen, int extra, int options) {
+ if (!(options & CMARK_OPT_SOURCEPOS)) {
+ return;
+ }
+
+ int since_newline;
+ int newlines = count_newlines(subj, subj->pos - matchlen - extra, matchlen, &since_newline);
+ if (newlines) {
+ subj->line += newlines;
+ node->end_line += newlines;
+ node->end_column = since_newline;
+ subj->column_offset = -subj->pos + since_newline + extra;
+ }
+}
+
// Try to process a backtick code span that began with a
// span of ticks of length openticklength length (already
// parsed). Return 0 if you don't find matching closing
@@ -270,14 +325,14 @@ static bufsize_t scan_to_closing_backticks(subject *subj,
// Parse backtick code section or raw backticks, return an inline.
// Assumes that the subject has a backtick at the current position.
-static cmark_node *handle_backticks(subject *subj) {
+static cmark_node *handle_backticks(subject *subj, int options) {
cmark_chunk openticks = take_while(subj, isbacktick);
bufsize_t startpos = subj->pos;
bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);
if (endpos == 0) { // not found
subj->pos = startpos; // rewind
- return make_str(subj->mem, openticks);
+ return make_str(subj, subj->pos, subj->pos, openticks);
} else {
cmark_strbuf buf = CMARK_BUF_INIT(subj->mem);
@@ -286,7 +341,9 @@ static cmark_node *handle_backticks(subject *subj) {
cmark_strbuf_trim(&buf);
cmark_strbuf_normalize_whitespace(&buf);
- return make_code(subj->mem, cmark_chunk_buf_detach(&buf));
+ cmark_node *node = make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf));
+ adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len, options);
+ return node;
}
}
@@ -345,7 +402,8 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open,
*can_close = right_flanking &&
(!left_flanking || cmark_utf8proc_is_punctuation(after_char));
} else if (c == '\'' || c == '"') {
- *can_open = left_flanking && !right_flanking;
+ *can_open = left_flanking && !right_flanking &&
+ before_char != ']' && before_char != ')';
*can_close = right_flanking;
} else {
*can_open = left_flanking;
@@ -443,7 +501,7 @@ static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) {
contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims);
}
- inl_text = make_str(subj->mem, contents);
+ inl_text = make_str(subj, subj->pos - numdelims, subj->pos - 1, contents);
if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) {
push_delimiter(subj, c, can_open, can_close, inl_text);
@@ -459,7 +517,7 @@ static cmark_node *handle_hyphen(subject *subj, bool smart) {
advance(subj);
if (!smart || peek_char(subj) != '-') {
- return make_str(subj->mem, cmark_chunk_literal("-"));
+ return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("-"));
}
while (smart && peek_char(subj) == '-') {
@@ -492,7 +550,7 @@ static cmark_node *handle_hyphen(subject *subj, bool smart) {
cmark_strbuf_puts(&buf, ENDASH);
}
- return make_str(subj->mem, cmark_chunk_buf_detach(&buf));
+ return make_str(subj, startpos, subj->pos - 1, cmark_chunk_buf_detach(&buf));
}
// Assumes we have a period at the current position.
@@ -502,12 +560,12 @@ static cmark_node *handle_period(subject *subj, bool smart) {
advance(subj);
if (peek_char(subj) == '.') {
advance(subj);
- return make_str(subj->mem, cmark_chunk_literal(ELLIPSES));
+ return make_str(subj, subj->pos - 3, subj->pos - 1, cmark_chunk_literal(ELLIPSES));
} else {
- return make_str(subj->mem, cmark_chunk_literal(".."));
+ return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal(".."));
}
} else {
- return make_str(subj->mem, cmark_chunk_literal("."));
+ return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("."));
}
}
@@ -615,7 +673,7 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
cmark_node *tmp, *tmpnext, *emph;
// calculate the actual number of characters used from this closer
- use_delims = (closer_num_chars >= 2 && opener_num_chars >=2) ? 2 : 1;
+ use_delims = (closer_num_chars >= 2 && opener_num_chars >= 2) ? 2 : 1;
// remove used characters from associated inlines.
opener_num_chars -= use_delims;
@@ -643,6 +701,10 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
}
cmark_node_insert_after(opener_inl, emph);
+ emph->start_line = emph->end_line = subj->line;
+ emph->start_column = opener_inl->start_column + subj->column_offset;
+ emph->end_column = closer_inl->end_column + subj->column_offset;
+
// if opener has 0 characters, remove it and its associated inline
if (opener_num_chars == 0) {
cmark_node_free(opener_inl);
@@ -669,11 +731,11 @@ static cmark_node *handle_backslash(subject *subj) {
if (cmark_ispunct(
nextchar)) { // only ascii symbols and newline can be escaped
advance(subj);
- return make_str(subj->mem, cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
+ return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
} else if (!is_eof(subj) && skip_line_end(subj)) {
return make_linebreak(subj->mem);
} else {
- return make_str(subj->mem, cmark_chunk_literal("\\"));
+ return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("\\"));
}
}
@@ -689,14 +751,14 @@ static cmark_node *handle_entity(subject *subj) {
subj->input.len - subj->pos);
if (len == 0)
- return make_str(subj->mem, cmark_chunk_literal("&"));
+ return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("&"));
subj->pos += len;
- return make_str(subj->mem, cmark_chunk_buf_detach(&ent));
+ return make_str(subj, subj->pos - 1 - len, subj->pos - 1, cmark_chunk_buf_detach(&ent));
}
-// Clean a URL: remove surrounding whitespace and surrounding <>,
-// and remove \ that escape punctuation.
+// Clean a URL: remove surrounding whitespace, and remove \ that escape
+// punctuation.
cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
cmark_strbuf buf = CMARK_BUF_INIT(mem);
@@ -707,11 +769,7 @@ cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
return result;
}
- if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
- houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
- } else {
houdini_unescape_html_f(&buf, url->data, url->len);
- }
cmark_strbuf_unescape(&buf);
return cmark_chunk_buf_detach(&buf);
@@ -743,7 +801,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) {
// Parse an autolink or HTML tag.
// Assumes the subject has a '<' character at the current position.
-static cmark_node *handle_pointy_brace(subject *subj) {
+static cmark_node *handle_pointy_brace(subject *subj, int options) {
bufsize_t matchlen = 0;
cmark_chunk contents;
@@ -755,7 +813,7 @@ static cmark_node *handle_pointy_brace(subject *subj) {
contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
subj->pos += matchlen;
- return make_autolink(subj->mem, contents, 0);
+ return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 0);
}
// next try to match an email autolink
@@ -764,7 +822,7 @@ static cmark_node *handle_pointy_brace(subject *subj) {
contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
subj->pos += matchlen;
- return make_autolink(subj->mem, contents, 1);
+ return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 1);
}
// finally, try to match an html tag
@@ -772,11 +830,13 @@ static cmark_node *handle_pointy_brace(subject *subj) {
if (matchlen > 0) {
contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
subj->pos += matchlen;
- return make_raw_html(subj->mem, contents);
+ cmark_node *node = make_raw_html(subj, subj->pos - matchlen - 1, subj->pos - 1, contents);
+ adjust_subj_node_newlines(subj, node, matchlen, 1, options);
+ return node;
}
// if nothing matches, just return the opening <:
- return make_str(subj->mem, cmark_chunk_literal("<"));
+ return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("<"));
}
// Parse a link label. Returns 1 if successful.
@@ -824,24 +884,12 @@ noMatch:
subj->pos = startpos; // rewind
return 0;
}
-static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset) {
+
+static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset,
+ cmark_chunk *output) {
bufsize_t i = offset;
size_t nb_p = 0;
- if (i < input->len && input->data[i] == '<') {
- ++i;
- while (i < input->len) {
- if (input->data[i] == '>') {
- ++i;
- break;
- } else if (input->data[i] == '\\')
- i += 2;
- else if (cmark_isspace(input->data[i]))
- return -1;
- else
- ++i;
- }
- } else {
while (i < input->len) {
if (input->data[i] == '\\' &&
i + 1 < input-> len &&
@@ -862,18 +910,53 @@ static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset) {
else
++i;
}
+
+ if (i >= input->len)
+ return -1;
+
+ {
+ cmark_chunk result = {input->data + offset, i - offset, 0};
+ *output = result;
+ }
+ return i - offset;
+}
+
+static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset,
+ cmark_chunk *output) {
+ bufsize_t i = offset;
+
+ if (i < input->len && input->data[i] == '<') {
+ ++i;
+ while (i < input->len) {
+ if (input->data[i] == '>') {
+ ++i;
+ break;
+ } else if (input->data[i] == '\\')
+ i += 2;
+ else if (cmark_isspace(input->data[i]) || input->data[i] == '<')
+ return manual_scan_link_url_2(input, offset, output);
+ else
+ ++i;
+ }
+ } else {
+ return manual_scan_link_url_2(input, offset, output);
}
if (i >= input->len)
return -1;
+
+ {
+ cmark_chunk result = {input->data + offset + 1, i - 2 - offset, 0};
+ *output = result;
+ }
return i - offset;
}
+
// Return a link, an image, or a literal close bracket.
static cmark_node *handle_close_bracket(subject *subj) {
bufsize_t initial_pos, after_link_text_pos;
- bufsize_t starturl, endurl, starttitle, endtitle, endall;
- bufsize_t n;
- bufsize_t sps;
+ bufsize_t endurl, starttitle, endtitle, endall;
+ bufsize_t sps, n;
cmark_reference *ref = NULL;
cmark_chunk url_chunk, title_chunk;
cmark_chunk url, title;
@@ -891,13 +974,13 @@ static cmark_node *handle_close_bracket(subject *subj) {
opener = subj->last_bracket;
if (opener == NULL) {
- return make_str(subj->mem, cmark_chunk_literal("]"));
+ return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
}
if (!opener->active) {
// take delimiter off stack
pop_bracket(subj);
- return make_str(subj->mem, cmark_chunk_literal("]"));
+ return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
}
// If we got here, we matched a potential link/image text.
@@ -909,11 +992,11 @@ static cmark_node *handle_close_bracket(subject *subj) {
// First, look for an inline link.
if (peek_char(subj) == '(' &&
((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
- ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {
+ ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps,
+ &url_chunk)) > -1)) {
// try to parse an explicit link:
- starturl = subj->pos + 1 + sps; // after (
- endurl = starturl + n;
+ endurl = subj->pos + 1 + sps + n;
starttitle = endurl + scan_spacechars(&subj->input, endurl);
// ensure there are spaces btw url and title
@@ -926,7 +1009,6 @@ static cmark_node *handle_close_bracket(subject *subj) {
if (peek_at(subj, endall) == ')') {
subj->pos = endall + 1;
- url_chunk = cmark_chunk_dup(&subj->input, starturl, endurl - starturl);
title_chunk =
cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle);
url = cmark_clean_url(subj->mem, &url_chunk);
@@ -975,12 +1057,15 @@ noMatch:
// If we fall through to here, it means we didn't match a link:
pop_bracket(subj); // remove this opener from delimiter list
subj->pos = initial_pos;
- return make_str(subj->mem, cmark_chunk_literal("]"));
+ return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
match:
inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK);
inl->as.link.url = url;
inl->as.link.title = title;
+ inl->start_line = inl->end_line = subj->line;
+ inl->start_column = opener->inl_text->start_column;
+ inl->end_column = subj->pos + subj->column_offset + subj->block_offset;
cmark_node_insert_before(opener->inl_text, inl);
// Add link text:
tmp = opener->inl_text->next;
@@ -1027,6 +1112,8 @@ static cmark_node *handle_newline(subject *subj) {
if (peek_at(subj, subj->pos) == '\n') {
advance(subj);
}
+ ++subj->line;
+ subj->column_offset = -subj->pos;
// skip spaces at beginning of line
skip_spaces(subj);
if (nlpos > 1 && peek_at(subj, nlpos - 1) == ' ' &&
@@ -1086,7 +1173,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
cmark_node *new_inl = NULL;
cmark_chunk contents;
unsigned char c;
- bufsize_t endpos;
+ bufsize_t startpos, endpos;
c = peek_char(subj);
if (c == 0) {
return 0;
@@ -1097,7 +1184,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
new_inl = handle_newline(subj);
break;
case '`':
- new_inl = handle_backticks(subj);
+ new_inl = handle_backticks(subj, options);
break;
case '\\':
new_inl = handle_backslash(subj);
@@ -1106,7 +1193,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
new_inl = handle_entity(subj);
break;
case '<':
- new_inl = handle_pointy_brace(subj);
+ new_inl = handle_pointy_brace(subj, options);
break;
case '*':
case '_':
@@ -1122,7 +1209,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
break;
case '[':
advance(subj);
- new_inl = make_str(subj->mem, cmark_chunk_literal("["));
+ new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("["));
push_bracket(subj, false, new_inl);
break;
case ']':
@@ -1132,15 +1219,16 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
advance(subj);
if (peek_char(subj) == '[') {
advance(subj);
- new_inl = make_str(subj->mem, cmark_chunk_literal("!["));
+ new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("!["));
push_bracket(subj, true, new_inl);
} else {
- new_inl = make_str(subj->mem, cmark_chunk_literal("!"));
+ new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("!"));
}
break;
default:
endpos = subject_find_special_char(subj, options);
contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
+ startpos = subj->pos;
subj->pos = endpos;
// if we're at a newline, strip trailing spaces.
@@ -1148,7 +1236,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
cmark_chunk_rtrim(&contents);
}
- new_inl = make_str(subj->mem, contents);
+ new_inl = make_str(subj, startpos, endpos - 1, contents);
}
if (new_inl != NULL) {
cmark_node_append_child(parent, new_inl);
@@ -1161,7 +1249,8 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
cmark_reference_map *refmap, int options) {
subject subj;
- subject_from_buf(mem, &subj, &parent->content, refmap);
+ cmark_chunk content = {parent->content.ptr, parent->content.size, 0};
+ subject_from_buf(mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &content, refmap);
cmark_chunk_rtrim(&subj.input);
while (!is_eof(&subj) && parse_inline(&subj, parent, options))
@@ -1189,7 +1278,7 @@ static void spnl(subject *subj) {
// Modify refmap if a reference is encountered.
// Return 0 if no reference found, otherwise position of subject
// after reference is parsed.
-bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
+bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
cmark_reference_map *refmap) {
subject subj;
@@ -1200,7 +1289,7 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
bufsize_t matchlen = 0;
bufsize_t beforetitle;
- subject_from_buf(mem, &subj, input, NULL);
+ subject_from_buf(mem, -1, 0, &subj, input, NULL);
// parse label:
if (!link_label(&subj, &lab) || lab.len == 0)
@@ -1215,9 +1304,8 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
// parse link url:
spnl(&subj);
- matchlen = manual_scan_link_url(&subj.input, subj.pos);
- if (matchlen > 0) {
- url = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
+ if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1 &&
+ url.len > 0) {
subj.pos += matchlen;
} else {
return 0;
diff --git a/src/inlines.h b/src/inlines.h
index 52be768..39d3363 100644
--- a/src/inlines.h
+++ b/src/inlines.h
@@ -11,7 +11,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title);
void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
cmark_reference_map *refmap, int options);
-bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
+bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
cmark_reference_map *refmap);
#ifdef __cplusplus
diff --git a/src/iterator.c b/src/iterator.c
index 24423a2..f5cd802 100644
--- a/src/iterator.c
+++ b/src/iterator.c
@@ -106,6 +106,7 @@ void cmark_consolidate_text_nodes(cmark_node *root) {
while (tmp && tmp->type == CMARK_NODE_TEXT) {
cmark_iter_next(iter); // advance pointer
cmark_strbuf_put(&buf, tmp->as.literal.data, tmp->as.literal.len);
+ cur->end_column = tmp->end_column;
next = tmp->next;
cmark_node_free(tmp);
tmp = next;
diff --git a/src/latex.c b/src/latex.c
index f372a13..0d9517d 100644
--- a/src/latex.c
+++ b/src/latex.c
@@ -252,24 +252,24 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
CR();
list_number = cmark_node_get_list_start(node);
if (list_number > 1) {
- enumlevel = S_get_enumlevel(node);
- // latex normally supports only five levels
- if (enumlevel >= 1 && enumlevel <= 5) {
+ enumlevel = S_get_enumlevel(node);
+ // latex normally supports only five levels
+ if (enumlevel >= 1 && enumlevel <= 5) {
snprintf(list_number_string, LIST_NUMBER_STRING_SIZE, "%d",
list_number);
LIT("\\setcounter{enum");
- switch(enumlevel) {
- case 1: LIT("i"); break;
- case 2: LIT("ii"); break;
- case 3: LIT("iii"); break;
- case 4: LIT("iv"); break;
- case 5: LIT("v"); break;
- default: LIT("i"); break;
+ switch (enumlevel) {
+ case 1: LIT("i"); break;
+ case 2: LIT("ii"); break;
+ case 3: LIT("iii"); break;
+ case 4: LIT("iv"); break;
+ case 5: LIT("v"); break;
+ default: LIT("i"); break;
}
LIT("}{");
OUT(list_number_string, false, NORMAL);
LIT("}");
- }
+ }
CR();
}
} else {
diff --git a/src/libcmark.pc.in b/src/libcmark.pc.in
index 024ae48..0f87c30 100644
--- a/src/libcmark.pc.in
+++ b/src/libcmark.pc.in
@@ -1,6 +1,6 @@
prefix=@CMAKE_INSTALL_PREFIX@
exec_prefix=@CMAKE_INSTALL_PREFIX@
-libdir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@
+libdir=@CMAKE_INSTALL_PREFIX@/@libdir@
includedir=@CMAKE_INSTALL_PREFIX@/include
Name: libcmark
diff --git a/src/main.c b/src/main.c
index 9482f68..8942520 100644
--- a/src/main.c
+++ b/src/main.c
@@ -32,6 +32,7 @@ void print_usage() {
printf(" --nobreaks Render soft line breaks as spaces\n");
printf(" --safe Suppress raw HTML and dangerous URLs\n");
printf(" --smart Use smart punctuation\n");
+ printf(" --validate-utf8 Replace UTF-8 invalid sequences with U+FFFD\n");
printf(" --help, -h Print usage information\n");
printf(" --version Print version\n");
}
diff --git a/src/node.h b/src/node.h
index 65d857f..13901ba 100644
--- a/src/node.h
+++ b/src/node.h
@@ -66,6 +66,7 @@ struct cmark_node {
int start_column;
int end_line;
int end_column;
+ int internal_offset;
uint16_t type;
uint16_t flags;
diff --git a/src/scanners.c b/src/scanners.c
index c96490d..b312f66 100644
--- a/src/scanners.c
+++ b/src/scanners.c
@@ -752,7 +752,7 @@ bufsize_t _scan_autolink_uri(const unsigned char *p) {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
- 128, 128, 128, 128, 128, 128, 0, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 0, 128, 0, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
@@ -839,7 +839,7 @@ bufsize_t _scan_autolink_uri(const unsigned char *p) {
}
if (yych <= 0xEC) {
if (yych <= 0xC1) {
- if (yych <= ' ')
+ if (yych <= '<')
goto yy45;
if (yych <= '>')
goto yy85;
@@ -7887,35 +7887,45 @@ bufsize_t _scan_html_tag(const unsigned char *p) {
unsigned char yych;
static const unsigned char yybm[] = {
/* table 1 .. 8: 0 */
- 0, 239, 239, 239, 239, 239, 239, 239, 239, 238, 238, 238, 238, 238, 239,
+ 0, 239, 239, 239, 239, 239, 239, 239, 239, 238, 238, 238, 238, 238,
239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239,
- 239, 239, 239, 238, 239, 234, 239, 239, 239, 239, 236, 239, 239, 239,
- 239, 239, 207, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239,
- 239, 239, 239, 238, 238, 174, 231, 239, 255, 255, 255, 255, 255, 255,
+ 239, 239, 239, 239, 238, 239, 234, 239, 239, 239, 239, 236, 239, 239,
+ 239, 239, 239, 207, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239,
+ 239, 239, 239, 239, 238, 238, 174, 231, 239, 255, 255, 255, 255, 255,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 239, 239, 111, 239, 239, 238, 239, 239,
+ 255, 255, 255, 255, 255, 255, 255, 239, 239, 111, 239, 239, 238, 239,
239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239,
239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239,
- 239, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 239, 239, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,
/* table 9 .. 11: 256 */
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 160, 128, 0, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 128, 0,
- 0, 0, 0, 0, 0, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 64, 64, 64,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 160, 128, 0, 160, 160, 160, 160, 160, 160, 160, 160,
+ 160, 160, 128, 0, 0, 0, 0, 0, 0, 160, 160, 160, 160, 160,
160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160,
- 160, 0, 0, 0, 0, 128, 0, 160, 160, 160, 160, 160, 160, 160, 160, 160,
+ 160, 160, 160, 160, 160, 160, 160, 0, 0, 0, 0, 128, 0, 160,
160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160,
- 160, 160, 160, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,
};
yych = *p;
if (yych <= '>') {
diff --git a/src/scanners.re b/src/scanners.re
index a0650f2..b20a954 100644
--- a/src/scanners.re
+++ b/src/scanners.re
@@ -91,7 +91,7 @@ bufsize_t _scan_autolink_uri(const unsigned char *p)
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
- scheme [:][^\x00-\x20>]*[>] { return (bufsize_t)(p - start); }
+ scheme [:][^\x00-\x20<>]*[>] { return (bufsize_t)(p - start); }
* { return 0; }
*/
}