18 files changed, 565 insertions, 156 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 33180e5..4eb0541 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,8 +17,8 @@ endif()
 set(PROJECT_NAME "cmark")
 
 set(PROJECT_VERSION_MAJOR 0)
-set(PROJECT_VERSION_MINOR 27)
-set(PROJECT_VERSION_PATCH 1)
+set(PROJECT_VERSION_MINOR 28)
+set(PROJECT_VERSION_PATCH 3)
 set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH} )
 
 option(CMARK_TESTS "Build cmark tests and enable testing" ON)
diff --git a/README.md b/README.md
index 1c9dd69..6b0c003 100644
--- a/README.md
+++ b/README.md
@@ -110,9 +110,9 @@ To run a test for memory leaks using `valgrind`:
 
     make leakcheck
 
-To reformat source code using `astyle`:
+To reformat source code using `clang-format`:
 
-    make astyle
+    make format
 
 To run a "fuzz test" against ten long randomly generated inputs:
 
diff --git a/api_test/main.c b/api_test/main.c
index d720234..c30dc71 100644
--- a/api_test/main.c
+++ b/api_test/main.c
@@ -552,9 +552,9 @@ static void render_xml(test_batch_runner *runner) {
   STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
                       "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
                       "<paragraph sourcepos=\"1:1-1:9\">\n"
-                      "  <text>foo </text>\n"
-                      "  <emph>\n"
-                      "    <text>bar</text>\n"
+                      "  <text sourcepos=\"1:1-1:4\">foo </text>\n"
+                      "  <emph sourcepos=\"1:5-1:9\">\n"
+                      "    <text sourcepos=\"1:6-1:8\">bar</text>\n"
                       "  </emph>\n"
                       "</paragraph>\n",
          "render first paragraph with source pos");
@@ -883,6 +883,95 @@ static void test_feed_across_line_ending(test_batch_runner *runner) {
   cmark_node_free(document);
 }
 
+static void source_pos(test_batch_runner *runner) {
+  static const char markdown[] =
+    "# Hi *there*.\n"
+    "\n"
+    "Hello &ldquo; <http://www.google.com>\n"
+    "there `hi` -- [okay](www.google.com (ok)).\n"
+    "\n"
+    "> 1. Okay.\n"
+    ">    Sure.\n"
+    ">\n"
+    "> 2. Yes, okay.\n"
+    ">    ![ok](hi \"yes\")\n";
+
+  cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT);
+  char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS);
+  STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+                      "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
+                      "<document sourcepos=\"1:1-10:20\" xmlns=\"http://commonmark.org/xml/1.0\">\n"
+                      "  <heading sourcepos=\"1:1-1:13\" level=\"1\">\n"
+                      "    <text sourcepos=\"1:3-1:5\">Hi </text>\n"
+                      "    <emph sourcepos=\"1:6-1:12\">\n"
+                      "      <text sourcepos=\"1:7-1:11\">there</text>\n"
+                      "    </emph>\n"
+                      "    <text sourcepos=\"1:13-1:13\">.</text>\n"
+                      "  </heading>\n"
+                      "  <paragraph sourcepos=\"3:1-4:42\">\n"
+                      "    <text sourcepos=\"3:1-3:14\">Hello “ </text>\n"
+                      "    <link sourcepos=\"3:15-3:37\" destination=\"http://www.google.com\" title=\"\">\n"
+                      "      <text sourcepos=\"3:16-3:36\">http://www.google.com</text>\n"
+                      "    </link>\n"
+                      "    <softbreak />\n"
+                      "    <text sourcepos=\"4:1-4:6\">there </text>\n"
+                      "    <code sourcepos=\"4:8-4:9\">hi</code>\n"
+                      "    <text sourcepos=\"4:11-4:14\"> -- </text>\n"
+                      "    <link sourcepos=\"4:15-4:41\" destination=\"www.google.com\" title=\"ok\">\n"
+                      "      <text sourcepos=\"4:16-4:19\">okay</text>\n"
+                      "    </link>\n"
+                      "    <text sourcepos=\"4:42-4:42\">.</text>\n"
+                      "  </paragraph>\n"
+                      "  <block_quote sourcepos=\"6:1-10:20\">\n"
+                      "    <list sourcepos=\"6:3-10:20\" type=\"ordered\" start=\"1\" delim=\"period\" tight=\"false\">\n"
+                      "      <item sourcepos=\"6:3-8:1\">\n"
+                      "        <paragraph sourcepos=\"6:6-7:10\">\n"
+                      "          <text sourcepos=\"6:6-6:10\">Okay.</text>\n"
+                      "          <softbreak />\n"
+                      "          <text sourcepos=\"7:6-7:10\">Sure.</text>\n"
+                      "        </paragraph>\n"
+                      "      </item>\n"
+                      "      <item sourcepos=\"9:3-10:20\">\n"
+                      "        <paragraph sourcepos=\"9:6-10:20\">\n"
+                      "          <text sourcepos=\"9:6-9:15\">Yes, okay.</text>\n"
+                      "          <softbreak />\n"
+                      "          <image sourcepos=\"10:6-10:20\" destination=\"hi\" title=\"yes\">\n"
+                      "            <text sourcepos=\"10:8-10:9\">ok</text>\n"
+                      "          </image>\n"
+                      "        </paragraph>\n"
+                      "      </item>\n"
+                      "    </list>\n"
+                      "  </block_quote>\n"
+                      "</document>\n",
+         "sourcepos are as expected");
+  free(xml);
+  cmark_node_free(doc);
+}
+
+static void ref_source_pos(test_batch_runner *runner) {
+  static const char markdown[] =
+    "Let's try [reference] links.\n"
+    "\n"
+    "[reference]: https://github.com (GitHub)\n";
+
+  cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT);
+  char *xml = cmark_render_xml(doc, CMARK_OPT_DEFAULT | CMARK_OPT_SOURCEPOS);
+  STR_EQ(runner, xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+                      "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n"
+                      "<document sourcepos=\"1:1-3:40\" xmlns=\"http://commonmark.org/xml/1.0\">\n"
+                      "  <paragraph sourcepos=\"1:1-1:28\">\n"
+                      "    <text sourcepos=\"1:1-1:10\">Let's try </text>\n"
+                      "    <link sourcepos=\"1:11-1:21\" destination=\"https://github.com\" title=\"GitHub\">\n"
+                      "      <text sourcepos=\"1:12-1:20\">reference</text>\n"
+                      "    </link>\n"
+                      "    <text sourcepos=\"1:22-1:28\"> links.</text>\n"
+                      "  </paragraph>\n"
+                      "</document>\n",
+         "sourcepos are as expected");
+  free(xml);
+  cmark_node_free(doc);
+}
+
 int main() {
   int retval;
   test_batch_runner *runner = test_batch_runner_new();
@@ -908,6 +997,8 @@ int main() {
   test_cplusplus(runner);
   test_safe(runner);
   test_feed_across_line_ending(runner);
+  source_pos(runner);
+  ref_source_pos(runner);
 
   test_print_summary(runner);
   retval = test_ok(runner) ? 0 : 1;
diff --git a/changelog.txt b/changelog.txt
index 883ef6c..33cff54 100644
--- a/changelog.txt
+++ b/changelog.txt
@@ -1,3 +1,152 @@
+[0.28.3]
+
+  * Include GNUInstallDirs in src/CMakeLists.txt (Nick Wellnhofer, #240).
+    This fixes build problems on some cmake versions (#241).
+
+[0.28.2]
+
+  * Fixed regression in install dest for static library (#238).
+    Due to a mistake, 0.28.1 installed libcmark.a into include/.
+
+[0.28.1]
+
+  * `--smart`: open quote can never occur right after `]` or `)` (#227).
+  * Fix quadratic behavior in `finalize` (Vicent Marti).
+  * Don't use `CMAKE_INSTALL_LIBDIR` to create `libcmark.pc` (#236).
+    This wasn't getting set in processing `libcmark.pc.in`, and we
+    were getting the wrong entry in `libcmark.pc`.
+    The new approach sets an internal `libdir` variable to
+    `lib${LIB_SUFFIX}`.  This variable is used both to set the
+    install destination and in the libcmark.pc.in template.
+  * Update README.md, replace `make astyle` with `make format`
+    (Nguyễn Thái Ngọc Duy).
+
+[0.28.0]
+
+  * Update spec.
+  * Use unsigned integer when shifting (Phil Turnbull).
+    Avoids a UBSAN warning which can be triggered when handling a
+    long sequence of backticks.
+  * Avoid memcpy'ing NULL pointers (Phil Turnbull).
+    Avoids a UBSAN warning when link title is empty string.
+    The length of the memcpy is zero so the NULL pointer is not
+    dereferenced but it is still undefined behaviour.
+  * DeMorgan simplification of some tests in emphasis parser.
+    This also brings the code into closer alignment with the wording
+    of the spec (see jgm/CommonMark#467).
+  * Fixed undefined shift in commonmark writer (#211).
+    Found by google/oss-fuzz:
+    <https://oss-fuzz.com/v2/testcase-detail/4686992824598528>.
+  * latex writer:  fix memory overflow (#210).
+    We got an array overflow in enumerated lists nested more than
+    10 deep with start number =/= 1.
+    This commit also ensures that we don't try to set `enum_` counters
+    that aren't defined by LaTeX (generally up to enumv).
+    Found by google/oss-fuzz:
+    <https://oss-fuzz.com/v2/testcase-detail/5546760854306816>.
+  * Check for NULL pointer in get_link_type (Phil Turnbull).
+    `echo '[](xx:)' | ./build/src/cmark -t latex` gave a
+    segfault.
+  * Move fuzzing dictionary into single file (Phil Turnbull).
+    This allows AFL and libFuzzer to use the same dictionary
+  * Reset bytes after UTF8 proc (Yuki Izumi, #206).
+  * Don't scan past an EOL (Yuki Izumi).
+    The existing negated character classes (`[^…]`) are careful to
+    always include` \x00` in the characters excluded, but these `.`
+    catch-alls can scan right past the terminating NUL placed
+    at the end of the buffer by `_scan_at`.  As such, buffer
+    overruns can occur.  Also, don't scan past a newline in HTML
+    block end scanners.
+  * Document cases where `get_` functions return `NULL` (#155).
+    E.g. `cmark_node_get_url` on a non-link or image.
+  * Properly handle backslashes in link destinations (#192).
+    Only ascii punctuation characters are escapable, per the spec.
+  * Fixed `cmark_node_get_list_start` to return 0 for bullet lists,
+    as documented (#202).
+  * Use `CMARK_NO_DELIM` for bullet lists (#201).
+  * Fixed code for freeing delimiter stack (#189).
+  * Removed abort outside of conditional (typo).
+  * Removed coercion in error message when aborting from buffer.
+  * Print message to stderr when we abort due to memory demands (#188).
+  * `libcmark.pc`: use `CMAKE_INSTALL_LIBDIR` (#185, Jens Petersen).
+    Needed for multilib distros like Fedora.
+  * Fixed buffer overflow error in `S_parser_feed` (#184).
+    The overflow could occur in the following condition:
+    the buffer ends with `\r` and the next memory address
+    contains `\n`.
+  * Update emphasis parsing for spec change.
+    Strong now goes inside Emph rather than the reverse,
+    when both scopes are possible.  The code is much simpler.
+    This also avoids a spec inconsistency that cmark had previously:
+    `***hi***` became Strong (Emph "hi")) but
+    `***hi****` became Emph (Strong "hi")) "*"
+  * Fixes for the LaTeX renderer (#182, Doeme)
+    + Don't double-output the link in latex-rendering.
+    + Prevent ligatures in dashes sensibly when rendering latex.
+      `\-` is a hyphenation, so it doesn't get displayed at all.
+  * Added a test for NULL when freeing `subj->last_delim`.
+  * Cleaned up setting of lower bounds for openers.
+    We now use a much smaller array.
+  * Fix #178, quadratic parsing bug.  Add pathological test.
+  * Slight improvement of clarity of logic in emph matching.
+  * Fix "multiple of 3" determination in emph/strong parsing.
+    We need to store the length of the original delimiter run,
+    instead of using the length of the remaining delimiters
+    after some have been subtracted.  Test case:
+    `a***b* c*`.  Thanks to Raph Levin for reporting.
+  * Correctly initialize chunk in S_process_line (Nick Wellnhofer, #170).
+    The `alloc` member wasn't initialized.  This also allows to add an
+    assertion in `chunk_rtrim` which doesn't work for alloced chunks.
+  * Added 'make newbench'.
+  * `scanners.c` generated with re2c 0.16 (68K smaller!).
+  * `scanners.re` - fixed warnings; use `*` for fallback.
+  * Fixed some warnings in `scanners.re`.
+  * Update CaseFolding to latest (Kevin Wojniak, #168).
+  * Allow balanced nested parens in link destinations (Yuki Izumi, #166)
+  * Allocate enough bytes for backticks array.
+  * Inlines: Ensure that the delimiter stack is freed in subject.
+  * Fixed pathological cases with backtick code spans:
+
+    - Removed recursion in scan_to_closing_backticks
+    - Added an array of pointers to potential backtick closers
+      to subject
+    - This array is used to avoid traversing the subject again
+      when we've already seen all the potential backtick closers.
+    - Added a max bound of 1000 for backtick code span delimiters.
+    - This helps with pathological cases like:
+
+            x
+            x `
+            x ``
+            x ```
+            x ````
+            ...
+
+    - Added pathological test case.
+
+    Thanks to Martin Mitáš for identifying the problem and for
+    discussion of solutions.
+  * Remove redundant cmake_minimum_required (#163, @kainjow).
+  * Make shared and static libraries optional (Azamat H. Hackimov).
+    Now you can enable/disable compilation and installation targets for
+    shared and static libraries via `-DCMARK_SHARED=ON/OFF` and
+    `-DCMARK_STATIC=ON/OFF`.
+  * Added support for built-in `${LIB_SUFFIX}` feature (Azamat H.
+    Hackimov).  Replaced `${LIB_INSTALL_DIR}` option with built-in
+    `${LIB_SUFFIX}` for installing for 32/64-bit systems. Normally,
+    CMake will set `${LIB_SUFFIX}` automatically for required enviroment.
+    If you have any issues with it, you can override this option with
+    `-DLIB_SUFFIX=64` or `-DLIB_SUFFIX=""` during configuration.
+  * Add Makefile target and harness to fuzz with libFuzzer (Phil Turnbull).
+    This can be run locally with `make libFuzzer` but the harness will be
+    integrated into oss-fuzz for large-scale fuzzing.
+  * Advertise `--validate-utf8` in usage information
+    (Nguyễn Thái Ngọc Duy).
+  * Makefile: use warnings with re2c.
+  * README: Add link to Python wrapper, prettify languages list
+    (Pavlo Kapyshin).
+  * README: Add link to cmark-scala (Tim Nieradzik, #196)
+
 [0.27.1]
 
   * Set policy for CMP0063 to avoid a warning (#162).
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 3197196..d5a1936 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -2,6 +2,8 @@ if(${CMAKE_VERSION} VERSION_GREATER "3.3")
     cmake_policy(SET CMP0063 NEW)
 endif()
 
+include(GNUInstallDirs)
+
 set(LIBRARY "libcmark")
 set(STATICLIBRARY "libcmark_static")
 set(HEADERS
@@ -123,19 +125,21 @@ endif(MSVC)
 
 set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON)
 
+set(libdir lib${LIB_SUFFIX})
+
 include (InstallRequiredSystemLibraries)
 install(TARGETS ${PROGRAM} ${CMARK_INSTALL}
   EXPORT cmark
   RUNTIME DESTINATION bin
-  LIBRARY DESTINATION lib${LIB_SUFFIX}
-  ARCHIVE DESTINATION lib${LIB_SUFFIX}
+  LIBRARY DESTINATION ${libdir}
+  ARCHIVE DESTINATION ${libdir}
   )
 
 if(CMARK_SHARED OR CMARK_STATIC)
   configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libcmark.pc.in
     ${CMAKE_CURRENT_BINARY_DIR}/libcmark.pc @ONLY)
   install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcmark.pc
-    DESTINATION lib${LIB_SUFFIX}/pkgconfig)
+    DESTINATION ${libdir}/pkgconfig)
 
   install(FILES
     cmark.h
@@ -144,7 +148,7 @@ if(CMARK_SHARED OR CMARK_STATIC)
     DESTINATION include
     )
 
-  install(EXPORT cmark DESTINATION lib${LIB_SUFFIX}/cmake)
+  install(EXPORT cmark DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake)
 endif()
 
 # Feature tests
diff --git a/src/blocks.c b/src/blocks.c
index 5a293b2..7f58ffd 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -255,17 +255,21 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
 
   switch (S_type(b)) {
   case CMARK_NODE_PARAGRAPH:
-    while (cmark_strbuf_at(node_content, 0) == '[' &&
-           (pos = cmark_parse_reference_inline(parser->mem, node_content,
-                                               parser->refmap))) {
+  {
+    cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
+    while (chunk.len && chunk.data[0] == '[' &&
+           (pos = cmark_parse_reference_inline(parser->mem, &chunk, parser->refmap))) {
 
-      cmark_strbuf_drop(node_content, pos);
+      chunk.data += pos;
+      chunk.len -= pos;
     }
+    cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
     if (is_blank(node_content, 0)) {
       // remove blank node (former reference def)
       cmark_node_free(b);
     }
     break;
+  }
 
   case CMARK_NODE_CODE_BLOCK:
     if (!b->as.code.fenced) { // indented code
@@ -900,6 +904,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
 
       (*container)->as.heading.level = level;
       (*container)->as.heading.setext = false;
+      (*container)->internal_offset = matched;
 
     } else if (!indented && (matched = scan_open_code_fence(
                                  input, parser->first_nonspace))) {
diff --git a/src/inlines.c b/src/inlines.c
index c95809c..d0ab253 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -22,9 +22,9 @@ static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98";
 static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99";
 
 // Macros for creating various kinds of simple.
-#define make_str(mem, s) make_literal(mem, CMARK_NODE_TEXT, s)
-#define make_code(mem, s) make_literal(mem, CMARK_NODE_CODE, s)
-#define make_raw_html(mem, s) make_literal(mem, CMARK_NODE_HTML_INLINE, s)
+#define make_str(subj, sc, ec, s) make_literal(subj, CMARK_NODE_TEXT, sc, ec, s)
+#define make_code(subj, sc, ec, s) make_literal(subj, CMARK_NODE_CODE, sc, ec, s)
+#define make_raw_html(subj, sc, ec, s) make_literal(subj, CMARK_NODE_HTML_INLINE, sc, ec, s)
 #define make_linebreak(mem) make_simple(mem, CMARK_NODE_LINEBREAK)
 #define make_softbreak(mem) make_simple(mem, CMARK_NODE_SOFTBREAK)
 #define make_emph(mem) make_simple(mem, CMARK_NODE_EMPH)
@@ -55,7 +55,10 @@ typedef struct bracket {
 typedef struct {
   cmark_mem *mem;
   cmark_chunk input;
+  int line;
   bufsize_t pos;
+  int block_offset;
+  int column_offset;
   cmark_reference_map *refmap;
   delimiter *last_delim;
   bracket *last_bracket;
@@ -72,17 +75,22 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
 
 static int parse_inline(subject *subj, cmark_node *parent, int options);
 
-static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer,
-                             cmark_reference_map *refmap);
+static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e,
+                             cmark_chunk *chunk, cmark_reference_map *refmap);
 static bufsize_t subject_find_special_char(subject *subj, int options);
 
 // Create an inline with a literal string value.
-static CMARK_INLINE cmark_node *make_literal(cmark_mem *mem, cmark_node_type t,
+static CMARK_INLINE cmark_node *make_literal(subject *subj, cmark_node_type t,
+                                             int start_column, int end_column,
                                              cmark_chunk s) {
-  cmark_node *e = (cmark_node *)mem->calloc(1, sizeof(*e));
-  cmark_strbuf_init(mem, &e->content, 0);
-  e->type = t;
+  cmark_node *e = (cmark_node *)subj->mem->calloc(1, sizeof(*e));
+  cmark_strbuf_init(subj->mem, &e->content, 0);
+  e->type = (uint16_t)t;
   e->as.literal = s;
+  e->start_line = e->end_line = subj->line;
+  // columns are 1 based.
+  e->start_column = start_column + 1 + subj->column_offset + subj->block_offset;
+  e->end_column = end_column + 1 + subj->column_offset + subj->block_offset;
   return e;
 }
 
@@ -95,14 +103,15 @@ static CMARK_INLINE cmark_node *make_simple(cmark_mem *mem, cmark_node_type t) {
 }
 
 // Like make_str, but parses entities.
-static cmark_node *make_str_with_entities(cmark_mem *mem,
+static cmark_node *make_str_with_entities(subject *subj,
+                                          int start_column, int end_column,
                                           cmark_chunk *content) {
-  cmark_strbuf unescaped = CMARK_BUF_INIT(mem);
+  cmark_strbuf unescaped = CMARK_BUF_INIT(subj->mem);
 
   if (houdini_unescape_html(&unescaped, content->data, content->len)) {
-    return make_str(mem, cmark_chunk_buf_detach(&unescaped));
+    return make_str(subj, start_column, end_column, cmark_chunk_buf_detach(&unescaped));
   } else {
-    return make_str(mem, *content);
+    return make_str(subj, start_column, end_column, *content);
   }
 }
 
@@ -140,23 +149,28 @@ static cmark_chunk cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url,
   return cmark_chunk_buf_detach(&buf);
 }
 
-static CMARK_INLINE cmark_node *make_autolink(cmark_mem *mem, cmark_chunk url,
-                                              int is_email) {
-  cmark_node *link = make_simple(mem, CMARK_NODE_LINK);
-  link->as.link.url = cmark_clean_autolink(mem, &url, is_email);
+static CMARK_INLINE cmark_node *make_autolink(subject *subj,
+                                              int start_column, int end_column,
+                                              cmark_chunk url, int is_email) {
+  cmark_node *link = make_simple(subj->mem, CMARK_NODE_LINK);
+  link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email);
   link->as.link.title = cmark_chunk_literal("");
-  cmark_node_append_child(link, make_str_with_entities(mem, &url));
+  link->start_line = link->end_line = subj->line;
+  link->start_column = start_column + 1;
+  link->end_column = end_column + 1;
+  cmark_node_append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url));
   return link;
 }
 
-static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer,
-                             cmark_reference_map *refmap) {
+static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e,
+                             cmark_chunk *chunk, cmark_reference_map *refmap) {
   int i;
   e->mem = mem;
-  e->input.data = buffer->ptr;
-  e->input.len = buffer->size;
-  e->input.alloc = 0;
+  e->input = *chunk;
+  e->line = line_number;
   e->pos = 0;
+  e->block_offset = block_offset;
+  e->column_offset = 0;
   e->refmap = refmap;
   e->last_delim = NULL;
   e->last_bracket = NULL;
@@ -223,6 +237,47 @@ static CMARK_INLINE cmark_chunk take_while(subject *subj, int (*f)(int)) {
   return cmark_chunk_dup(&subj->input, startpos, len);
 }
 
+// Return the number of newlines in a given span of text in a subject.  If
+// the number is greater than zero, also return the number of characters
+// between the last newline and the end of the span in `since_newline`.
+static int count_newlines(subject *subj, bufsize_t from, bufsize_t len, int *since_newline) {
+  int nls = 0;
+  int since_nl = 0;
+
+  while (len--) {
+    if (subj->input.data[from++] == '\n') {
+      ++nls;
+      since_nl = 0;
+    } else {
+      ++since_nl;
+    }
+  }
+
+  if (!nls)
+    return 0;
+
+  *since_newline = since_nl;
+  return nls;
+}
+
+// Adjust `node`'s `end_line`, `end_column`, and `subj`'s `line` and
+// `column_offset` according to the number of newlines in a just-matched span
+// of text in `subj`.
+static void adjust_subj_node_newlines(subject *subj, cmark_node *node, int matchlen, int extra, int options) {
+  if (!(options & CMARK_OPT_SOURCEPOS)) {
+    return;
+  }
+
+  int since_newline;
+  int newlines = count_newlines(subj, subj->pos - matchlen - extra, matchlen, &since_newline);
+  if (newlines) {
+    subj->line += newlines;
+    node->end_line += newlines;
+    node->end_column = since_newline;
+    subj->column_offset = -subj->pos + since_newline + extra;
+  }
+}
+
 // Try to process a backtick code span that began with a
 // span of ticks of length openticklength length (already
 // parsed).  Return 0 if you don't find matching closing
@@ -270,14 +325,14 @@ static bufsize_t scan_to_closing_backticks(subject *subj,
 
 // Parse backtick code section or raw backticks, return an inline.
 // Assumes that the subject has a backtick at the current position.
-static cmark_node *handle_backticks(subject *subj) {
+static cmark_node *handle_backticks(subject *subj, int options) {
   cmark_chunk openticks = take_while(subj, isbacktick);
   bufsize_t startpos = subj->pos;
   bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);
 
   if (endpos == 0) {      // not found
     subj->pos = startpos; // rewind
-    return make_str(subj->mem, openticks);
+    return make_str(subj, subj->pos, subj->pos, openticks);
   } else {
     cmark_strbuf buf = CMARK_BUF_INIT(subj->mem);
 
@@ -286,7 +341,9 @@ static cmark_node *handle_backticks(subject *subj) {
     cmark_strbuf_trim(&buf);
     cmark_strbuf_normalize_whitespace(&buf);
 
-    return make_code(subj->mem, cmark_chunk_buf_detach(&buf));
+    cmark_node *node = make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf));
+    adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len, options);
+    return node;
   }
 }
 
@@ -345,7 +402,8 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open,
     *can_close = right_flanking &&
                  (!left_flanking || cmark_utf8proc_is_punctuation(after_char));
   } else if (c == '\'' || c == '"') {
-    *can_open = left_flanking && !right_flanking;
+    *can_open = left_flanking && !right_flanking &&
+	         before_char != ']' && before_char != ')';
     *can_close = right_flanking;
   } else {
     *can_open = left_flanking;
@@ -443,7 +501,7 @@ static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) {
     contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims);
   }
 
-  inl_text = make_str(subj->mem, contents);
+  inl_text = make_str(subj, subj->pos - numdelims, subj->pos - 1, contents);
 
   if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) {
     push_delimiter(subj, c, can_open, can_close, inl_text);
@@ -459,7 +517,7 @@ static cmark_node *handle_hyphen(subject *subj, bool smart) {
   advance(subj);
 
   if (!smart || peek_char(subj) != '-') {
-    return make_str(subj->mem, cmark_chunk_literal("-"));
+    return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("-"));
   }
 
   while (smart && peek_char(subj) == '-') {
@@ -492,7 +550,7 @@ static cmark_node *handle_hyphen(subject *subj, bool smart) {
     cmark_strbuf_puts(&buf, ENDASH);
   }
 
-  return make_str(subj->mem, cmark_chunk_buf_detach(&buf));
+  return make_str(subj, startpos, subj->pos - 1, cmark_chunk_buf_detach(&buf));
 }
 
 // Assumes we have a period at the current position.
@@ -502,12 +560,12 @@ static cmark_node *handle_period(subject *subj, bool smart) {
     advance(subj);
     if (peek_char(subj) == '.') {
       advance(subj);
-      return make_str(subj->mem, cmark_chunk_literal(ELLIPSES));
+      return make_str(subj, subj->pos - 3, subj->pos - 1, cmark_chunk_literal(ELLIPSES));
     } else {
-      return make_str(subj->mem, cmark_chunk_literal(".."));
+      return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal(".."));
     }
   } else {
-    return make_str(subj->mem, cmark_chunk_literal("."));
+    return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("."));
   }
 }
 
@@ -615,7 +673,7 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
   cmark_node *tmp, *tmpnext, *emph;
 
   // calculate the actual number of characters used from this closer
-  use_delims = (closer_num_chars >= 2 && opener_num_chars >=2) ? 2 : 1;
+  use_delims = (closer_num_chars >= 2 && opener_num_chars >= 2) ? 2 : 1;
 
   // remove used characters from associated inlines.
   opener_num_chars -= use_delims;
@@ -643,6 +701,10 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
   }
   cmark_node_insert_after(opener_inl, emph);
 
+  emph->start_line = emph->end_line = subj->line;
+  emph->start_column = opener_inl->start_column + subj->column_offset;
+  emph->end_column = closer_inl->end_column + subj->column_offset;
+
   // if opener has 0 characters, remove it and its associated inline
   if (opener_num_chars == 0) {
     cmark_node_free(opener_inl);
@@ -669,11 +731,11 @@ static cmark_node *handle_backslash(subject *subj) {
   if (cmark_ispunct(
           nextchar)) { // only ascii symbols and newline can be escaped
     advance(subj);
-    return make_str(subj->mem, cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
+    return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
   } else if (!is_eof(subj) && skip_line_end(subj)) {
     return make_linebreak(subj->mem);
   } else {
-    return make_str(subj->mem, cmark_chunk_literal("\\"));
+    return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("\\"));
   }
 }
 
@@ -689,14 +751,14 @@ static cmark_node *handle_entity(subject *subj) {
                              subj->input.len - subj->pos);
 
   if (len == 0)
-    return make_str(subj->mem, cmark_chunk_literal("&"));
+    return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("&"));
 
   subj->pos += len;
-  return make_str(subj->mem, cmark_chunk_buf_detach(&ent));
+  return make_str(subj, subj->pos - 1 - len, subj->pos - 1, cmark_chunk_buf_detach(&ent));
 }
 
-// Clean a URL: remove surrounding whitespace and surrounding <>,
-// and remove \ that escape punctuation.
+// Clean a URL: remove surrounding whitespace, and remove \ that escape
+// punctuation.
 cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
   cmark_strbuf buf = CMARK_BUF_INIT(mem);
 
@@ -707,11 +769,7 @@ cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {
     return result;
   }
 
-  if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
-    houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
-  } else {
     houdini_unescape_html_f(&buf, url->data, url->len);
-  }
 
   cmark_strbuf_unescape(&buf);
   return cmark_chunk_buf_detach(&buf);
@@ -743,7 +801,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) {
 
 // Parse an autolink or HTML tag.
 // Assumes the subject has a '<' character at the current position.
-static cmark_node *handle_pointy_brace(subject *subj) {
+static cmark_node *handle_pointy_brace(subject *subj, int options) {
   bufsize_t matchlen = 0;
   cmark_chunk contents;
 
@@ -755,7 +813,7 @@ static cmark_node *handle_pointy_brace(subject *subj) {
     contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
     subj->pos += matchlen;
 
-    return make_autolink(subj->mem, contents, 0);
+    return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 0);
   }
 
   // next try to match an email autolink
@@ -764,7 +822,7 @@ static cmark_node *handle_pointy_brace(subject *subj) {
     contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);
     subj->pos += matchlen;
 
-    return make_autolink(subj->mem, contents, 1);
+    return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 1);
   }
 
   // finally, try to match an html tag
@@ -772,11 +830,13 @@ static cmark_node *handle_pointy_brace(subject *subj) {
   if (matchlen > 0) {
     contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
     subj->pos += matchlen;
-    return make_raw_html(subj->mem, contents);
+    cmark_node *node = make_raw_html(subj, subj->pos - matchlen - 1, subj->pos - 1, contents);
+    adjust_subj_node_newlines(subj, node, matchlen, 1, options);
+    return node;
   }
 
   // if nothing matches, just return the opening <:
-  return make_str(subj->mem, cmark_chunk_literal("<"));
+  return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("<"));
 }
 
 // Parse a link label.  Returns 1 if successful.
@@ -824,24 +884,12 @@ noMatch:
   subj->pos = startpos; // rewind
   return 0;
 }
-static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset) {
+
+static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset,
+                                        cmark_chunk *output) {
   bufsize_t i = offset;
   size_t nb_p = 0;
 
-  if (i < input->len && input->data[i] == '<') {
-    ++i;
-    while (i < input->len) {
-      if (input->data[i] == '>') {
-        ++i;
-        break;
-      } else if (input->data[i] == '\\')
-        i += 2;
-      else if (cmark_isspace(input->data[i]))
-        return -1;
-      else
-        ++i;
-    }
-  } else {
     while (i < input->len) {
       if (input->data[i] == '\\' &&
 	  i + 1 < input-> len &&
@@ -862,18 +910,53 @@ static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset) {
       else
         ++i;
     }
+
+  if (i >= input->len)
+    return -1;
+
+  {
+    cmark_chunk result = {input->data + offset, i - offset, 0};
+    *output = result;
+  }
+  return i - offset;
+}
+
+static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset,
+                                      cmark_chunk *output) {
+  bufsize_t i = offset;
+
+  if (i < input->len && input->data[i] == '<') {
+    ++i;
+    while (i < input->len) {
+      if (input->data[i] == '>') {
+        ++i;
+        break;
+      } else if (input->data[i] == '\\')
+        i += 2;
+      else if (cmark_isspace(input->data[i]) || input->data[i] == '<')
+        return manual_scan_link_url_2(input, offset, output);
+      else
+        ++i;
+    }
+  } else {
+    return manual_scan_link_url_2(input, offset, output);
   }
 
   if (i >= input->len)
     return -1;
+
+  {
+    cmark_chunk result = {input->data + offset + 1, i - 2 - offset, 0};
+    *output = result;
+  }
   return i - offset;
 }
+
 // Return a link, an image, or a literal close bracket.
 static cmark_node *handle_close_bracket(subject *subj) {
   bufsize_t initial_pos, after_link_text_pos;
-  bufsize_t starturl, endurl, starttitle, endtitle, endall;
-  bufsize_t n;
-  bufsize_t sps;
+  bufsize_t endurl, starttitle, endtitle, endall;
+  bufsize_t sps, n;
   cmark_reference *ref = NULL;
   cmark_chunk url_chunk, title_chunk;
   cmark_chunk url, title;
@@ -891,13 +974,13 @@ static cmark_node *handle_close_bracket(subject *subj) {
   opener = subj->last_bracket;
 
   if (opener == NULL) {
-    return make_str(subj->mem, cmark_chunk_literal("]"));
+    return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
   }
 
   if (!opener->active) {
     // take delimiter off stack
     pop_bracket(subj);
-    return make_str(subj->mem, cmark_chunk_literal("]"));
+    return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
   }
 
   // If we got here, we matched a potential link/image text.
@@ -909,11 +992,11 @@ static cmark_node *handle_close_bracket(subject *subj) {
   // First, look for an inline link.
   if (peek_char(subj) == '(' &&
       ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
-      ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {
+      ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps,
+                                 &url_chunk)) > -1)) {
 
     // try to parse an explicit link:
-    starturl = subj->pos + 1 + sps; // after (
-    endurl = starturl + n;
+    endurl = subj->pos + 1 + sps + n;
     starttitle = endurl + scan_spacechars(&subj->input, endurl);
 
     // ensure there are spaces btw url and title
@@ -926,7 +1009,6 @@ static cmark_node *handle_close_bracket(subject *subj) {
     if (peek_at(subj, endall) == ')') {
       subj->pos = endall + 1;
 
-      url_chunk = cmark_chunk_dup(&subj->input, starturl, endurl - starturl);
       title_chunk =
           cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle);
       url = cmark_clean_url(subj->mem, &url_chunk);
@@ -975,12 +1057,15 @@ noMatch:
   // If we fall through to here, it means we didn't match a link:
   pop_bracket(subj); // remove this opener from delimiter list
   subj->pos = initial_pos;
-  return make_str(subj->mem, cmark_chunk_literal("]"));
+  return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));
 
 match:
   inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK);
   inl->as.link.url = url;
   inl->as.link.title = title;
+  inl->start_line = inl->end_line = subj->line;
+  inl->start_column = opener->inl_text->start_column;
+  inl->end_column = subj->pos + subj->column_offset + subj->block_offset;
   cmark_node_insert_before(opener->inl_text, inl);
   // Add link text:
   tmp = opener->inl_text->next;
@@ -1027,6 +1112,8 @@ static cmark_node *handle_newline(subject *subj) {
   if (peek_at(subj, subj->pos) == '\n') {
     advance(subj);
   }
+  ++subj->line;
+  subj->column_offset = -subj->pos;
   // skip spaces at beginning of line
   skip_spaces(subj);
   if (nlpos > 1 && peek_at(subj, nlpos - 1) == ' ' &&
@@ -1086,7 +1173,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
   cmark_node *new_inl = NULL;
   cmark_chunk contents;
   unsigned char c;
-  bufsize_t endpos;
+  bufsize_t startpos, endpos;
   c = peek_char(subj);
   if (c == 0) {
     return 0;
@@ -1097,7 +1184,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
     new_inl = handle_newline(subj);
     break;
   case '`':
-    new_inl = handle_backticks(subj);
+    new_inl = handle_backticks(subj, options);
     break;
   case '\\':
     new_inl = handle_backslash(subj);
@@ -1106,7 +1193,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
     new_inl = handle_entity(subj);
     break;
   case '<':
-    new_inl = handle_pointy_brace(subj);
+    new_inl = handle_pointy_brace(subj, options);
     break;
   case '*':
   case '_':
@@ -1122,7 +1209,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
     break;
   case '[':
     advance(subj);
-    new_inl = make_str(subj->mem, cmark_chunk_literal("["));
+    new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("["));
     push_bracket(subj, false, new_inl);
     break;
   case ']':
@@ -1132,15 +1219,16 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
     advance(subj);
     if (peek_char(subj) == '[') {
       advance(subj);
-      new_inl = make_str(subj->mem, cmark_chunk_literal("!["));
+      new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("!["));
       push_bracket(subj, true, new_inl);
     } else {
-      new_inl = make_str(subj->mem, cmark_chunk_literal("!"));
+      new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("!"));
     }
     break;
   default:
     endpos = subject_find_special_char(subj, options);
     contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
+    startpos = subj->pos;
     subj->pos = endpos;
 
     // if we're at a newline, strip trailing spaces.
@@ -1148,7 +1236,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
       cmark_chunk_rtrim(&contents);
     }
 
-    new_inl = make_str(subj->mem, contents);
+    new_inl = make_str(subj, startpos, endpos - 1, contents);
   }
   if (new_inl != NULL) {
     cmark_node_append_child(parent, new_inl);
@@ -1161,7 +1249,8 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
 extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
                                 cmark_reference_map *refmap, int options) {
   subject subj;
-  subject_from_buf(mem, &subj, &parent->content, refmap);
+  cmark_chunk content = {parent->content.ptr, parent->content.size, 0};
+  subject_from_buf(mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &content, refmap);
   cmark_chunk_rtrim(&subj.input);
 
   while (!is_eof(&subj) && parse_inline(&subj, parent, options))
@@ -1189,7 +1278,7 @@ static void spnl(subject *subj) {
 // Modify refmap if a reference is encountered.
 // Return 0 if no reference found, otherwise position of subject
 // after reference is parsed.
-bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
+bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
                                        cmark_reference_map *refmap) {
   subject subj;
 
@@ -1200,7 +1289,7 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
   bufsize_t matchlen = 0;
   bufsize_t beforetitle;
 
-  subject_from_buf(mem, &subj, input, NULL);
+  subject_from_buf(mem, -1, 0, &subj, input, NULL);
 
   // parse label:
   if (!link_label(&subj, &lab) || lab.len == 0)
@@ -1215,9 +1304,8 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
 
   // parse link url:
   spnl(&subj);
-  matchlen = manual_scan_link_url(&subj.input, subj.pos);
-  if (matchlen > 0) {
-    url = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
+  if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1 &&
+      url.len > 0) {
     subj.pos += matchlen;
   } else {
     return 0;
diff --git a/src/inlines.h b/src/inlines.h
index 52be768..39d3363 100644
--- a/src/inlines.h
+++ b/src/inlines.h
@@ -11,7 +11,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title);
 void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
                          cmark_reference_map *refmap, int options);
 
-bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
+bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
                                        cmark_reference_map *refmap);
 
 #ifdef __cplusplus
diff --git a/src/iterator.c b/src/iterator.c
index 24423a2..f5cd802 100644
--- a/src/iterator.c
+++ b/src/iterator.c
@@ -106,6 +106,7 @@ void cmark_consolidate_text_nodes(cmark_node *root) {
       while (tmp && tmp->type == CMARK_NODE_TEXT) {
         cmark_iter_next(iter); // advance pointer
         cmark_strbuf_put(&buf, tmp->as.literal.data, tmp->as.literal.len);
+        cur->end_column = tmp->end_column;
         next = tmp->next;
         cmark_node_free(tmp);
         tmp = next;
diff --git a/src/latex.c b/src/latex.c
index f372a13..0d9517d 100644
--- a/src/latex.c
+++ b/src/latex.c
@@ -252,24 +252,24 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
       CR();
       list_number = cmark_node_get_list_start(node);
       if (list_number > 1) {
-	enumlevel = S_get_enumlevel(node);
-	// latex normally supports only five levels
-	if (enumlevel >= 1 && enumlevel <= 5) {
+        enumlevel = S_get_enumlevel(node);
+        // latex normally supports only five levels
+        if (enumlevel >= 1 && enumlevel <= 5) {
           snprintf(list_number_string, LIST_NUMBER_STRING_SIZE, "%d",
                    list_number);
           LIT("\\setcounter{enum");
-          switch(enumlevel) {
-	  case 1: LIT("i"); break;
-	  case 2: LIT("ii"); break;
-	  case 3: LIT("iii"); break;
-	  case 4: LIT("iv"); break;
-	  case 5: LIT("v"); break;
-	  default: LIT("i"); break;
+          switch (enumlevel) {
+          case 1: LIT("i"); break;
+          case 2: LIT("ii"); break;
+          case 3: LIT("iii"); break;
+          case 4: LIT("iv"); break;
+          case 5: LIT("v"); break;
+          default: LIT("i"); break;
 	  }
           LIT("}{");
           OUT(list_number_string, false, NORMAL);
           LIT("}");
-	}
+        }
         CR();
       }
     } else {
diff --git a/src/libcmark.pc.in b/src/libcmark.pc.in
index 024ae48..0f87c30 100644
--- a/src/libcmark.pc.in
+++ b/src/libcmark.pc.in
@@ -1,6 +1,6 @@
 prefix=@CMAKE_INSTALL_PREFIX@
 exec_prefix=@CMAKE_INSTALL_PREFIX@
-libdir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@
+libdir=@CMAKE_INSTALL_PREFIX@/@libdir@
 includedir=@CMAKE_INSTALL_PREFIX@/include
 
 Name: libcmark
diff --git a/src/main.c b/src/main.c
index 9482f68..8942520 100644
--- a/src/main.c
+++ b/src/main.c
@@ -32,6 +32,7 @@ void print_usage() {
   printf("  --nobreaks       Render soft line breaks as spaces\n");
   printf("  --safe           Suppress raw HTML and dangerous URLs\n");
   printf("  --smart          Use smart punctuation\n");
+  printf("  --validate-utf8  Replace UTF-8 invalid sequences with U+FFFD\n");
   printf("  --help, -h       Print usage information\n");
   printf("  --version        Print version\n");
 }
diff --git a/src/node.h b/src/node.h
index 65d857f..13901ba 100644
--- a/src/node.h
+++ b/src/node.h
@@ -66,6 +66,7 @@ struct cmark_node {
   int start_column;
   int end_line;
   int end_column;
+  int internal_offset;
   uint16_t type;
   uint16_t flags;
 
diff --git a/src/scanners.c b/src/scanners.c
index c96490d..b312f66 100644
--- a/src/scanners.c
+++ b/src/scanners.c
@@ -752,7 +752,7 @@ bufsize_t _scan_autolink_uri(const unsigned char *p) {
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   128, 128, 128, 128, 128, 128, 128, 128, 128,
         128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
-        128, 128, 128, 128, 128, 128, 0,   128, 128, 128, 128, 128, 128, 128,
+        128, 128, 128, 128, 0,   128, 0,   128, 128, 128, 128, 128, 128, 128,
         128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
         128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
         128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
@@ -839,7 +839,7 @@ bufsize_t _scan_autolink_uri(const unsigned char *p) {
     }
     if (yych <= 0xEC) {
       if (yych <= 0xC1) {
-        if (yych <= ' ')
+        if (yych <= '<')
           goto yy45;
         if (yych <= '>')
           goto yy85;
@@ -7887,35 +7887,45 @@ bufsize_t _scan_html_tag(const unsigned char *p) {
     unsigned char yych;
     static const unsigned char yybm[] = {
         /* table 1 .. 8: 0 */
-        0, 239, 239, 239, 239, 239, 239, 239, 239, 238, 238, 238, 238, 238, 239,
+        0,   239, 239, 239, 239, 239, 239, 239, 239, 238, 238, 238, 238, 238,
         239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239,
-        239, 239, 239, 238, 239, 234, 239, 239, 239, 239, 236, 239, 239, 239,
-        239, 239, 207, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239,
-        239, 239, 239, 238, 238, 174, 231, 239, 255, 255, 255, 255, 255, 255,
+        239, 239, 239, 239, 238, 239, 234, 239, 239, 239, 239, 236, 239, 239,
+        239, 239, 239, 207, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239,
+        239, 239, 239, 239, 238, 238, 174, 231, 239, 255, 255, 255, 255, 255,
         255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 239, 239, 111, 239, 239, 238, 239, 239,
+        255, 255, 255, 255, 255, 255, 255, 239, 239, 111, 239, 239, 238, 239,
         239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239,
         239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239,
-        239, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        239, 239, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,
         /* table 9 .. 11: 256 */
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        160, 128, 0, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 128, 0,
-        0, 0, 0, 0, 0, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   64,  64,  64,  64,  64,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   64,  0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   160, 128, 0,   160, 160, 160, 160, 160, 160, 160, 160,
+        160, 160, 128, 0,   0,   0,   0,   0,   0,   160, 160, 160, 160, 160,
         160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160,
-        160, 0, 0, 0, 0, 128, 0, 160, 160, 160, 160, 160, 160, 160, 160, 160,
+        160, 160, 160, 160, 160, 160, 160, 0,   0,   0,   0,   128, 0,   160,
         160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160,
-        160, 160, 160, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+        0,   0,   0,   0,
     };
     yych = *p;
     if (yych <= '>') {
diff --git a/src/scanners.re b/src/scanners.re
index a0650f2..b20a954 100644
--- a/src/scanners.re
+++ b/src/scanners.re
@@ -91,7 +91,7 @@ bufsize_t _scan_autolink_uri(const unsigned char *p)
   const unsigned char *marker = NULL;
   const unsigned char *start = p;
 /*!re2c
-  scheme [:][^\x00-\x20>]*[>]  { return (bufsize_t)(p - start); }
+  scheme [:][^\x00-\x20<>]*[>]  { return (bufsize_t)(p - start); }
   * { return 0; }
 */
 }
diff --git a/test/regression.txt b/test/regression.txt
index 18b7d79..a6669d0 100644
--- a/test/regression.txt
+++ b/test/regression.txt
@@ -81,7 +81,7 @@ Issue #193 - unescaped left angle brackets in link destination
 
 [a]: <te<st>
 .
-<p><a href="te%3Cst">a</a></p>
+<p><a href="%3Cte%3Cst%3E">a</a></p>
 ````````````````````````````````
 
 Issue #192 - escaped spaces in link destination
diff --git a/test/smart_punct.txt b/test/smart_punct.txt
index 3522c94..fd55e62 100644
--- a/test/smart_punct.txt
+++ b/test/smart_punct.txt
@@ -78,6 +78,15 @@ left double quote, to facilitate this style:
 <p>“Second paragraph by same speaker, in fiction.”</p>
 ````````````````````````````````
 
+A quote following a `]` or `)` character cannot
+be an open quote:
+
+```````````````````````````````` example
+[a]'s b'
+.
+<p>[a]’s b’</p>
+````````````````````````````````
+
 Quotes that are escaped come out as literal straight
 quotes:
 
diff --git a/test/spec.txt b/test/spec.txt
index 64a60b1..9fd5841 100644
--- a/test/spec.txt
+++ b/test/spec.txt
@@ -1,8 +1,8 @@
 ---
 title: CommonMark Spec
 author: John MacFarlane
-version: 0.27
-date: '2016-11-18'
+version: 0.28
+date: '2017-08-01'
 license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)'
 ...
 
@@ -1645,6 +1645,15 @@ With tildes:
 </code></pre>
 ````````````````````````````````
 
+Fewer than three backticks is not enough:
+
+```````````````````````````````` example
+``
+foo
+``
+.
+<p><code>foo</code></p>
+````````````````````````````````
 
 The closing code fence must use the same character as the opening
 fence:
@@ -2033,6 +2042,37 @@ or [closing tag] (with any [tag name] other than `script`,
 or the end of the line.\
 **End condition:** line is followed by a [blank line].
 
+HTML blocks continue until they are closed by their appropriate
+[end condition], or the last line of the document or other [container block].
+This means any HTML **within an HTML block** that might otherwise be recognised
+as a start condition will be ignored by the parser and passed through as-is,
+without changing the parser's state.
+
+For instance, `<pre>` within a HTML block started by `<table>` will not affect
+the parser state; as the HTML block was started in by start condition 6, it
+will end at any blank line. This can be surprising:
+
+```````````````````````````````` example
+<table><tr><td>
+<pre>
+**Hello**,
+
+_world_.
+</pre>
+</td></tr></table>
+.
+<table><tr><td>
+<pre>
+**Hello**,
+<p><em>world</em>.
+</pre></p>
+</td></tr></table>
+````````````````````````````````
+
+In this case, the HTML block is terminated by the newline — the `**hello**`
+text remains verbatim — and regular parsing resumes, with a paragraph,
+emphasised `world` and inline and block HTML following.
+
 All types of [HTML blocks] except type 7 may interrupt
 a paragraph.  Blocks of type 7 may not interrupt a paragraph.
 (This restriction is intended to prevent unwanted interpretation
@@ -3639,11 +3679,15 @@ The following rules define [list items]:
     If the list item is ordered, then it is also assigned a start
     number, based on the ordered list marker.
 
-    Exceptions: When the first list item in a [list] interrupts
-    a paragraph---that is, when it starts on a line that would
-    otherwise count as [paragraph continuation text]---then (a)
-    the lines *Ls* must not begin with a blank line, and (b) if
-    the list item is ordered, the start number must be 1.
+    Exceptions:
+
+    1. When the first list item in a [list] interrupts
+       a paragraph---that is, when it starts on a line that would
+       otherwise count as [paragraph continuation text]---then (a)
+       the lines *Ls* must not begin with a blank line, and (b) if
+       the list item is ordered, the start number must be 1.
+    2. If any line is a [thematic break][thematic breaks] then
+       that line is not a list item.
 
 For example, let *Ls* be the lines
 
@@ -5856,8 +5900,9 @@ for efficient parsing strategies that do not backtrack.
 
 First, some definitions.  A [delimiter run](@) is either
 a sequence of one or more `*` characters that is not preceded or
-followed by a `*` character, or a sequence of one or more `_`
-characters that is not preceded or followed by a `_` character.
+followed by a non-backslash-escaped `*` character, or a sequence
+of one or more `_` characters that is not preceded or followed by
+a non-backslash-escaped `_` character.
 
 A [left-flanking delimiter run](@) is
 a [delimiter run] that is (a) not followed by [Unicode whitespace],
@@ -7159,7 +7204,9 @@ A [link destination](@) consists of either
 - a nonempty sequence of characters that does not include
   ASCII space or control characters, and includes parentheses
   only if (a) they are backslash-escaped or (b) they are part of
-  a balanced pair of unescaped parentheses.
+  a balanced pair of unescaped parentheses.  (Implementations
+  may impose limits on parentheses nesting to avoid performance
+  issues, but at least three levels of nesting should be supported.)
 
 A [link title](@)  consists of either
 
@@ -7265,7 +7312,7 @@ Parentheses inside the link destination may be escaped:
 <p><a href="(foo)">link</a></p>
 ````````````````````````````````
 
-Any number parentheses are allowed without escaping, as long as they are
+Any number of parentheses are allowed without escaping, as long as they are
 balanced:
 
 ```````````````````````````````` example
@@ -7571,13 +7618,16 @@ that [matches] a [link reference definition] elsewhere in the document.
 A [link label](@)  begins with a left bracket (`[`) and ends
 with the first right bracket (`]`) that is not backslash-escaped.
 Between these brackets there must be at least one [non-whitespace character].
-Unescaped square bracket characters are not allowed in
-[link labels].  A link label can have at most 999
-characters inside the square brackets.
+Unescaped square bracket characters are not allowed inside the
+opening and closing square brackets of [link labels].  A link
+label can have at most 999 characters inside the square
+brackets.
 
 One label [matches](@)
 another just in case their normalized forms are equal.  To normalize a
-label, perform the *Unicode case fold* and collapse consecutive internal
+label, strip off the opening and closing brackets,
+perform the *Unicode case fold*, strip leading and trailing
+[whitespace] and collapse consecutive internal
 [whitespace] to a single space.  If there are multiple
 matching reference link definitions, the one that comes first in the
 document is used.  (It is desirable in such cases to emit a warning.)