13 files changed, 155 insertions, 15 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 968b869..2ab6a72 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -9,8 +9,8 @@ endif()
 set(PROJECT_NAME "cmark")
 
 set(PROJECT_VERSION_MAJOR 0)
-set(PROJECT_VERSION_MINOR 24)
-set(PROJECT_VERSION_PATCH 1)
+set(PROJECT_VERSION_MINOR 25)
+set(PROJECT_VERSION_PATCH 2)
 set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH} )
 
 option(CMARK_TESTS "Build cmark tests and enable testing" ON)
diff --git a/api_test/main.c b/api_test/main.c
index bde6222..8da9ba9 100644
--- a/api_test/main.c
+++ b/api_test/main.c
@@ -362,6 +362,9 @@ static void create_tree(test_batch_runner *runner) {
   cmark_node *str4 = cmark_node_new(CMARK_NODE_TEXT);
   cmark_node_set_literal(str4, "brzz");
   OK(runner, cmark_node_replace(str1, str4), "replace");
+  // The replaced node is not freed
+  cmark_node_free(str1);
+
   INT_EQ(runner, cmark_node_check(doc, NULL), 0, "replace consistent");
   OK(runner, cmark_node_previous(emph) == str4, "replace works");
   INT_EQ(runner, cmark_node_replace(p, str4), 0, "replace str for p fails");
@@ -857,6 +860,17 @@ static void test_md_to_html(test_batch_runner *runner, const char *markdown,
   free(html);
 }
 
+static void test_feed_across_line_ending(test_batch_runner *runner) {
+  // See #117
+  cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT);
+  cmark_parser_feed(parser, "line1\r", 6);
+  cmark_parser_feed(parser, "\nline2\r\n", 8);
+  cmark_node *document = cmark_parser_finish(parser);
+  OK(runner, document->first_child->next == NULL, "document has one paragraph");
+  cmark_parser_free(parser);
+  cmark_node_free(document);
+}
+
 int main() {
   int retval;
   test_batch_runner *runner = test_batch_runner_new();
@@ -881,6 +895,7 @@ int main() {
   numeric_entities(runner);
   test_cplusplus(runner);
   test_safe(runner);
+  test_feed_across_line_ending(runner);
 
   test_print_summary(runner);
   retval = test_ok(runner) ? 0 : 1;
diff --git a/changelog.txt b/changelog.txt
index 1deee77..77341b5 100644
--- a/changelog.txt
+++ b/changelog.txt
@@ -1,3 +1,75 @@
+[0.25.2]
+
+  * Open files in binary mode (#113, Nick Wellnhofer).  Now that cmark
+    supports different line endings, files must be openend in binary mode
+    on Windows.
+  * Reset `partially_consumed_tab` on every new line (#114, Nick Wellnhofer).
+  * Handle buffer split across a CRLF line ending (#117).  Adds an internal
+    field to the parser struct to keep track of `last_buffer_ended_with_cr`.
+    Added test.
+
+[0.25.1]
+
+  * Release with no code changes.  cmark version was mistakenly set to
+    0.25.1 in the 0.25.0 release (#112), so this release just
+    ensures that this will cause no confusion later.
+
+[0.25.0]
+
+  * Fixed tabs in indentation (#101).  This patch fixes S_advance_offset
+    so that it doesn't gobble a tab character when advancing less than the
+    width of a tab.
+  * Added partially_consumed_tab to parser.  This keeps track of when we
+    have gotten partway through a tab when consuming initial indentation.
+  * Simplified add_line (only need parser parameter).
+  * Properly handle partially consumed tab.  E.g. in
+
+        - foo
+
+         <TAB><TAB>bar
+
+    we should consume two spaces from the second tab, including two spaces
+    in the code block.
+  * Properly handle tabs with blockquotes and fenced blocks.
+  * Fixed handling of tabs in lists.
+  * Clarified logic in S_advance_offset.
+  * Use an assertion to check for in-range html_block_type.
+    It's a programming error if the type is out of range.
+  * Refactored S_processLines to make the logic easier to
+    understand, and added documentation (Mathieu Duponchelle).
+  * Removed unnecessary check for empty string_content.
+  * Factored out contains_inlines.
+  * Moved the cmake minimum version to top line of CMakeLists.txt
+    (tinysun212).
+  * Fix ctype(3) usage on NetBSD (Kamil Rytarowski).  We need to cast value
+    passed to isspace(3) to unsigned char to explicitly prevent possibly
+    undefined behavior.
+  * Compile in plain C mode with MSVC 12.0 or newer (Nick Wellnhofer).
+    Under MSVC, we used to compile in C++ mode to get some C99 features
+    like mixing declarations and code. With newer MSVC versions, it's
+    possible to build in plain C mode.
+  * Switched from "inline" to "CMARK_INLINE" (Nick Wellnhofer).
+    Newer MSVC versions support enough of C99 to be able to compile cmark
+    in plain C mode. Only the "inline" keyword is still unsupported.
+    We have to use "__inline" instead.
+  * Added include guards to config.h
+  * config.h.in - added compatibility snprintf, vsnprintf for MSVC.
+  * Replaced sprintf with snprintf (Marco Benelli).
+  * config.h: include stdio.h for _vscprintf etc.
+  * Include starg.h when needed in config.h.
+  * Removed an unnecessary C99-ism in buffer.c.  This helps compiling on
+    systems like luarocks that don't have all the cmake configuration
+    goodness (thanks to carlmartus).
+  * Don't use variable length arrays (Nick Wellnhofer).
+    They're not supported by MSVC.
+  * Test with multiple MSVC versions under Appveyor (Nick Wellnhofer).
+  * Fix installation dir of man-pages on NetBSD (Kamil Rytarowski).
+  * Fixed typo in cmark.h comments (Chris Eidhof).
+  * Clarify in man page that cmark_node_free frees a node's children too.
+  * Fixed documentation of --width in man page.
+  * Require re2c >= 1.14.2 (#102).
+  * Generated scanners.c with more recent re2c.
+
 [0.24.1]
 
   * Commonmark renderer:
diff --git a/man/man1/cmark.1 b/man/man1/cmark.1
index 8dd9165..9ea8d4c 100644
--- a/man/man1/cmark.1
+++ b/man/man1/cmark.1
@@ -1,4 +1,4 @@
-.TH "cmark" "1" "November 30, 2014" "LOCAL" "General Commands Manual"
+.TH "cmark" "1" "March 24, 2016" "LOCAL" "General Commands Manual"
 .SH "NAME"
 \fBcmark\fR
 \- convert CommonMark formatted text to HTML
@@ -23,7 +23,7 @@ Specify output format (\f[C]html\f[], \f[C]man\f[], \f[C]xml\f[],
 .B \-\-width \f[I]WIDTH\f[]
 Specify a column width to which to wrap the output. For no wrapping, use
 the value 0 (the default).  This option currently only affects the
-commonmark renderer.
+commonmark, latex, and man renderers.
 .TP 12n
 .B \-\-sourcepos
 Include source position attribute.
diff --git a/man/man3/cmark.3 b/man/man3/cmark.3
index bffe73d..283b9cc 100644
--- a/man/man3/cmark.3
+++ b/man/man3/cmark.3
@@ -1,4 +1,4 @@
-.TH cmark 3 "February 02, 2016" "LOCAL" "Library Functions Manual"
+.TH cmark 3 "March 24, 2016" "LOCAL" "Library Functions Manual"
 .SH
 NAME
 .PP
@@ -14,7 +14,7 @@ Simple Interface
 
 .PP
 Convert \f[I]text\f[] (assumed to be a UTF\-8 encoded string with length
-\f[I]len\f[] from CommonMark Markdown to HTML, returning a
+\f[I]len\f[]) from CommonMark Markdown to HTML, returning a
 null\-terminated, UTF\-8\-encoded string.
 
 .SS
@@ -109,7 +109,7 @@ to assign.
 \fIvoid\f[] \fBcmark_node_free\f[](\fIcmark_node *node\f[])
 
 .PP
-Frees the memory allocated for a node.
+Frees the memory allocated for a node and any children.
 
 .SS
 Tree Traversal
@@ -543,7 +543,7 @@ Streaming interface:
 .nf
 \f[C]
 cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT);
-FILE *fp = fopen("myfile.md", "r");
+FILE *fp = fopen("myfile.md", "rb");
 while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
 	   cmark_parser_feed(parser, buffer, bytes);
 	   if (bytes < sizeof(buffer)) {
diff --git a/src/blocks.c b/src/blocks.c
index 00639cf..f49ad4e 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -90,6 +90,7 @@ cmark_parser *cmark_parser_new(int options) {
   parser->last_line_length = 0;
   parser->linebuf = buf;
   parser->options = options;
+  parser->last_buffer_ended_with_cr = false;
 
   return parser;
 }
@@ -506,6 +507,11 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
   const unsigned char *end = buffer + len;
   static const uint8_t repl[] = {239, 191, 189};
 
+  if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
+    // skip NL if last buffer ended with CR ; see #117
+    buffer++;
+  }
+  parser->last_buffer_ended_with_cr = false;
   while (buffer < end) {
     const unsigned char *eol;
     bufsize_t chunk_len;
@@ -546,8 +552,11 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
 
     buffer += chunk_len;
     // skip over line ending characters:
-    if (buffer < end && *buffer == '\r')
+    if (buffer < end && *buffer == '\r') {
       buffer++;
+      if (buffer == end)
+	parser->last_buffer_ended_with_cr = true;
+    }
     if (buffer < end && *buffer == '\n')
       buffer++;
   }
@@ -1124,6 +1133,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
   parser->offset = 0;
   parser->column = 0;
   parser->blank = false;
+  parser->partially_consumed_tab = false;
 
   input.data = parser->curline->ptr;
   input.len = parser->curline->size;
diff --git a/src/cmark.h b/src/cmark.h
index 95ec623..c98e18e 100644
--- a/src/cmark.h
+++ b/src/cmark.h
@@ -96,7 +96,7 @@ typedef struct cmark_iter cmark_iter;
  */
 CMARK_EXPORT cmark_node *cmark_node_new(cmark_node_type type);
 
-/** Frees the memory allocated for a node.
+/** Frees the memory allocated for a node and any children.
  */
 CMARK_EXPORT void cmark_node_free(cmark_node *node);
 
@@ -418,7 +418,7 @@ CMARK_EXPORT void cmark_consolidate_text_nodes(cmark_node *root);
  * Streaming interface:
  *
  *     cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT);
- *     FILE *fp = fopen("myfile.md", "r");
+ *     FILE *fp = fopen("myfile.md", "rb");
  *     while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
  *     	   cmark_parser_feed(parser, buffer, bytes);
  *     	   if (bytes < sizeof(buffer)) {
diff --git a/src/main.c b/src/main.c
index f9fa29c..fa18e03 100644
--- a/src/main.c
+++ b/src/main.c
@@ -75,6 +75,7 @@ int main(int argc, char *argv[]) {
   int options = CMARK_OPT_DEFAULT;
 
 #if defined(_WIN32) && !defined(__CYGWIN__)
+  _setmode(_fileno(stdin), _O_BINARY);
   _setmode(_fileno(stdout), _O_BINARY);
 #endif
 
@@ -145,7 +146,7 @@ int main(int argc, char *argv[]) {
 
   parser = cmark_parser_new(options);
   for (i = 0; i < numfps; i++) {
-    FILE *fp = fopen(argv[files[i]], "r");
+    FILE *fp = fopen(argv[files[i]], "rb");
     if (fp == NULL) {
       fprintf(stderr, "Error opening file %s: %s\n", argv[files[i]],
               strerror(errno));
diff --git a/src/parser.h b/src/parser.h
index b3ff39b..ab21d0f 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -27,6 +27,7 @@ struct cmark_parser {
   bufsize_t last_line_length;
   cmark_strbuf *linebuf;
   int options;
+  bool last_buffer_ended_with_cr;
 };
 
 #ifdef __cplusplus
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index d537ab5..fbfd1d0 100755
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -53,6 +53,13 @@ IF (PYTHONINTERP_FOUND)
     "${ROUNDTRIP} ${CMAKE_CURRENT_BINARY_DIR}/../src/cmark"
     )
 
+  add_test(regressiontest_executable
+    ${PYTHON_EXECUTABLE}
+    "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec"
+    "${CMAKE_CURRENT_SOURCE_DIR}/regression.txt" "--program"
+    "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark"
+    )
+
 
 ELSE(PYTHONINTERP_FOUND)
 
diff --git a/test/regression.txt b/test/regression.txt
new file mode 100644
index 0000000..c8a1057
--- /dev/null
+++ b/test/regression.txt
@@ -0,0 +1,34 @@
+### Regression tests
+
+Issue #113: EOL character weirdness on Windows
+(Important: first line ends with CR + CR + LF)
+
+```````````````````````````````` example
+line1
+line2
+.
+<p>line1</p>
+<p>line2</p>
+````````````````````````````````
+
+Issue #114: cmark skipping first character in line
+(Important: the blank lines around "Repeatedly" contain a tab.)
+
+```````````````````````````````` example
+By taking it apart
+
+- alternative solutions
+→
+Repeatedly solving
+→
+- how techniques
+.
+<p>By taking it apart</p>
+<ul>
+<li>alternative solutions</li>
+</ul>
+<p>Repeatedly solving</p>
+<ul>
+<li>how techniques</li>
+</ul>
+````````````````````````````````
diff --git a/test/spec.txt b/test/spec.txt
index 0309eaa..1a4a7dc 100644
--- a/test/spec.txt
+++ b/test/spec.txt
@@ -1,8 +1,8 @@
 ---
 title: CommonMark Spec
 author: John MacFarlane
-version: 0.24
-date: '2016-01-12'
+version: 0.25
+date: '2016-03-24'
 license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)'
 ...
 
diff --git a/test/spec_tests.py b/test/spec_tests.py
index d5b4d05..1521a82 100755
--- a/test/spec_tests.py
+++ b/test/spec_tests.py
@@ -87,7 +87,7 @@ def get_tests(specfile):
 
     header_re = re.compile('#+ ')
 
-    with open(specfile, 'r', encoding='utf-8') as specf:
+    with open(specfile, 'r', encoding='utf-8', newline='\n') as specf:
         for line in specf:
             line_number = line_number + 1
             l = line.strip()