7 files changed, 124 insertions, 119 deletions
diff --git a/src/blocks.c b/src/blocks.c
index 8ccaa25..b72c256 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -19,7 +19,7 @@
 #define peek_at(i, n) (i)->data[n]
 
 static inline bool
-is_line_end_char(char c)
+S_is_line_end_char(char c)
 {
 	return (c == '\n' || c == '\r');
 }
@@ -69,6 +69,10 @@ cmark_parser *cmark_parser_new(int options)
 	parser->root = document;
 	parser->current = document;
 	parser->line_number = 0;
+	parser->offset = 0;
+	parser->first_nonspace = 0;
+	parser->indent = 0;
+	parser->blank = false;
 	parser->curline = line;
 	parser->last_line_length = 0;
 	parser->linebuf = buf;
@@ -138,7 +142,7 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln)
 	for (i = ln->size - 1; i >= 0; --i) {
 		c = ln->ptr[i];
 
-		if (c != ' ' && c != '\t' && !is_line_end_char(c))
+		if (c != ' ' && c != '\t' && !S_is_line_end_char(c))
 			break;
 	}
 
@@ -151,7 +155,7 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln)
 	for(; i < ln->size; ++i) {
 		c = ln->ptr[i];
 
-		if (!is_line_end_char(c))
+		if (!S_is_line_end_char(c))
 			continue;
 
 		cmark_strbuf_truncate(ln, i);
@@ -249,7 +253,7 @@ finalize(cmark_parser *parser, cmark_node* b)
 
 			// first line of contents becomes info
 			for (pos = 0; pos < b->string_content.size; ++pos) {
-				if (is_line_end_char(b->string_content.ptr[pos]))
+				if (S_is_line_end_char(b->string_content.ptr[pos]))
 					break;
 			}
 			assert(pos < b->string_content.size);
@@ -495,7 +499,7 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len,
 		size_t line_len;
 
 		for (eol = buffer; eol < end; ++eol) {
-			if (is_line_end_char(*eol))
+			if (S_is_line_end_char(*eol))
 				break;
 		}
 		if (eol >= end)
@@ -546,29 +550,39 @@ static void chop_trailing_hashtags(cmark_chunk *ch)
 }
 
 static void
+S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input)
+{
+	parser->first_nonspace = parser->offset;
+	while (peek_at(input, parser->first_nonspace) == ' ') {
+		parser->first_nonspace++;
+	}
+
+	parser->indent = parser->first_nonspace - parser->offset;
+	parser->blank = S_is_line_end_char(peek_at(input, parser->first_nonspace));
+}
+
+static void
 S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 {
 	cmark_node* last_matched_container;
-	int offset = 0;
 	int matched = 0;
 	int lev = 0;
 	int i;
 	cmark_list *data = NULL;
 	bool all_matched = true;
 	cmark_node* container;
-	bool blank = false;
-	int first_nonspace;
-	int indent;
 	bool indented;
 	cmark_chunk input;
 	bool maybe_lazy;
 
 	utf8proc_detab(parser->curline, buffer, bytes);
+	parser->offset = 0;
+	parser->blank = false;
 
 	// Add a newline to the end if not present:
 	// TODO this breaks abstraction:
 	if (parser->curline->size > 0 &&
-	    !is_line_end_char(parser->curline->ptr[parser->curline->size - 1])) {
+	    !S_is_line_end_char(parser->curline->ptr[parser->curline->size - 1])) {
 		cmark_strbuf_putc(parser->curline, '\n');
 	}
 
@@ -586,32 +600,26 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 	while (container->last_child && container->last_child->open) {
 		container = container->last_child;
 
-		first_nonspace = offset;
-		while (peek_at(&input, first_nonspace) == ' ') {
-			first_nonspace++;
-		}
-
-		indent = first_nonspace - offset;
-		blank = is_line_end_char(peek_at(&input, first_nonspace));
+		S_find_first_nonspace(parser, &input);
 
 		if (container->type == NODE_BLOCK_QUOTE) {
-			matched = indent <= 3 && peek_at(&input, first_nonspace) == '>';
+			matched = parser->indent <= 3 && peek_at(&input, parser->first_nonspace) == '>';
 			if (matched) {
-				offset = first_nonspace + 1;
-				if (peek_at(&input, offset) == ' ')
-					offset++;
+				parser->offset = parser->first_nonspace + 1;
+				if (peek_at(&input, parser->offset) == ' ')
+					parser->offset++;
 			} else {
 				all_matched = false;
 			}
 
 		} else if (container->type == NODE_ITEM) {
 
-			if (indent >= container->as.list.marker_offset +
+			if (parser->indent >= container->as.list.marker_offset +
 			    container->as.list.padding) {
-				offset += container->as.list.marker_offset +
+				parser->offset += container->as.list.marker_offset +
 				          container->as.list.padding;
-			} else if (blank) {
-				offset = first_nonspace;
+			} else if (parser->blank) {
+				parser->offset = parser->first_nonspace;
 			} else {
 				all_matched = false;
 			}
@@ -619,34 +627,34 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 		} else if (container->type == NODE_CODE_BLOCK) {
 
 			if (!container->as.code.fenced) { // indented
-				if (indent >= CODE_INDENT) {
-					offset += CODE_INDENT;
-				} else if (blank) {
-					offset = first_nonspace;
+				if (parser->indent >= CODE_INDENT) {
+					parser->offset += CODE_INDENT;
+				} else if (parser->blank) {
+					parser->offset = parser->first_nonspace;
 				} else {
 					all_matched = false;
 				}
 			} else { // fenced
 				matched = 0;
-				if (indent <= 3 &&
-				    (peek_at(&input, first_nonspace) ==
+				if (parser->indent <= 3 &&
+				    (peek_at(&input, parser->first_nonspace) ==
 				     container->as.code.fence_char)) {
 					matched = scan_close_code_fence(&input,
-					                                first_nonspace);
+					                                parser->first_nonspace);
 				}
 				if (matched >= container->as.code.fence_length) {
 					// closing fence - and since we're at
 					// the end of a line, we can return:
 					all_matched = false;
-					offset += matched;
+					parser->offset += matched;
 					parser->current = finalize(parser, container);
 					goto finished;
 				} else {
-					// skip opt. spaces of fence offset
+					// skip opt. spaces of fence parser->offset
 					i = container->as.code.fence_offset;
 					while (i > 0 &&
-					       peek_at(&input, offset) == ' ') {
-						offset++;
+					       peek_at(&input, parser->offset) == ' ') {
+						parser->offset++;
 						i--;
 					}
 				}
@@ -658,13 +666,13 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 
 		} else if (container->type == NODE_HTML) {
 
-			if (blank) {
+			if (parser->blank) {
 				all_matched = false;
 			}
 
 		} else if (container->type == NODE_PARAGRAPH) {
 
-			if (blank) {
+			if (parser->blank) {
 				all_matched = false;
 			}
 
@@ -679,7 +687,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 	last_matched_container = container;
 
 	// check to see if we've hit 2nd blank line, break out of list:
-	if (blank && container->last_line_blank) {
+	if (parser->blank && container->last_line_blank) {
 		break_out_of_lists(parser, &container);
 	}
 
@@ -688,37 +696,23 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 	while (container->type != NODE_CODE_BLOCK &&
 	       container->type != NODE_HTML) {
 
-		first_nonspace = offset;
-		while (peek_at(&input, first_nonspace) == ' ')
-			first_nonspace++;
-
-		indent = first_nonspace - offset;
-		indented = indent >= CODE_INDENT;
-		blank = is_line_end_char(peek_at(&input, first_nonspace));
+		S_find_first_nonspace(parser, &input);
+		indented = parser->indent >= CODE_INDENT;
 
-		if (indented && !maybe_lazy && !blank) {
-				offset += CODE_INDENT;
-				container = add_child(parser, container, NODE_CODE_BLOCK, offset + 1);
-				container->as.code.fenced = false;
-				container->as.code.fence_char = 0;
-				container->as.code.fence_length = 0;
-				container->as.code.fence_offset = 0;
-				container->as.code.info = cmark_chunk_literal("");
+		if (!indented && peek_at(&input, parser->first_nonspace) == '>') {
 
-		} else if (!indented && peek_at(&input, first_nonspace) == '>') {
-
-			offset = first_nonspace + 1;
+			parser->offset = parser->first_nonspace + 1;
 			// optional following character
-			if (peek_at(&input, offset) == ' ')
-				offset++;
-			container = add_child(parser, container, NODE_BLOCK_QUOTE, offset + 1);
+			if (peek_at(&input, parser->offset) == ' ')
+				parser->offset++;
+			container = add_child(parser, container, NODE_BLOCK_QUOTE, parser->offset + 1);
 
-		} else if (!indented && (matched = scan_atx_header_start(&input, first_nonspace))) {
+		} else if (!indented && (matched = scan_atx_header_start(&input, parser->first_nonspace))) {
 
-			offset = first_nonspace + matched;
-			container = add_child(parser, container, NODE_HEADER, offset + 1);
+			parser->offset = parser->first_nonspace + matched;
+			container = add_child(parser, container, NODE_HEADER, parser->offset + 1);
 
-			int hashpos = cmark_chunk_strchr(&input, '#', first_nonspace);
+			int hashpos = cmark_chunk_strchr(&input, '#', parser->first_nonspace);
 			int level = 0;
 
 			while (peek_at(&input, hashpos) == '#') {
@@ -728,24 +722,24 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 			container->as.header.level = level;
 			container->as.header.setext = false;
 
-		} else if (!indented && (matched = scan_open_code_fence(&input, first_nonspace))) {
+		} else if (!indented && (matched = scan_open_code_fence(&input, parser->first_nonspace))) {
 
-			container = add_child(parser, container, NODE_CODE_BLOCK, first_nonspace + 1);
+			container = add_child(parser, container, NODE_CODE_BLOCK, parser->first_nonspace + 1);
 			container->as.code.fenced = true;
-			container->as.code.fence_char = peek_at(&input, first_nonspace);
+			container->as.code.fence_char = peek_at(&input, parser->first_nonspace);
 			container->as.code.fence_length = matched;
-			container->as.code.fence_offset = first_nonspace - offset;
+			container->as.code.fence_offset = parser->first_nonspace - parser->offset;
 			container->as.code.info = cmark_chunk_literal("");
-			offset = first_nonspace + matched;
+			parser->offset = parser->first_nonspace + matched;
 
-		} else if (!indented && (matched = scan_html_block_tag(&input, first_nonspace))) {
+		} else if (!indented && (matched = scan_html_block_tag(&input, parser->first_nonspace))) {
 
-			container = add_child(parser, container, NODE_HTML, first_nonspace + 1);
-			// note, we don't adjust offset because the tag is part of the text
+			container = add_child(parser, container, NODE_HTML, parser->first_nonspace + 1);
+			// note, we don't adjust parser->offset because the tag is part of the text
 
 		} else if (!indented &&
-			   container->type == NODE_PARAGRAPH &&
-		           (lev = scan_setext_header_line(&input, first_nonspace)) &&
+		           container->type == NODE_PARAGRAPH &&
+		           (lev = scan_setext_header_line(&input, parser->first_nonspace)) &&
 		           // check that there is only one line in the paragraph:
 		           (cmark_strbuf_strrchr(&container->string_content, '\n',
 		                                 cmark_strbuf_len(&container->string_content) - 2) < 0)) {
@@ -753,57 +747,70 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 			container->type = NODE_HEADER;
 			container->as.header.level = lev;
 			container->as.header.setext = true;
-			offset = input.len - 1;
+			parser->offset = input.len - 1;
 
 		} else if (!indented &&
-			   !(container->type == NODE_PARAGRAPH &&
-			     !all_matched) &&
-		           (matched = scan_hrule(&input, first_nonspace))) {
+		           !(container->type == NODE_PARAGRAPH &&
+		             !all_matched) &&
+		           (matched = scan_hrule(&input, parser->first_nonspace))) {
 
 			// it's only now that we know the line is not part of a setext header:
-			container = add_child(parser, container, NODE_HRULE, first_nonspace + 1);
+			container = add_child(parser, container, NODE_HRULE, parser->first_nonspace + 1);
 			container = finalize(parser, container);
-			offset = input.len - 1;
+			parser->offset = input.len - 1;
 
-		} else if ((matched = parse_list_marker(&input, first_nonspace, &data))) {
+		} else if ((matched = parse_list_marker(&input, parser->first_nonspace, &data)) &&
+		           (!indented || container->type == NODE_LIST)) {
+			// Note that we can have new list items starting with >= 4
+			// spaces indent, as long as the list container is still open.
 
 			// compute padding:
-			offset = first_nonspace + matched;
+			parser->offset = parser->first_nonspace + matched;
 			i = 0;
-			while (i <= 5 && peek_at(&input, offset + i) == ' ') {
+			while (i <= 5 && peek_at(&input, parser->offset + i) == ' ') {
 				i++;
 			}
 			// i = number of spaces after marker, up to 5
 			if (i >= 5 || i < 1 ||
-			    is_line_end_char(peek_at(&input, offset))) {
+			    S_is_line_end_char(peek_at(&input, parser->offset))) {
 				data->padding = matched + 1;
 				if (i > 0) {
-					offset += 1;
+					parser->offset += 1;
 				}
 			} else {
 				data->padding = matched + i;
-				offset += i;
+				parser->offset += i;
 			}
 
 			// check container; if it's a list, see if this list item
 			// can continue the list; otherwise, create a list container.
 
-			data->marker_offset = indent;
+			data->marker_offset = parser->indent;
 
 			if (container->type != NODE_LIST ||
 			    !lists_match(&container->as.list, data)) {
 				container = add_child(parser, container, NODE_LIST,
-				                      first_nonspace + 1);
+				                      parser->first_nonspace + 1);
 
 				memcpy(&container->as.list, data, sizeof(*data));
 			}
 
 			// add the list item
 			container = add_child(parser, container, NODE_ITEM,
-			                      first_nonspace + 1);
+			                      parser->first_nonspace + 1);
 			/* TODO: static */
 			memcpy(&container->as.list, data, sizeof(*data));
 			free(data);
+
+		} else if (indented && !maybe_lazy && !parser->blank) {
+			parser->offset += CODE_INDENT;
+			container = add_child(parser, container, NODE_CODE_BLOCK, parser->offset + 1);
+			container->as.code.fenced = false;
+			container->as.code.fence_char = 0;
+			container->as.code.fence_length = 0;
+			container->as.code.fence_offset = 0;
+			container->as.code.info = cmark_chunk_literal("");
+
 		} else {
 			break;
 		}
@@ -815,17 +822,12 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 		maybe_lazy = false;
 	}
 
-	// what remains at offset is a text line.  add the text to the
+	// what remains at parser->offset is a text line.  add the text to the
 	// appropriate container.
 
-	first_nonspace = offset;
-	while (peek_at(&input, first_nonspace) == ' ')
-		first_nonspace++;
-
-	indent = first_nonspace - offset;
-	blank = is_line_end_char(peek_at(&input, first_nonspace));
+	S_find_first_nonspace(parser, &input);
 
-	if (blank && container->last_child) {
+	if (parser->blank && container->last_child) {
 		container->last_child->last_line_blank = true;
 	}
 
@@ -833,7 +835,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 	// and we don't count blanks in fenced code for purposes of tight/loose
 	// lists or breaking out of lists.  we also don't set last_line_blank
 	// on an empty list item.
-	container->last_line_blank = (blank &&
+	container->last_line_blank = (parser->blank &&
 	                              container->type != NODE_BLOCK_QUOTE &&
 	                              container->type != NODE_HEADER &&
 	                              !(container->type == NODE_CODE_BLOCK &&
@@ -850,11 +852,11 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 
 	if (parser->current != last_matched_container &&
 	    container == last_matched_container &&
-	    !blank &&
+	    !parser->blank &&
 	    parser->current->type == NODE_PARAGRAPH &&
 	    cmark_strbuf_len(&parser->current->string_content) > 0) {
 
-		add_line(parser->current, &input, offset);
+		add_line(parser->current, &input, parser->offset);
 
 	} else { // not a lazy continuation
 
@@ -867,9 +869,9 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 		if (container->type == NODE_CODE_BLOCK ||
 		    container->type == NODE_HTML) {
 
-			add_line(container, &input, offset);
+			add_line(container, &input, parser->offset);
 
-		} else if (blank) {
+		} else if (parser->blank) {
 
 			// ??? do nothing
 
@@ -879,12 +881,12 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
 			    container->as.header.setext == false) {
 				chop_trailing_hashtags(&input);
 			}
-			add_line(container, &input, first_nonspace);
+			add_line(container, &input, parser->first_nonspace);
 
 		} else {
 			// create paragraph container for line
-			container = add_child(parser, container, NODE_PARAGRAPH, first_nonspace + 1);
-			add_line(container, &input, first_nonspace);
+			container = add_child(parser, container, NODE_PARAGRAPH, parser->first_nonspace + 1);
+			add_line(container, &input, parser->first_nonspace);
 
 		}
 
diff --git a/src/chunk.h b/src/chunk.h
index 4bb4980..a246a9d 100644
--- a/src/chunk.h
+++ b/src/chunk.h
@@ -66,8 +66,8 @@ static inline const char *cmark_chunk_to_cstr(cmark_chunk *c)
 	}
 	str = (unsigned char *)malloc(c->len + 1);
 	if(str != NULL) {
-		if(c->len > 0){
-			memcpy(str, c->data, c->len);			
+		if(c->len > 0) {
+			memcpy(str, c->data, c->len);
 		}
 		str[c->len] = 0;
 	}
@@ -86,8 +86,7 @@ static inline void cmark_chunk_set_cstr(cmark_chunk *c, const char *str)
 		c->len   = 0;
 		c->data  = NULL;
 		c->alloc = 0;
-	}
-	else {
+	} else {
 		c->len   = strlen(str);
 		c->data  = (unsigned char *)malloc(c->len + 1);
 		c->alloc = 1;
diff --git a/src/commonmark.c b/src/commonmark.c
index 47da191..dba1fcf 100644
--- a/src/commonmark.c
+++ b/src/commonmark.c
@@ -260,7 +260,7 @@ is_autolink(cmark_node *node)
 	link_text = node->first_child;
 	cmark_consolidate_text_nodes(link_text);
 	return ((int)strlen(url) == link_text->as.literal.len &&
-		strncmp(url,
+	        strncmp(url,
 	                (char*)link_text->as.literal.data,
 	                link_text->as.literal.len) == 0);
 }
diff --git a/src/html.c b/src/html.c
index c15f6f4..d3f9fc7 100644
--- a/src/html.c
+++ b/src/html.c
@@ -262,12 +262,12 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 		if (entering) {
 			cmark_strbuf_puts(html, "<a href=\"");
 			escape_href(html, node->as.link.url.data,
-				    node->as.link.url.len);
+			            node->as.link.url.len);
 
 			if (node->as.link.title.len) {
 				cmark_strbuf_puts(html, "\" title=\"");
 				escape_html(html, node->as.link.title.data,
-					    node->as.link.title.len);
+				            node->as.link.title.len);
 			}
 
 			cmark_strbuf_puts(html, "\">");
@@ -280,7 +280,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 		if (entering) {
 			cmark_strbuf_puts(html, "<img src=\"");
 			escape_href(html, node->as.link.url.data,
-				    node->as.link.url.len);
+			            node->as.link.url.len);
 
 			cmark_strbuf_puts(html, "\" alt=\"");
 			state->plain = node;
@@ -288,7 +288,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 			if (node->as.link.title.len) {
 				cmark_strbuf_puts(html, "\" title=\"");
 				escape_html(html, node->as.link.title.data,
-					    node->as.link.title.len);
+				            node->as.link.title.len);
 			}
 
 			cmark_strbuf_puts(html, "\" />");
diff --git a/src/inlines.c b/src/inlines.c
index 352ded3..8a1ee44 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -151,7 +151,7 @@ static cmark_chunk chunk_clone(cmark_chunk *src)
 	memcpy(c.data, src->data, len);
 	c.data[len] = '\0';
 
-       return c;
+	return c;
 }
 
 static void subject_from_buf(subject *e, cmark_strbuf *buffer,
@@ -307,9 +307,9 @@ scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
 	                   !utf8proc_is_punctuation(after_char));
 	if (c == '_') {
 		*can_open = left_flanking &&
-			(!right_flanking || utf8proc_is_punctuation(before_char));
+		            (!right_flanking || utf8proc_is_punctuation(before_char));
 		*can_close = right_flanking &&
-			(!left_flanking || utf8proc_is_punctuation(after_char));
+		             (!left_flanking || utf8proc_is_punctuation(after_char));
 	} else if (c == '\'' || c == '"') {
 		*can_open = left_flanking && !right_flanking;
 		*can_close = right_flanking;
diff --git a/src/parser.h b/src/parser.h
index cbccae3..ccdf84b 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -16,6 +16,10 @@ struct cmark_parser {
 	struct cmark_node* root;
 	struct cmark_node* current;
 	int line_number;
+	int offset;
+	int first_nonspace;
+	int indent;
+	bool blank;
 	cmark_strbuf *curline;
 	int last_line_length;
 	cmark_strbuf *linebuf;
diff --git a/src/xml.c b/src/xml.c
index acb2f3d..14f6d67 100644
--- a/src/xml.c
+++ b/src/xml.c
@@ -119,11 +119,11 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 		case CMARK_NODE_IMAGE:
 			cmark_strbuf_puts(xml, " destination=\"");
 			escape_xml(xml, node->as.link.url.data,
-				   node->as.link.url.len);
+			           node->as.link.url.len);
 			cmark_strbuf_putc(xml, '"');
 			cmark_strbuf_puts(xml, " title=\"");
 			escape_xml(xml, node->as.link.title.data,
-				   node->as.link.title.len);
+			           node->as.link.title.len);
 			cmark_strbuf_putc(xml, '"');
 			break;
 		default: