diff options
| -rw-r--r-- | runtests.pl | 3 | ||||
| -rw-r--r-- | spec.txt | 6 | ||||
| -rw-r--r-- | src/html/html.c | 22 | ||||
| -rw-r--r-- | src/inlines.c | 105 | 
4 files changed, 69 insertions, 67 deletions
| diff --git a/runtests.pl b/runtests.pl index 2e2b795..e53938d 100644 --- a/runtests.pl +++ b/runtests.pl @@ -49,6 +49,7 @@ sub tidy        s/  */ /;        # collapse space before /> in tag        s/  *\/>/\/>/; +	  s/>\n$/>/;        # skip blank line        if (/^$/) {          next; @@ -89,8 +90,10 @@ sub dotest      print $markdown;      print "=== expected ===============\n";      print $html; +	print "\n";      print "=== got ====================\n";      print $actual; +	print "\n";      print color "black";      return 0;    } @@ -1682,7 +1682,7 @@ them.  [Foo bar]  . -<p><a href="my url" title="title">Foo bar</a></p> +<p><a href="my%20url" title="title">Foo bar</a></p>  .  The title may be omitted: @@ -1745,7 +1745,7 @@ case-insensitive (see [matches](#matches)).  [αγω]  . -<p><a href="/φου">αγω</a></p> +<p><a href="/%CF%86%CE%BF%CF%85">αγω</a></p>  .  Here is a link reference definition with no corresponding link. @@ -3688,7 +3688,7 @@ raw HTML:  .  <http://google.com?find=\*>  . -<p><a href="http://google.com?find=\*">http://google.com?find=\*</a></p> +<p><a href="http://google.com?find=%5C*">http://google.com?find=\*</a></p>  .  . diff --git a/src/html/html.c b/src/html/html.c index 2a65a63..cdccf2a 100644 --- a/src/html/html.c +++ b/src/html/html.c @@ -50,17 +50,15 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)  					cr(html);  					gh_buf_puts(html, "<p>");  					inlines_to_html(html, b->inline_content); -					gh_buf_puts(html, "</p>"); -					cr(html); +					gh_buf_puts(html, "</p>\n");  				}  				break;  			case block_quote:  				cr(html); -				gh_buf_puts(html, "<blockquote>"); +				gh_buf_puts(html, "<blockquote>\n");  				blocks_to_html(html, b->children, false); -				gh_buf_puts(html, "</blockquote>"); -				cr(html); +				gh_buf_puts(html, "</blockquote>\n");  				break;  			case list_item: @@ -68,8 +66,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)  				gh_buf_puts(html, "<li>");  				blocks_to_html(html, b->children, tight);  				gh_buf_trim(html); /* TODO: rtrim */ -				gh_buf_puts(html, "</li>"); -				cr(html); +				gh_buf_puts(html, "</li>\n");  				break;  			case list: @@ -87,7 +84,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)  				blocks_to_html(html, b->children, data->tight);  				gh_buf_puts(html, data->list_type == bullet ? "</ul>" : "</ol>"); -				cr(html); +				gh_buf_putc(html, '\n');  				break;  			case atx_header: @@ -95,8 +92,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)  				cr(html);  				gh_buf_printf(html, "<h%d>", b->attributes.header_level);  				inlines_to_html(html, b->inline_content); -				gh_buf_printf(html, "</h%d>", b->attributes.header_level); -				cr(html); +				gh_buf_printf(html, "</h%d>\n", b->attributes.header_level);  				break;  			case indented_code: @@ -122,8 +118,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)  				gh_buf_puts(html, "><code>");  				escape_html(html, b->string_content.ptr, b->string_content.size); -				gh_buf_puts(html, "</code></pre>"); -				cr(html); +				gh_buf_puts(html, "</code></pre>\n");  				break;  			case html_block: @@ -131,8 +126,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)  				break;  			case hrule: -				gh_buf_puts(html, "<hr />"); -				cr(html); +				gh_buf_puts(html, "<hr />\n");  				break;  			case reference_def: diff --git a/src/inlines.c b/src/inlines.c index ced4673..a0dcac9 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -1,8 +1,8 @@  #include <stdlib.h> +#include <string.h>  #include <stdio.h>  #include <stdbool.h>  #include <ctype.h> -#include <string.h>  #include "stmd.h"  #include "uthash.h" @@ -18,7 +18,7 @@ typedef struct Subject {  reference* lookup_reference(reference** refmap, chunk *label);  reference* make_reference(chunk *label, chunk *url, chunk *title); -static unsigned char *clean_url(chunk *url); +static unsigned char *clean_url(chunk *url, int is_email);  static unsigned char *clean_title(chunk *title);  inline static unsigned char *chunk_to_cstr(chunk *c); @@ -97,7 +97,7 @@ extern reference* make_reference(chunk *label, chunk *url, chunk *title)  	reference *ref;  	ref = malloc(sizeof(reference));  	ref->label = normalize_reference(label); -	ref->url = clean_url(url); +	ref->url = clean_url(url, 0);  	ref->title = clean_title(title);  	return ref;  } @@ -116,14 +116,25 @@ extern void add_reference(reference** refmap, reference* ref)  	}  } +inline static inl* make_link_from_reference(inl* label, reference *ref) +{ +	inl* e = (inl*) malloc(sizeof(inl)); +	e->tag = INL_LINK; +	e->content.linkable.label = label; +	e->content.linkable.url   = strdup(ref->url); +	e->content.linkable.title = ref->title ? strdup(ref->title) : NULL; +	e->next = NULL; +	return e; +} +  // Create an inline with a linkable string value. -inline static inl* make_linkable(int t, inl* label, chunk url, chunk title) +inline static inl* make_link(inl* label, chunk url, chunk title, int is_email)  {  	inl* e = (inl*) malloc(sizeof(inl)); -	e->tag = t; +	e->tag = INL_LINK;  	e->content.linkable.label = label; -	e->content.linkable.url   = chunk_to_cstr(&url); -	e->content.linkable.title = title.len ? chunk_to_cstr(&title) : NULL; +	e->content.linkable.url   = clean_url(&url, is_email); +	e->content.linkable.title = clean_title(&title);  	e->next = NULL;  	return e;  } @@ -163,7 +174,6 @@ inline static inl* make_simple(int t)  #define make_entity(s) make_literal(INL_ENTITY, s)  #define make_linebreak() make_simple(INL_LINEBREAK)  #define make_softbreak() make_simple(INL_SOFTBREAK) -#define make_link(label, url, title) make_linkable(INL_LINK, label, url, title)  #define make_emph(contents) make_inlines(INL_EMPH, contents)  #define make_strong(contents) make_inlines(INL_STRONG, contents) @@ -309,37 +319,27 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)  // space and newline characters into a single space.  static void normalize_whitespace(gh_buf *s)  { -	/* TODO */ -#if 0  	bool last_char_was_space = false; -	int pos = 0; -	char c; -	while ((c = gh_buf_at(s, pos))) { -		switch (c) { -			case ' ': -				if (last_char_was_space) { -					bdelete(s, pos, 1); -				} else { -					pos++; -				} -				last_char_was_space = true; -				break; -			case '\n': -				if (last_char_was_space) { -					bdelete(s, pos, 1); -				} else { -					bdelete(s, pos, 1); -					binsertch(s, pos, 1, ' '); -					pos++; -				} -				last_char_was_space = true; +	int r, w; + +	for (r = 0, w = 0; r < s->size; ++r) { +		switch (s->ptr[r]) { +		case ' ': +		case '\n': +			if (last_char_was_space)  				break; -			default: -				pos++; -				last_char_was_space = false; + +			s->ptr[w++] = ' '; +			last_char_was_space = true; +			break; + +		default: +			s->ptr[w++] = s->ptr[r]; +			last_char_was_space = false;  		}  	} -#endif + +	gh_buf_truncate(s, w);  }  // Parse backtick code section or raw backticks, return an inline. @@ -593,16 +593,19 @@ extern void unescape_buffer(gh_buf *buf)  // Clean a URL: remove surrounding whitespace and surrounding <>,  // and remove \ that escape punctuation. -static unsigned char *clean_url(chunk *url) +static unsigned char *clean_url(chunk *url, int is_email)  {  	gh_buf buf = GH_BUF_INIT;  	chunk_trim(url); +	if (is_email) +		gh_buf_puts(&buf, "mailto:"); +  	if (url->data[0] == '<' && url->data[url->len - 1] == '>') { -		gh_buf_set(&buf, url->data + 1, url->len - 2); +		gh_buf_put(&buf, url->data + 1, url->len - 2);  	} else { -		gh_buf_set(&buf, url->data, url->len); +		gh_buf_put(&buf, url->data, url->len);  	}  	unescape_buffer(&buf); @@ -613,8 +616,13 @@ static unsigned char *clean_url(chunk *url)  static unsigned char *clean_title(chunk *title)  {  	gh_buf buf = GH_BUF_INIT; -	unsigned char first = title->data[0]; -	unsigned char last = title->data[title->len - 1]; +	unsigned char first, last; + +	if (title->len == 0) +		return NULL; + +	first = title->data[0]; +	last = title->data[title->len - 1];  	// remove surrounding quotes if any:  	if ((first == '\'' && last == '\'') || @@ -647,25 +655,22 @@ static inl* handle_pointy_brace(subject* subj)  		return make_link(  			make_str_with_entities(&contents),  			contents, -			chunk_literal("") +			chunk_literal(""), +			0  		);  	}  	// next try to match an email autolink  	matchlen = scan_autolink_email(&subj->input, subj->pos);  	if (matchlen > 0) { -		gh_buf mail_url = GH_BUF_INIT; -  		contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);  		subj->pos += matchlen; -		gh_buf_puts(&mail_url, "mailto:"); -		gh_buf_put(&mail_url, contents.data, contents.len); -  		return make_link(  				make_str_with_entities(&contents), -				chunk_buf_detach(&mail_url), -				chunk_literal("") +				contents, +				chunk_literal(""), +				1  		);  	} @@ -790,7 +795,7 @@ static inl* handle_left_bracket(subject* subj)  				title = chunk_dup(&subj->input, starttitle, endtitle - starttitle);  				lab = parse_chunk_inlines(&rawlabel, NULL); -				return make_link(lab, url, title); +				return make_link(lab, url, title, 0);  			} else {  				// if we get here, we matched a label but didn't get further:  				subj->pos = endlabel; @@ -821,7 +826,7 @@ static inl* handle_left_bracket(subject* subj)  			ref = lookup_reference(subj->reference_map, &reflabel);  			if (ref != NULL) { // found  				lab = parse_chunk_inlines(&rawlabel, NULL); -				result = make_link(lab, chunk_literal(ref->url), chunk_literal(ref->title)); +				result = make_link_from_reference(lab, ref);  			} else {  				subj->pos = endlabel;  				lab = parse_chunk_inlines(&rawlabel, subj->reference_map); | 
