diff options
-rw-r--r-- | Makefile | 8 | ||||
-rw-r--r-- | TODO | 7 | ||||
-rw-r--r-- | js/lib/inlines.js | 151 | ||||
-rw-r--r-- | leakcheck.md | 1561 | ||||
-rw-r--r-- | spec.txt | 110 | ||||
-rw-r--r-- | src/blocks.c | 199 | ||||
-rw-r--r-- | src/buffer.c | 11 | ||||
-rw-r--r-- | src/inlines.c | 457 | ||||
-rw-r--r-- | src/main.c | 5 | ||||
-rw-r--r-- | src/print.c | 34 | ||||
-rw-r--r-- | src/references.c | 5 | ||||
-rw-r--r-- | src/utf8.c | 39 |
12 files changed, 2100 insertions, 487 deletions
@@ -38,6 +38,9 @@ js/stmd.js: js/lib/index.js ${JSMODULES} testjs: spec.txt node js/test.js +jshint: + jshint ${JSMODULES} + benchjs: node js/bench.js ${BENCHINP} @@ -57,14 +60,13 @@ $(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding-3.2.0.txt $(SRCDIR)/html/html_unescape.h: $(SRCDIR)/html/html_unescape.gperf gperf -I -t -N find_entity -H hash_entity -K entity -C -l --null-strings -m5 $< > $@ -.PHONY: leakcheck clean fuzztest dingus upload +.PHONY: leakcheck clean fuzztest dingus upload jshint test testjs benchjs dingus: js/stmd.js cd js && echo "Starting dingus server at http://localhost:9000" && python -m SimpleHTTPServer 9000 leakcheck: $(PROG) - # TODO produce leaktest.md that tests everything - cat leaktest.md | valgrind --leak-check=full --dsymutil=yes $(PROG) + cat leakcheck.md | valgrind --leak-check=full --dsymutil=yes $(PROG) operf: $(PROG) operf $(PROG) <bench.md >/dev/null @@ -1 +1,8 @@ +- leakcheck reveals leak in new stmd code + Create a function to remove and free a stack entry + Use a while loop to remove and free all stack entries from top to the + one we're matching. +- use name other than subj->last_emphasis + +- in js: make a proper stack (linked list) rather than using an array? diff --git a/js/lib/inlines.js b/js/lib/inlines.js index 34f1560..5fde099 100644 --- a/js/lib/inlines.js +++ b/js/lib/inlines.js @@ -262,93 +262,81 @@ var Str = function(s) { // Attempt to parse emphasis or strong emphasis. var parseEmphasis = function(cc,inlines) { var startpos = this.pos; - var c ; - var first_close = 0; - c = fromCodePoint(cc); - var numdelims; - var numclosedelims; - var delimpos; - - // Get opening delimiters. - res = this.scanDelims(cc); - numdelims = res.numdelims; + var res = this.scanDelims(cc); + var numdelims = res.numdelims; if (numdelims === 0) { this.pos = startpos; return false; } - if (numdelims >= 4 || !res.can_open) { - this.pos += numdelims; - inlines.push(Str(this.subject.slice(startpos, startpos + numdelims))); - return true; - } + if (res.can_close) { - this.pos += numdelims; + // Walk the stack and find a matching opener, if possible + var opener = this.emphasis_openers; + while (opener) { - var delims_to_match = numdelims; - - var current = []; - var firstend; - var firstpos; - var state = 0; - var can_close = false; - var can_open = false; - var last_emphasis_closer = null; - while (this.last_emphasis_closer[c] >= this.pos) { - res = this.scanDelims(cc); - numclosedelims = res.numdelims; - - if (res.can_close) { - if (last_emphasis_closer === null || - last_emphasis_closer < this.pos) { - last_emphasis_closer = this.pos; - } - if (numclosedelims === 3 && delims_to_match === 3) { - delims_to_match -= 3; - this.pos += 3; - current = [{t: 'Strong', c: [{t: 'Emph', c: current}]}]; - } else if (numclosedelims >= 2 && delims_to_match >= 2) { - delims_to_match -= 2; - this.pos += 2; - firstend = current.length; - firstpos = this.pos; - current = [{t: 'Strong', c: current}]; - } else if (numclosedelims >= 1 && delims_to_match >= 1) { - delims_to_match -= 1; - this.pos += 1; - firstend = current.length; - firstpos = this.pos; - current = [{t: 'Emph', c: current}]; - } else { - if (!(this.parseInline(current,true))) { - break; - } - } - if (delims_to_match === 0) { - Array.prototype.push.apply(inlines, current); - return true; + if (opener.cc === cc) { // we have a match! + + if (opener.numdelims <= numdelims) { // all openers used + + this.pos += opener.numdelims; + var X; + switch (opener.numdelims) { + case 3: + X = function(x) { return Strong([Emph(x)]); }; + break; + case 2: + X = Strong; + break; + case 1: + default: + X = Emph; + break; } - } else if (!(this.parseInline(current,true))) { - break; + inlines[opener.pos] = X(inlines.slice(opener.pos + 1)); + inlines.splice(opener.pos + 1, inlines.length - (opener.pos + 1)); + // Remove entries after this, to prevent overlapping nesting: + this.emphasis_openers = opener.previous; + return true; + + } else if (opener.numdelims > numdelims) { // only some openers used + + this.pos += numdelims; + opener.numdelims -= numdelims; + inlines[opener.pos].c = + inlines[opener.pos].c.slice(0, opener.numdelims); + var X = numdelims === 2 ? Strong : Emph; + inlines[opener.pos + 1] = X(inlines.slice(opener.pos + 1)); + inlines.splice(opener.pos + 2, inlines.length - (opener.pos + 2)); + // Remove entries after this, to prevent overlapping nesting: + this.emphasis_openers = opener; + return true; + + } + } + opener = opener.previous; + } } - // we didn't match emphasis: fallback - inlines.push(Str(this.subject.slice(startpos, - startpos + delims_to_match))); - if (delims_to_match < numdelims) { - Array.prototype.push.apply(inlines, current.slice(0,firstend)); - this.pos = firstpos; - } else { // delims_to_match === numdelims - this.pos = startpos + delims_to_match; - } + // If we're here, we didn't match a closer. + + this.pos += numdelims; + inlines.push(Str(this.subject.slice(startpos, startpos + numdelims))); + + if (res.can_open) { - if (last_emphasis_closer) { - this.last_emphasis_closer[c] = last_emphasis_closer; + // Add entry to stack for this opener + this.emphasis_openers = { cc: cc, + numdelims: numdelims, + pos: inlines.length - 1, + previous: this.emphasis_openers }; } + return true; + }; // Attempt to parse link title (sans quotes), returning the string @@ -629,18 +617,11 @@ var parseReference = function(s, refmap) { }; // Parse the next inline element in subject, advancing subject position. -// If memoize is set, memoize the result. // On success, add the result to the inlines list, and return true. // On failure, return false. -var parseInline = function(inlines, memoize) { +var parseInline = function(inlines) { var startpos = this.pos; var origlen = inlines.length; - var memoized = memoize && this.memo[startpos]; - if (memoized) { - this.pos = memoized.endpos; - Array.prototype.push.apply(inlines, memoized.inline); - return true; - } var c = this.peek(); if (c === -1) { @@ -683,10 +664,6 @@ var parseInline = function(inlines, memoize) { inlines.push({t: 'Str', c: fromCodePoint(c)}); } - if (memoize) { - this.memo[startpos] = { inline: inlines.slice(origlen), - endpos: this.pos }; - } return true; }; @@ -695,10 +672,9 @@ var parseInlines = function(s, refmap) { this.subject = s; this.pos = 0; this.refmap = refmap || {}; - this.memo = {}; - this.last_emphasis_closer = { '*': s.length, '_': s.length }; + this.emphasis_openers = null; var inlines = []; - while (this.parseInline(inlines, false)) { + while (this.parseInline(inlines)) { } return inlines; }; @@ -708,10 +684,9 @@ function InlineParser(){ return { subject: '', label_nest_level: 0, // used by parseLinkLabel method - last_emphasis_closer: null, // used by parseEmphasis method + emphasis_openers: null, // used by parseEmphasis method pos: 0, refmap: {}, - memo: {}, match: match, peek: peek, spnl: spnl, diff --git a/leakcheck.md b/leakcheck.md new file mode 100644 index 0000000..06716e1 --- /dev/null +++ b/leakcheck.md @@ -0,0 +1,1561 @@ +→foo→baz→→bim + + a→a + ὐ→a + +- `one +- two` + +*** +--- +___ + ++++ + +=== + +-- +** +__ + + *** + *** + *** + + *** + +Foo + *** + +_____________________________________ + + - - - + + ** * ** * ** * ** + +- - - - + +- - - - + +_ _ _ _ a + +a------ + + *-* + +- foo +*** +- bar + +Foo +*** +bar + +Foo +--- +bar + +* Foo +* * * +* Bar + +- Foo +- * * * + +# foo +## foo +### foo +#### foo +##### foo +###### foo + +####### foo + +#5 bolt + +\## foo + +# foo *bar* \*baz\* + +# foo + + ### foo + ## foo + # foo + + # foo + +foo + # bar + +## foo ## + ### bar ### + +# foo ################################## +##### foo ## + +### foo ### + +### foo ### b + +### foo \### +## foo \#\## +# foo \# + +**** +## foo +**** + +Foo bar +# baz +Bar foo + +## +# +### ### + +Foo *bar* +========= + +Foo *bar* +--------- + +Foo +------------------------- + +Foo += + + Foo +--- + + Foo +----- + + Foo + === + + Foo + --- + + Foo +--- + +Foo + ---- + +Foo + --- + +Foo += = + +Foo +--- - + +Foo +----- + +Foo\ +---- + +`Foo +---- +` + +<a title="a lot +--- +of dashes"/> + +> Foo +--- + +Foo +Bar +--- + +Foo +Bar +=== + +--- +Foo +--- +Bar +--- +Baz + + +==== + + a simple + indented code block + + <a/> + *hi* + + - one + + chunk1 + + chunk2 + + + + chunk3 + + chunk1 + + chunk2 + +Foo + bar + + + foo +bar + +# Header + foo +Header +------ + foo +---- + + foo + bar + + + + foo + + + + foo + +``` +< + > +``` + +~~~ +< + > +~~~ + +``` +aaa +~~~ +``` + +~~~ +aaa +``` +~~~ + +```` +aaa +``` +`````` + +~~~~ +aaa +~~~ +~~~~ + +``` + +````` + +``` +aaa + +``` + + +``` + +``` +``` + + ``` + aaa +aaa +``` + + ``` +aaa + aaa +aaa + ``` + + ``` + aaa + aaa + aaa + ``` + + ``` + aaa + ``` + +``` ``` +aaa + +~~~~~~ +aaa +~~~ ~~ + +foo +``` +bar +``` +baz + +foo +--- +~~~ +bar +~~~ +# baz + +```ruby +def foo(x) + return 3 +end +``` + +~~~~ ruby startline=3 $%@#$ +def foo(x) + return 3 +end +~~~~~~~ + +````; +```` + +``` aa ``` +foo + +``` +``` aaa +``` + +<table> + <tr> + <td> + hi + </td> + </tr> +</table> + +okay. + + <div> + *hello* + <foo><a> + +<DIV CLASS="foo"> + +*Markdown* + +</DIV> + +<div></div> +``` c +int x = 33; +``` + +<!-- Foo +bar + baz --> + +<?php + echo 'foo' +?> + +<![CDATA[ +function matchwo(a,b) +{ +if (a < b && a < 0) then + { + return 1; + } +else + { + return 0; + } +} +]]> + + <!-- foo --> + + <!-- foo --> + +Foo +<div> +bar +</div> + +<div> +bar +</div> +*foo* + +<div class +foo + +<div> + +*Emphasized* text. + +</div> + +<div> +*Emphasized* text. +</div> + +<table> + +<tr> + +<td> +Hi +</td> + +</tr> + +</table> + +[foo]: /url "title" + +[foo] + + [foo]: + /url + 'the title' + +[foo] + +[Foo*bar\]]:my_(url) 'title (with parens)' + +[Foo*bar\]] + +[Foo bar]: +<my url> +'title' + +[Foo bar] + +[foo]: +/url + +[foo] + +[foo]: + +[foo] + +[foo] + +[foo]: url + +[foo] + +[foo]: first +[foo]: second + +[FOO]: /url + +[Foo] + +[ΑΓΩ]: /φου + +[αγω] + +[foo]: /url + +[foo]: /url "title" ok + + [foo]: /url "title" + +[foo] + +``` +[foo]: /url +``` + +[foo] + +Foo +[bar]: /baz + +[bar] + +# [Foo] +[foo]: /url +> bar + +[foo]: /foo-url "foo" +[bar]: /bar-url + "bar" +[baz]: /baz-url + +[foo], +[bar], +[baz] + +[foo] + +> [foo]: /url + +aaa + +bbb + +aaa +bbb + +ccc +ddd + +aaa + + +bbb + + aaa + bbb + +aaa + bbb + ccc + + aaa +bbb + + aaa +bbb + +aaa +bbb + + + +aaa + + +# aaa + + + +> # Foo +> bar +> baz + +># Foo +>bar +> baz + + > # Foo + > bar + > baz + + > # Foo + > bar + > baz + +> # Foo +> bar +baz + +> bar +baz +> foo + +> foo +--- + +> - foo +- bar + +> foo + bar + +> ``` +foo +``` + +> + +> +> +> + +> +> foo +> + +> foo + +> bar + +> foo +> bar + +> foo +> +> bar + +foo +> bar + +> aaa +*** +> bbb + +> bar +baz + +> bar + +baz + +> bar +> +baz + +> > > foo +bar + +>>> foo +> bar +>>baz + +> code + +> not code + +A paragraph +with two lines. + + indented code + +> A block quote. + +1. A paragraph + with two lines. + + indented code + + > A block quote. + +- one + + two + +- one + + two + + - one + + two + + - one + + two + + > > 1. one +>> +>> two + +>>- one +>> + > > two + +- foo + + bar + +- foo + + + bar + +- ``` + foo + + + bar + ``` + +1. foo + + ``` + bar + ``` + + baz + + > bam + +- foo + + bar + + 10. foo + + bar + + indented code + +paragraph + + more code + +1. indented code + + paragraph + + more code + +1. indented code + + paragraph + + more code + + foo + +bar + +- foo + + bar + +- foo + + bar + + 1. A paragraph + with two lines. + + indented code + + > A block quote. + + 1. A paragraph + with two lines. + + indented code + + > A block quote. + + 1. A paragraph + with two lines. + + indented code + + > A block quote. + + 1. A paragraph + with two lines. + + indented code + + > A block quote. + + 1. A paragraph +with two lines. + + indented code + + > A block quote. + + 1. A paragraph + with two lines. + +> 1. > Blockquote +continued here. + +> 1. > Blockquote +> continued here. + +- foo + - bar + - baz + +- foo + - bar + - baz + +10) foo + - bar + +10) foo + - bar + +- - foo + +1. - 2. foo + +- foo +- +- bar + +- + +- foo +- bar ++ baz + +1. foo +2. bar +3) baz + +- foo + +- bar + + +- baz + +- foo + + + bar +- baz + +- foo + - bar + - baz + + + bim + +- foo +- bar + + +- baz +- bim + +- foo + + notcode + +- foo + + + code + +- a + - b + - c + - d + - e + - f +- g + +- a +- b + +- c + +* a +* + +* c + +- a +- b + + c +- d + +- a +- b + + [ref]: /url +- d + +- a +- ``` + b + + + ``` +- c + +- a + - b + + c +- d + +* a + > b + > +* c + +- a + > b + ``` + c + ``` +- d + +- a + +- a + - b + +* foo + * bar + + baz + +- a + - b + - c + +- d + - e + - f + +`hi`lo` + +\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ + +\→\A\a\ \3\φ\« + +\*not emphasized* +\<br/> not a tag +\[not a link](/foo) +\`not code` +1\. not a list +\* not a list +\# not a header +\[foo]: /url "not a reference" + +\\*emphasis* + +foo\ +bar + +`` \[\` `` + + \[\] + +~~~ +\[\] +~~~ + +<http://example.com?find=\*> + +<a href="/bar\/)"> + +[foo](/bar\* "ti\*tle") + +[foo] + +[foo]: /bar\* "ti\*tle" + +``` foo\+bar +foo +``` + + & © Æ Ď ¾ ℋ ⅆ ∲ + +# Ӓ Ϡ � + +" ആ ಫ + +  &x; &#; &#x; &ThisIsWayTooLongToBeAnEntityIsntIt; &hi?; + +© + +&MadeUpEntity; + +<a href="öö.html"> + +[foo](/föö "föö") + +[foo] + +[foo]: /föö "föö" + +``` föö +foo +``` + +`föö` + + föfö + +`foo` + +`` foo ` bar `` + +` `` ` + +`` +foo +`` + +`foo bar + baz` + +`foo `` bar` + +`foo\`bar` + +*foo`*` + +[not a `link](/foo`) + +<http://foo.bar.`baz>` + +<a href="`">` + +```foo`` + +`foo + +*foo bar* + +_foo bar_ + +**foo bar** + +__foo bar__ + +*foo +bar* + +_foo +bar_ + +**foo +bar** + +__foo +bar__ + +*foo [bar](/url)* + +_foo [bar](/url)_ + +**foo [bar](/url)** + +__foo [bar](/url)__ + +*foo [bar*](/url) + +_foo [bar_](/url) + +**<a href="**"> + +__<a href="__"> + +*a `*`* + +_a `_`_ + +**a<http://foo.bar?q=**> + +__a<http://foo.bar?q=__> + +and * foo bar* + +_ foo bar_ + +and ** foo bar** + +__ foo bar__ + +and *foo bar * + +and _foo bar _ + +and **foo bar ** + +and __foo bar __ + +****hi**** + +_____hi_____ + +Sign here: _________ + +** is not an empty emphasis + +**** is not an empty strong emphasis + +*here is a \** + +__this is a double underscore (`__`)__ + +*_* + +_*_ + +*__* + +_**_ + +foo*bar*baz + +foo_bar_baz + +foo__bar__baz + +_foo_bar_baz_ + +11*15*32 + +11_15_32 + +_foo_bar_baz_ + +__foo__bar__baz__ + +***foo bar*** + +___foo bar___ + +***foo** bar* + +___foo__ bar_ + +***foo* bar** + +___foo_ bar__ + +*foo **bar*** + +_foo __bar___ + +**foo *bar*** + +__foo _bar___ + +*foo **bar*** + +_foo __bar___ + +*foo *bar* baz* + +_foo _bar_ baz_ + +**foo **bar** baz** + +__foo __bar__ baz__ + +*foo **bar** baz* + +_foo __bar__ baz_ + +**foo *bar* baz** + +__foo _bar_ baz__ + +**foo, *bar*, baz** + +__foo, _bar_, baz__ + +*foo**bar**baz* + +**foo*bar*baz** + +**foo** + +****foo**** + +*_foo_* + +**__foo__** + +*foo** + +*foo *bar** + +**foo*** + +***foo* bar*** + +***foo** bar*** + +*foo**bar*** + +*foo**** + +*foo** + +**foo* + +*foo *bar** + +**foo* bar* + +*bar*** + +***foo* + +**bar*** + +***foo** + +***foo *bar* + +[link](/uri "title") + +[link](/uri) + +[link]() + +[link](<>) + +[link](/my uri) + +[link](</my uri>) + +[link](foo +bar) + +[link]((foo)and(bar)) + +[link](foo(and(bar))) + +[link](foo(and\(bar\))) + +[link](<foo(and(bar))>) + +[link](foo\)\:) + +[link](foo%20bä) + +[link]("title") + +[link](/url "title") +[link](/url 'title') +[link](/url (title)) + +[link](/url "title \""") + +[link](/url "title "and" title") + +[link](/url 'title "and" title') + +[link]( /uri + "title" ) + +[link] (/uri) + +[foo <bar attr="](baz)"> + +[foo][bar] + +[bar]: /url "title" + +[*foo\!*][bar] + +[bar]: /url "title" + +[foo][BaR] + +[bar]: /url "title" + +[Толпой][Толпой] is a Russian word. + +[ТОЛПОЙ]: /url + +[Foo + bar]: /url + +[Baz][Foo bar] + +[foo] [bar] + +[bar]: /url "title" + +[foo] +[bar] + +[bar]: /url "title" + +[foo]: /url1 + +[foo]: /url2 + +[bar][foo] + +[bar][foo\!] + +[foo!]: /url + +[foo][] + +[foo]: /url "title" + +[*foo* bar][] + +[*foo* bar]: /url "title" + +[Foo][] + +[foo]: /url "title" + +[foo] +[] + +[foo]: /url "title" + +[foo] + +[foo]: /url "title" + +[*foo* bar] + +[*foo* bar]: /url "title" + +[[*foo* bar]] + +[*foo* bar]: /url "title" + +[Foo] + +[foo]: /url "title" + +\[foo] + +[foo]: /url "title" + +[foo*]: /url + +*[foo*] + +[foo`]: /url + +[foo`]` + +[[[foo]]] + +[[[foo]]]: /url + +[[[foo]]] + +[[[foo]]]: /url1 +[foo]: /url2 + +[\[foo] + +[\[foo]: /url + +[foo][bar] + +[foo]: /url1 +[bar]: /url2 + +[foo][bar][baz] + +[baz]: /url + +[foo][bar][baz] + +[baz]: /url1 +[bar]: /url2 + +[foo][bar][baz] + +[baz]: /url1 +[foo]: /url2 + +![foo](/url "title") + +![foo *bar*] + +[foo *bar*]: train.jpg "train & tracks" + +![foo *bar*][] + +[foo *bar*]: train.jpg "train & tracks" + +![foo *bar*][foobar] + +[FOOBAR]: train.jpg "train & tracks" + +![foo](train.jpg) + +My ![foo bar](/path/to/train.jpg "title" ) + +![foo](<url>) + +![](/url) + +![foo] [bar] + +[bar]: /url + +![foo] [bar] + +[BAR]: /url + +![foo][] + +[foo]: /url "title" + +![*foo* bar][] + +[*foo* bar]: /url "title" + +![Foo][] + +[foo]: /url "title" + +![foo] +[] + +[foo]: /url "title" + +![foo] + +[foo]: /url "title" + +![*foo* bar] + +[*foo* bar]: /url "title" + +![[foo]] + +[[foo]]: /url "title" + +![Foo] + +[foo]: /url "title" + +\!\[foo] + +[foo]: /url "title" + +\![foo] + +[foo]: /url "title" + +<http://foo.bar.baz> + +<http://foo.bar.baz?q=hello&id=22&boolean> + +<irc://foo.bar:2233/baz> + +<MAILTO:FOO@BAR.BAZ> + +<http://foo.bar/baz bim> + +<foo@bar.example.com> + +<foo+special@Bar.baz-bar0.com> + +<> + +<heck://bing.bong> + +< http://foo.bar > + +<foo.bar.baz> + +<localhost:5001/foo> + +http://example.com + +foo@bar.example.com + +<a><bab><c2c> + +<a/><b2/> + +<a /><b2 +data="foo" > + +<a foo="bar" bam = 'baz <em>"</em>' +_boolean zoop:33=zoop:33 /> + +<33> <__> + +<a h*#ref="hi"> + +<a href="hi'> <a href=hi'> + +< a>< +foo><bar/ > + +<a href='bar'title=title> + +</a> +</foo > + +</a href="foo"> + +foo <!-- this is a +comment - with hyphen --> + +foo <!-- not a comment -- two hyphens --> + +foo <?php echo $a; ?> + +foo <!ELEMENT br EMPTY> + +foo <![CDATA[>&<]]> + +<a href="ö"> + +<a href="\*"> + +<a href="\""> + +foo +baz + +foo\ +baz + +foo +baz + +foo + bar + +foo\ + bar + +*foo +bar* + +*foo\ +bar* + +`code +span` + +`code\ +span` + +<a href="foo +bar"> + +<a href="foo\ +bar"> + +foo +baz + +foo + baz + +hello $.;'there + +Foo χρῆν + +Multiple spaces + @@ -4095,21 +4095,39 @@ for efficient parsing strategies that do not backtrack: (c) it is not followed by an ASCII alphanumeric character. 9. Emphasis begins with a delimiter that [can open - emphasis](#can-open-emphasis) and includes inlines parsed - sequentially until a delimiter that [can close + emphasis](#can-open-emphasis) and ends with a delimiter that [can close emphasis](#can-close-emphasis), and that uses the same - character (`_` or `*`) as the opening delimiter, is reached. + character (`_` or `*`) as the opening delimiter. The inlines + between the open delimiter and the closing delimiter are the + contents of the emphasis inline. 10. Strong emphasis begins with a delimiter that [can open strong - emphasis](#can-open-strong-emphasis) and includes inlines parsed - sequentially until a delimiter that [can close strong - emphasis](#can-close-strong-emphasis), and that uses the - same character (`_` or `*`) as the opening delimiter, is reached. + emphasis](#can-open-strong-emphasis) and ends with a delimiter that + [can close strong emphasis](#can-close-strong-emphasis), and that uses the + same character (`_` or `*`) as the opening delimiter. The inlines + between the open delimiter and the closing delimiter are the + contents of the strong emphasis inline. -11. In case of ambiguity, strong emphasis takes precedence. Thus, - `**foo**` is `<strong>foo</strong>`, not `<em><em>foo</em></em>`, - and `***foo***` is `<strong><em>foo</em></strong>`, not - `<em><strong>foo</strong></em>` or `<em><em><em>foo</em></em></em>`. +Where rules 1--10 above are compatible with multiple parsings, +the following principles resolve ambiguity: + +11. An interpretation `<strong>...</strong>` is always preferred to + `<em><em>...</em></em>`. + +12. An interpretation `<strong><em>...</em></strong>` is always + preferred to `<em><strong>..</strong></em>`. + +13. When two potential emphasis or strong emphasis spans overlap, + the first takes precedence. Thus, for example, `*foo _bar* baz_` + is parsed as `<em>foo _bar</em> baz_` rather than + `*foo <em>bar* baz</em>`. + +14. Inline code spans, links, images, and HTML tags group more tightly + than emphasis. So, when there is a choice between an interpretation + that contains one of these elements and one that does not, the + former always wins. Thus, for example, `*[foo*](bar)` is + parsed as `*<a href="bar">foo*</a>` rather than as + `<em>[foo</em>](bar)`. These rules can be illustrated through a series of examples. @@ -4689,6 +4707,15 @@ We retain symmetry in these cases: <p><em><em>foo</em> bar</em></p> . +Note that this is not a case of strong emphasis, +since the interior `*` closes regular emphasis: + +. +**foo bar* baz** +. +<p><em><em>foo bar</em> baz</em>*</p> +. + More cases with mismatched delimiters: . @@ -4721,6 +4748,67 @@ More cases with mismatched delimiters: <p>***foo <em>bar</em></p> . +The following case illustrates rule 13: + +. +*foo _bar* baz_ +. +<p><em>foo _bar</em> baz_</p> +. + +The following cases illustrate rule 14: + +. +*[foo*](bar) +. +<p>*<a href="bar">foo*</a></p> +. + +. +*![foo*](bar) +. +<p>*<img src="bar" alt="foo*" /></p> +. + +. +*<img src="foo" title="*"/> +. +<p>*<img src="foo" title="*"/></p> +. + +. +*a`a*` +. +<p>*a<code>a*</code></p> +. + +Here is a tricky case that can be a performance problem with some +parsers: + +. +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +. +<p>*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a</p> +. + ## Links A link contains a [link label](#link-label) (the visible text), diff --git a/src/blocks.c b/src/blocks.c index 5b38116..c0c7e23 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -47,13 +47,13 @@ bool is_blank(strbuf *s, int offset) { while (offset < s->size) { switch (s->ptr[offset]) { - case '\n': - return true; - case ' ': - offset++; - break; - default: - return false; + case '\n': + return true; + case ' ': + offset++; + break; + default: + return false; } } @@ -63,17 +63,17 @@ bool is_blank(strbuf *s, int offset) static inline bool can_contain(int parent_type, int child_type) { return ( parent_type == BLOCK_DOCUMENT || - parent_type == BLOCK_BQUOTE || - parent_type == BLOCK_LIST_ITEM || - (parent_type == BLOCK_LIST && child_type == BLOCK_LIST_ITEM) ); + parent_type == BLOCK_BQUOTE || + parent_type == BLOCK_LIST_ITEM || + (parent_type == BLOCK_LIST && child_type == BLOCK_LIST_ITEM) ); } static inline bool accepts_lines(int block_type) { return (block_type == BLOCK_PARAGRAPH || - block_type == BLOCK_ATX_HEADER || - block_type == BLOCK_INDENTED_CODE || - block_type == BLOCK_FENCED_CODE); + block_type == BLOCK_ATX_HEADER || + block_type == BLOCK_INDENTED_CODE || + block_type == BLOCK_FENCED_CODE); } static void add_line(node_block* node_block, chunk *ch, int offset) @@ -156,77 +156,77 @@ static void finalize(node_block* b, int line_number) } switch (b->tag) { - case BLOCK_PARAGRAPH: - pos = 0; - while (strbuf_at(&b->string_content, 0) == '[' && - (pos = parse_reference_inline(&b->string_content, b->top->as.document.refmap))) { - - strbuf_drop(&b->string_content, pos); - } - if (is_blank(&b->string_content, 0)) { - b->tag = BLOCK_REFERENCE_DEF; - } - break; + case BLOCK_PARAGRAPH: + pos = 0; + while (strbuf_at(&b->string_content, 0) == '[' && + (pos = parse_reference_inline(&b->string_content, b->top->as.document.refmap))) { - case BLOCK_INDENTED_CODE: - remove_trailing_blank_lines(&b->string_content); - strbuf_putc(&b->string_content, '\n'); - break; - - case BLOCK_FENCED_CODE: - // first line of contents becomes info - firstlinelen = strbuf_strchr(&b->string_content, '\n', 0); - - strbuf_init(&b->as.code.info, 0); - houdini_unescape_html_f( - &b->as.code.info, - b->string_content.ptr, - firstlinelen + strbuf_drop(&b->string_content, pos); + } + if (is_blank(&b->string_content, 0)) { + b->tag = BLOCK_REFERENCE_DEF; + } + break; + + case BLOCK_INDENTED_CODE: + remove_trailing_blank_lines(&b->string_content); + strbuf_putc(&b->string_content, '\n'); + break; + + case BLOCK_FENCED_CODE: + // first line of contents becomes info + firstlinelen = strbuf_strchr(&b->string_content, '\n', 0); + + strbuf_init(&b->as.code.info, 0); + houdini_unescape_html_f( + &b->as.code.info, + b->string_content.ptr, + firstlinelen ); - strbuf_drop(&b->string_content, firstlinelen + 1); + strbuf_drop(&b->string_content, firstlinelen + 1); - strbuf_trim(&b->as.code.info); - strbuf_unescape(&b->as.code.info); - break; + strbuf_trim(&b->as.code.info); + strbuf_unescape(&b->as.code.info); + break; - case BLOCK_LIST: // determine tight/loose status - b->as.list.tight = true; // tight by default - item = b->children; + case BLOCK_LIST: // determine tight/loose status + b->as.list.tight = true; // tight by default + item = b->children; - while (item) { - // check for non-final non-empty list item ending with blank line: - if (item->last_line_blank && item->next) { + while (item) { + // check for non-final non-empty list item ending with blank line: + if (item->last_line_blank && item->next) { + b->as.list.tight = false; + break; + } + // recurse into children of list item, to see if there are + // spaces between them: + subitem = item->children; + while (subitem) { + if (ends_with_blank_line(subitem) && + (item->next || subitem->next)) { b->as.list.tight = false; break; } - // recurse into children of list item, to see if there are - // spaces between them: - subitem = item->children; - while (subitem) { - if (ends_with_blank_line(subitem) && - (item->next || subitem->next)) { - b->as.list.tight = false; - break; - } - subitem = subitem->next; - } - if (!(b->as.list.tight)) { - break; - } - item = item->next; + subitem = subitem->next; } + if (!(b->as.list.tight)) { + break; + } + item = item->next; + } - break; + break; - default: - break; + default: + break; } } // Add a node_block as child of another. Return pointer to child. static node_block* add_child(node_block* parent, - int block_type, int start_line, int start_column) + int block_type, int start_line, int start_column) { assert(parent); @@ -276,14 +276,14 @@ void stmd_free_nodes(node_block *e) void process_inlines(node_block* cur, reference_map *refmap) { switch (cur->tag) { - case BLOCK_PARAGRAPH: - case BLOCK_ATX_HEADER: - case BLOCK_SETEXT_HEADER: - cur->inline_content = parse_inlines(&cur->string_content, refmap); - break; - - default: - break; + case BLOCK_PARAGRAPH: + case BLOCK_ATX_HEADER: + case BLOCK_SETEXT_HEADER: + cur->inline_content = parse_inlines(&cur->string_content, refmap); + break; + + default: + break; } node_block *child = cur->children; @@ -355,9 +355,9 @@ static int parse_list_marker(chunk *input, int pos, struct ListData ** dataptr) static int lists_match(struct ListData *list_data, struct ListData *item_data) { return (list_data->list_type == item_data->list_type && - list_data->delimiter == item_data->delimiter && - // list_data->marker_offset == item_data.marker_offset && - list_data->bullet_char == item_data->bullet_char); + list_data->delimiter == item_data->delimiter && + // list_data->marker_offset == item_data.marker_offset && + list_data->bullet_char == item_data->bullet_char); } static node_block *finalize_document(node_block *document, int linenum) @@ -486,7 +486,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) } else if (container->tag == BLOCK_LIST_ITEM) { if (indent >= container->as.list.marker_offset + - container->as.list.padding) { + container->as.list.padding) { offset += container->as.list.marker_offset + container->as.list.padding; } else if (blank) { @@ -506,7 +506,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) } } else if (container->tag == BLOCK_ATX_HEADER || - container->tag == BLOCK_SETEXT_HEADER) { + container->tag == BLOCK_SETEXT_HEADER) { // a header can never contain more than one line all_matched = false; @@ -550,7 +550,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) // unless last matched container is code node_block, try new container starts: while (container->tag != BLOCK_FENCED_CODE && container->tag != BLOCK_INDENTED_CODE && - container->tag != BLOCK_HTML) { + container->tag != BLOCK_HTML) { first_nonspace = offset; while (peek_at(&input, first_nonspace) == ' ') @@ -603,17 +603,17 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) // note, we don't adjust offset because the tag is part of the text } else if (container->tag == BLOCK_PARAGRAPH && - (lev = scan_setext_header_line(&input, first_nonspace)) && - // check that there is only one line in the paragraph: - strbuf_strrchr(&container->string_content, '\n', - strbuf_len(&container->string_content) - 2) < 0) { + (lev = scan_setext_header_line(&input, first_nonspace)) && + // check that there is only one line in the paragraph: + strbuf_strrchr(&container->string_content, '\n', + strbuf_len(&container->string_content) - 2) < 0) { container->tag = BLOCK_SETEXT_HEADER; container->as.header.level = lev; offset = input.len - 1; } else if (!(container->tag == BLOCK_PARAGRAPH && !all_matched) && - (matched = scan_hrule(&input, first_nonspace))) { + (matched = scan_hrule(&input, first_nonspace))) { // it's only now that we know the line is not part of a setext header: container = add_child(container, BLOCK_HRULE, line_number, first_nonspace + 1); @@ -646,16 +646,16 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) data->marker_offset = indent; if (container->tag != BLOCK_LIST || - !lists_match(&container->as.list, data)) { + !lists_match(&container->as.list, data)) { container = add_child(container, BLOCK_LIST, line_number, - first_nonspace + 1); + first_nonspace + 1); memcpy(&container->as.list, data, sizeof(*data)); } // add the list item container = add_child(container, BLOCK_LIST_ITEM, line_number, - first_nonspace + 1); + first_nonspace + 1); /* TODO: static */ memcpy(&container->as.list, data, sizeof(*data)); free(data); @@ -684,11 +684,11 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) // lists or breaking out of lists. we also don't set last_line_blank // on an empty list item. container->last_line_blank = (blank && - container->tag != BLOCK_BQUOTE && - container->tag != BLOCK_FENCED_CODE && - !(container->tag == BLOCK_LIST_ITEM && - container->children == NULL && - container->start_line == line_number)); + container->tag != BLOCK_BQUOTE && + container->tag != BLOCK_FENCED_CODE && + !(container->tag == BLOCK_LIST_ITEM && + container->children == NULL && + container->start_line == line_number)); node_block *cont = container; while (cont->parent) { @@ -697,10 +697,10 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) } if (cur != last_matched_container && - container == last_matched_container && - !blank && - cur->tag == BLOCK_PARAGRAPH && - strbuf_len(&cur->string_content) > 0) { + container == last_matched_container && + !blank && + cur->tag == BLOCK_PARAGRAPH && + strbuf_len(&cur->string_content) > 0) { add_line(cur, &input, offset); @@ -721,7 +721,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) matched = 0; if (indent <= 3 && - peek_at(&input, first_nonspace) == container->as.code.fence_char) { + peek_at(&input, first_nonspace) == container->as.code.fence_char) { int fence_len = scan_close_code_fence(&input, first_nonspace); if (fence_len > container->as.code.fence_length) matched = 1; @@ -767,4 +767,3 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) *curptr = container; } } - diff --git a/src/buffer.c b/src/buffer.c index 7c2b86b..2e32720 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -15,8 +15,8 @@ unsigned char strbuf__initbuf[1]; unsigned char strbuf__oom[1]; -#define ENSURE_SIZE(b, d) \ - if ((d) > buf->asize && strbuf_grow(b, (d)) < 0)\ +#define ENSURE_SIZE(b, d) \ + if ((d) > buf->asize && strbuf_grow(b, (d)) < 0) \ return -1; void strbuf_init(strbuf *buf, int initial_size) @@ -111,8 +111,8 @@ int strbuf_set(strbuf *buf, const unsigned char *data, int len) int strbuf_sets(strbuf *buf, const char *string) { return strbuf_set(buf, - (const unsigned char *)string, - string ? strlen(string) : 0); + (const unsigned char *)string, + string ? strlen(string) : 0); } int strbuf_putc(strbuf *buf, int c) @@ -155,7 +155,7 @@ int strbuf_vprintf(strbuf *buf, const char *format, va_list ap) (char *)buf->ptr + buf->size, buf->asize - buf->size, format, args - ); + ); if (len < 0) { free(buf->ptr); @@ -351,4 +351,3 @@ extern void strbuf_unescape(strbuf *buf) strbuf_truncate(buf, w); } - diff --git a/src/inlines.c b/src/inlines.c index 71d75e9..07a75f9 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -10,11 +10,19 @@ #include "scanners.h" #include "inlines.h" +typedef struct InlineStack { + struct InlineStack *previous; + node_inl *first_inline; + int delim_count; + char delim_char; +} inline_stack; + typedef struct Subject { chunk input; int pos; int label_nestlevel; reference_map *refmap; + inline_stack *emphasis_openers; } subject; static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap); @@ -108,26 +116,26 @@ extern void free_inlines(node_inl* e) node_inl * next; while (e != NULL) { switch (e->tag){ - case INL_STRING: - case INL_RAW_HTML: - case INL_CODE: - chunk_free(&e->content.literal); - break; - case INL_LINEBREAK: - case INL_SOFTBREAK: - break; - case INL_LINK: - case INL_IMAGE: - free(e->content.linkable.url); - free(e->content.linkable.title); - free_inlines(e->content.linkable.label); - break; - case INL_EMPH: - case INL_STRONG: - free_inlines(e->content.inlines); - break; - default: - break; + case INL_STRING: + case INL_RAW_HTML: + case INL_CODE: + chunk_free(&e->content.literal); + break; + case INL_LINEBREAK: + case INL_SOFTBREAK: + break; + case INL_LINK: + case INL_IMAGE: + free(e->content.linkable.url); + free(e->content.linkable.title); + free_inlines(e->content.linkable.label); + break; + case INL_EMPH: + case INL_STRONG: + free_inlines(e->content.inlines); + break; + default: + break; } next = e->next; free(e); @@ -158,6 +166,7 @@ static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap) e->pos = 0; e->label_nestlevel = 0; e->refmap = refmap; + e->emphasis_openers = NULL; chunk_rtrim(&e->input); } @@ -170,6 +179,7 @@ static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap) e->pos = 0; e->label_nestlevel = 0; e->refmap = refmap; + e->emphasis_openers = NULL; chunk_rtrim(&e->input); } @@ -262,12 +272,11 @@ static node_inl* handle_backticks(subject *subj) } // Scan ***, **, or * and return number scanned, or 0. -// Don't advance position. +// Advances position. static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close) { int numdelims = 0; char char_before, char_after; - int startpos = subj->pos; char_before = subj->pos == 0 ? '\n' : peek_at(subj, subj->pos - 1); while (peek_char(subj) == c) { @@ -281,135 +290,106 @@ static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close) *can_open = *can_open && !isalnum(char_before); *can_close = *can_close && !isalnum(char_after); } - subj->pos = startpos; return numdelims; } +static void free_openers(subject* subj, inline_stack* istack) +{ + inline_stack * tempstack; + while (subj->emphasis_openers != istack) { + tempstack = subj->emphasis_openers; + subj->emphasis_openers = subj->emphasis_openers->previous; + free(tempstack); + } +} + // Parse strong/emph or a fallback. // Assumes the subject has '_' or '*' at the current position. -static node_inl* handle_strong_emph(subject* subj, char c) +static node_inl* handle_strong_emph(subject* subj, char c, node_inl **last) { bool can_open, can_close; - node_inl * result = NULL; - node_inl ** last = malloc(sizeof(node_inl *)); - node_inl * new; - node_inl * il; - node_inl * first_head = NULL; - node_inl * first_close = NULL; - int first_close_delims = 0; int numdelims; - - *last = NULL; + int useDelims; + inline_stack * istack; + node_inl * inl; + node_inl * emph; + node_inl * inl_text; numdelims = scan_delims(subj, c, &can_open, &can_close); - subj->pos += numdelims; - - new = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims)); - *last = new; - first_head = new; - result = new; - - if (!can_open || numdelims == 0) { - goto done; - } - - switch (numdelims) { - case 1: - while (true) { - numdelims = scan_delims(subj, c, &can_open, &can_close); - if (numdelims >= 1 && can_close) { - subj->pos += 1; - first_head->tag = INL_EMPH; - chunk_free(&first_head->content.literal); - first_head->content.inlines = first_head->next; - first_head->next = NULL; - goto done; - } else { - if (!parse_inline(subj, last)) { - goto done; - } - } - } - break; - case 2: - while (true) { - numdelims = scan_delims(subj, c, &can_open, &can_close); - if (numdelims >= 2 && can_close) { - subj->pos += 2; - first_head->tag = INL_STRONG; - chunk_free(&first_head->content.literal); - first_head->content.inlines = first_head->next; - first_head->next = NULL; - goto done; - } else { - if (!parse_inline(subj, last)) { - goto done; - } - } - } - break; - case 3: - while (true) { - numdelims = scan_delims(subj, c, &can_open, &can_close); - if (can_close && numdelims >= 1 && numdelims <= 3 && - numdelims != first_close_delims) { - new = make_str(chunk_dup(&subj->input, subj->pos, numdelims)); - append_inlines(*last, new); - *last = new; - if (first_close_delims == 1 && numdelims > 2) { - numdelims = 2; - } else if (first_close_delims == 2) { - numdelims = 1; - } else if (numdelims == 3) { - // If we opened with ***, we interpret it as ** followed by * - // giving us <strong><em> - numdelims = 1; - } - subj->pos += numdelims; - if (first_close) { - first_head->tag = first_close_delims == 1 ? INL_STRONG : INL_EMPH; - chunk_free(&first_head->content.literal); - first_head->content.inlines = - make_inlines(first_close_delims == 1 ? INL_EMPH : INL_STRONG, - first_head->next); - - il = first_head->next; - while (il->next && il->next != first_close) { - il = il->next; - } - il->next = NULL; - - first_head->content.inlines->next = first_close->next; - - il = first_head->content.inlines; - while (il->next && il->next != *last) { - il = il->next; - } - il->next = NULL; - free_inlines(*last); - - first_close->next = NULL; - free_inlines(first_close); - first_head->next = NULL; - goto done; - } else { - first_close = *last; - first_close_delims = numdelims; - } - } else { - if (!parse_inline(subj, last)) { - goto done; - } - } - } - break; - default: - goto done; + + if (can_close) + { + // walk the stack and find a matching opener, if there is one + istack = subj->emphasis_openers; + while (true) + { + if (istack == NULL) + goto cannotClose; + + if (istack->delim_char == c) + break; + + istack = istack->previous; + } + + // calculate the actual number of delimeters used from this closer + useDelims = istack->delim_count; + if (useDelims == 3) useDelims = numdelims == 3 ? 1 : numdelims; + else if (useDelims > numdelims) useDelims = 1; + + if (istack->delim_count == useDelims) + { + // the opener is completely used up - remove the stack entry and reuse the inline element + inl = istack->first_inline; + inl->tag = useDelims == 1 ? INL_EMPH : INL_STRONG; + chunk_free(&inl->content.literal); + inl->content.inlines = inl->next; + inl->next = NULL; + + // remove this opener and all later ones from stack: + free_openers(subj, istack->previous); + *last = inl; + } + else + { + // the opener will only partially be used - stack entry remains (truncated) and a new inline is added. + inl = istack->first_inline; + istack->delim_count -= useDelims; + inl->content.literal.len = istack->delim_count; + + emph = useDelims == 1 ? make_emph(inl->next) : make_strong(inl->next); + inl->next = emph; + + // remove all later openers from stack: + free_openers(subj, istack); + + *last = emph; + } + + // if the closer was not fully used, move back a char or two and try again. + if (useDelims < numdelims) + { + subj->pos = subj->pos - numdelims + useDelims; + return handle_strong_emph(subj, c, last); + } + + return NULL; // make_str(chunk_literal("")); + } + +cannotClose: + inl_text = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims)); + + if (can_open) + { + istack = (inline_stack*)malloc(sizeof(inline_stack)); + istack->delim_count = numdelims; + istack->delim_char = c; + istack->first_inline = inl_text; + istack->previous = subj->emphasis_openers; + subj->emphasis_openers = istack; } -done: - free(last); - return result; + return inl_text; } // Parse backslash-escape or just a backslash, returning an inline. @@ -438,9 +418,9 @@ static node_inl* handle_entity(subject* subj) advance(subj); len = houdini_unescape_ent(&ent, - subj->input.data + subj->pos, - subj->input.len - subj->pos - ); + subj->input.data + subj->pos, + subj->input.len - subj->pos + ); if (len == 0) return make_str(chunk_literal("&")); @@ -513,8 +493,8 @@ unsigned char *clean_title(chunk *title) // remove surrounding quotes if any: if ((first == '\'' && last == '\'') || - (first == '(' && last == ')') || - (first == '"' && last == '"')) { + (first == '(' && last == ')') || + (first == '"' && last == '"')) { houdini_unescape_html_f(&buf, title->data + 1, title->len - 2); } else { houdini_unescape_html_f(&buf, title->data, title->len); @@ -542,7 +522,7 @@ static node_inl* handle_pointy_brace(subject* subj) return make_autolink( make_str_with_entities(&contents), contents, 0 - ); + ); } // next try to match an email autolink @@ -552,9 +532,9 @@ static node_inl* handle_pointy_brace(subject* subj) subj->pos += matchlen; return make_autolink( - make_str_with_entities(&contents), - contents, 1 - ); + make_str_with_entities(&contents), + contents, 1 + ); } // finally, try to match an html tag @@ -598,30 +578,30 @@ static int link_label(subject* subj, chunk *raw_label) char c; while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) { switch (c) { - case '`': - tmp = handle_backticks(subj); - free_inlines(tmp); - break; - case '<': - tmp = handle_pointy_brace(subj); - free_inlines(tmp); - break; - case '[': // nested [] - nestlevel++; - advance(subj); - break; - case ']': // nested [] - nestlevel--; - advance(subj); - break; - case '\\': - advance(subj); - if (ispunct(peek_char(subj))) { - advance(subj); - } - break; - default: + case '`': + tmp = handle_backticks(subj); + free_inlines(tmp); + break; + case '<': + tmp = handle_pointy_brace(subj); + free_inlines(tmp); + break; + case '[': // nested [] + nestlevel++; + advance(subj); + break; + case ']': // nested [] + nestlevel--; + advance(subj); + break; + case '\\': + advance(subj); + if (ispunct(peek_char(subj))) { advance(subj); + } + break; + default: + advance(subj); } } if (c == ']') { @@ -657,8 +637,8 @@ static node_inl* handle_left_bracket(subject* subj) if (found_label) { if (peek_char(subj) == '(' && - ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) && - ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) { + ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) && + ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) { // try to parse an explicit link: starturl = subj->pos + 1 + sps; // after ( @@ -684,8 +664,8 @@ static node_inl* handle_left_bracket(subject* subj) subj->pos = endlabel; lab = parse_chunk_inlines(&rawlabel, subj->refmap); result = append_inlines(make_str(chunk_literal("[")), - append_inlines(lab, - make_str(chunk_literal("]")))); + append_inlines(lab, + make_str(chunk_literal("]")))); return result; } } else { @@ -714,7 +694,7 @@ static node_inl* handle_left_bracket(subject* subj) subj->pos = endlabel; lab = parse_chunk_inlines(&rawlabel, subj->refmap); result = append_inlines(make_str(chunk_literal("[")), - append_inlines(lab, make_str(chunk_literal("]")))); + append_inlines(lab, make_str(chunk_literal("]")))); } return result; } @@ -736,8 +716,8 @@ static node_inl* handle_newline(subject *subj) advance(subj); } if (nlpos > 1 && - peek_at(subj, nlpos - 1) == ' ' && - peek_at(subj, nlpos - 2) == ' ') { + peek_at(subj, nlpos - 1) == ' ' && + peek_at(subj, nlpos - 2) == ' ') { return make_linebreak(); } else { return make_softbreak(); @@ -754,9 +734,22 @@ extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*)) { node_inl* result = NULL; node_inl** last = &result; + node_inl* first = NULL; while ((*f)(subj) && parse_inline(subj, last)) { + if (!first) { + first = *last; + } + } + + inline_stack* istack = subj->emphasis_openers; + inline_stack* temp; + while (istack != NULL) { + temp = istack->previous; + free(istack); + istack = temp; } - return result; + + return first; } node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap) @@ -812,69 +805,62 @@ static int parse_inline(subject* subj, node_inl ** last) return 0; } switch(c){ - case '\n': - new = handle_newline(subj); - break; - case '`': - new = handle_backticks(subj); - break; - case '\\': - new = handle_backslash(subj); - break; - case '&': - new = handle_entity(subj); - break; - case '<': - new = handle_pointy_brace(subj); - break; - case '_': - if (subj->pos > 0) { - unsigned char prev = peek_at(subj, subj->pos - 1); - if (isalnum(prev) || prev == '_') { - new = make_str(chunk_literal("_")); - advance(subj); - break; - } - } - - new = handle_strong_emph(subj, '_'); - break; - case '*': - new = handle_strong_emph(subj, '*'); - break; - case '[': + case '\n': + new = handle_newline(subj); + break; + case '`': + new = handle_backticks(subj); + break; + case '\\': + new = handle_backslash(subj); + break; + case '&': + new = handle_entity(subj); + break; + case '<': + new = handle_pointy_brace(subj); + break; + case '_': + new = handle_strong_emph(subj, '_', last); + break; + case '*': + new = handle_strong_emph(subj, '*', last); + break; + case '[': + new = handle_left_bracket(subj); + break; + case '!': + advance(subj); + if (peek_char(subj) == '[') { new = handle_left_bracket(subj); - break; - case '!': - advance(subj); - if (peek_char(subj) == '[') { - new = handle_left_bracket(subj); - if (new != NULL && new->tag == INL_LINK) { - new->tag = INL_IMAGE; - } else { - new = append_inlines(make_str(chunk_literal("!")), new); - } + if (new != NULL && new->tag == INL_LINK) { + new->tag = INL_IMAGE; } else { - new = make_str(chunk_literal("!")); - } - break; - default: - endpos = subject_find_special_char(subj); - contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos); - subj->pos = endpos; - - // if we're at a newline, strip trailing spaces. - if (peek_char(subj) == '\n') { - chunk_rtrim(&contents); + new = append_inlines(make_str(chunk_literal("!")), new); } + } else { + new = make_str(chunk_literal("!")); + } + break; + default: + endpos = subject_find_special_char(subj); + contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos); + subj->pos = endpos; + + // if we're at a newline, strip trailing spaces. + if (peek_char(subj) == '\n') { + chunk_rtrim(&contents); + } - new = make_str(contents); + new = make_str(contents); } if (*last == NULL) { *last = new; - } else { + } else if (new) { append_inlines(*last, new); + *last = new; } + return 1; } @@ -890,8 +876,8 @@ void spnl(subject* subj) { bool seen_newline = false; while (peek_char(subj) == ' ' || - (!seen_newline && - (seen_newline = peek_char(subj) == '\n'))) { + (!seen_newline && + (seen_newline = peek_char(subj) == '\n'))) { advance(subj); } } @@ -958,4 +944,3 @@ int parse_reference_inline(strbuf *input, reference_map *refmap) reference_create(refmap, &lab, &url, &title); return subj.pos; } - @@ -38,7 +38,7 @@ int main(int argc, char *argv[]) printf(" - CommonMark converter (c) 2014 John MacFarlane\n"); exit(0); } else if ((strcmp(argv[i], "--help") == 0) || - (strcmp(argv[i], "-h") == 0)) { + (strcmp(argv[i], "-h") == 0)) { print_usage(); exit(0); } else if (strcmp(argv[i], "--ast") == 0) { @@ -61,7 +61,7 @@ int main(int argc, char *argv[]) if (fp == NULL) { fprintf(stderr, "Error opening file %s: %s\n", - argv[files[i]], strerror(errno)); + argv[files[i]], strerror(errno)); exit(1); } @@ -74,4 +74,3 @@ int main(int argc, char *argv[]) return 0; } - diff --git a/src/print.c b/src/print.c index 83f8daa..f3bd8e5 100644 --- a/src/print.c +++ b/src/print.c @@ -16,17 +16,17 @@ static void print_str(const unsigned char *s, int len) unsigned char c = s[i]; switch (c) { - case '\n': - printf("\\n"); - break; - case '"': - printf("\\\""); - break; - case '\\': - printf("\\\\"); - break; - default: - putchar((int)c); + case '\n': + printf("\\n"); + break; + case '"': + printf("\\\""); + break; + case '\\': + printf("\\\\"); + break; + default: + putchar((int)c); } } putchar('"'); @@ -116,13 +116,13 @@ static void print_blocks(node_block* b, int indent) data = &(b->as.list); if (data->list_type == ordered) { printf("list (type=ordered tight=%s start=%d delim=%s)\n", - (data->tight ? "true" : "false"), - data->start, - (data->delimiter == parens ? "parens" : "period")); + (data->tight ? "true" : "false"), + data->start, + (data->delimiter == parens ? "parens" : "period")); } else { printf("list (type=bullet tight=%s bullet_char=%c)\n", - (data->tight ? "true" : "false"), - data->bullet_char); + (data->tight ? "true" : "false"), + data->bullet_char); } print_blocks(b->children, indent + 2); break; @@ -148,7 +148,7 @@ static void print_blocks(node_block* b, int indent) break; case BLOCK_FENCED_CODE: printf("fenced_code length=%d info=", - b->as.code.fence_length); + b->as.code.fence_length); print_str(b->as.code.info.ptr, -1); putchar(' '); print_str(b->string_content.ptr, -1); diff --git a/src/references.c b/src/references.c index 3e54b48..975bf81 100644 --- a/src/references.c +++ b/src/references.c @@ -55,7 +55,7 @@ static void add_reference(reference_map *map, reference* ref) while (t) { if (t->hash == ref->hash && - !strcmp((char *)t->label, (char *)ref->label)) { + !strcmp((char *)t->label, (char *)ref->label)) { reference_free(ref); return; } @@ -105,7 +105,7 @@ reference* reference_lookup(reference_map *map, chunk *label) while (ref) { if (ref->hash == hash && - !strcmp((char *)ref->label, (char *)norm)) + !strcmp((char *)ref->label, (char *)norm)) break; ref = ref->next; } @@ -138,4 +138,3 @@ reference_map *reference_map_new(void) memset(map, 0x0, sizeof(reference_map)); return map; } - @@ -103,24 +103,24 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst) return -1; switch (length) { - case 1: - uc = str[0]; - break; - case 2: - uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F); - if (uc < 0x80) uc = -1; - break; - case 3: - uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) - + (str[2] & 0x3F); - if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) || - (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1; - break; - case 4: - uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) - + ((str[2] & 0x3F) << 6) + (str[3] & 0x3F); - if (uc < 0x10000 || uc >= 0x110000) uc = -1; - break; + case 1: + uc = str[0]; + break; + case 2: + uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F); + if (uc < 0x80) uc = -1; + break; + case 3: + uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + + (str[2] & 0x3F); + if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) || + (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1; + break; + case 4: + uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) + + ((str[2] & 0x3F) << 6) + (str[3] & 0x3F); + if (uc < 0x10000 || uc >= 0x110000) uc = -1; + break; } if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE)) @@ -173,7 +173,7 @@ void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len) { int32_t c; -#define bufpush(x) \ +#define bufpush(x) \ utf8proc_encode_char(x, dest) while (len > 0) { @@ -190,4 +190,3 @@ void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len) len -= char_len; } } - |