From 95a1be5747a430ff408bf88f50452fe936c785b2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 18 Jan 2015 11:48:20 -0800 Subject: Moved continuation checks & finalizers into blocks property. This is a first step towards keeping the code for each kind of block in a central place, rather than spread all over the code base. This is preparatory for a more modular structure, where each type of block has a record describing how it is parsed and finalized. Eventually this will also contain functions for checking for a block start, and metadata that determines how line data should be handled. There is a small performance penalty (about 3%?) but it seems worth it. --- js/lib/blocks.js | 324 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 183 insertions(+), 141 deletions(-) diff --git a/js/lib/blocks.js b/js/lib/blocks.js index cf64652..472fa58 100644 --- a/js/lib/blocks.js +++ b/js/lib/blocks.js @@ -8,6 +8,8 @@ var C_NEWLINE = 10; var C_SPACE = 32; var C_OPEN_BRACKET = 91; +var CODE_INDENT = 4; + var InlineParser = require('./inlines'); var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)'; @@ -223,6 +225,176 @@ var closeUnmatchedBlocks = function() { return true; }; +// 'finalize' is run when the block is closed. +// 'continue' is run to check whether the block is continuing +// at a certain line and offset (e.g. whether a block quote +// contains a `>`. It returns 0 for matched, 1 for not matched, +// and 2 for "we've dealt with this line completely, go to next." +var blocks = { + Document: { + continue: function(parser, container, ln, first_nonspace) { + return 0; + }, + finalize: function(parser, block) { + return; + } + }, + List: { + continue: function(parser, container, ln, first_nonspace) { + return 0; + }, + finalize: function(parser, block) { + var item = block._firstChild; + while (item) { + // check for non-final list item ending with blank line: + if (endsWithBlankLine(item) && item._next) { + block._listData.tight = false; + break; + } + // recurse into children of list item, to see if there are + // spaces between any of them: + var subitem = item._firstChild; + while (subitem) { + if (endsWithBlankLine(subitem) && + (item._next || subitem._next)) { + block._listData.tight = false; + break; + } + subitem = subitem._next; + } + item = item._next; + } + } + }, + BlockQuote: { + continue: function(parser, container, ln, first_nonspace) { + if (first_nonspace - parser.offset <= 3 && + ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { + parser.offset = first_nonspace + 1; + if (ln.charCodeAt(parser.offset) === C_SPACE) { + parser.offset++; + } + } else { + return 1; + } + return 0; + }, + finalize: function(parser, block) { + return; + } + }, + Item: { + continue: function(parser, container, ln, first_nonspace) { + if (first_nonspace === ln.length) { // blank + parser.offset = first_nonspace; + } else if (first_nonspace - parser.offset >= + container._listData.markerOffset + + container._listData.padding) { + parser.offset += container._listData.markerOffset + + container._listData.padding; + } else { + return 1; + } + return 0; + }, + finalize: function(parser, block) { + return; + } + }, + Header: { + continue: function(parser, container, ln, first_nonspace) { + // a header can never container > 1 line, so fail to match: + return 1; + }, + finalize: function(parser, block) { + block._string_content = block._strings.join('\n'); + } + }, + HorizontalRule: { + continue: function(parser, container, ln, first_nonspace) { + // an hrule can never container > 1 line, so fail to match: + return 1; + }, + finalize: function(parser, block) { + return; + } + }, + CodeBlock: { + continue: function(parser, container, ln, first_nonspace) { + var indent = first_nonspace - parser.offset; + if (container._isFenced) { // fenced + var match = (indent <= 3 && + ln.charAt(first_nonspace) === container._fenceChar && + ln.slice(first_nonspace).match(reClosingCodeFence)); + if (match && match[0].length >= container._fenceLength) { + // closing fence - we're at end of line, so we can return + parser.finalize(container, parser.lineNumber); + return 2; + } else { + // skip optional spaces of fence offset + var i = container._fenceOffset; + while (i > 0 && ln.charCodeAt(parser.offset) === C_SPACE) { + parser.offset++; + i--; + } + } + } else { // indented + if (indent >= CODE_INDENT) { + parser.offset += CODE_INDENT; + } else if (first_nonspace === ln.length) { // blank + parser.offset = first_nonspace; + } else { + return 1; + } + } + return 0; + }, + finalize: function(parser, block) { + if (block._isFenced) { // fenced + // first line becomes info string + block.info = unescapeString(block._strings[0].trim()); + if (block._strings.length === 1) { + block._literal = ''; + } else { + block._literal = block._strings.slice(1).join('\n') + '\n'; + } + } else { // indented + stripFinalBlankLines(block._strings); + block._literal = block._strings.join('\n') + '\n'; + } + } + }, + HtmlBlock: { + continue: function(parser, container, ln, first_nonspace) { + return (first_nonspace === ln.length ? 1 : 0); + }, + finalize: function(parser, block) { + block._literal = block._strings.join('\n'); + } + }, + Paragraph: { + continue: function(parser, container, ln, first_nonspace) { + return (first_nonspace === ln.length ? 1 : 0); + }, + finalize: function(parser, block) { + var pos; + block._string_content = block._strings.join('\n'); + + // try parsing the beginning as link reference definitions: + while (block._string_content.charCodeAt(0) === C_OPEN_BRACKET && + (pos = + parser.inlineParser.parseReference(block._string_content, + parser.refmap))) { + block._string_content = block._string_content.slice(pos); + if (isBlank(block._string_content)) { + block.unlink(); + break; + } + } + } + } +}; + // Analyze a line of text and update the document appropriately. // We parse markdown text by calling this on each line of input, // then finalizing the document. @@ -234,7 +406,6 @@ var incorporateLine = function(ln) { var blank; var indent; var i; - var CODE_INDENT = 4; var allClosed; var container = this.doc; @@ -260,94 +431,29 @@ var incorporateLine = function(ln) { match = matchAt(reNonSpace, ln, this.offset); if (match === -1) { first_nonspace = ln.length; - blank = true; } else { first_nonspace = match; - blank = false; } - indent = first_nonspace - this.offset; - - switch (container.type) { - case 'BlockQuote': - if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { - this.offset = first_nonspace + 1; - if (ln.charCodeAt(this.offset) === C_SPACE) { - this.offset++; - } - } else { - all_matched = false; - } - break; - case 'Item': - if (blank) { - this.offset = first_nonspace; - } else if (indent >= container._listData.markerOffset + - container._listData.padding) { - this.offset += container._listData.markerOffset + - container._listData.padding; - } else { - all_matched = false; - } + switch (this.blocks[container.type].continue(this, container, ln, first_nonspace)) { + case 0: // we've matched, keep going break; - - case 'Header': - case 'HorizontalRule': - // a header can never container > 1 line, so fail to match: + case 1: // we've failed to match a block all_matched = false; break; - - case 'CodeBlock': - if (container._isFenced) { // fenced - match = (indent <= 3 && - ln.charAt(first_nonspace) === container._fenceChar && - ln.slice(first_nonspace).match(reClosingCodeFence)); - if (match && match[0].length >= container._fenceLength) { - // closing fence - we're at end of line, so we can return - all_matched = false; - this.finalize(container, this.lineNumber); - this.lastLineLength = ln.length - 1; // -1 for newline - return; - } else { - // skip optional spaces of fence offset - i = container._fenceOffset; - while (i > 0 && ln.charCodeAt(this.offset) === C_SPACE) { - this.offset++; - i--; - } - } - } else { // indented - if (indent >= CODE_INDENT) { - this.offset += CODE_INDENT; - } else if (blank) { - this.offset = first_nonspace; - } else { - all_matched = false; - } - } - break; - - case 'HtmlBlock': - if (blank) { - all_matched = false; - } - break; - - case 'Paragraph': - if (blank) { - all_matched = false; - } - break; - + case 2: // we've hit end of line for fenced code close and can return + return; default: + throw 'continue returned illegal value, must be 0, 1, or 2'; } - if (!all_matched) { container = container._parent; // back up to last matching block break; } } + blank = first_nonspace === ln.length; + allClosed = (container === this.oldtip); this.lastMatchedContainer = container; @@ -552,76 +658,11 @@ var incorporateLine = function(ln) { // of paragraphs for reference definitions. Reset the tip to the // parent of the closed block. var finalize = function(block, lineNumber) { - var pos; - var above = block._parent; + var above = block._parent || this.top; block._open = false; block.sourcepos[1] = [lineNumber, this.lastLineLength + 1]; - switch (block.type) { - case 'Paragraph': - block._string_content = block._strings.join('\n'); - - // try parsing the beginning as link reference definitions: - while (block._string_content.charCodeAt(0) === C_OPEN_BRACKET && - (pos = this.inlineParser.parseReference(block._string_content, - this.refmap))) { - block._string_content = block._string_content.slice(pos); - if (isBlank(block._string_content)) { - block.unlink(); - break; - } - } - break; - - case 'Header': - block._string_content = block._strings.join('\n'); - break; - - case 'HtmlBlock': - block._literal = block._strings.join('\n'); - break; - - case 'CodeBlock': - if (block._isFenced) { // fenced - // first line becomes info string - block.info = unescapeString(block._strings[0].trim()); - if (block._strings.length === 1) { - block._literal = ''; - } else { - block._literal = block._strings.slice(1).join('\n') + '\n'; - } - } else { // indented - stripFinalBlankLines(block._strings); - block._literal = block._strings.join('\n') + '\n'; - } - break; - - case 'List': - var item = block._firstChild; - while (item) { - // check for non-final list item ending with blank line: - if (endsWithBlankLine(item) && item._next) { - block._listData.tight = false; - break; - } - // recurse into children of list item, to see if there are - // spaces between any of them: - var subitem = item._firstChild; - while (subitem) { - if (endsWithBlankLine(subitem) && - (item._next || subitem._next)) { - block._listData.tight = false; - break; - } - subitem = subitem._next; - } - item = item._next; - } - break; - - default: - break; - } + this.blocks[block.type].finalize(this, block); this.tip = above; }; @@ -679,6 +720,7 @@ var parse = function(input) { function Parser(options){ return { doc: new Document(), + blocks: blocks, tip: this.doc, oldtip: this.doc, lineNumber: 0, -- cgit v1.2.3