From e564e0af5c2f7b9732e89707655068e507579a89 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 8 Jan 2015 10:21:30 -0800 Subject: Use linked list instead of arrays for AST. Use the same doubly linked node structure that cmark uses. The primary advantages of this change are (a) simplified code, especially in the renderers, and (b) elimination of the need for recursion, so we can render deeply-nested structures without a stack overflow. A node walker has also been added, for easy AST traversal. * Added js/lib/node.js for nodes. Includes a node walker. * All modules updated to use node structures. * Regularized position information into pos property. * Performance is slightly worse than before, but only marginally, and no doubt there are more optimizations that can be done. --- js/lib/blocks.js | 127 +++++++------------- js/lib/html-renderer.js | 311 +++++++++++++++++++++++++++++------------------- js/lib/index.js | 1 + js/lib/inlines.js | 257 +++++++++++++++++++-------------------- js/lib/node.js | 195 ++++++++++++++++++++++++++++++ 5 files changed, 552 insertions(+), 339 deletions(-) create mode 100644 js/lib/node.js (limited to 'js/lib') diff --git a/js/lib/blocks.js b/js/lib/blocks.js index 786c099..cc2263a 100644 --- a/js/lib/blocks.js +++ b/js/lib/blocks.js @@ -1,3 +1,5 @@ +Node = require('./node'); + var C_GREATERTHAN = 62; var C_SPACE = 32; var C_OPEN_BRACKET = 91; @@ -48,19 +50,14 @@ var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; // These are methods of a DocParser object, defined below. var makeBlock = function(tag, start_line, start_column) { - return { t: tag, - open: true, - last_line_blank: false, - start_line: start_line, - start_column: start_column, - end_line: start_line, - children: [], - parent: null, - // string_content is formed by concatenating strings, in finalize: - string_content: "", - strings: [], - children: [] - }; + var b = new Node(tag); + b.pos.start = [start_line, start_column]; + b.pos.end = []; // assigned in finalization step + b.open = true; + b.last_line_blank = false; + b.string_content = ""; + b.strings = []; + return b; }; // Returns true if parent block can contain child block. @@ -81,14 +78,17 @@ var acceptsLines = function(block_type) { // Returns true if block ends with a blank line, descending if needed // into lists and sublists. var endsWithBlankLine = function(block) { - if (block.last_line_blank) { + while (block) { + if (block.last_line_blank) { return true; + } + if (block.t === 'List' || block.t === 'ListItem') { + block = block.lastChild; + } else { + break; + } } - if ((block.t === 'List' || block.t === 'ListItem') && block.children.length > 0) { - return endsWithBlankLine(block.children[block.children.length - 1]); - } else { - return false; - } + return false; }; // Break out of all containing lists, resetting the tip of the @@ -135,8 +135,7 @@ var addChild = function(tag, line_number, offset) { var column_number = offset + 1; // offset 0 = column 1 var newBlock = makeBlock(tag, line_number, column_number); - this.tip.children.push(newBlock); - newBlock.parent = this.tip; + this.tip.appendChild(newBlock); this.tip = newBlock; return newBlock; }; @@ -190,7 +189,6 @@ var listsMatch = function(list_data, item_data) { var incorporateLine = function(ln, line_number) { var all_matched = true; - var last_child; var first_nonspace; var offset = 0; var match; @@ -210,12 +208,11 @@ var incorporateLine = function(ln, line_number) { // For each containing block, try to parse the associated line start. // Bail out on failure: container will point to the last matching block. // Set all_matched to false if not all containers match. - while (container.children.length > 0) { - last_child = container.children[container.children.length - 1]; - if (!last_child.open) { + while (container.lastChild) { + if (!container.lastChild.open) { break; } - container = last_child; + container = container.lastChild; match = matchAt(/[^ ]/, ln, offset); if (match === -1) { @@ -472,8 +469,8 @@ var incorporateLine = function(ln, line_number) { container.t === 'Header' || container.t === 'FencedCode' || (container.t === 'ListItem' && - container.children.length === 0 && - container.start_line === line_number)); + !container.firstChild && + container.pos.start[0] === line_number)); var cont = container; while (cont.parent) { @@ -537,10 +534,10 @@ var finalize = function(block, line_number) { return 0; } block.open = false; - if (line_number > block.start_line) { - block.end_line = line_number - 1; + if (line_number > block.pos.start[0]) { + block.pos.end = [line_number - 1]; // TODO end column } else { - block.end_line = line_number; + block.pos.end = [line_number]; } switch (block.t) { @@ -584,30 +581,24 @@ var finalize = function(block, line_number) { case 'List': block.tight = true; // tight by default - var numitems = block.children.length; - var i = 0; - while (i < numitems) { - var item = block.children[i]; + var item = block.firstChild; + while (item) { // check for non-final list item ending with blank line: - var last_item = i === numitems - 1; - if (endsWithBlankLine(item) && !last_item) { + if (endsWithBlankLine(item) && item.next) { block.tight = false; break; } // recurse into children of list item, to see if there are // spaces between any of them: - var numsubitems = item.children.length; - var j = 0; - while (j < numsubitems) { - var subitem = item.children[j]; - var last_subitem = j === numsubitems - 1; - if (endsWithBlankLine(subitem) && !(last_item && last_subitem)) { + var subitem = item.firstChild; + while (subitem) { + if (endsWithBlankLine(subitem) && (item.next || subitem.next)) { block.tight = false; break; } - j++; + subitem = subitem.next; } - i++; + item = item.next; } break; @@ -621,45 +612,14 @@ var finalize = function(block, line_number) { // Walk through a block & children recursively, parsing string content // into inline content where appropriate. Returns new object. var processInlines = function(block) { - var newblock = {}; - newblock.t = block.t; - newblock.start_line = block.start_line; - newblock.start_column = block.start_column; - newblock.end_line = block.end_line; - - switch(block.t) { - case 'Paragraph': - newblock.children = - this.inlineParser.parse(block.string_content.trim(), this.refmap); - break; - case 'Header': - newblock.children = - this.inlineParser.parse(block.string_content.trim(), this.refmap); - newblock.level = block.level; - break; - case 'List': - newblock.list_data = block.list_data; - newblock.tight = block.tight; - break; - case 'CodeBlock': - newblock.string_content = block.string_content; - newblock.info = block.info; - break; - case 'HtmlBlock': - newblock.string_content = block.string_content; - break; - default: - break; - } - - if (block.children && !newblock.children) { - var newchildren = []; - for (var i = 0; i < block.children.length; i++) { - newchildren.push(this.processInlines(block.children[i])); + var node, event; + var walker = block.walker(); + while (event = walker.next()) { + node = event.node; + if (!event.entering && (node.t == 'Paragraph' || node.t == 'Header')) { + this.inlineParser.parse(node, this.refmap); } - newblock.children = newchildren; } - return newblock; }; // The main parsing function. Returns a parsed document AST. @@ -675,7 +635,8 @@ var parse = function(input) { while (this.tip) { this.finalize(this.tip, len - 1); } - return this.processInlines(this.doc); + this.processInlines(this.doc); + return this.doc; }; diff --git a/js/lib/html-renderer.js b/js/lib/html-renderer.js index f6efe32..e7953cf 100644 --- a/js/lib/html-renderer.js +++ b/js/lib/html-renderer.js @@ -1,6 +1,6 @@ -// Helper function to produce content in a pair of HTML tags. -var inTags = function(tag, attribs, contents, selfclosing) { - var result = '<' + tag; +// Helper function to produce an HTML tag. +var tag = function(name, attribs, selfclosing) { + var result = '<' + name; if (attribs) { var i = 0; var attrib; @@ -9,132 +9,207 @@ var inTags = function(tag, attribs, contents, selfclosing) { i++; } } - if (contents) { - result = result.concat('>', contents, ''); - } else if (selfclosing) { - result = result + ' />'; - } else { - result = result.concat('>'); - } - return result; -}; - -// Render an inline element as HTML. -var renderInline = function(inline) { - var attrs; - switch (inline.t) { - case 'Text': - return this.escape(inline.c); - case 'Softbreak': - return this.softbreak; - case 'Hardbreak': - return inTags('br', [], "", true) + '\n'; - case 'Emph': - return inTags('em', [], this.renderInlines(inline.children)); - case 'Strong': - return inTags('strong', [], this.renderInlines(inline.children)); - case 'Html': - return inline.c; - case 'Link': - attrs = [['href', this.escape(inline.destination, true)]]; - if (inline.title) { - attrs.push(['title', this.escape(inline.title, true)]); - } - return inTags('a', attrs, this.renderInlines(inline.children)); - case 'Image': - attrs = [['src', this.escape(inline.destination, true)], - ['alt', this.renderInlines(inline.children). - replace(/\<[^>]*alt="([^"]*)"[^>]*\>/g, '$1'). - replace(/\<[^>]*\>/g, '')]]; - if (inline.title) { - attrs.push(['title', this.escape(inline.title, true)]); - } - return inTags('img', attrs, "", true); - case 'Code': - return inTags('code', [], this.escape(inline.c)); - default: - console.log("Unknown inline type " + inline.t); - return ""; - } -}; + if (selfclosing) + result += ' /'; -// Render a list of inlines. -var renderInlines = function(inlines) { - var result = ''; - for (var i = 0; i < inlines.length; i++) { - result = result + this.renderInline(inlines[i]); - } + result += '>'; return result; }; -// Render a single block element. -var renderBlock = function(block, in_tight_list) { - var tag; - var attr; +var renderNodes = function(block) { + + var attrs; var info_words; - switch (block.t) { - case 'Document': - var whole_doc = this.renderBlocks(block.children); - return (whole_doc === '' ? '' : whole_doc + '\n'); - case 'Paragraph': - if (in_tight_list) { - return this.renderInlines(block.children); + var tagname; + var walker = block.walker(); + var event, node, entering; + var buffer = []; + var disableTags = 0; + var grandparent; + var out = function(s) { + if (disableTags > 0) { + buffer.push(s.replace(/\<[^>]*\>/g, '')); } else { - return inTags('p', [], this.renderInlines(block.children)); - } - break; - case 'BlockQuote': - var filling = this.renderBlocks(block.children); - return inTags('blockquote', [], filling === '' ? this.innersep : - this.innersep + filling + this.innersep); - case 'ListItem': - var contents = this.renderBlocks(block.children, in_tight_list); - if (/^[<]/.test(contents)) { - contents = '\n' + contents; + buffer.push(s); } - if (/[>]$/.test(contents)) { - contents = contents + '\n'; + } + var esc = this.escape; + var cr = function() { + if (buffer.length > 0 && buffer[buffer.length - 1] !== '\n') { + out('\n'); } - return inTags('li', [], contents, false).trim(); - case 'List': - tag = block.list_data.type === 'Bullet' ? 'ul' : 'ol'; - attr = (!block.list_data.start || block.list_data.start === 1) ? - [] : [['start', block.list_data.start.toString()]]; - return inTags(tag, attr, this.innersep + - this.renderBlocks(block.children, block.tight) + - this.innersep); - case 'Header': - tag = 'h' + block.level; - return inTags(tag, [], this.renderInlines(block.children)); - case 'CodeBlock': - info_words = block.info ? block.info.split(/ +/) : []; - attr = (info_words.length === 0 || info_words[0].length === 0) ? - [] : [['class', 'language-' + this.escape(info_words[0], true)]]; - return inTags('pre', [], - inTags('code', attr, this.escape(block.string_content))); - case 'HtmlBlock': - return block.string_content; - case 'ReferenceDef': - return ""; - case 'HorizontalRule': - return inTags('hr', [], "", true); - default: - console.log("Unknown block type " + block.t); - return ""; } -}; -// Render a list of block elements, separated by this.blocksep. -var renderBlocks = function(blocks, in_tight_list) { - var result = []; - for (var i = 0; i < blocks.length; i++) { - if (blocks[i].t !== 'ReferenceDef') { - result.push(this.renderBlock(blocks[i], in_tight_list)); + while (event = walker.next()) { + entering = event.entering; + node = event.node; + + switch (node.t) { + case 'Text': + out(esc(node.c)); + break; + + case 'Softbreak': + out(this.softbreak); + break; + + case 'Hardbreak': + out(tag('br', [], true)); + cr(); + break; + + case 'Emph': + out(tag(entering ? 'em' : '/em')); + break; + + case 'Strong': + out(tag(entering ? 'strong' : '/strong')); + break; + + case 'Emph': + out(tag(entering ? 'strong' : '/strong')); + break; + + case 'Html': + out(node.c); + break; + + case 'Link': + if (entering) { + attrs = [['href', esc(node.destination, true)]]; + if (node.title) { + attrs.push(['title', esc(node.title, true)]); + } + out(tag('a', attrs)); + } else { + out(tag('/a')); + } + break; + + case 'Image': + if (entering) { + if (disableTags == 0) { + out('');
+                }
+                disableTags += 1;
+            } else {
+                disableTags -= 1;
+                if (disableTags == 0) {
+                    if (node.title) {
+                        out(''); + } + } + break; + + case 'Code': + out(tag('code') + esc(node.c) + tag('/code')); + break; + + case 'Document': + break; + + case 'Paragraph': + grandparent = node.parent.parent; + if (grandparent !== null && + grandparent.t === 'List') { + if (grandparent.tight) + break; + } + if (entering) { + cr(); + out(tag('p')); + } else { + out(tag('/p')); + cr(); + } + break; + + case 'BlockQuote': + if (entering) { + cr(); + out(tag('blockquote')); + cr(); + } else { + cr(); + out(tag('/blockquote')); + cr(); + } + break; + + case 'ListItem': + if (entering) { + out(tag('li')); + } else { + out(tag('/li')); + cr(); + } + break; + + case 'List': + tagname = node.list_data.type === 'Bullet' ? 'ul' : 'ol'; + if (entering) { + attr = (!node.list_data.start || node.list_data.start === 1) ? + [] : [['start', node.list_data.start.toString()]]; + cr(); + out(tag(tagname, attr)); + cr(); + } else { + cr(); + out(tag('/' + tagname)); + cr(); + } + break; + + case 'Header': + tagname = 'h' + node.level; + if (entering) { + cr(); + out(tag(tagname)); + } else { + out(tag('/' + tagname)); + cr(); + } + break; + + case 'CodeBlock': + info_words = node.info ? node.info.split(/ +/) : []; + attr = (info_words.length === 0 || info_words[0].length === 0) + ? [] : [['class', 'language-' + esc(info_words[0], true)]]; + cr(); + out(tag('pre') + tag('code', attr)); + out(this.escape(node.string_content)); + out(tag('/code') + tag('/pre')); + cr(); + break; + + case 'HtmlBlock': + cr(); + out(node.string_content); + cr(); + break; + + case 'HorizontalRule': + cr(); + out(tag('hr', [], true)); + cr(); + break; + + + case 'ReferenceDef': + break; + + default: + console.log("Unknown node type " + node.t); } + } - return result.join(this.blocksep); + return buffer.join(''); }; + // The HtmlRenderer object. function HtmlRenderer(){ return { @@ -157,11 +232,7 @@ function HtmlRenderer(){ .replace(/["]/g, '"'); } }, - renderInline: renderInline, - renderInlines: renderInlines, - renderBlock: renderBlock, - renderBlocks: renderBlocks, - render: renderBlock + render: renderNodes }; } diff --git a/js/lib/index.js b/js/lib/index.js index 4e04532..b2388ce 100755 --- a/js/lib/index.js +++ b/js/lib/index.js @@ -17,6 +17,7 @@ var renderAST = function(tree) { return util.inspect(tree, {depth: null}); }; +module.exports.Node = require('./node'); module.exports.DocParser = require('./blocks'); module.exports.HtmlRenderer = require('./html-renderer'); module.exports.ASTRenderer = renderAST; diff --git a/js/lib/inlines.js b/js/lib/inlines.js index ef7a00f..23b2b1e 100644 --- a/js/lib/inlines.js +++ b/js/lib/inlines.js @@ -85,6 +85,12 @@ var normalizeReference = function(s) { .toUpperCase(); }; +var text = function(s) { + var node = new Node('Text'); + node.c = s; + return node; +} + // INLINE PARSER // These are methods of an InlineParser object, defined below. @@ -123,9 +129,9 @@ var spnl = function() { // in the subject. If they succeed in matching anything, they // return the inline matched, advancing the subject. -// Attempt to parse backticks, returning either a backtick code span or a +// Attempt to parse backticks, adding either a backtick code span or a // literal sequence of backticks. -var parseBackticks = function(inlines) { +var parseBackticks = function(block) { var ticks = this.match(/^`+/); if (!ticks) { return 0; @@ -133,37 +139,42 @@ var parseBackticks = function(inlines) { var afterOpenTicks = this.pos; var foundCode = false; var matched; + var node; while (!foundCode && (matched = this.match(/`+/m))) { if (matched === ticks) { - inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks, - this.pos - ticks.length) - .replace(/[ \n]+/g, ' ') - .trim() }); + node = new Node('Code'); + node.c = this.subject.slice(afterOpenTicks, + this.pos - ticks.length) + .replace(/[ \n]+/g, ' ') + .trim(); + block.appendChild(node); return true; } } // If we got here, we didn't match a closing backtick sequence. this.pos = afterOpenTicks; - inlines.push({ t: 'Text', c: ticks }); + block.appendChild(text(ticks)); return true; }; // Parse a backslash-escaped special character, adding either the escaped // character, a hard line break (if the backslash is followed by a newline), -// or a literal backslash to the 'inlines' list. -var parseBackslash = function(inlines) { +// or a literal backslash to the block's children. +var parseBackslash = function(block) { var subj = this.subject, pos = this.pos; + var node; if (subj.charCodeAt(pos) === C_BACKSLASH) { if (subj.charAt(pos + 1) === '\n') { this.pos = this.pos + 2; - inlines.push({ t: 'Hardbreak' }); + node = new Node('Hardbreak'); + block.appendChild(node); } else if (reEscapable.test(subj.charAt(pos + 1))) { this.pos = this.pos + 2; - inlines.push({ t: 'Text', c: subj.charAt(pos + 1) }); + block.appendChild(text(subj.charAt(pos + 1))); } else { this.pos++; - inlines.push({t: 'Text', c: '\\'}); + block.appendChild(text('\\')); } return true; } else { @@ -172,22 +183,23 @@ var parseBackslash = function(inlines) { }; // Attempt to parse an autolink (URL or email in pointy brackets). -var parseAutolink = function(inlines) { +var parseAutolink = function(block) { var m; var dest; + var node; if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink dest = m.slice(1, -1); - inlines.push( - {t: 'Link', - children: [{ t: 'Text', c: dest }], - destination: 'mailto:' + encodeURI(unescape(dest)) }); + node = new Node('Link'); + node.destination = 'mailto:' + encodeURI(unescape(dest)); + node.appendChild(text(dest)); + block.appendChild(node); return true; } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { dest = m.slice(1, -1); - inlines.push({ - t: 'Link', - children: [{ t: 'Text', c: dest }], - destination: encodeURI(unescape(dest)) }); + node = new Node('Link'); + node.destination = encodeURI(unescape(dest)); + node.appendChild(text(dest)); + block.appendChild(node); return true; } else { return false; @@ -195,10 +207,13 @@ var parseAutolink = function(inlines) { }; // Attempt to parse a raw HTML tag. -var parseHtmlTag = function(inlines) { +var parseHtmlTag = function(block) { var m = this.match(reHtmlTag); + var node; if (m) { - inlines.push({ t: 'Html', c: m }); + node = new Node('Html'); + node.c = m; + block.appendChild(node); return true; } else { return false; @@ -247,20 +262,8 @@ var scanDelims = function(cc) { can_close: can_close }; }; -var Emph = function(ils) { - return {t: 'Emph', children: ils}; -}; - -var Strong = function(ils) { - return {t: 'Strong', children: ils}; -}; - -var Str = function(s) { - return {t: 'Text', c: s}; -}; - // Attempt to parse emphasis or strong emphasis. -var parseEmphasis = function(cc, inlines) { +var parseEmphasis = function(cc, block) { var res = this.scanDelims(cc); var numdelims = res.numdelims; @@ -271,12 +274,13 @@ var parseEmphasis = function(cc, inlines) { } this.pos += numdelims; - inlines.push(Str(this.subject.slice(startpos, this.pos))); + var node = text(this.subject.slice(startpos, this.pos)); + block.appendChild(node); // Add entry to stack for this opener this.delimiters = { cc: cc, numdelims: numdelims, - pos: inlines.length - 1, + node: node, previous: this.delimiters, next: null, can_open: res.can_open, @@ -302,28 +306,14 @@ var removeDelimiter = function(delim) { } }; -var removeGaps = function(inlines) { - // remove gaps from inlines - var i, j; - j = 0; - for (i = 0 ; i < inlines.length; i++) { - if (inlines[i] !== null) { - inlines[j] = inlines[i]; - j++; - } - } - inlines.splice(j); -}; - -var processEmphasis = function(inlines, stack_bottom) { +var processEmphasis = function(block, stack_bottom) { "use strict"; var opener, closer; var opener_inl, closer_inl; var nextstack, tempstack; var use_delims; var contents; - var emph; - var i; + var tmp, next; // find first closer above stack_bottom: closer = this.delimiters; @@ -350,8 +340,8 @@ var processEmphasis = function(inlines, stack_bottom) { use_delims = closer.numdelims % 2 === 0 ? 2 : 1; } - opener_inl = inlines[opener.pos]; - closer_inl = inlines[closer.pos]; + opener_inl = opener.node; + closer_inl = closer.node; // remove used delimiters from stack elts and inlines opener.numdelims -= use_delims; @@ -360,17 +350,18 @@ var processEmphasis = function(inlines, stack_bottom) { closer_inl.c = closer_inl.c.slice(0, closer_inl.c.length - use_delims); // build contents for new emph element - contents = inlines.slice(opener.pos + 1, closer.pos); - removeGaps(contents); - - emph = use_delims === 1 ? Emph(contents) : Strong(contents); - - // insert into list of inlines - inlines[opener.pos + 1] = emph; - for (i = opener.pos + 2; i < closer.pos; i++) { - inlines[i] = null; + var emph = new Node(use_delims === 1 ? 'Emph' : 'Strong'); + + tmp = opener_inl.next; + while (tmp && tmp !== closer_inl) { + next = tmp.next; + tmp.unlink(); + emph.appendChild(tmp); + tmp = next; } + opener_inl.insertAfter(emph); + // remove elts btw opener and closer in delimiters stack tempstack = closer.previous; while (tempstack !== null && tempstack !== opener) { @@ -381,18 +372,17 @@ var processEmphasis = function(inlines, stack_bottom) { // if opener has 0 delims, remove it and the inline if (opener.numdelims === 0) { - inlines[opener.pos] = null; + opener_inl.unlink(); this.removeDelimiter(opener); } if (closer.numdelims === 0) { - inlines[closer.pos] = null; + closer_inl.unlink(); tempstack = closer.next; this.removeDelimiter(closer); closer = tempstack; } - } else { closer = closer.next; } @@ -403,8 +393,6 @@ var processEmphasis = function(inlines, stack_bottom) { } - removeGaps(inlines); - // remove all delimiters while (this.delimiters !== stack_bottom) { this.removeDelimiter(this.delimiters); @@ -445,18 +433,19 @@ var parseLinkLabel = function() { return m === null ? 0 : m.length; }; -// Add open bracket to delimiter stack and add a Str to inlines. -var parseOpenBracket = function(inlines) { +// Add open bracket to delimiter stack and add a text node to block's children. +var parseOpenBracket = function(block) { var startpos = this.pos; this.pos += 1; - inlines.push(Str("[")); + var node = text('['); + block.appendChild(node); // Add entry to stack for this opener this.delimiters = { cc: C_OPEN_BRACKET, numdelims: 1, - pos: inlines.length - 1, + node: node, previous: this.delimiters, next: null, can_open: true, @@ -472,19 +461,21 @@ var parseOpenBracket = function(inlines) { }; // IF next character is [, and ! delimiter to delimiter stack and -// add a Str to inlines. Otherwise just add a Str. -var parseBang = function(inlines) { +// add a text node to block's children. Otherwise just add a text node. +var parseBang = function(block) { var startpos = this.pos; this.pos += 1; if (this.peek() === C_OPEN_BRACKET) { this.pos += 1; - inlines.push(Str("![")); + + var node = text('!['); + block.appendChild(node); // Add entry to stack for this opener this.delimiters = { cc: C_BANG, numdelims: 1, - pos: inlines.length - 1, + node: node, previous: this.delimiters, next: null, can_open: true, @@ -495,22 +486,21 @@ var parseBang = function(inlines) { this.delimiters.previous.next = this.delimiters; } } else { - inlines.push(Str("!")); + block.appendChild(text('!')); } return true; }; // Try to match close bracket against an opening in the delimiter // stack. Add either a link or image, or a plain [ character, -// to the inlines stack. If there is a matching delimiter, +// to block's children. If there is a matching delimiter, // remove it from the delimiter stack. -var parseCloseBracket = function(inlines) { +var parseCloseBracket = function(block) { var startpos; var is_image; var dest; var title; var matched = false; - var link_text; var i; var reflabel; var opener; @@ -530,13 +520,13 @@ var parseCloseBracket = function(inlines) { if (opener === null) { // no matched opener, just return a literal - inlines.push(Str("]")); + block.appendChild(text(']')); return true; } if (!opener.active) { // no matched opener, just return a literal - inlines.push(Str("]")); + block.appendChild(text(']')); // take opener off emphasis stack this.removeDelimiter(opener); return true; @@ -544,15 +534,6 @@ var parseCloseBracket = function(inlines) { // If we got here, open is a potential opener is_image = opener.cc === C_BANG; - // instead of copying a slice, we null out the - // parts of inlines that don't correspond to link_text; - // later, we'll collapse them. This is awkward, and could - // be simplified if we made inlines a linked list rather than - // an array: - link_text = inlines.slice(0); - for (i = 0; i < opener.pos + 1; i++) { - link_text[i] = null; - } // Check to see if we have a link/image @@ -597,13 +578,22 @@ var parseCloseBracket = function(inlines) { } if (matched) { - this.processEmphasis(link_text, opener.previous); - - // remove the part of inlines that became link_text. - // see note above on why we need to do this instead of splice: - for (i = opener.pos; i < inlines.length; i++) { - inlines[i] = null; + var node = new Node(is_image ? 'Image' : 'Link'); + node.destination = dest; + node.title = title; + + var tmp, next; + tmp = opener.node.next; + while (tmp) { + next = tmp.next; + tmp.unlink(); + node.appendChild(tmp); + tmp = next; } + block.appendChild(node); + this.processEmphasis(node, opener.previous); + + opener.node.unlink(); // processEmphasis will remove this and later delimiters. // Now, for a link, we also deactivate earlier link openers. @@ -618,27 +608,23 @@ var parseCloseBracket = function(inlines) { } } - inlines.push({t: is_image ? 'Image' : 'Link', - destination: dest, - title: title, - children: link_text}); return true; } else { // no match this.removeDelimiter(opener); // remove this opener from stack this.pos = startpos; - inlines.push(Str("]")); + block.appendChild(text(']')); return true; } }; // Attempt to parse an entity, return Entity object if successful. -var parseEntity = function(inlines) { +var parseEntity = function(block) { var m; if ((m = this.match(reEntityHere))) { - inlines.push({ t: 'Text', c: entityToChar(m) }); + block.appendChild(text(entityToChar(m))); return true; } else { return false; @@ -646,11 +632,11 @@ var parseEntity = function(inlines) { }; // Parse a run of ordinary characters, or a single character with -// a special meaning in markdown, as a plain string, adding to inlines. -var parseString = function(inlines) { +// a special meaning in markdown, as a plain string. +var parseString = function(block) { var m; if ((m = this.match(reMain))) { - inlines.push({ t: 'Text', c: m }); + block.appendChild(text(m)); return true; } else { return false; @@ -659,17 +645,15 @@ var parseString = function(inlines) { // Parse a newline. If it was preceded by two spaces, return a hard // line break; otherwise a soft line break. -var parseNewline = function(inlines) { +var parseNewline = function(block) { var m = this.match(/^ *\n/); if (m) { - if (m.length > 2) { - inlines.push({ t: 'Hardbreak' }); - } else if (m.length > 0) { - inlines.push({ t: 'Softbreak' }); - } + var node = new Node(m.length > 2 ? 'Hardbreak' : 'Softbreak'); + block.appendChild(node); return true; + } else { + return false; } - return false; }; // Attempt to parse a link reference, modifying refmap. @@ -731,9 +715,9 @@ var parseReference = function(s, refmap) { }; // Parse the next inline element in subject, advancing subject position. -// On success, add the result to the inlines list, and return true. +// On success, add the result to block's children and return true. // On failure, return false. -var parseInline = function(inlines) { +var parseInline = function(block) { "use strict"; var c = this.peek(); if (c === -1) { @@ -743,56 +727,57 @@ var parseInline = function(inlines) { switch(c) { case C_NEWLINE: case C_SPACE: - res = this.parseNewline(inlines); + res = this.parseNewline(block); break; case C_BACKSLASH: - res = this.parseBackslash(inlines); + res = this.parseBackslash(block); break; case C_BACKTICK: - res = this.parseBackticks(inlines); + res = this.parseBackticks(block); break; case C_ASTERISK: case C_UNDERSCORE: - res = this.parseEmphasis(c, inlines); + res = this.parseEmphasis(c, block); break; case C_OPEN_BRACKET: - res = this.parseOpenBracket(inlines); + res = this.parseOpenBracket(block); break; case C_BANG: - res = this.parseBang(inlines); + res = this.parseBang(block); break; case C_CLOSE_BRACKET: - res = this.parseCloseBracket(inlines); + res = this.parseCloseBracket(block); break; case C_LESSTHAN: - res = this.parseAutolink(inlines) || this.parseHtmlTag(inlines); + res = this.parseAutolink(block) || this.parseHtmlTag(block); break; case C_AMPERSAND: - res = this.parseEntity(inlines); + res = this.parseEntity(block); break; default: - res = this.parseString(inlines); + res = this.parseString(block); break; } if (!res) { this.pos += 1; - inlines.push({t: 'Text', c: fromCodePoint(c)}); + var textnode = new Node('Text'); + textnode.c = fromCodePoint(c); + block.appendChild(textnode); } return true; }; -// Parse s as a list of inlines, using refmap to resolve references. -var parseInlines = function(s, refmap) { - this.subject = s; +// Parse string_content in block into inline children, +// using refmap to resolve references. +var parseInlines = function(block, refmap) { + this.subject = block.string_content.trim(); this.pos = 0; this.refmap = refmap || {}; this.delimiters = null; - var inlines = []; - while (this.parseInline(inlines)) { + while (this.parseInline(block)) { } - this.processEmphasis(inlines, null); - return inlines; + this.processEmphasis(block, null); }; // The InlineParser object. diff --git a/js/lib/node.js b/js/lib/node.js new file mode 100644 index 0000000..f96d65c --- /dev/null +++ b/js/lib/node.js @@ -0,0 +1,195 @@ +util = require('util'); + +function isContainer(node) { + var t = node.t; + return (t == 'Document' || + t == 'BlockQuote' || + t == 'List' || + t == 'ListItem' || + t == 'Paragraph' || + t == 'Header' || + t == 'Emph' || + t == 'Strong' || + t == 'Link' || + t == 'Image'); +} + +function NodeWalker(root) { + this.current = root; + this.root = root; + this.entering = true; +} + +NodeWalker.prototype.resumeAt = function(node, entering) { + this.current = node; + this.entering = (entering === true); +}; + +NodeWalker.prototype.next = function(){ + var cur = this.current; + var entering = this.entering; + + if (!cur) + return null; + + var container = isContainer(cur); + + if (entering && container) { + if (cur.firstChild) { + this.current = cur.firstChild; + this.entering = true; + } else { + // stay on node but exit + this.entering = false; + } + + } else if (!entering && cur == this.root) { + // don't move past root + this.current = null; + + } else if (cur.next) { + this.current = cur.next; + this.entering = true; + + } else { + this.current = cur.parent; + this.entering = false; + } + + return {entering: entering, node: cur}; +}; + +function Node(nodeType) { + this.t = nodeType; + this.parent = null; + this.firstChild = null; + this.lastChild = null; + this.prev = null; + this.next = null; + this.pos = {}; +} + +Node.prototype.isContainer = function() { + return isContainer(this); +}; + +Node.prototype.appendChild = function(child) { + child.unlink(); + child.parent = this; + if (this.lastChild) { + this.lastChild.next = child; + child.prev = this.lastChild; + this.lastChild = child; + } else { + this.firstChild = child; + this.lastChild = child; + } +}; + +Node.prototype.prependChild = function(child) { + child.unlink(); + child.parent = this; + if (this.firstChild) { + this.firstChild.prev = child; + child.next = this.firstChild; + this.firstChild = child; + } else { + this.firstChild = child; + this.lastChild = child; + } +}; + +Node.prototype.unlink = function() { + if (this.prev) { + this.prev.next = this.next; + } else if (this.parent) { + this.parent.firstChild = this.next; + } + if (this.next) { + this.next.prev = this.prev; + } else if (this.parent) { + this.parent.lastChild = this.prev; + } + this.parent = null; + this.next = null; + this.prev = null; +}; + +Node.prototype.insertAfter = function(sibling) { + sibling.unlink(); + sibling.next = this.next; + if (sibling.next) { + sibling.next.prev = sibling; + } + sibling.prev = this; + this.next = sibling; + sibling.parent = this.parent; + if (!sibling.next) { + sibling.parent.lastChild = sibling; + } +}; + +Node.prototype.insertBefore = function(sibling) { + sibling.unlink(); + sibling.prev = this.prev; + if (sibling.prev) { + sibling.prev.next = sibling; + } + sibling.next = this; + this.prev = sibling; + sibling.parent = this.parent; + if (!sibling.prev) { + sibling.parent.firstChild = sibling; + } +}; + +Node.prototype.walker = function() { + var walker = new NodeWalker(this); + return walker; +}; + +Node.prototype.toAST = function() { + var children; + var cur; + var result = { t: this.t }; + + var propsToShow = ['t', 'c', 'list_data', 'string_content', + 'pos', 'start_column', 'end_column', + 'tight', 'info']; + + for (var i = 0; i < propsToShow.length; i++) { + var prop = propsToShow[i]; + if (this[prop] !== undefined) { + result[prop] = this[prop]; + } + } + + if (isContainer(this)) { + children = []; + if (this.firstChild) { + cur = this.firstChild; + while (cur) { + // TODO avoid recursion here... + children.push(cur.toAST()); + cur = cur.next; + } + result.children = children; + } + } + return result; + +}; + +module.exports = Node; + + +/* Example of use of walker: + + var walker = w.walker(); + var event; + + while (event = walker.next()) { + console.log(event.entering, event.node.t); + } + +*/ -- cgit v1.2.3