diff options
Diffstat (limited to 'src/scanners.re')
-rw-r--r-- | src/scanners.re | 23 |
1 files changed, 11 insertions, 12 deletions
diff --git a/src/scanners.re b/src/scanners.re index 75417a1..8b1c91e 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -6,10 +6,9 @@ bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, { bufsize_t res; unsigned char *ptr = (unsigned char *)c->data; - unsigned char zero = '\0'; - if (ptr == NULL) { - res = scanner(&zero); + if (ptr == NULL || offset > c->len) { + return 0; } else { unsigned char lim = ptr[c->len]; @@ -72,7 +71,7 @@ bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, in_single_quotes = ['] (escaped_char|[^'\x00])* [']; in_parens = [(] (escaped_char|[^)\x00])* [)]; - scheme = 'coap'|'doi'|'javascript'|'aaa'|'aaas'|'about'|'acap'|'cap'|'cid'|'crid'|'data'|'dav'|'dict'|'dns'|'file'|'ftp'|'geo'|'go'|'gopher'|'h323'|'http'|'https'|'iax'|'icap'|'im'|'imap'|'info'|'ipp'|'iris'|'iris.beep'|'iris.xpc'|'iris.xpcs'|'iris.lwz'|'ldap'|'mailto'|'mid'|'msrp'|'msrps'|'mtqp'|'mupdate'|'news'|'nfs'|'ni'|'nih'|'nntp'|'opaquelocktoken'|'pop'|'pres'|'rtsp'|'service'|'session'|'shttp'|'sieve'|'sip'|'sips'|'sms'|'snmp'|'soap.beep'|'soap.beeps'|'tag'|'tel'|'telnet'|'tftp'|'thismessage'|'tn3270'|'tip'|'tv'|'urn'|'vemmi'|'ws'|'wss'|'xcon'|'xcon-userid'|'xmlrpc.beep'|'xmlrpc.beeps'|'xmpp'|'z39.50r'|'z39.50s'|'adiumxtra'|'afp'|'afs'|'aim'|'apt'|'attachment'|'aw'|'beshare'|'bitcoin'|'bolo'|'callto'|'chrome'|'chrome-extension'|'com-eventbrite-attendee'|'content'|'cvs'|'dlna-playsingle'|'dlna-playcontainer'|'dtn'|'dvb'|'ed2k'|'facetime'|'feed'|'finger'|'fish'|'gg'|'git'|'gizmoproject'|'gtalk'|'hcp'|'icon'|'ipn'|'irc'|'irc6'|'ircs'|'itms'|'jar'|'jms'|'keyparc'|'lastfm'|'ldaps'|'magnet'|'maps'|'market'|'message'|'mms'|'ms-help'|'msnim'|'mumble'|'mvn'|'notes'|'oid'|'palm'|'paparazzi'|'platform'|'proxy'|'psyc'|'query'|'res'|'resource'|'rmi'|'rsync'|'rtmp'|'secondlife'|'sftp'|'sgn'|'skype'|'smb'|'soldat'|'spotify'|'ssh'|'steam'|'svn'|'teamspeak'|'things'|'udp'|'unreal'|'ut2004'|'ventrilo'|'view-source'|'webcal'|'wtai'|'wyciwyg'|'xfire'|'xri'|'ymsgr'; + scheme = [A-Za-z ][A-Za-z.+-]{1,31}; */ // Try to match a scheme including colon. @@ -215,8 +214,8 @@ bufsize_t _scan_link_url(const unsigned char *p) const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (bufsize_t)(p - start); } - [ \r\n]* (reg_char+ | escaped_char | in_parens_nosp | [\\][^()])* { return (bufsize_t)(p - start); } + [ \r\n]* [<] ([^<> \t\r\n\\\x00] | escaped_char | [\\])* [>] { return (bufsize_t)(p - start); } + [ \r\n]* (reg_char+ | escaped_char | [\\] [^() \t\v\f\r\n] | in_parens_nosp)* [\\]? { return (bufsize_t)(p - start); } .? { return 0; } */ } @@ -247,8 +246,8 @@ bufsize_t _scan_spacechars(const unsigned char *p) */ } -// Match ATX header start. -bufsize_t _scan_atx_header_start(const unsigned char *p) +// Match ATX heading start. +bufsize_t _scan_atx_heading_start(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; @@ -258,9 +257,9 @@ bufsize_t _scan_atx_header_start(const unsigned char *p) */ } -// Match setext header line. Return 1 for level-1 header, +// Match setext heading line. Return 1 for level-1 heading, // 2 for level-2, 0 for no match. -bufsize_t _scan_setext_header_line(const unsigned char *p) +bufsize_t _scan_setext_heading_line(const unsigned char *p) { const unsigned char *marker = NULL; /*!re2c @@ -270,10 +269,10 @@ bufsize_t _scan_setext_header_line(const unsigned char *p) */ } -// Scan a horizontal rule line: "...three or more hyphens, asterisks, +// Scan a thematic break line: "...three or more hyphens, asterisks, // or underscores on a line by themselves. If you wish, you may use // spaces between the hyphens or asterisks." -bufsize_t _scan_hrule(const unsigned char *p) +bufsize_t _scan_thematic_break(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; |