diff options
| -rw-r--r-- | src/inlines.c | 90 | 
1 files changed, 52 insertions, 38 deletions
diff --git a/src/inlines.c b/src/inlines.c index b864e12..f223baf 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -695,8 +695,8 @@ static cmark_node *handle_entity(subject *subj) {    return make_str(subj->mem, cmark_chunk_buf_detach(&ent));  } -// Clean a URL: remove surrounding whitespace and surrounding <>, -// and remove \ that escape punctuation. +// Clean a URL: remove surrounding whitespace, and remove \ that escape +// punctuation.  cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {    cmark_strbuf buf = CMARK_BUF_INIT(mem); @@ -707,11 +707,7 @@ cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {      return result;    } -  if (url->data[0] == '<' && url->data[url->len - 1] == '>') { -    houdini_unescape_html_f(&buf, url->data + 1, url->len - 2); -  } else { -    houdini_unescape_html_f(&buf, url->data, url->len); -  } +  houdini_unescape_html_f(&buf, url->data, url->len);    cmark_strbuf_unescape(&buf);    return cmark_chunk_buf_detach(&buf); @@ -824,10 +820,43 @@ noMatch:    subj->pos = startpos; // rewind    return 0;  } -static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset) { + +static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset, cmark_chunk *output) {    bufsize_t i = offset;    size_t nb_p = 0; +  while (i < input->len) { +    if (input->data[i] == '\\' && +        i + 1 < input-> len && +        cmark_ispunct(input->data[i+1])) +      i += 2; +    else if (input->data[i] == '(') { +      ++nb_p; +      ++i; +    } else if (input->data[i] == ')') { +      if (nb_p == 0) +        break; +      --nb_p; +      ++i; +    } else if (cmark_isspace(input->data[i])) +      break; +    else +      ++i; +  } + +  if (i >= input->len) +    return -1; + +  { +    cmark_chunk result = {input->data + offset, i - offset, 0}; +    *output = result; +  } +  return i - offset; +} + +static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset, cmark_chunk *output) { +  bufsize_t i = offset; +    if (i < input->len && input->data[i] == '<') {      ++i;      while (i < input->len) { @@ -836,42 +865,30 @@ static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset) {          break;        } else if (input->data[i] == '\\')          i += 2; -      else if (cmark_isspace(input->data[i])) -        return -1; +      else if (cmark_isspace(input->data[i]) || input->data[i] == '<') +        return manual_scan_link_url_2(input, offset, output);        else          ++i;      }    } else { -    while (i < input->len) { -      if (input->data[i] == '\\' && -	  i + 1 < input-> len && -          cmark_ispunct(input->data[i+1])) -        i += 2; -      else if (input->data[i] == '(') { -        ++nb_p; -        ++i; -      } else if (input->data[i] == ')') { -        if (nb_p == 0) -          break; -        --nb_p; -        ++i; -      } else if (cmark_isspace(input->data[i])) -        break; -      else -        ++i; -    } +    return manual_scan_link_url_2(input, offset, output);    }    if (i >= input->len)      return -1; + +  { +    cmark_chunk result = {input->data + offset + 1, i - 2 - offset, 0}; +    *output = result; +  }    return i - offset;  } +  // Return a link, an image, or a literal close bracket.  static cmark_node *handle_close_bracket(subject *subj) {    bufsize_t initial_pos, after_link_text_pos; -  bufsize_t starturl, endurl, starttitle, endtitle, endall; -  bufsize_t n; -  bufsize_t sps; +  bufsize_t endurl, starttitle, endtitle, endall; +  bufsize_t sps, n;    cmark_reference *ref = NULL;    cmark_chunk url_chunk, title_chunk;    cmark_chunk url, title; @@ -907,11 +924,10 @@ static cmark_node *handle_close_bracket(subject *subj) {    // First, look for an inline link.    if (peek_char(subj) == '(' &&        ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) && -      ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) { +      ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps, &url_chunk)) > -1)) {      // try to parse an explicit link: -    starturl = subj->pos + 1 + sps; // after ( -    endurl = starturl + n; +    endurl = subj->pos + 1 + sps + n;      starttitle = endurl + scan_spacechars(&subj->input, endurl);      // ensure there are spaces btw url and title @@ -924,7 +940,6 @@ static cmark_node *handle_close_bracket(subject *subj) {      if (peek_at(subj, endall) == ')') {        subj->pos = endall + 1; -      url_chunk = cmark_chunk_dup(&subj->input, starturl, endurl - starturl);        title_chunk =            cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle);        url = cmark_clean_url(subj->mem, &url_chunk); @@ -1213,9 +1228,8 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,    // parse link url:    spnl(&subj); -  matchlen = manual_scan_link_url(&subj.input, subj.pos); -  if (matchlen > 0) { -    url = cmark_chunk_dup(&subj.input, subj.pos, matchlen); +  if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1 && +      url.len > 0) {      subj.pos += matchlen;    } else {      return 0;  | 
