#include <stdarg.h> #include <string.h> #include <assert.h> #include <string.h> #include <stdio.h> #include <stdlib.h> #include <stdint.h> #include "config.h" #include "cmark_ctype.h" #include "buffer.h" /* Used as default value for cmark_strbuf->ptr so that people can always * assume ptr is non-NULL and zero terminated even for new cmark_strbufs. */ unsigned char cmark_strbuf__initbuf[1]; #ifndef MIN #define MIN(x, y) ((x < y) ? x : y) #endif void cmark_strbuf_init(cmark_strbuf *buf, bufsize_t initial_size) { buf->asize = 0; buf->size = 0; buf->ptr = cmark_strbuf__initbuf; if (initial_size > 0) cmark_strbuf_grow(buf, initial_size); } void cmark_strbuf_overflow_err() { fprintf(stderr, "String buffer overflow"); abort(); } static inline void S_strbuf_grow_by(cmark_strbuf *buf, size_t add) { size_t target_size = (size_t)buf->size + add; if (target_size < add /* Integer overflow. */ || target_size > BUFSIZE_MAX /* Truncation overflow. */ ) { cmark_strbuf_overflow_err(); return; /* unreachable */ } if ((bufsize_t)target_size >= buf->asize) cmark_strbuf_grow(buf, (bufsize_t)target_size); } void cmark_strbuf_grow(cmark_strbuf *buf, bufsize_t target_size) { unsigned char *new_ptr; if (target_size < buf->asize) return; if (buf->asize == 0) { new_ptr = NULL; } else { new_ptr = buf->ptr; } /* Oversize the buffer by 50% to guarantee amortized linear time * complexity on append operations. */ size_t new_size = (size_t)target_size + (size_t)target_size / 2; /* Account for terminating null byte. */ new_size += 1; /* round allocation up to multiple of 8 */ new_size = (new_size + 7) & ~7; if (new_size < (size_t)target_size /* Integer overflow. */ || new_size > BUFSIZE_MAX /* Truncation overflow. */ ) { if (target_size >= BUFSIZE_MAX) { /* No space for terminating null byte. */ cmark_strbuf_overflow_err(); return; /* unreachable */ } /* Oversize by the maximum possible amount. */ new_size = BUFSIZE_MAX; } new_ptr = (unsigned char *)realloc(new_ptr, new_size); if (!new_ptr) { perror("realloc in cmark_strbuf_grow"); abort(); } buf->asize = (bufsize_t)new_size; buf->ptr = new_ptr; } bufsize_t cmark_strbuf_len(const cmark_strbuf *buf) { return buf->size; } void cmark_strbuf_free(cmark_strbuf *buf) { if (!buf) return; if (buf->ptr != cmark_strbuf__initbuf) free(buf->ptr); cmark_strbuf_init(buf, 0); } void cmark_strbuf_clear(cmark_strbuf *buf) { buf->size = 0; if (buf->asize > 0) buf->ptr[0] = '\0'; } void cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, bufsize_t len) { if (len <= 0 || data == NULL) { cmark_strbuf_clear(buf); } else { if (data != buf->ptr) { if (len >= buf->asize) cmark_strbuf_grow(buf, len); memmove(buf->ptr, data, len); } buf->size = len; buf->ptr[buf->size] = '\0'; } } void cmark_strbuf_sets(cmark_strbuf *buf, const char *string) { cmark_strbuf_set(buf, (const unsigned char *)string, string ? cmark_strbuf_safe_strlen(string) : 0); } void cmark_strbuf_putc(cmark_strbuf *buf, int c) { S_strbuf_grow_by(buf, 1); buf->ptr[buf->size++] = (unsigned char)(c & 0xFF); buf->ptr[buf->size] = '\0'; } void cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, bufsize_t len) { if (len <= 0) return; S_strbuf_grow_by(buf, len); memmove(buf->ptr + buf->size, data, len); buf->size += len; buf->ptr[buf->size] = '\0'; } void cmark_strbuf_puts(cmark_strbuf *buf, const char *string) { cmark_strbuf_put(buf, (const unsigned char *)string, cmark_strbuf_safe_strlen(string)); } void cmark_strbuf_copy_cstr(char *data, bufsize_t datasize, const cmark_strbuf *buf) { bufsize_t copylen; assert(buf); if (!data || datasize <= 0) return; data[0] = '\0'; if (buf->size == 0 || buf->asize <= 0) return; copylen = buf->size; if (copylen > datasize - 1) copylen = datasize - 1; memmove(data, buf->ptr, copylen); data[copylen] = '\0'; } void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b) { cmark_strbuf t = *buf_a; *buf_a = *buf_b; *buf_b = t; } unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) { unsigned char *data = buf->ptr; if (buf->asize == 0) { /* return an empty string */ return (unsigned char *)calloc(1, 1); } cmark_strbuf_init(buf, 0); return data; } int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b) { int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size)); return (result != 0) ? result : (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0; } bufsize_t cmark_strbuf_strchr(const cmark_strbuf *buf, int c, bufsize_t pos) { if (pos >= buf->size) return -1; if (pos < 0) pos = 0; const unsigned char *p = (unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos); if (!p) return -1; return (bufsize_t)(p - (const unsigned char *)buf->ptr); } bufsize_t cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, bufsize_t pos) { if (pos < 0 || buf->size == 0) return -1; if (pos >= buf->size) pos = buf->size - 1; bufsize_t i; for (i = pos; i >= 0; i--) { if (buf->ptr[i] == (unsigned char)c) return i; } return -1; } void cmark_strbuf_truncate(cmark_strbuf *buf, bufsize_t len) { if (len < 0) len = 0; if (len < buf->size) { buf->size = len; buf->ptr[buf->size] = '\0'; } } void cmark_strbuf_drop(cmark_strbuf *buf, bufsize_t n) { if (n > 0) { if (n > buf->size) n = buf->size; buf->size = buf->size - n; if (buf->size) memmove(buf->ptr, buf->ptr + n, buf->size); buf->ptr[buf->size] = '\0'; } } void cmark_strbuf_rtrim(cmark_strbuf *buf) { if (!buf->size) return; while (buf->size > 0) { if (!cmark_isspace(buf->ptr[buf->size - 1])) break; buf->size--; } buf->ptr[buf->size] = '\0'; } void cmark_strbuf_trim(cmark_strbuf *buf) { bufsize_t i = 0; if (!buf->size) return; while (i < buf->size && cmark_isspace(buf->ptr[i])) i++; cmark_strbuf_drop(buf, i); cmark_strbuf_rtrim(buf); } // Destructively modify string, collapsing consecutive // space and newline characters into a single space. void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) { bool last_char_was_space = false; bufsize_t r, w; for (r = 0, w = 0; r < s->size; ++r) { if (cmark_isspace(s->ptr[r])) { if (!last_char_was_space) { s->ptr[w++] = ' '; last_char_was_space = true; } } else { s->ptr[w++] = s->ptr[r]; last_char_was_space = false; } } cmark_strbuf_truncate(s, w); } // Destructively unescape a string: remove backslashes before punctuation chars. extern void cmark_strbuf_unescape(cmark_strbuf *buf) { bufsize_t r, w; for (r = 0, w = 0; r < buf->size; ++r) { if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1])) r++; buf->ptr[w++] = buf->ptr[r]; } cmark_strbuf_truncate(buf, w); }