summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorVicent Marti <tanoku@gmail.com>2014-09-10 18:33:27 +0200
committerVicent Marti <tanoku@gmail.com>2014-09-10 18:33:27 +0200
commit94a79a605f3e76a43f1f87a5044f6761b99e5ca5 (patch)
tree76a24ad88117cf4948eebaf6b42c86d75fe7d1e2 /src
parent9d86d2f32303ae0048f6a5daa552bacceb9b12ea (diff)
Cleanup reference implementation
Diffstat (limited to 'src')
-rw-r--r--src/blocks.c16
-rw-r--r--src/buffer.c43
-rw-r--r--src/buffer.h2
-rw-r--r--src/inlines.c176
-rw-r--r--src/references.c109
-rw-r--r--src/references.h27
-rw-r--r--src/stmd.h26
-rw-r--r--src/utf8.c10
-rw-r--r--src/utf8.h5
9 files changed, 224 insertions, 190 deletions
diff --git a/src/blocks.c b/src/blocks.c
index 72b2dc2..30a8284 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -8,7 +8,6 @@
#include "utf8.h"
#include "html/houdini.h"
#include "scanners.h"
-#include "uthash.h"
#define peek_at(i, n) (i)->data[n]
@@ -36,12 +35,7 @@ static node_block* make_block(int tag, int start_line, int start_column)
extern node_block* make_document()
{
node_block *e = make_block(BLOCK_DOCUMENT, 1, 1);
- reference *map = NULL;
- reference ** refmap;
-
- refmap = (reference**) malloc(sizeof(reference*));
- *refmap = map;
- e->as.document.refmap = refmap;
+ e->as.document.refmap = reference_map_new();
e->top = e;
return e;
@@ -164,7 +158,7 @@ static void finalize(node_block* b, int line_number)
case BLOCK_PARAGRAPH:
pos = 0;
while (strbuf_at(&b->string_content, 0) == '[' &&
- (pos = parse_reference(&b->string_content, b->top->as.document.refmap))) {
+ (pos = parse_reference_inline(&b->string_content, b->top->as.document.refmap))) {
strbuf_drop(&b->string_content, pos);
}
@@ -192,7 +186,7 @@ static void finalize(node_block* b, int line_number)
strbuf_drop(&b->string_content, firstlinelen + 1);
strbuf_trim(&b->as.code.info);
- unescape_buffer(&b->as.code.info);
+ strbuf_unescape(&b->as.code.info);
break;
case BLOCK_LIST: // determine tight/loose status
@@ -268,7 +262,7 @@ extern void free_blocks(node_block* e)
if (e->tag == BLOCK_FENCED_CODE) {
strbuf_free(&e->as.code.info);
} else if (e->tag == BLOCK_DOCUMENT) {
- free_reference_map(e->as.document.refmap);
+ reference_map_free(e->as.document.refmap);
}
free_blocks(e->children);
free(e);
@@ -278,7 +272,7 @@ extern void free_blocks(node_block* e)
// Walk through node_block and all children, recursively, parsing
// string content into inline content where appropriate.
-void process_inlines(node_block* cur, reference** refmap)
+void process_inlines(node_block* cur, reference_map *refmap)
{
switch (cur->tag) {
case BLOCK_PARAGRAPH:
diff --git a/src/buffer.c b/src/buffer.c
index 90c2186..cdf8ca0 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -308,3 +308,46 @@ void strbuf_trim(strbuf *buf)
buf->ptr[buf->size] = '\0';
}
+
+// Destructively modify string, collapsing consecutive
+// space and newline characters into a single space.
+void strbuf_normalize_whitespace(strbuf *s)
+{
+ bool last_char_was_space = false;
+ int r, w;
+
+ for (r = 0, w = 0; r < s->size; ++r) {
+ switch (s->ptr[r]) {
+ case ' ':
+ case '\n':
+ if (last_char_was_space)
+ break;
+
+ s->ptr[w++] = ' ';
+ last_char_was_space = true;
+ break;
+
+ default:
+ s->ptr[w++] = s->ptr[r];
+ last_char_was_space = false;
+ }
+ }
+
+ strbuf_truncate(s, w);
+}
+
+// Destructively unescape a string: remove backslashes before punctuation chars.
+extern void strbuf_unescape(strbuf *buf)
+{
+ int r, w;
+
+ for (r = 0, w = 0; r < buf->size; ++r) {
+ if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1]))
+ continue;
+
+ buf->ptr[w++] = buf->ptr[r];
+ }
+
+ strbuf_truncate(buf, w);
+}
+
diff --git a/src/buffer.h b/src/buffer.h
index 6f45cbb..1bc1eee 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -108,5 +108,7 @@ int strbuf_strrchr(const strbuf *buf, int c, int pos);
void strbuf_drop(strbuf *buf, int n);
void strbuf_truncate(strbuf *buf, int len);
void strbuf_trim(strbuf *buf);
+void strbuf_normalize_whitespace(strbuf *s);
+void strbuf_unescape(strbuf *s);
#endif
diff --git a/src/inlines.c b/src/inlines.c
index aa0e13e..3040f09 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -7,110 +7,23 @@
#include "stmd.h"
#include "html/houdini.h"
#include "utf8.h"
-#include "uthash.h"
#include "scanners.h"
typedef struct Subject {
chunk input;
int pos;
- int label_nestlevel;
- reference** reference_map;
+ int label_nestlevel;
+ reference_map *refmap;
} subject;
-reference* lookup_reference(reference** refmap, chunk *label);
-reference* make_reference(chunk *label, chunk *url, chunk *title);
-
-static unsigned char *clean_url(chunk *url);
-static unsigned char *clean_title(chunk *title);
-static unsigned char *clean_autolink(chunk *url, int is_email);
-
-inline static void chunk_free(chunk *c);
-inline static void chunk_trim(chunk *c);
-
-inline static chunk chunk_literal(const char *data);
-inline static chunk chunk_buf_detach(strbuf *buf);
-inline static chunk chunk_dup(const chunk *ch, int pos, int len);
-
-static node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
+static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap);
static node_inl *parse_inlines_while(subject* subj, int (*f)(subject*));
static int parse_inline(subject* subj, node_inl ** last);
-static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap);
-static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap);
+static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap);
+static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap);
static int subject_find_special_char(subject *subj);
-static void normalize_whitespace(strbuf *s);
-
-extern void free_reference(reference *ref) {
- free(ref->label);
- free(ref->url);
- free(ref->title);
- free(ref);
-}
-
-extern void free_reference_map(reference **refmap) {
- /* free the hash table contents */
- reference *s;
- reference *tmp;
- if (refmap != NULL) {
- HASH_ITER(hh, *refmap, s, tmp) {
- HASH_DEL(*refmap, s);
- free_reference(s);
- }
- free(refmap);
- }
-}
-
-// normalize reference: collapse internal whitespace to single space,
-// remove leading/trailing whitespace, case fold
-static unsigned char *normalize_reference(chunk *ref)
-{
- strbuf normalized = GH_BUF_INIT;
-
- utf8proc_case_fold(&normalized, ref->data, ref->len);
- strbuf_trim(&normalized);
- normalize_whitespace(&normalized);
-
- return strbuf_detach(&normalized);
-}
-
-// Returns reference if refmap contains a reference with matching
-// label, otherwise NULL.
-extern reference* lookup_reference(reference** refmap, chunk *label)
-{
- reference *ref = NULL;
- unsigned char *norm = normalize_reference(label);
- if (refmap != NULL) {
- HASH_FIND_STR(*refmap, (char*)norm, ref);
- }
- free(norm);
- return ref;
-}
-
-extern reference* make_reference(chunk *label, chunk *url, chunk *title)
-{
- reference *ref;
- ref = malloc(sizeof(reference));
- ref->label = normalize_reference(label);
- ref->url = clean_url(url);
- ref->title = clean_title(title);
- return ref;
-}
-
-extern void add_reference(reference** refmap, reference* ref)
-{
- reference * t = NULL;
- const char *label = (const char *)ref->label;
-
- HASH_FIND(hh, *refmap, label, strlen(label), t);
-
- if (t == NULL) {
- HASH_ADD_KEYPTR(hh, *refmap, label, strlen(label), ref);
- } else {
- free_reference(ref); // we free this now since it won't be in the refmap
- }
-}
-
static unsigned char *bufdup(const unsigned char *buf)
{
unsigned char *new = NULL;
@@ -236,26 +149,26 @@ inline static node_inl* append_inlines(node_inl* a, node_inl* b)
return a;
}
-static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap)
+static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap)
{
e->input.data = buffer->ptr;
e->input.len = buffer->size;
e->input.alloc = 0;
e->pos = 0;
e->label_nestlevel = 0;
- e->reference_map = refmap;
+ e->refmap = refmap;
chunk_rtrim(&e->input);
}
-static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap)
+static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap)
{
e->input.data = chunk->data;
e->input.len = chunk->len;
e->input.alloc = 0;
e->pos = 0;
e->label_nestlevel = 0;
- e->reference_map = refmap;
+ e->refmap = refmap;
chunk_rtrim(&e->input);
}
@@ -325,33 +238,6 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
return (subj->pos);
}
-// Destructively modify string, collapsing consecutive
-// space and newline characters into a single space.
-static void normalize_whitespace(strbuf *s)
-{
- bool last_char_was_space = false;
- int r, w;
-
- for (r = 0, w = 0; r < s->size; ++r) {
- switch (s->ptr[r]) {
- case ' ':
- case '\n':
- if (last_char_was_space)
- break;
-
- s->ptr[w++] = ' ';
- last_char_was_space = true;
- break;
-
- default:
- s->ptr[w++] = s->ptr[r];
- last_char_was_space = false;
- }
- }
-
- strbuf_truncate(s, w);
-}
-
// Parse backtick code section or raw backticks, return an inline.
// Assumes that the subject has a backtick at the current position.
static node_inl* handle_backticks(subject *subj)
@@ -368,7 +254,7 @@ static node_inl* handle_backticks(subject *subj)
strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
strbuf_trim(&buf);
- normalize_whitespace(&buf);
+ strbuf_normalize_whitespace(&buf);
return make_code(chunk_buf_detach(&buf));
}
@@ -575,24 +461,9 @@ static node_inl *make_str_with_entities(chunk *content)
}
}
-// Destructively unescape a string: remove backslashes before punctuation chars.
-extern void unescape_buffer(strbuf *buf)
-{
- int r, w;
-
- for (r = 0, w = 0; r < buf->size; ++r) {
- if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1]))
- continue;
-
- buf->ptr[w++] = buf->ptr[r];
- }
-
- strbuf_truncate(buf, w);
-}
-
// Clean a URL: remove surrounding whitespace and surrounding <>,
// and remove \ that escape punctuation.
-static unsigned char *clean_url(chunk *url)
+unsigned char *clean_url(chunk *url)
{
strbuf buf = GH_BUF_INIT;
@@ -607,11 +478,11 @@ static unsigned char *clean_url(chunk *url)
houdini_unescape_html_f(&buf, url->data, url->len);
}
- unescape_buffer(&buf);
+ strbuf_unescape(&buf);
return strbuf_detach(&buf);
}
-static unsigned char *clean_autolink(chunk *url, int is_email)
+unsigned char *clean_autolink(chunk *url, int is_email)
{
strbuf buf = GH_BUF_INIT;
@@ -628,7 +499,7 @@ static unsigned char *clean_autolink(chunk *url, int is_email)
}
// Clean a title: remove surrounding quotes and remove \ that escape punctuation.
-static unsigned char *clean_title(chunk *title)
+unsigned char *clean_title(chunk *title)
{
strbuf buf = GH_BUF_INIT;
unsigned char first, last;
@@ -648,7 +519,7 @@ static unsigned char *clean_title(chunk *title)
houdini_unescape_html_f(&buf, title->data, title->len);
}
- unescape_buffer(&buf);
+ strbuf_unescape(&buf);
return strbuf_detach(&buf);
}
@@ -810,7 +681,7 @@ static node_inl* handle_left_bracket(subject* subj)
} else {
// if we get here, we matched a label but didn't get further:
subj->pos = endlabel;
- lab = parse_chunk_inlines(&rawlabel, subj->reference_map);
+ lab = parse_chunk_inlines(&rawlabel, subj->refmap);
result = append_inlines(make_str(chunk_literal("[")),
append_inlines(lab,
make_str(chunk_literal("]"))));
@@ -834,13 +705,13 @@ static node_inl* handle_left_bracket(subject* subj)
}
// lookup rawlabel in subject->reference_map:
- ref = lookup_reference(subj->reference_map, &reflabel);
+ ref = reference_lookup(subj->refmap, &reflabel);
if (ref != NULL) { // found
lab = parse_chunk_inlines(&rawlabel, NULL);
result = make_ref_link(lab, ref);
} else {
subj->pos = endlabel;
- lab = parse_chunk_inlines(&rawlabel, subj->reference_map);
+ lab = parse_chunk_inlines(&rawlabel, subj->refmap);
result = append_inlines(make_str(chunk_literal("[")),
append_inlines(lab, make_str(chunk_literal("]"))));
}
@@ -887,7 +758,7 @@ extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*))
return result;
}
-node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap)
+node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap)
{
subject subj;
subject_from_chunk(&subj, chunk, refmap);
@@ -987,7 +858,7 @@ static int parse_inline(subject* subj, node_inl ** last)
return 1;
}
-extern node_inl* parse_inlines(strbuf *input, reference** refmap)
+extern node_inl* parse_inlines(strbuf *input, reference_map *refmap)
{
subject subj;
subject_from_buf(&subj, input, refmap);
@@ -1009,7 +880,7 @@ void spnl(subject* subj)
// Modify refmap if a reference is encountered.
// Return 0 if no reference found, otherwise position of subject
// after reference is parsed.
-extern int parse_reference(strbuf *input, reference** refmap)
+int parse_reference_inline(strbuf *input, reference_map *refmap)
{
subject subj;
@@ -1019,7 +890,6 @@ extern int parse_reference(strbuf *input, reference** refmap)
int matchlen = 0;
int beforetitle;
- reference *new = NULL;
subject_from_buf(&subj, input, NULL);
@@ -1065,9 +935,7 @@ extern int parse_reference(strbuf *input, reference** refmap)
return 0;
}
// insert reference into refmap
- new = make_reference(&lab, &url, &title);
- add_reference(refmap, new);
-
+ reference_create(refmap, &lab, &url, &title);
return subj.pos;
}
diff --git a/src/references.c b/src/references.c
new file mode 100644
index 0000000..ff64b00
--- /dev/null
+++ b/src/references.c
@@ -0,0 +1,109 @@
+#include "stmd.h"
+#include "utf8.h"
+#include "references.h"
+
+static unsigned int
+refhash(const unsigned char *link_ref)
+{
+ unsigned int hash = 0;
+
+ while (*link_ref)
+ hash = (*link_ref++) + (hash << 6) + (hash << 16) - hash;
+
+ return hash;
+}
+
+// normalize reference: collapse internal whitespace to single space,
+// remove leading/trailing whitespace, case fold
+static unsigned char *normalize_reference(chunk *ref)
+{
+ strbuf normalized = GH_BUF_INIT;
+
+ utf8proc_case_fold(&normalized, ref->data, ref->len);
+ strbuf_trim(&normalized);
+ strbuf_normalize_whitespace(&normalized);
+
+ return strbuf_detach(&normalized);
+}
+
+static void add_reference(reference_map *map, reference* ref)
+{
+ ref->next = map->table[ref->hash % REFMAP_SIZE];
+ map->table[ref->hash % REFMAP_SIZE] = ref;
+}
+
+extern reference *reference_create(reference_map *map, chunk *label, chunk *url, chunk *title)
+{
+ reference *ref;
+ ref = malloc(sizeof(reference));
+ ref->label = normalize_reference(label);
+ ref->hash = refhash(ref->label);
+ ref->url = clean_url(url);
+ ref->title = clean_title(title);
+ ref->next = NULL;
+
+ add_reference(map, ref);
+
+ return ref;
+}
+
+// Returns reference if refmap contains a reference with matching
+// label, otherwise NULL.
+reference* reference_lookup(reference_map *map, chunk *label)
+{
+ reference *ref = NULL;
+ unsigned char *norm;
+ unsigned int hash;
+
+ if (map == NULL)
+ return NULL;
+
+ norm = normalize_reference(label);
+ hash = refhash(norm);
+ ref = map->table[hash % REFMAP_SIZE];
+
+ while (ref) {
+ if (ref->label[0] == norm[0] &&
+ !strcmp((char *)ref->label, (char *)norm))
+ break;
+ ref = ref->next;
+ }
+
+ free(norm);
+ return ref;
+}
+
+static void reference_free(reference *ref)
+{
+ free(ref->label);
+ free(ref->url);
+ free(ref->title);
+ free(ref);
+}
+
+void reference_map_free(reference_map *map)
+{
+ unsigned int i;
+
+ for (i = 0; i < REFMAP_SIZE; ++i) {
+ reference *ref = map->table[i];
+ reference *next;
+
+ while (ref) {
+ next = ref->next;
+ reference_free(ref);
+ ref = next;
+ }
+ }
+
+ free(map->table);
+ free(map);
+}
+
+reference_map *reference_map_new(void)
+{
+ reference_map *map = malloc(sizeof(reference_map));
+ memset(map, 0x0, sizeof(reference_map));
+ return map;
+}
+
diff --git a/src/references.h b/src/references.h
new file mode 100644
index 0000000..78fffe7
--- /dev/null
+++ b/src/references.h
@@ -0,0 +1,27 @@
+#ifndef _REFERENCES_H_
+#define _REFERENCES_H_
+
+#define REFMAP_SIZE 16
+
+struct reference {
+ struct reference *next;
+ unsigned char *label;
+ unsigned char *url;
+ unsigned char *title;
+ unsigned int hash;
+};
+
+typedef struct reference reference;
+
+struct reference_map {
+ reference *table[REFMAP_SIZE];
+};
+
+typedef struct reference_map reference_map;
+
+reference_map *reference_map_new(void);
+void reference_map_free(reference_map *map);
+reference* reference_lookup(reference_map *map, chunk *label);
+extern reference *reference_create(reference_map *map, chunk *label, chunk *url, chunk *title);
+
+#endif
diff --git a/src/stmd.h b/src/stmd.h
index 21a86b0..4e21e6c 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -5,7 +5,7 @@
#include <stdio.h>
#include "buffer.h"
#include "chunk.h"
-#include "uthash.h"
+#include "references.h"
#define VERSION "0.1"
#define CODE_INDENT 4
@@ -36,17 +36,7 @@ struct node_inl {
typedef struct node_inl node_inl;
-struct reference {
- unsigned char *label;
- unsigned char *url;
- unsigned char *title;
- UT_hash_handle hh; // used by uthash
-};
-
-typedef struct reference reference;
-
// Types for blocks
-
struct ListData {
enum {
bullet,
@@ -104,7 +94,7 @@ struct node_block {
int level;
} header;
struct {
- reference** refmap;
+ reference_map *refmap;
} document;
} as;
@@ -114,14 +104,10 @@ struct node_block {
typedef struct node_block node_block;
-node_inl* parse_inlines(strbuf *input, reference** refmap);
+node_inl* parse_inlines(strbuf *input, reference_map *refmap);
void free_inlines(node_inl* e);
-int parse_reference(strbuf *input, reference** refmap);
-void free_reference(reference *ref);
-void free_reference_map(reference **refmap);
-
-void add_reference(reference** refmap, reference* ref);
+int parse_reference_inline(strbuf *input, reference_map *refmap);
void unescape_buffer(strbuf *buf);
extern node_block* make_document();
@@ -138,4 +124,8 @@ void print_blocks(node_block* blk, int indent);
void blocks_to_html(strbuf *html, node_block *b, bool tight);
void inlines_to_html(strbuf *html, node_inl *b);
+unsigned char *clean_url(chunk *url);
+unsigned char *clean_autolink(chunk *url, int is_email);
+unsigned char *clean_title(chunk *title);
+
#endif
diff --git a/src/utf8.c b/src/utf8.c
index 12d7ba5..c65aec6 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -25,7 +25,7 @@ static const int8_t utf8proc_utf8class[256] = {
static void encode_unknown(strbuf *buf)
{
- static const unsigned char repl[] = {239, 191, 189};
+ static const uint8_t repl[] = {239, 191, 189};
strbuf_put(buf, repl, 3);
}
@@ -52,9 +52,9 @@ ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len)
return length;
}
-void utf8proc_detab(strbuf *ob, const unsigned char *line, size_t size)
+void utf8proc_detab(strbuf *ob, const uint8_t *line, size_t size)
{
- static const unsigned char whitespace[] = " ";
+ static const uint8_t whitespace[] = " ";
size_t i = 0, tab = 0;
@@ -132,7 +132,7 @@ ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst)
void utf8proc_encode_char(int32_t uc, strbuf *buf)
{
- unsigned char dst[4];
+ uint8_t dst[4];
int len = 0;
assert(uc >= 0);
@@ -169,7 +169,7 @@ void utf8proc_encode_char(int32_t uc, strbuf *buf)
strbuf_put(buf, dst, len);
}
-void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len)
+void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len)
{
int32_t c;
diff --git a/src/utf8.h b/src/utf8.h
index 1e4e556..9506b75 100644
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -1,12 +1,13 @@
#ifndef _H_STMD_UTF8_
#define _H_STMD_UTF8_
+#include <stdint.h>
#include "buffer.h"
-void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len);
+void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len);
void utf8proc_encode_char(int32_t uc, strbuf *buf);
ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst);
ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len);
-void utf8proc_detab(strbuf *dest, const unsigned char *line, size_t size);
+void utf8proc_detab(strbuf *dest, const uint8_t *line, size_t size);
#endif