diff options
Diffstat (limited to 'src/detab.c')
-rw-r--r-- | src/detab.c | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/src/detab.c b/src/detab.c new file mode 100644 index 0000000..e03fcf7 --- /dev/null +++ b/src/detab.c @@ -0,0 +1,48 @@ +#include "bstrlib.h" + +// UTF-8 aware detab: assumes s has no newlines, or only a final newline. +// Return 0 on success, BSTR_ERR if invalid UTF-8. +extern int bdetab(bstring s, int utf8) +{ + unsigned char c; + int pos = 0; // a count of characters + int byte = 0; // a count of bytes + int high_chars_to_skip = 0; + int numspaces = 0; + while ((c = bchar(s, byte))) { + if (utf8 && high_chars_to_skip > 0) { + if (c >= 0x80) { + high_chars_to_skip--; + byte++; + } else { + return BSTR_ERR; // invalid utf-8 + } + } else if (c == '\t') { + bdelete(s, byte, 1); // delete tab character + numspaces = 4 - (pos % 4); + binsertch(s, byte, numspaces, ' '); + byte += numspaces; + pos += numspaces; + } else if (c <= 0x80 || !utf8) { + byte++; + pos++; + } else { // multibyte utf8 sequences + if (c >> 1 == 0176) { + high_chars_to_skip = 5; + } else if (c >> 2 == 076) { + high_chars_to_skip = 4; + } else if (c >> 3 == 036) { + high_chars_to_skip = 3; + } else if (c >> 4 == 016) { + high_chars_to_skip = 2; + } else if (c >> 5 == 06) { + high_chars_to_skip = 1; + } else { + return BSTR_ERR; // invalid utf-8 + } + pos++; + byte++; + } + } + return 0; +} |