diff options
Diffstat (limited to 'src/spreed.c')
| -rwxr-xr-x | src/spreed.c | 143 |
1 files changed, 143 insertions, 0 deletions
diff --git a/src/spreed.c b/src/spreed.c new file mode 100755 index 0000000..933bec7 --- /dev/null +++ b/src/spreed.c @@ -0,0 +1,143 @@ +#if 0 +${CC:-gcc} -s -O2 -std=c99 -Wall -o ${SPREED_INSTALL_DIR:-.}/spreed src/spreed.c +exit +#endif + +/* TODO + * contractions don't work + * - result of trying to separate a+b + */ + +#include <locale.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <wchar.h> +#include <wctype.h> + +#define CLR_FG L"\033[31m" +#define CLR_RESET L"\033[0m" + +uint8_t get_optimal_recognition_point(size_t len) { + static uint8_t orps[9] = {0,0,0,1,1,2,2,2,2}; + if (len >= 10) return 3; + return orps[len-1]; +} + +int main(int argc, char *argv[]) +{ + wchar_t word[48]; + wchar_t wc; + size_t res; + int c; + char chararr[4] = {0}; + uint8_t u, uu; + size_t ichar = 0, iword = 0; + uint8_t b_word_break = 0; + + memset(word, 0, 48 * sizeof(wchar_t)); + memset(chararr, 0, sizeof(chararr)); + + setlocale(LC_ALL, ""); + fwide(stdout, 1); + + fwprintf(stdout, L" V\n"); + do { + b_word_break = 0; + c = getchar(); + if (c == EOF) { + b_word_break = iword > 0; + goto END_OF_WORD; + } + chararr[ichar++] = (char)c; + + res = mbrtowc(&wc, chararr, 4, NULL); + switch(res) { + case 0: + // null char + if (iword == 0 && ichar == 1) { + ichar = 0; + memset(chararr, 0, 4 * sizeof(char)); + continue; + } else { + // fwprintf(stderr, L"cowabunga: %X %X, %lX\n", ichar, c, wc); + b_word_break = 1; + goto END_OF_WORD; + } + case (size_t)-2: + // incomplete wchar_t, keep reading bytes + continue; + case (size_t)-1: + if (ichar == 4) { + // emojis return (size_t)-1 until we have all the bytes + fwprintf(stderr, L"\nmbrtowc encoding error\n"); + return 1; + } + continue; + } + + ichar = 0; + memset(chararr, 0, 4 * sizeof(char)); + + // not a printable character, swallow + if (!iswprint(wc) && iword == 0) { + continue; + } + + if (iswspace(wc)) { + if (iword == 0) { + continue; + } + b_word_break = 1; + } + + if (iword > 0) { + if (iswpunct(word[iword-1]) != iswpunct(wc)) { + b_word_break = 1; + } + } + +END_OF_WORD: + ichar = 0; + + if (b_word_break != 0 || iword == 37) { + if (iword == 0) { + fwprintf(stderr, L"\n0 length word\n"); + return 2; + } + ichar = get_optimal_recognition_point(iword); + uu = 0; + putwchar('\r'); + for (u = 0; u < 48; ++u) { + if (u < 10-ichar) { + putwchar(L' '); + continue; + } + if (uu < iword) { + if (u == 10) { + fwprintf(stdout, L"%ls", CLR_FG); + } + putwchar(word[uu++]); + if (u == 10) { + fwprintf(stdout, L"%ls", CLR_RESET); + } + continue; + } + putwchar(L' '); + } + putwchar('\n'); + + iword = 0; + memset(word, 0, 48 * sizeof(wchar_t)); + } + + ichar = 0; + if (!iswspace(wc)) { + word[iword++] = wc; + } + + } while (c != EOF); + putwchar('\n'); + + return 0; +} |
