summaryrefslogtreecommitdiff
path: root/src/spreed.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/spreed.c')
-rwxr-xr-xsrc/spreed.c143
1 files changed, 143 insertions, 0 deletions
diff --git a/src/spreed.c b/src/spreed.c
new file mode 100755
index 0000000..933bec7
--- /dev/null
+++ b/src/spreed.c
@@ -0,0 +1,143 @@
+#if 0
+${CC:-gcc} -s -O2 -std=c99 -Wall -o ${SPREED_INSTALL_DIR:-.}/spreed src/spreed.c
+exit
+#endif
+
+/* TODO
+ * contractions don't work
+ * - result of trying to separate a+b
+ */
+
+#include <locale.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#define CLR_FG L"\033[31m"
+#define CLR_RESET L"\033[0m"
+
+uint8_t get_optimal_recognition_point(size_t len) {
+ static uint8_t orps[9] = {0,0,0,1,1,2,2,2,2};
+ if (len >= 10) return 3;
+ return orps[len-1];
+}
+
+int main(int argc, char *argv[])
+{
+ wchar_t word[48];
+ wchar_t wc;
+ size_t res;
+ int c;
+ char chararr[4] = {0};
+ uint8_t u, uu;
+ size_t ichar = 0, iword = 0;
+ uint8_t b_word_break = 0;
+
+ memset(word, 0, 48 * sizeof(wchar_t));
+ memset(chararr, 0, sizeof(chararr));
+
+ setlocale(LC_ALL, "");
+ fwide(stdout, 1);
+
+ fwprintf(stdout, L" V\n");
+ do {
+ b_word_break = 0;
+ c = getchar();
+ if (c == EOF) {
+ b_word_break = iword > 0;
+ goto END_OF_WORD;
+ }
+ chararr[ichar++] = (char)c;
+
+ res = mbrtowc(&wc, chararr, 4, NULL);
+ switch(res) {
+ case 0:
+ // null char
+ if (iword == 0 && ichar == 1) {
+ ichar = 0;
+ memset(chararr, 0, 4 * sizeof(char));
+ continue;
+ } else {
+ // fwprintf(stderr, L"cowabunga: %X %X, %lX\n", ichar, c, wc);
+ b_word_break = 1;
+ goto END_OF_WORD;
+ }
+ case (size_t)-2:
+ // incomplete wchar_t, keep reading bytes
+ continue;
+ case (size_t)-1:
+ if (ichar == 4) {
+ // emojis return (size_t)-1 until we have all the bytes
+ fwprintf(stderr, L"\nmbrtowc encoding error\n");
+ return 1;
+ }
+ continue;
+ }
+
+ ichar = 0;
+ memset(chararr, 0, 4 * sizeof(char));
+
+ // not a printable character, swallow
+ if (!iswprint(wc) && iword == 0) {
+ continue;
+ }
+
+ if (iswspace(wc)) {
+ if (iword == 0) {
+ continue;
+ }
+ b_word_break = 1;
+ }
+
+ if (iword > 0) {
+ if (iswpunct(word[iword-1]) != iswpunct(wc)) {
+ b_word_break = 1;
+ }
+ }
+
+END_OF_WORD:
+ ichar = 0;
+
+ if (b_word_break != 0 || iword == 37) {
+ if (iword == 0) {
+ fwprintf(stderr, L"\n0 length word\n");
+ return 2;
+ }
+ ichar = get_optimal_recognition_point(iword);
+ uu = 0;
+ putwchar('\r');
+ for (u = 0; u < 48; ++u) {
+ if (u < 10-ichar) {
+ putwchar(L' ');
+ continue;
+ }
+ if (uu < iword) {
+ if (u == 10) {
+ fwprintf(stdout, L"%ls", CLR_FG);
+ }
+ putwchar(word[uu++]);
+ if (u == 10) {
+ fwprintf(stdout, L"%ls", CLR_RESET);
+ }
+ continue;
+ }
+ putwchar(L' ');
+ }
+ putwchar('\n');
+
+ iword = 0;
+ memset(word, 0, 48 * sizeof(wchar_t));
+ }
+
+ ichar = 0;
+ if (!iswspace(wc)) {
+ word[iword++] = wc;
+ }
+
+ } while (c != EOF);
+ putwchar('\n');
+
+ return 0;
+}