From 975e972e65fc14c6e383acd5a34e6006bd557c2a Mon Sep 17 00:00:00 2001 From: Jonathan Bradley Date: Fri, 24 Apr 2026 08:35:41 -0400 Subject: cleanup, add more test cases --- src/spreed.c | 47 +++++++++++++++++++---------------------------- test.txt | 4 ++++ 2 files changed, 23 insertions(+), 28 deletions(-) diff --git a/src/spreed.c b/src/spreed.c index 933bec7..623fe4f 100755 --- a/src/spreed.c +++ b/src/spreed.c @@ -3,11 +3,6 @@ ${CC:-gcc} -s -O2 -std=c99 -Wall -o ${SPREED_INSTALL_DIR:-.}/spreed src/spreed.c exit #endif -/* TODO - * contractions don't work - * - result of trying to separate a+b - */ - #include #include #include @@ -18,24 +13,25 @@ exit #define CLR_FG L"\033[31m" #define CLR_RESET L"\033[0m" -uint8_t get_optimal_recognition_point(size_t len) { - static uint8_t orps[9] = {0,0,0,1,1,2,2,2,2}; +const uint8_t orps[9] = {0,0,0,1,1,2,2,2,2}; + +inline uint8_t get_optimal_recognition_point(size_t len) { if (len >= 10) return 3; return orps[len-1]; } int main(int argc, char *argv[]) { - wchar_t word[48]; + wchar_t word[37]; wchar_t wc; - size_t res; int c; - char chararr[4] = {0}; - uint8_t u, uu; - size_t ichar = 0, iword = 0; - uint8_t b_word_break = 0; + char chararr[4]; + uint8_t u, uu, ichar, iword, b_word_break; - memset(word, 0, 48 * sizeof(wchar_t)); + ichar = 0; + iword = 0; + b_word_break = 0; + memset(word, 0, 37 * sizeof(wchar_t)); memset(chararr, 0, sizeof(chararr)); setlocale(LC_ALL, ""); @@ -51,8 +47,7 @@ int main(int argc, char *argv[]) } chararr[ichar++] = (char)c; - res = mbrtowc(&wc, chararr, 4, NULL); - switch(res) { + switch(mbrtowc(&wc, chararr, 4, NULL)) { case 0: // null char if (iword == 0 && ichar == 1) { @@ -60,7 +55,6 @@ int main(int argc, char *argv[]) memset(chararr, 0, 4 * sizeof(char)); continue; } else { - // fwprintf(stderr, L"cowabunga: %X %X, %lX\n", ichar, c, wc); b_word_break = 1; goto END_OF_WORD; } @@ -76,31 +70,27 @@ int main(int argc, char *argv[]) continue; } - ichar = 0; - memset(chararr, 0, 4 * sizeof(char)); - // not a printable character, swallow if (!iswprint(wc) && iword == 0) { + ichar = 0; + memset(chararr, 0, 4 * sizeof(char)); continue; } if (iswspace(wc)) { if (iword == 0) { + ichar = 0; + memset(chararr, 0, 4 * sizeof(char)); continue; } b_word_break = 1; } - if (iword > 0) { - if (iswpunct(word[iword-1]) != iswpunct(wc)) { - b_word_break = 1; - } - } - END_OF_WORD: ichar = 0; + memset(chararr, 0, 4 * sizeof(char)); - if (b_word_break != 0 || iword == 37) { + if (b_word_break != 0 || iword >= 37) { if (iword == 0) { fwprintf(stderr, L"\n0 length word\n"); return 2; @@ -128,7 +118,7 @@ END_OF_WORD: putwchar('\n'); iword = 0; - memset(word, 0, 48 * sizeof(wchar_t)); + memset(word, 0, 37 * sizeof(wchar_t)); } ichar = 0; @@ -137,6 +127,7 @@ END_OF_WORD: } } while (c != EOF); + putwchar('\n'); return 0; diff --git a/test.txt b/test.txt index 1b9def7..0d07c2b 100644 --- a/test.txt +++ b/test.txt @@ -12,5 +12,9 @@ this ain't workin 001122334455 alpha10numeric +01234567891123456789212345678931234567894123456789 + 😐 (1+1)==2 + +This is a complete sentence. -- cgit v1.2.3