summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xsrc/spreed.c47
-rw-r--r--test.txt4
2 files changed, 23 insertions, 28 deletions
diff --git a/src/spreed.c b/src/spreed.c
index 933bec7..623fe4f 100755
--- a/src/spreed.c
+++ b/src/spreed.c
@@ -3,11 +3,6 @@ ${CC:-gcc} -s -O2 -std=c99 -Wall -o ${SPREED_INSTALL_DIR:-.}/spreed src/spreed.c
exit
#endif
-/* TODO
- * contractions don't work
- * - result of trying to separate a+b
- */
-
#include <locale.h>
#include <stdint.h>
#include <stdio.h>
@@ -18,24 +13,25 @@ exit
#define CLR_FG L"\033[31m"
#define CLR_RESET L"\033[0m"
-uint8_t get_optimal_recognition_point(size_t len) {
- static uint8_t orps[9] = {0,0,0,1,1,2,2,2,2};
+const uint8_t orps[9] = {0,0,0,1,1,2,2,2,2};
+
+inline uint8_t get_optimal_recognition_point(size_t len) {
if (len >= 10) return 3;
return orps[len-1];
}
int main(int argc, char *argv[])
{
- wchar_t word[48];
+ wchar_t word[37];
wchar_t wc;
- size_t res;
int c;
- char chararr[4] = {0};
- uint8_t u, uu;
- size_t ichar = 0, iword = 0;
- uint8_t b_word_break = 0;
+ char chararr[4];
+ uint8_t u, uu, ichar, iword, b_word_break;
- memset(word, 0, 48 * sizeof(wchar_t));
+ ichar = 0;
+ iword = 0;
+ b_word_break = 0;
+ memset(word, 0, 37 * sizeof(wchar_t));
memset(chararr, 0, sizeof(chararr));
setlocale(LC_ALL, "");
@@ -51,8 +47,7 @@ int main(int argc, char *argv[])
}
chararr[ichar++] = (char)c;
- res = mbrtowc(&wc, chararr, 4, NULL);
- switch(res) {
+ switch(mbrtowc(&wc, chararr, 4, NULL)) {
case 0:
// null char
if (iword == 0 && ichar == 1) {
@@ -60,7 +55,6 @@ int main(int argc, char *argv[])
memset(chararr, 0, 4 * sizeof(char));
continue;
} else {
- // fwprintf(stderr, L"cowabunga: %X %X, %lX\n", ichar, c, wc);
b_word_break = 1;
goto END_OF_WORD;
}
@@ -76,31 +70,27 @@ int main(int argc, char *argv[])
continue;
}
- ichar = 0;
- memset(chararr, 0, 4 * sizeof(char));
-
// not a printable character, swallow
if (!iswprint(wc) && iword == 0) {
+ ichar = 0;
+ memset(chararr, 0, 4 * sizeof(char));
continue;
}
if (iswspace(wc)) {
if (iword == 0) {
+ ichar = 0;
+ memset(chararr, 0, 4 * sizeof(char));
continue;
}
b_word_break = 1;
}
- if (iword > 0) {
- if (iswpunct(word[iword-1]) != iswpunct(wc)) {
- b_word_break = 1;
- }
- }
-
END_OF_WORD:
ichar = 0;
+ memset(chararr, 0, 4 * sizeof(char));
- if (b_word_break != 0 || iword == 37) {
+ if (b_word_break != 0 || iword >= 37) {
if (iword == 0) {
fwprintf(stderr, L"\n0 length word\n");
return 2;
@@ -128,7 +118,7 @@ END_OF_WORD:
putwchar('\n');
iword = 0;
- memset(word, 0, 48 * sizeof(wchar_t));
+ memset(word, 0, 37 * sizeof(wchar_t));
}
ichar = 0;
@@ -137,6 +127,7 @@ END_OF_WORD:
}
} while (c != EOF);
+
putwchar('\n');
return 0;
diff --git a/test.txt b/test.txt
index 1b9def7..0d07c2b 100644
--- a/test.txt
+++ b/test.txt
@@ -12,5 +12,9 @@ this ain't workin
001122334455
alpha10numeric
+01234567891123456789212345678931234567894123456789
+
😐
(1+1)==2
+
+This is a complete sentence.