summaryrefslogtreecommitdiff
path: root/src/utf8.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/utf8.c')
-rw-r--r--src/utf8.c66
1 files changed, 66 insertions, 0 deletions
diff --git a/src/utf8.c b/src/utf8.c
new file mode 100644
index 0000000..177f76d
--- /dev/null
+++ b/src/utf8.c
@@ -0,0 +1,66 @@
+#include "aplos.h"
+
+int
+utf8_char_length(const char8_t *p)
+{
+ uint8_t v = *p;
+ if((v >> 5) == 0b110)
+ return 2;
+ if((v >> 4) == 0b1110)
+ return 3;
+ if((v >> 3) == 0b11110)
+ return 4;
+ return 1;
+}
+
+int
+utf8_cmp_n(const char8_t *a, const char8_t *b, uint64_t n)
+{
+ uint32_t aval, bval;
+ while(*a && *b && n){
+ aval = utf8_value(a);
+ bval = utf8_value(b);
+
+ if(aval < bval)
+ return -1;
+ if(aval > bval)
+ return 1;
+ a += utf8_char_length(a);
+ b += utf8_char_length(b);
+ n--;
+ }
+ if((n == 0) || (*a == *b))
+ return 0;
+ if(*a)
+ return 1;
+ else
+ return -1;
+}
+
+uint32_t
+utf8_value(const char8_t *p)
+{
+ uint32_t v = 0;
+ int n = utf8_char_length(p);
+ switch(n){
+ case 1:
+ v = *p;
+ break;
+ case 2:
+ v = *p & 0b00011111;
+ break;
+ case 3:
+ v = *p & 0b00001111;
+ break;
+ case 4:
+ v = *p & 0b00000111;
+ break;
+ }
+
+ while(--n){
+ p++;
+ v <<= 6;
+ v |= *p & 0b00111111;
+ }
+ return v;
+}