diff options
Diffstat (limited to 'src/utf8.c')
-rw-r--r-- | src/utf8.c | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/src/utf8.c b/src/utf8.c new file mode 100644 index 0000000..177f76d --- /dev/null +++ b/src/utf8.c @@ -0,0 +1,66 @@ +#include "aplos.h" + +int +utf8_char_length(const char8_t *p) +{ + uint8_t v = *p; + if((v >> 5) == 0b110) + return 2; + if((v >> 4) == 0b1110) + return 3; + if((v >> 3) == 0b11110) + return 4; + return 1; +} + +int +utf8_cmp_n(const char8_t *a, const char8_t *b, uint64_t n) +{ + uint32_t aval, bval; + while(*a && *b && n){ + aval = utf8_value(a); + bval = utf8_value(b); + + if(aval < bval) + return -1; + if(aval > bval) + return 1; + a += utf8_char_length(a); + b += utf8_char_length(b); + n--; + } + if((n == 0) || (*a == *b)) + return 0; + if(*a) + return 1; + else + return -1; +} + +uint32_t +utf8_value(const char8_t *p) +{ + uint32_t v = 0; + int n = utf8_char_length(p); + switch(n){ + case 1: + v = *p; + break; + case 2: + v = *p & 0b00011111; + break; + case 3: + v = *p & 0b00001111; + break; + case 4: + v = *p & 0b00000111; + break; + } + + while(--n){ + p++; + v <<= 6; + v |= *p & 0b00111111; + } + return v; +} |