summaryrefslogtreecommitdiff
path: root/src/utf8.c
blob: 177f76d74890651bb9a1ceb0f7a72d9e33a6b361 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#include "aplos.h"

int
utf8_char_length(const char8_t *p)
{
	uint8_t v = *p;
	if((v >> 5) == 0b110)
		return 2;
	if((v >> 4) == 0b1110)
		return 3;
	if((v >> 3) == 0b11110)
		return 4;
	return 1;
}

int
utf8_cmp_n(const char8_t *a, const char8_t *b, uint64_t n)
{
	uint32_t aval, bval;
	while(*a && *b && n){
		aval = utf8_value(a);
		bval = utf8_value(b);

		if(aval < bval)
			return -1;
		if(aval > bval)
			return 1;
		a += utf8_char_length(a);
		b += utf8_char_length(b);
		n--;
	}
	if((n == 0) || (*a == *b))
		return 0;
	if(*a)
		return 1;
	else
		return -1;
}

uint32_t
utf8_value(const char8_t *p)
{
	uint32_t v = 0;
	int n = utf8_char_length(p);
	switch(n){
	case 1:
		v = *p;
		break;
	case 2:
		v = *p & 0b00011111;
		break;
	case 3:
		v = *p & 0b00001111;
		break;
	case 4:
		v = *p & 0b00000111;
		break;
	}

	while(--n){
		p++;
		v <<= 6;
		v |= *p & 0b00111111;
	}
	return v;
}