blob: 299c255110ff27ec90da03bffd7360d5e8e349da (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
#ifndef _IPXE_UTF8_H
#define _IPXE_UTF8_H
/** @file
*
* UTF-8 Unicode encoding
*
*/
FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
#include <stdint.h>
/** Maximum length of UTF-8 sequence */
#define UTF8_MAX_LEN 4
/** Minimum legal value for two-byte UTF-8 sequence */
#define UTF8_MIN_TWO 0x80
/** Minimum legal value for three-byte UTF-8 sequence */
#define UTF8_MIN_THREE 0x800
/** Minimum legal value for four-byte UTF-8 sequence */
#define UTF8_MIN_FOUR 0x10000
/** High bit of UTF-8 bytes */
#define UTF8_HIGH_BIT 0x80
/** Number of data bits in each continuation byte */
#define UTF8_CONTINUATION_BITS 6
/** Bit mask for data bits in a continuation byte */
#define UTF8_CONTINUATION_MASK ( ( 1 << UTF8_CONTINUATION_BITS ) - 1 )
/** Non-data bits in a continuation byte */
#define UTF8_CONTINUATION 0x80
/** Check for a continuation byte
*
* @v byte UTF-8 byte
* @ret is_continuation Byte is a continuation byte
*/
#define UTF8_IS_CONTINUATION( byte ) \
( ( (byte) & ~UTF8_CONTINUATION_MASK ) == UTF8_CONTINUATION )
/** Check for an ASCII byte
*
* @v byte UTF-8 byte
* @ret is_ascii Byte is an ASCII byte
*/
#define UTF8_IS_ASCII( byte ) ( ! ( (byte) & UTF8_HIGH_BIT ) )
/** Invalid character returned when decoding fails */
#define UTF8_INVALID 0xfffd
/** A UTF-8 character accumulator */
struct utf8_accumulator {
/** Character in progress */
unsigned int character;
/** Number of remaining continuation bytes */
unsigned int remaining;
/** Minimum legal character */
unsigned int min;
};
extern unsigned int utf8_accumulate ( struct utf8_accumulator *utf8,
uint8_t byte );
#endif /* _IPXE_UTF8_H */
|