diff options
Diffstat (limited to 'src/include/ipxe/utf8.h')
-rw-r--r-- | src/include/ipxe/utf8.h | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/src/include/ipxe/utf8.h b/src/include/ipxe/utf8.h new file mode 100644 index 000000000..299c25511 --- /dev/null +++ b/src/include/ipxe/utf8.h @@ -0,0 +1,69 @@ +#ifndef _IPXE_UTF8_H +#define _IPXE_UTF8_H + +/** @file + * + * UTF-8 Unicode encoding + * + */ + +FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); + +#include <stdint.h> + +/** Maximum length of UTF-8 sequence */ +#define UTF8_MAX_LEN 4 + +/** Minimum legal value for two-byte UTF-8 sequence */ +#define UTF8_MIN_TWO 0x80 + +/** Minimum legal value for three-byte UTF-8 sequence */ +#define UTF8_MIN_THREE 0x800 + +/** Minimum legal value for four-byte UTF-8 sequence */ +#define UTF8_MIN_FOUR 0x10000 + +/** High bit of UTF-8 bytes */ +#define UTF8_HIGH_BIT 0x80 + +/** Number of data bits in each continuation byte */ +#define UTF8_CONTINUATION_BITS 6 + +/** Bit mask for data bits in a continuation byte */ +#define UTF8_CONTINUATION_MASK ( ( 1 << UTF8_CONTINUATION_BITS ) - 1 ) + +/** Non-data bits in a continuation byte */ +#define UTF8_CONTINUATION 0x80 + +/** Check for a continuation byte + * + * @v byte UTF-8 byte + * @ret is_continuation Byte is a continuation byte + */ +#define UTF8_IS_CONTINUATION( byte ) \ + ( ( (byte) & ~UTF8_CONTINUATION_MASK ) == UTF8_CONTINUATION ) + +/** Check for an ASCII byte + * + * @v byte UTF-8 byte + * @ret is_ascii Byte is an ASCII byte + */ +#define UTF8_IS_ASCII( byte ) ( ! ( (byte) & UTF8_HIGH_BIT ) ) + +/** Invalid character returned when decoding fails */ +#define UTF8_INVALID 0xfffd + +/** A UTF-8 character accumulator */ +struct utf8_accumulator { + /** Character in progress */ + unsigned int character; + /** Number of remaining continuation bytes */ + unsigned int remaining; + /** Minimum legal character */ + unsigned int min; +}; + +extern unsigned int utf8_accumulate ( struct utf8_accumulator *utf8, + uint8_t byte ); + +#endif /* _IPXE_UTF8_H */ |