aboutsummaryrefslogtreecommitdiffstats
path: root/src/include/ipxe/utf8.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/ipxe/utf8.h')
-rw-r--r--src/include/ipxe/utf8.h69
1 files changed, 69 insertions, 0 deletions
diff --git a/src/include/ipxe/utf8.h b/src/include/ipxe/utf8.h
new file mode 100644
index 000000000..299c25511
--- /dev/null
+++ b/src/include/ipxe/utf8.h
@@ -0,0 +1,69 @@
+#ifndef _IPXE_UTF8_H
+#define _IPXE_UTF8_H
+
+/** @file
+ *
+ * UTF-8 Unicode encoding
+ *
+ */
+
+FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
+
+#include <stdint.h>
+
+/** Maximum length of UTF-8 sequence */
+#define UTF8_MAX_LEN 4
+
+/** Minimum legal value for two-byte UTF-8 sequence */
+#define UTF8_MIN_TWO 0x80
+
+/** Minimum legal value for three-byte UTF-8 sequence */
+#define UTF8_MIN_THREE 0x800
+
+/** Minimum legal value for four-byte UTF-8 sequence */
+#define UTF8_MIN_FOUR 0x10000
+
+/** High bit of UTF-8 bytes */
+#define UTF8_HIGH_BIT 0x80
+
+/** Number of data bits in each continuation byte */
+#define UTF8_CONTINUATION_BITS 6
+
+/** Bit mask for data bits in a continuation byte */
+#define UTF8_CONTINUATION_MASK ( ( 1 << UTF8_CONTINUATION_BITS ) - 1 )
+
+/** Non-data bits in a continuation byte */
+#define UTF8_CONTINUATION 0x80
+
+/** Check for a continuation byte
+ *
+ * @v byte UTF-8 byte
+ * @ret is_continuation Byte is a continuation byte
+ */
+#define UTF8_IS_CONTINUATION( byte ) \
+ ( ( (byte) & ~UTF8_CONTINUATION_MASK ) == UTF8_CONTINUATION )
+
+/** Check for an ASCII byte
+ *
+ * @v byte UTF-8 byte
+ * @ret is_ascii Byte is an ASCII byte
+ */
+#define UTF8_IS_ASCII( byte ) ( ! ( (byte) & UTF8_HIGH_BIT ) )
+
+/** Invalid character returned when decoding fails */
+#define UTF8_INVALID 0xfffd
+
+/** A UTF-8 character accumulator */
+struct utf8_accumulator {
+ /** Character in progress */
+ unsigned int character;
+ /** Number of remaining continuation bytes */
+ unsigned int remaining;
+ /** Minimum legal character */
+ unsigned int min;
+};
+
+extern unsigned int utf8_accumulate ( struct utf8_accumulator *utf8,
+ uint8_t byte );
+
+#endif /* _IPXE_UTF8_H */