diff options
Diffstat (limited to 'src/tests/utf8_test.c')
-rw-r--r-- | src/tests/utf8_test.c | 164 |
1 files changed, 164 insertions, 0 deletions
diff --git a/src/tests/utf8_test.c b/src/tests/utf8_test.c new file mode 100644 index 000000000..1996915cb --- /dev/null +++ b/src/tests/utf8_test.c @@ -0,0 +1,164 @@ +/* + * Copyright (C) 2022 Michael Brown <mbrown@fensystems.co.uk>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * You can also choose to distribute this program under the terms of + * the Unmodified Binary Distribution Licence (as given in the file + * COPYING.UBDL), provided that you have satisfied its requirements. + */ + +FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); + +/** @file + * + * UTF-8 Unicode encoding tests + * + */ + +/* Forcibly enable assertions */ +#undef NDEBUG + +#include <string.h> +#include <ipxe/utf8.h> +#include <ipxe/test.h> + +/** A UTF-8 accumulation test */ +struct utf8_accumulate_test { + /** UTF-8 byte string */ + const char *bytes; + /** Expected character sequence */ + const unsigned int *expected; + /** Length */ + size_t len; +}; + +/** Define inline data */ +#define DATA(...) { __VA_ARGS__ } + +/** Define a UTF-8 accumulation test */ +#define UTF8_ACCUMULATE( name, BYTES, EXPECTED ) \ + static const char name ## _bytes[] = BYTES; \ + static const unsigned int name ## _expected[] = EXPECTED; \ + static struct utf8_accumulate_test name = { \ + .bytes = name ## _bytes, \ + .expected = name ## _expected, \ + .len = ( sizeof ( name ## _expected ) / \ + sizeof ( name ## _expected[0] ) ), \ + }; + +/** Basic ASCII test */ +UTF8_ACCUMULATE ( ascii, "Hello world!", + DATA ( 'H', 'e', 'l', 'l', 'o', ' ', + 'w', 'o', 'r', 'l', 'd', '!' ) ); + +/** Multi-byte character test */ +UTF8_ACCUMULATE ( multibyte, "Héllô wörld 🥳", + DATA ( 'H', 0, L'é', 'l', 'l', 0, L'ô', ' ', + 'w', 0, L'ö', 'r', 'l', 'd', ' ', + 0, 0, 0, 0x1f973 ) ); + +/** Stray continuation byte test */ +UTF8_ACCUMULATE ( stray_continuation, + DATA ( 'a', 0x81, 'b', 0xc3, 0x82, 0x83, 'c' ), + DATA ( 'a', 0xfffd, 'b', 0, 0xc2, 0xfffd, 'c' ) ); + +/** Missing continuation byte test */ +UTF8_ACCUMULATE ( missing_continuation, + DATA ( 'a', 0xc3, 'b', 0xe1, 0x86, 0xc3, 0x89, 'c' ), + DATA ( 'a', 0, 'b', 0, 0, 0, 0xc9, 'c' ) ); + +/** Illegal two-byte sequence test */ +UTF8_ACCUMULATE ( illegal_two, + DATA ( 'a', 0xc2, 0x80, 'b', 0xc1, 0xbf, 'c', 0xc0, 0x80, + 'd' ), + DATA ( 'a', 0, 0x80, 'b', 0, 0xfffd, 'c', 0, 0xfffd, 'd' ) ); + +/** Illegal three-byte sequence test */ +UTF8_ACCUMULATE ( illegal_three, + DATA ( 'a', 0xe0, 0xa0, 0x80, 'b', 0xe0, 0x9f, 0xbf, 'c', + 0xe0, 0x80, 0x80, 'd' ), + DATA ( 'a', 0, 0, 0x800, 'b', 0, 0, 0xfffd, 'c', + 0, 0, 0xfffd, 'd' ) ); + +/** Illegal four-byte sequence test */ +UTF8_ACCUMULATE ( illegal_four, + DATA ( 'a', 0xf0, 0x90, 0x80, 0x80, 'b', 0xf0, 0x8f, 0xbf, + 0xbf, 'c', 0xf0, 0x80, 0x80, 0x80, 'd' ), + DATA ( 'a', 0, 0, 0, 0x10000, 'b', 0, 0, 0, 0xfffd, 'c', + 0, 0, 0, 0xfffd, 'd' ) ); + +/** Illegal overlength sequence test */ +UTF8_ACCUMULATE ( illegal_length, + DATA ( 'a', 0xf8, 0xbf, 0xbf, 0xbf, 0xbf, 'b', 0xfc, 0xbf, + 0xbf, 0xbf, 0xbf, 0xbf, 'c', 0xfe, 0xbf, 0xbf, 0xbf, + 0xbf, 0xbf, 0xbf, 'd', 0xff, 0xbf, 0xbf, 0xbf, 0xbf, + 0xbf, 0xbf, 0xbf, 'e' ), + DATA ( 'a', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'b', + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 'c', + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 'd', 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 'e' ) ); + +/** + * Report UTF-8 accumulation test result + * + * @v test UTF-8 accumulation test + * @v file Test code file + * @v line Test code line + */ +static void utf8_accumulate_okx ( struct utf8_accumulate_test *test, + const char *file, unsigned int line ) { + struct utf8_accumulator utf8; + unsigned int character; + unsigned int i; + + /* Initialise accumulator */ + memset ( &utf8, 0, sizeof ( utf8 ) ); + + /* Test each byte in turn */ + for ( i = 0 ; i < test->len ; i++ ) { + character = utf8_accumulate ( &utf8, test->bytes[i] ); + DBGC ( test, "UTF8 byte %02x character %02x\n", + test->bytes[i], character ); + okx ( character == test->expected[i], file, line ); + } +} +#define utf8_accumulate_ok( test ) \ + utf8_accumulate_okx ( test, __FILE__, __LINE__ ) + +/** + * Perform UTF-8 self-test + * + */ +static void utf8_test_exec ( void ) { + + /* Accumulation tests */ + utf8_accumulate_ok ( &ascii ); + utf8_accumulate_ok ( &multibyte ); + utf8_accumulate_ok ( &stray_continuation ); + utf8_accumulate_ok ( &missing_continuation ); + utf8_accumulate_ok ( &illegal_two ); + utf8_accumulate_ok ( &illegal_three ); + utf8_accumulate_ok ( &illegal_four ); + utf8_accumulate_ok ( &illegal_length ); +} + +/** UTF-8 self-test */ +struct self_test utf8_test __self_test = { + .name = "utf8", + .exec = utf8_test_exec, +}; |