From ddbaff53da5b99563fa371db0b09544e139fdabb Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sat, 27 Feb 2021 14:08:37 +0100 Subject: lib/charset: utf8_get() should return error utf8_get() should return an error if hitting an illegal UTF-8 sequence and not silently convert the input to a question mark. Correct utf_8() and the its unit test. console_read_unicode() now will ignore illegal UTF-8 sequences. Signed-off-by: Heinrich Schuchardt --- test/unicode_ut.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'test/unicode_ut.c') diff --git a/test/unicode_ut.c b/test/unicode_ut.c index 2cc6b5fefff..154361aea7d 100644 --- a/test/unicode_ut.c +++ b/test/unicode_ut.c @@ -52,6 +52,7 @@ static const char d4[] = {0xf0, 0x90, 0x92, 0x8d, 0xf0, 0x90, 0x92, 0x96, static const char j1[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00}; static const char j2[] = {0x6a, 0x32, 0xc3, 0xc3, 0x6c, 0x00}; static const char j3[] = {0x6a, 0x33, 0xf0, 0x90, 0xf0, 0x00}; +static const char j4[] = {0xa1, 0x00}; static int unicode_test_u16_strlen(struct unit_test_state *uts) { @@ -165,6 +166,12 @@ static int unicode_test_utf8_get(struct unit_test_state *uts) ut_asserteq(0x0001048d, code); ut_asserteq_ptr(s, d4 + 4); + /* Check illegal character */ + s = j4; + code = utf8_get((const char **)&s); + ut_asserteq(-1, code); + ut_asserteq_ptr(j4 + 1, s); + return 0; } UNICODE_TEST(unicode_test_utf8_get); -- cgit