mirror of
				https://xff.cz/git/u-boot/
				synced 2025-10-31 10:26:10 +01:00 
			
		
		
		
	lib/charset: utf8_get() should return error
utf8_get() should return an error if hitting an illegal UTF-8 sequence and not silently convert the input to a question mark. Correct utf_8() and the its unit test. console_read_unicode() now will ignore illegal UTF-8 sequences. Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
This commit is contained in:
		| @@ -32,7 +32,7 @@ static struct capitalization_table capitalization_table[] = | |||||||
|  * |  * | ||||||
|  * @read_u8:	- stream reader |  * @read_u8:	- stream reader | ||||||
|  * @src:	- string buffer passed to stream reader, optional |  * @src:	- string buffer passed to stream reader, optional | ||||||
|  * Return:	- Unicode code point |  * Return:	- Unicode code point, or -1 | ||||||
|  */ |  */ | ||||||
| static int get_code(u8 (*read_u8)(void *data), void *data) | static int get_code(u8 (*read_u8)(void *data), void *data) | ||||||
| { | { | ||||||
| @@ -78,7 +78,7 @@ static int get_code(u8 (*read_u8)(void *data), void *data) | |||||||
| 	} | 	} | ||||||
| 	return ch; | 	return ch; | ||||||
| error: | error: | ||||||
| 	return '?'; | 	return -1; | ||||||
| } | } | ||||||
|  |  | ||||||
| /** | /** | ||||||
| @@ -120,15 +120,22 @@ static u8 read_console(void *data) | |||||||
|  |  | ||||||
| int console_read_unicode(s32 *code) | int console_read_unicode(s32 *code) | ||||||
| { | { | ||||||
|  | 	for (;;) { | ||||||
|  | 		s32 c; | ||||||
|  |  | ||||||
| 		if (!tstc()) { | 		if (!tstc()) { | ||||||
| 			/* No input available */ | 			/* No input available */ | ||||||
| 			return 1; | 			return 1; | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		/* Read Unicode code */ | 		/* Read Unicode code */ | ||||||
| 	*code = get_code(read_console, NULL); | 		c = get_code(read_console, NULL); | ||||||
|  | 		if (c > 0) { | ||||||
|  | 			*code = c; | ||||||
| 			return 0; | 			return 0; | ||||||
| 		} | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
| s32 utf8_get(const char **src) | s32 utf8_get(const char **src) | ||||||
| { | { | ||||||
|   | |||||||
| @@ -52,6 +52,7 @@ static const char d4[] = {0xf0, 0x90, 0x92, 0x8d, 0xf0, 0x90, 0x92, 0x96, | |||||||
| static const char j1[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00}; | static const char j1[] = {0x6a, 0x31, 0xa1, 0x6c, 0x00}; | ||||||
| static const char j2[] = {0x6a, 0x32, 0xc3, 0xc3, 0x6c, 0x00}; | static const char j2[] = {0x6a, 0x32, 0xc3, 0xc3, 0x6c, 0x00}; | ||||||
| static const char j3[] = {0x6a, 0x33, 0xf0, 0x90, 0xf0, 0x00}; | static const char j3[] = {0x6a, 0x33, 0xf0, 0x90, 0xf0, 0x00}; | ||||||
|  | static const char j4[] = {0xa1, 0x00}; | ||||||
|  |  | ||||||
| static int unicode_test_u16_strlen(struct unit_test_state *uts) | static int unicode_test_u16_strlen(struct unit_test_state *uts) | ||||||
| { | { | ||||||
| @@ -165,6 +166,12 @@ static int unicode_test_utf8_get(struct unit_test_state *uts) | |||||||
| 	ut_asserteq(0x0001048d, code); | 	ut_asserteq(0x0001048d, code); | ||||||
| 	ut_asserteq_ptr(s, d4 + 4); | 	ut_asserteq_ptr(s, d4 + 4); | ||||||
|  |  | ||||||
|  | 	/* Check illegal character */ | ||||||
|  | 	s = j4; | ||||||
|  | 	code = utf8_get((const char **)&s); | ||||||
|  | 	ut_asserteq(-1, code); | ||||||
|  | 	ut_asserteq_ptr(j4 + 1, s); | ||||||
|  |  | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
| UNICODE_TEST(unicode_test_utf8_get); | UNICODE_TEST(unicode_test_utf8_get); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user