mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
MDEV-8416 ucs2: compare broken bytes as "greater than any non-broken character"
MDEV-8418 utf16: compare broken bytes as "greater than any non-broken character"
This commit is contained in:
@ -149,7 +149,7 @@ typedef struct
|
||||
A1A1 - MB2 or 8BIT+8BIT
|
||||
E0E0 - MB2
|
||||
*/
|
||||
STRNNCOLL_PARAM strcoll_mb2_common[]=
|
||||
static STRNNCOLL_PARAM strcoll_mb2_common[]=
|
||||
{
|
||||
/* Compare two good sequences */
|
||||
{CSTR(""), CSTR(""), 0},
|
||||
@ -210,7 +210,7 @@ STRNNCOLL_PARAM strcoll_mb2_common[]=
|
||||
/*
|
||||
For character sets that have good mb2 characters A1A1 and F9FE
|
||||
*/
|
||||
STRNNCOLL_PARAM strcoll_mb2_A1A1_mb2_F9FE[]=
|
||||
static STRNNCOLL_PARAM strcoll_mb2_A1A1_mb2_F9FE[]=
|
||||
{
|
||||
/* Compare two good characters */
|
||||
{CSTR(""), CSTR("\xF9\xFE"), -1},
|
||||
@ -246,7 +246,7 @@ STRNNCOLL_PARAM strcoll_mb2_A1A1_mb2_F9FE[]=
|
||||
A1A1 - a good mb2 character
|
||||
F9FE - a bad sequence
|
||||
*/
|
||||
STRNNCOLL_PARAM strcoll_mb2_A1A1_bad_F9FE[]=
|
||||
static STRNNCOLL_PARAM strcoll_mb2_A1A1_bad_F9FE[]=
|
||||
{
|
||||
/* Compare a good character to an illegal or an incomplete sequence */
|
||||
{CSTR(""), CSTR("\xF9\xFE"), -1},
|
||||
@ -283,7 +283,7 @@ STRNNCOLL_PARAM strcoll_mb2_A1A1_bad_F9FE[]=
|
||||
F9 - ILSEQ or H2
|
||||
F9FE - a bad sequence (ILSEQ+XX or H2+ILSEQ)
|
||||
*/
|
||||
STRNNCOLL_PARAM strcoll_mb1_A1_bad_F9FE[]=
|
||||
static STRNNCOLL_PARAM strcoll_mb1_A1_bad_F9FE[]=
|
||||
{
|
||||
/* Compare two good characters */
|
||||
{CSTR(""), CSTR("\xA1"), -1},
|
||||
@ -323,7 +323,7 @@ STRNNCOLL_PARAM strcoll_mb1_A1_bad_F9FE[]=
|
||||
and sort in this order:
|
||||
8181 < A1 < E0E0
|
||||
*/
|
||||
STRNNCOLL_PARAM strcoll_8181_A1_E0E0[]=
|
||||
static STRNNCOLL_PARAM strcoll_8181_A1_E0E0[]=
|
||||
{
|
||||
{CSTR("\x81\x81"), CSTR("\xA1"), -1},
|
||||
{CSTR("\x81\x81"), CSTR("\xE0\xE0"), -1},
|
||||
@ -336,7 +336,7 @@ STRNNCOLL_PARAM strcoll_8181_A1_E0E0[]=
|
||||
/*
|
||||
A shared test for eucjpms and ujis.
|
||||
*/
|
||||
STRNNCOLL_PARAM strcoll_ujis[]=
|
||||
static STRNNCOLL_PARAM strcoll_ujis[]=
|
||||
{
|
||||
{CSTR("\x8E\xA1"), CSTR("\x8E"), -1}, /* Good MB2 vs incomplete MB2 */
|
||||
{CSTR("\x8E\xA1"), CSTR("\x8F\xA1"), -1}, /* Good MB2 vs incomplete MB3 */
|
||||
@ -347,7 +347,7 @@ STRNNCOLL_PARAM strcoll_ujis[]=
|
||||
};
|
||||
|
||||
|
||||
STRNNCOLL_PARAM strcoll_utf8mb3_common[]=
|
||||
static STRNNCOLL_PARAM strcoll_utf8mb3_common[]=
|
||||
{
|
||||
{CSTR("\xC0"), CSTR("\xC1"), -1}, /* Unused byte vs unused byte */
|
||||
{CSTR("\xC0"), CSTR("\xFF"), -1}, /* Unused byte vs unused byte */
|
||||
@ -369,7 +369,7 @@ STRNNCOLL_PARAM strcoll_utf8mb3_common[]=
|
||||
};
|
||||
|
||||
|
||||
STRNNCOLL_PARAM strcoll_utf8mb4_common[]=
|
||||
static STRNNCOLL_PARAM strcoll_utf8mb4_common[]=
|
||||
{
|
||||
/* Minimum four-byte character: U+10000 == _utf8 0xF0908080 */
|
||||
{CSTR("\xF0\x90\x80\x80"), CSTR("\xC0"), -1}, /* MB4 vs unused byte */
|
||||
@ -412,6 +412,101 @@ STRNNCOLL_PARAM strcoll_utf8mb4_common[]=
|
||||
};
|
||||
|
||||
|
||||
static STRNNCOLL_PARAM strcoll_ucs2_common[]=
|
||||
{
|
||||
{CSTR("\xC0"), CSTR("\xC1"), -1}, /* Incomlete MB2 vs incomplete MB2 */
|
||||
{CSTR("\xC0"), CSTR("\xFF"), -1}, /* Incomlete MB2 vs incomplete MB2 */
|
||||
{CSTR("\xC2\xA1"), CSTR("\xC0"), -1}, /* MB2 vs incomplete MB2 */
|
||||
{CSTR("\xC2\xA1"), CSTR("\xC2"), -1}, /* MB2 vs incomplete MB2 */
|
||||
{CSTR("\xC2\xA0"), CSTR("\xC2\xA1"), -1}, /* MB2 vs MB2 */
|
||||
{CSTR("\xC2\xA1"), CSTR("\xC2\xA2"), -1}, /* MB2 vs MB2 */
|
||||
|
||||
{CSTR("\xFF\xFF"), CSTR("\x00"),-1}, /* MB2 vs incomplete */
|
||||
{CSTR("\xFF\xFF\xFF\xFF"), CSTR("\x00"),-1}, /* MB2+MB2 vs incomplete */
|
||||
{CSTR("\xFF\xFF\xFF\xFF"), CSTR("\x00\x00\x00"), 1},/* MB2+MB2 vs MB2+incomplete */
|
||||
|
||||
{NULL, 0, NULL, 0, 0}
|
||||
};
|
||||
|
||||
|
||||
/* Tests that involve comparison to SPACE (explicit, or padded) */
|
||||
static STRNNCOLL_PARAM strcoll_ucs2_space[]=
|
||||
{
|
||||
{CSTR("\x00\x1F"), CSTR("\x00\x20"), -1}, /* MB2 vs MB2 */
|
||||
{CSTR("\x00\x20"), CSTR("\x00\x21"), -1}, /* MB2 vs MB2 */
|
||||
{CSTR("\x00\x1F"), CSTR(""), -1}, /* MB2 vs empty */
|
||||
{CSTR("\x00\x20"), CSTR(""), 0}, /* MB2 vs empty */
|
||||
{CSTR("\x00\x21"), CSTR(""), 1}, /* MB2 vs empty */
|
||||
|
||||
{NULL, 0, NULL, 0, 0}
|
||||
};
|
||||
|
||||
|
||||
/* Tests that involve comparison to SPACE (explicit, or padded) */
|
||||
static STRNNCOLL_PARAM strcoll_utf16le_space[]=
|
||||
{
|
||||
{CSTR("\x1F\x00"), CSTR("\x20\x00"), -1}, /* MB2 vs MB2 */
|
||||
{CSTR("\x20\x00"), CSTR("\x21\x00"), -1}, /* MB2 vs MB2 */
|
||||
{CSTR("\x1F\x00"), CSTR(""), -1}, /* MB2 vs empty */
|
||||
{CSTR("\x20\x00"), CSTR(""), 0}, /* MB2 vs empty */
|
||||
{CSTR("\x21\x00"), CSTR(""), 1}, /* MB2 vs empty */
|
||||
|
||||
{NULL, 0, NULL, 0, 0}
|
||||
};
|
||||
|
||||
|
||||
static STRNNCOLL_PARAM strcoll_utf16_common[]=
|
||||
{
|
||||
/* Minimum four-byte character: U+10000 == _utf16 0xD800DC00 */
|
||||
{CSTR("\xD8\x00\xDC\x00"), CSTR("\xC0"), -1},/* MB4 vs incomplete MB2 */
|
||||
{CSTR("\xD8\x00\xDC\x00"), CSTR("\xC2"), -1},/* MB4 vs incomplete MB2 */
|
||||
{CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xDB\x00"),-1},/* MB4 vs broken MB4 */
|
||||
{CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xE0\x00"),-1},/* MB4 vs broken MB4 */
|
||||
{CSTR("\xD8\x00\xDC\x00"), CSTR("\xDC\x00"), -1},/* MB4 vs broken MB2 */
|
||||
{CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xDC"), -1},/* MB4 vs incomplete MB4 */
|
||||
|
||||
/* Maximum four-byte character: U+10FFFF == _utf8 0xF48FBFBF */
|
||||
{CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xC0"), -1},/* MB4 vs incomplete MB2 */
|
||||
{CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xC2"), -1},/* MB4 vs incomplete MB2 */
|
||||
{CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xD8\x00\xDB\x00"),-1},/* MB4 vs broken MB4 */
|
||||
{CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xD8\x00\xE0\x00"),-1},/* MB4 vs broken MB4 */
|
||||
{CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xDC\x00"), -1},/* MB4 vs broken MB2 */
|
||||
{CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xDC\xFF\xDF"), -1},/* MB4 vs incomplete MB4 */
|
||||
|
||||
/* Broken MB4 vs broken MB4 */
|
||||
{CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xDC\x01"),-1},/* Broken MB4 vs broken MB4 */
|
||||
{CSTR("\xDB\xFF\xE0\xFE"), CSTR("\xDB\xFF\xE0\xFF"),-1},/* Broken MB4 vs broken MB4 */
|
||||
|
||||
{NULL, 0, NULL, 0, 0}
|
||||
};
|
||||
|
||||
|
||||
static STRNNCOLL_PARAM strcoll_utf16le_common[]=
|
||||
{
|
||||
/* Minimum four-byte character: U+10000 == _utf16 0xD800DC00 */
|
||||
{CSTR("\x00\xD8\x00\xDC"), CSTR("\xC0"), -1},/* MB4 vs incomplete MB2 */
|
||||
{CSTR("\x00\xD8\x00\xDC"), CSTR("\xC2"), -1},/* MB4 vs incomplete MB2 */
|
||||
{CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x00\xDB"),-1},/* MB4 vs broken MB4 */
|
||||
{CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x00\xD0"),-1},/* MB4 vs broken MB4 */
|
||||
{CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xDC"), -1},/* MB4 vs broken MB2 */
|
||||
{CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x00"), -1},/* MB4 vs incomplete MB4 */
|
||||
|
||||
/* Maximum four-byte character: U+10FFFF == _utf8 0xF48FBFBF */
|
||||
{CSTR("\xFF\xDB\xFF\xDF"), CSTR("\xC0"), -1},/* MB4 vs incomplete MB2 */
|
||||
{CSTR("\xFF\xDB\xFF\xDF"), CSTR("\xC2"), -1},/* MB4 vs incomplete MB2 */
|
||||
{CSTR("\xFF\xDB\xFF\xDF"), CSTR("\x00\xD8\x00\xDB"),-1},/* MB4 vs broken MB4 */
|
||||
{CSTR("\xFF\xDB\xFF\xDF"), CSTR("\x00\xD8\x00\xE0"),-1},/* MB4 vs broken MB4 */
|
||||
{CSTR("\xFF\xDB\xFF\xDF"), CSTR("\x00\xDC"), -1},/* MB4 vs broken MB2 */
|
||||
{CSTR("\xFF\xDB\xFF\xDF"), CSTR("\xFF\xDC\x00"), -1},/* MB4 vs incomplete MB4 */
|
||||
|
||||
/* Broken MB4 vs broken MB4 */
|
||||
{CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x01\xDC"),-1},/* Broken MB4 vs broken MB4 */
|
||||
{CSTR("\xFF\xDB\xFE\xE0"), CSTR("\xFF\xDB\xFF\xE0"),-1},/* Broken MB4 vs broken MB4 */
|
||||
|
||||
{NULL, 0, NULL, 0, 0}
|
||||
};
|
||||
|
||||
|
||||
static void
|
||||
str2hex(char *dst, size_t dstlen, const char *src, size_t srclen)
|
||||
{
|
||||
@ -528,6 +623,12 @@ test_strcollsp()
|
||||
failed+= strcollsp(&my_charset_sjis_japanese_ci, strcoll_8181_A1_E0E0);
|
||||
failed+= strcollsp(&my_charset_sjis_bin, strcoll_8181_A1_E0E0);
|
||||
#endif
|
||||
#ifdef HAVE_CHARSET_ucs2
|
||||
failed+= strcollsp(&my_charset_ucs2_general_ci, strcoll_ucs2_common);
|
||||
failed+= strcollsp(&my_charset_ucs2_general_ci, strcoll_ucs2_space);
|
||||
failed+= strcollsp(&my_charset_ucs2_bin, strcoll_ucs2_common);
|
||||
failed+= strcollsp(&my_charset_ucs2_bin, strcoll_ucs2_space);
|
||||
#endif
|
||||
#ifdef HAVE_CHARSET_ujis
|
||||
failed+= strcollsp(&my_charset_ujis_japanese_ci, strcoll_mb2_common);
|
||||
failed+= strcollsp(&my_charset_ujis_bin, strcoll_mb2_common);
|
||||
@ -536,6 +637,21 @@ test_strcollsp()
|
||||
failed+= strcollsp(&my_charset_ujis_japanese_ci, strcoll_ujis);
|
||||
failed+= strcollsp(&my_charset_ujis_bin, strcoll_ujis);
|
||||
#endif
|
||||
#ifdef HAVE_CHARSET_utf16
|
||||
failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_ucs2_common);
|
||||
failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_ucs2_space);
|
||||
failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_utf16_common);
|
||||
failed+= strcollsp(&my_charset_utf16_bin, strcoll_ucs2_common);
|
||||
failed+= strcollsp(&my_charset_utf16_bin, strcoll_ucs2_space);
|
||||
failed+= strcollsp(&my_charset_utf16_bin, strcoll_utf16_common);
|
||||
|
||||
failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_ucs2_common);
|
||||
failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_utf16le_space);
|
||||
failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_utf16le_common);
|
||||
failed+= strcollsp(&my_charset_utf16le_bin, strcoll_ucs2_common);
|
||||
failed+= strcollsp(&my_charset_utf16le_bin, strcoll_utf16le_space);
|
||||
failed+= strcollsp(&my_charset_utf16le_bin, strcoll_utf16le_common);
|
||||
#endif
|
||||
#ifdef HAVE_CHARSET_utf8
|
||||
failed+= strcollsp(&my_charset_utf8_general_ci, strcoll_utf8mb3_common);
|
||||
failed+= strcollsp(&my_charset_utf8_general_mysql500_ci, strcoll_utf8mb3_common);
|
||||
|
Reference in New Issue
Block a user