1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-30 16:24:05 +03:00

MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"

This commit is contained in:
Alexander Barkov
2015-07-06 10:47:39 +04:00
parent 7ab7f5327a
commit 35d8ac350d
7 changed files with 275 additions and 149 deletions

View File

@ -369,6 +369,49 @@ STRNNCOLL_PARAM strcoll_utf8mb3_common[]=
};
STRNNCOLL_PARAM strcoll_utf8mb4_common[]=
{
/* Minimum four-byte character: U+10000 == _utf8 0xF0908080 */
{CSTR("\xF0\x90\x80\x80"), CSTR("\xC0"), -1}, /* MB4 vs unused byte */
{CSTR("\xF0\x90\x80\x80"), CSTR("\xC2"), -1}, /* MB4 vs incomplete MB2 */
{CSTR("\xF0\x90\x80\x80"), CSTR("\xE0\xA0\x7F"),-1}, /* MB4 vs broken MB3 */
{CSTR("\xF0\x90\x80\x80"), CSTR("\xE0\xA0\xC0"),-1}, /* MB4 vs broken MB3 */
{CSTR("\xF0\x90\x80\x80"), CSTR("\xE0\xA0"), -1}, /* MB4 vs incomplete MB3 */
{CSTR("\xF0\x90\x80\x80"), CSTR("\xF0\x90\x80"),-1}, /* MB4 vs incomplete MB4 */
{CSTR("\xF0\x90\x80\x80"), CSTR("\xF0\x90\x80\x7F"),-1},/* MB4 vs broken MB4 */
{CSTR("\xF0\x90\x80\x80"), CSTR("\xF0\x90\x80\xC0"),-1},/* MB4 vs broken MB4 */
/* Maximum four-byte character: U+10FFFF == _utf8 0xF48FBFBF */
{CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xC0"), -1}, /* MB4 vs unused byte */
{CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xC2"), -1}, /* MB4 vs incomplete MB2 */
{CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xE0\xA0\x7F"),-1}, /* MB4 vs broken MB3 */
{CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xE0\xA0\xC0"),-1}, /* MB4 vs broken MB3 */
{CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xE0\xA0"), -1}, /* MB4 vs incomplete MB3 */
{CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xF0\x90\x80"),-1}, /* MB4 vs incomplete MB4 */
{CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xF0\x90\x80\x7F"),-1},/* MB4 vs broken MB4 */
{CSTR("\xF4\x8F\xBF\xBF"), CSTR("\xF0\x90\x80\xC0"),-1},/* MB4 vs broken MB4 */
/* Broken MB4 vs incomplete/broken MB3 */
{CSTR("\xF0\x90\x80\x7F"), CSTR("\xE0\xA0"), 1}, /* Broken MB4 vs incomplete MB3 */
{CSTR("\xF0\x90\x80\x7F"), CSTR("\xE0\xA0\x7F"),1}, /* Broken MB4 vs broken MB3 */
{CSTR("\xF0\x90\x80\x7F"), CSTR("\xE0\xA0\xC0"),1}, /* Broken MB4 vs broken MB3 */
/*
Broken MB4 vs incomplete MB4:
The three leftmost bytes are compared binary, the fourth byte is compared
to auto-padded space.
*/
{CSTR("\xF0\x90\x80\x1F"), CSTR("\xF0\x90\x80"),-1}, /* Broken MB4 vs incomplete MB4 */
{CSTR("\xF0\x90\x80\x7E"), CSTR("\xF0\x90\x80"),1}, /* Broken MB4 vs incomplete MB4 */
/* Broken MB4 vs broken MB4 */
{CSTR("\xF0\x90\x80\x7E"), CSTR("\xF0\x90\x80\x7F"),-1},/* Broken MB4 vs broken MB4 */
{CSTR("\xF0\x90\x80\x7E"), CSTR("\xF0\x90\x80\xC0"),-1},/* Broken MB4 vs broken MB4 */
{NULL, 0, NULL, 0, 0}
};
static void
str2hex(char *dst, size_t dstlen, const char *src, size_t srclen)
{
@ -497,6 +540,12 @@ test_strcollsp()
failed+= strcollsp(&my_charset_utf8_general_ci, strcoll_utf8mb3_common);
failed+= strcollsp(&my_charset_utf8_general_mysql500_ci, strcoll_utf8mb3_common);
failed+= strcollsp(&my_charset_utf8_bin, strcoll_utf8mb3_common);
#endif
#ifdef HAVE_CHARSET_utf8mb4
failed+= strcollsp(&my_charset_utf8mb4_general_ci, strcoll_utf8mb3_common);
failed+= strcollsp(&my_charset_utf8mb4_bin, strcoll_utf8mb3_common);
failed+= strcollsp(&my_charset_utf8mb4_general_ci, strcoll_utf8mb4_common);
failed+= strcollsp(&my_charset_utf8mb4_bin, strcoll_utf8mb4_common);
#endif
return failed;
}