mirror of
https://github.com/MariaDB/server.git
synced 2025-07-27 18:02:13 +03:00
MDEV-26743 InnoDB: CHAR+nopad does not work well
The patch for "MDEV-25440: Indexed CHAR ... broken with NO_PAD collations" fixed these scenarios from MDEV-26743: - Basic latin letter vs equal accented letter - Two letters vs equal (but space padded) expansion However, this scenario was still broken: - Basic latin letter (but followed by an ignorable character) vs equal accented letter Fix: When processing for a NOPAD collation a string with trailing ignorable characters, like: '<non-ignorable><ignorable><ignorable>' the string gets virtually converted to: '<non-ignorable><ignorable><ignorable><space><space><space>...' After the fix the code works differently in these two cases: 1. <space> fits into the "nchars" limit 2. <space> does not fit into the "nchars" limit Details: 1. If "nchars" is large enough (4+ in this example), return weights as follows: '[weight-for-non-ignorable, 1 char] [weight-for-space-character, 3 chars]' i.e. the weight for the virtual trailing space character now indicates that it corresponds to total 3 characters: - two ignorable characters - one virtual trailing space character 2. If "nchars" is small (3), then the virtual trailing space character does not fit into the "nchar" limit, so return 0x00 as weight, e.g.: '[weight-for-non-ignorable, 1 char] [0x00, 2 chars]' Adding corresponding MTR tests and unit tests.
This commit is contained in:
@ -911,6 +911,19 @@ static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mb3_unicode_ci[]=
|
||||
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 4, TCHAR, 0},
|
||||
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 100, TCHAR, 0},
|
||||
|
||||
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 0, TCHAR, 0},
|
||||
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 1, TCHAR, 0},
|
||||
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 2, TCHAR, 0},
|
||||
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 3, TCHAR, 0},
|
||||
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 100, TCHAR, 0},
|
||||
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 0, TCHAR, 0},
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 1, TCHAR, 0},
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 2, TCHAR, 0},
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 3, TCHAR, 0},
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 4, TCHAR, 0},
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 100, TCHAR, 0},
|
||||
|
||||
{{NULL, 0}, {NULL, 0}, 0, 0, 0}
|
||||
};
|
||||
|
||||
@ -938,6 +951,19 @@ static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mb3_unicode_nopad_ci[]=
|
||||
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 4, TVCHAR, 0},
|
||||
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 100, TVCHAR, 0},
|
||||
|
||||
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 0, TCHAR, 0},
|
||||
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 1, TCHAR, 0},
|
||||
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 2, TCHAR, -1},
|
||||
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 3, TCHAR, 0},
|
||||
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 100, TCHAR, 0},
|
||||
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 0, TCHAR, 0},
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 1, TCHAR, 0},
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 2, TCHAR, -1},
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 3, TCHAR, -1},
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 4, TCHAR, 0},
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 100, TCHAR, 0},
|
||||
|
||||
{{NULL, 0}, {NULL, 0}, 0, 0, 0}
|
||||
};
|
||||
|
||||
|
Reference in New Issue
Block a user