mirror of
https://github.com/MariaDB/server.git
synced 2025-07-27 18:02:13 +03:00
MDEV-26743 InnoDB: CHAR+nopad does not work well
The patch for "MDEV-25440: Indexed CHAR ... broken with NO_PAD collations" fixed these scenarios from MDEV-26743: - Basic latin letter vs equal accented letter - Two letters vs equal (but space padded) expansion However, this scenario was still broken: - Basic latin letter (but followed by an ignorable character) vs equal accented letter Fix: When processing for a NOPAD collation a string with trailing ignorable characters, like: '<non-ignorable><ignorable><ignorable>' the string gets virtually converted to: '<non-ignorable><ignorable><ignorable><space><space><space>...' After the fix the code works differently in these two cases: 1. <space> fits into the "nchars" limit 2. <space> does not fit into the "nchars" limit Details: 1. If "nchars" is large enough (4+ in this example), return weights as follows: '[weight-for-non-ignorable, 1 char] [weight-for-space-character, 3 chars]' i.e. the weight for the virtual trailing space character now indicates that it corresponds to total 3 characters: - two ignorable characters - one virtual trailing space character 2. If "nchars" is small (3), then the virtual trailing space character does not fit into the "nchar" limit, so return 0x00 as weight, e.g.: '[weight-for-non-ignorable, 1 char] [0x00, 2 chars]' Adding corresponding MTR tests and unit tests.
This commit is contained in:
@ -5,3 +5,49 @@ ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
|
||||
INSERT INTO t1 VALUES ('',2);
|
||||
ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# MDEV-26743 InnoDB: CHAR+nopad does not work well
|
||||
#
|
||||
#
|
||||
# Basic Latin letter vs equal accented letter
|
||||
#
|
||||
SET NAMES utf8mb3;
|
||||
CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT;
|
||||
INSERT INTO t1 VALUES ('a'),('ä');
|
||||
ERROR 23000: Duplicate entry 'ä' for key 'PRIMARY'
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# Two letters vs equal (but space padded) expansion
|
||||
#
|
||||
CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT;
|
||||
INSERT INTO t1 VALUES ('ss'),('ß');
|
||||
SET sql_mode=PAD_CHAR_TO_FULL_LENGTH;
|
||||
SELECT HEX(a) FROM t1;
|
||||
HEX(a)
|
||||
7373
|
||||
C39F20
|
||||
SET sql_mode=DEFAULT;
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# Basic Latin letter (but followed by an ignorable character) vs equal accented letter
|
||||
#
|
||||
SET NAMES utf8mb3;
|
||||
CREATE TABLE t1 (a CHAR(3), PRIMARY KEY(a)) CHARACTER SET utf8mb3 COLLATE utf8mb3_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT;
|
||||
INSERT INTO t1 VALUES (CONCAT('a',_utf8mb3 0x01)),('ä');
|
||||
SET sql_mode=PAD_CHAR_TO_FULL_LENGTH;
|
||||
SELECT HEX(a) FROM t1 ORDER BY HEX(a);
|
||||
HEX(a)
|
||||
610120
|
||||
C3A42020
|
||||
SET sql_mode=DEFAULT;
|
||||
DROP TABLE t1;
|
||||
SET NAMES utf8mb3;
|
||||
CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT;
|
||||
INSERT INTO t1 VALUES (CONCAT('a',_utf8mb3 0x01)),('ä');
|
||||
SET sql_mode=PAD_CHAR_TO_FULL_LENGTH;
|
||||
SELECT HEX(a) FROM t1 ORDER BY HEX(a);
|
||||
HEX(a)
|
||||
6101
|
||||
C3A420
|
||||
SET sql_mode=DEFAULT;
|
||||
DROP TABLE t1;
|
||||
|
@ -8,3 +8,49 @@ ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
|
||||
INSERT INTO t1 VALUES ('',2);
|
||||
ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
|
||||
DROP TABLE t1;
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-26743 InnoDB: CHAR+nopad does not work well
|
||||
--echo #
|
||||
|
||||
--echo #
|
||||
--echo # Basic Latin letter vs equal accented letter
|
||||
--echo #
|
||||
|
||||
SET NAMES utf8mb3;
|
||||
CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT;
|
||||
--error ER_DUP_ENTRY
|
||||
INSERT INTO t1 VALUES ('a'),('ä');
|
||||
DROP TABLE t1;
|
||||
|
||||
--echo #
|
||||
--echo # Two letters vs equal (but space padded) expansion
|
||||
--echo #
|
||||
|
||||
CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT;
|
||||
INSERT INTO t1 VALUES ('ss'),('ß');
|
||||
SET sql_mode=PAD_CHAR_TO_FULL_LENGTH;
|
||||
SELECT HEX(a) FROM t1;
|
||||
SET sql_mode=DEFAULT;
|
||||
DROP TABLE t1;
|
||||
|
||||
--echo #
|
||||
--echo # Basic Latin letter (but followed by an ignorable character) vs equal accented letter
|
||||
--echo #
|
||||
|
||||
SET NAMES utf8mb3;
|
||||
CREATE TABLE t1 (a CHAR(3), PRIMARY KEY(a)) CHARACTER SET utf8mb3 COLLATE utf8mb3_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT;
|
||||
INSERT INTO t1 VALUES (CONCAT('a',_utf8mb3 0x01)),('ä');
|
||||
SET sql_mode=PAD_CHAR_TO_FULL_LENGTH;
|
||||
SELECT HEX(a) FROM t1 ORDER BY HEX(a);
|
||||
SET sql_mode=DEFAULT;
|
||||
DROP TABLE t1;
|
||||
|
||||
SET NAMES utf8mb3;
|
||||
CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT;
|
||||
INSERT INTO t1 VALUES (CONCAT('a',_utf8mb3 0x01)),('ä');
|
||||
SET sql_mode=PAD_CHAR_TO_FULL_LENGTH;
|
||||
SELECT HEX(a) FROM t1 ORDER BY HEX(a);
|
||||
SET sql_mode=DEFAULT;
|
||||
DROP TABLE t1;
|
||||
|
@ -335,8 +335,20 @@ MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner,
|
||||
flags & MY_STRNNCOLLSP_NCHARS_EMULATE_TRIMMED_TRAILING_SPACES ?
|
||||
my_space_weight(scanner->level) : 0;
|
||||
|
||||
res.nchars= 1;
|
||||
(*generated)++;
|
||||
res.nchars++; /* Count all ignorable characters and the padded space */
|
||||
if (res.nchars > nchars)
|
||||
{
|
||||
/*
|
||||
We scanned a number of ignorable characters at the end of the
|
||||
string and reached the "nchars" limit, so the virtual padded space
|
||||
does not fit. This is possible with CONCAT('a', x'00') with
|
||||
nchars=2 on the second iteration when we scan the x'00'.
|
||||
*/
|
||||
if (scanner->cs->state & MY_CS_NOPAD)
|
||||
res.weight= 0;
|
||||
res.nchars= (uint) nchars;
|
||||
}
|
||||
}
|
||||
else if (res.nchars > nchars)
|
||||
{
|
||||
|
@ -911,6 +911,19 @@ static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mb3_unicode_ci[]=
|
||||
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 4, TCHAR, 0},
|
||||
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 100, TCHAR, 0},
|
||||
|
||||
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 0, TCHAR, 0},
|
||||
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 1, TCHAR, 0},
|
||||
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 2, TCHAR, 0},
|
||||
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 3, TCHAR, 0},
|
||||
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 100, TCHAR, 0},
|
||||
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 0, TCHAR, 0},
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 1, TCHAR, 0},
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 2, TCHAR, 0},
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 3, TCHAR, 0},
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 4, TCHAR, 0},
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 100, TCHAR, 0},
|
||||
|
||||
{{NULL, 0}, {NULL, 0}, 0, 0, 0}
|
||||
};
|
||||
|
||||
@ -938,6 +951,19 @@ static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mb3_unicode_nopad_ci[]=
|
||||
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 4, TVCHAR, 0},
|
||||
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 100, TVCHAR, 0},
|
||||
|
||||
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 0, TCHAR, 0},
|
||||
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 1, TCHAR, 0},
|
||||
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 2, TCHAR, -1},
|
||||
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 3, TCHAR, 0},
|
||||
{{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 100, TCHAR, 0},
|
||||
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 0, TCHAR, 0},
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 1, TCHAR, 0},
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 2, TCHAR, -1},
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 3, TCHAR, -1},
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 4, TCHAR, 0},
|
||||
{{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 100, TCHAR, 0},
|
||||
|
||||
{{NULL, 0}, {NULL, 0}, 0, 0, 0}
|
||||
};
|
||||
|
||||
|
Reference in New Issue
Block a user