diff --git a/mysql-test/suite/innodb/r/no_pad.result b/mysql-test/suite/innodb/r/no_pad.result index 0c039c30a5e..2af5eb0207f 100644 --- a/mysql-test/suite/innodb/r/no_pad.result +++ b/mysql-test/suite/innodb/r/no_pad.result @@ -5,3 +5,49 @@ ALTER TABLE t1 ROW_FORMAT=DYNAMIC; INSERT INTO t1 VALUES ('',2); ALTER TABLE t1 ROW_FORMAT=REDUNDANT; DROP TABLE t1; +# +# MDEV-26743 InnoDB: CHAR+nopad does not work well +# +# +# Basic Latin letter vs equal accented letter +# +SET NAMES utf8mb3; +CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT; +INSERT INTO t1 VALUES ('a'),('ä'); +ERROR 23000: Duplicate entry 'ä' for key 'PRIMARY' +DROP TABLE t1; +# +# Two letters vs equal (but space padded) expansion +# +CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT; +INSERT INTO t1 VALUES ('ss'),('ß'); +SET sql_mode=PAD_CHAR_TO_FULL_LENGTH; +SELECT HEX(a) FROM t1; +HEX(a) +7373 +C39F20 +SET sql_mode=DEFAULT; +DROP TABLE t1; +# +# Basic Latin letter (but followed by an ignorable character) vs equal accented letter +# +SET NAMES utf8mb3; +CREATE TABLE t1 (a CHAR(3), PRIMARY KEY(a)) CHARACTER SET utf8mb3 COLLATE utf8mb3_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT; +INSERT INTO t1 VALUES (CONCAT('a',_utf8mb3 0x01)),('ä'); +SET sql_mode=PAD_CHAR_TO_FULL_LENGTH; +SELECT HEX(a) FROM t1 ORDER BY HEX(a); +HEX(a) +610120 +C3A42020 +SET sql_mode=DEFAULT; +DROP TABLE t1; +SET NAMES utf8mb3; +CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT; +INSERT INTO t1 VALUES (CONCAT('a',_utf8mb3 0x01)),('ä'); +SET sql_mode=PAD_CHAR_TO_FULL_LENGTH; +SELECT HEX(a) FROM t1 ORDER BY HEX(a); +HEX(a) +6101 +C3A420 +SET sql_mode=DEFAULT; +DROP TABLE t1; diff --git a/mysql-test/suite/innodb/t/no_pad.test b/mysql-test/suite/innodb/t/no_pad.test index 1be1972c9ca..15ab71b6c7f 100644 --- a/mysql-test/suite/innodb/t/no_pad.test +++ b/mysql-test/suite/innodb/t/no_pad.test @@ -8,3 +8,49 @@ ALTER TABLE t1 ROW_FORMAT=DYNAMIC; INSERT INTO t1 VALUES ('',2); ALTER TABLE t1 ROW_FORMAT=REDUNDANT; DROP TABLE t1; + + +--echo # +--echo # MDEV-26743 InnoDB: CHAR+nopad does not work well +--echo # + +--echo # +--echo # Basic Latin letter vs equal accented letter +--echo # + +SET NAMES utf8mb3; +CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT; +--error ER_DUP_ENTRY +INSERT INTO t1 VALUES ('a'),('ä'); +DROP TABLE t1; + +--echo # +--echo # Two letters vs equal (but space padded) expansion +--echo # + +CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT; +INSERT INTO t1 VALUES ('ss'),('ß'); +SET sql_mode=PAD_CHAR_TO_FULL_LENGTH; +SELECT HEX(a) FROM t1; +SET sql_mode=DEFAULT; +DROP TABLE t1; + +--echo # +--echo # Basic Latin letter (but followed by an ignorable character) vs equal accented letter +--echo # + +SET NAMES utf8mb3; +CREATE TABLE t1 (a CHAR(3), PRIMARY KEY(a)) CHARACTER SET utf8mb3 COLLATE utf8mb3_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT; +INSERT INTO t1 VALUES (CONCAT('a',_utf8mb3 0x01)),('ä'); +SET sql_mode=PAD_CHAR_TO_FULL_LENGTH; +SELECT HEX(a) FROM t1 ORDER BY HEX(a); +SET sql_mode=DEFAULT; +DROP TABLE t1; + +SET NAMES utf8mb3; +CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT; +INSERT INTO t1 VALUES (CONCAT('a',_utf8mb3 0x01)),('ä'); +SET sql_mode=PAD_CHAR_TO_FULL_LENGTH; +SELECT HEX(a) FROM t1 ORDER BY HEX(a); +SET sql_mode=DEFAULT; +DROP TABLE t1; diff --git a/strings/ctype-uca.inl b/strings/ctype-uca.inl index 55b2c1ef7ce..48573ae2eed 100644 --- a/strings/ctype-uca.inl +++ b/strings/ctype-uca.inl @@ -335,8 +335,20 @@ MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner, flags & MY_STRNNCOLLSP_NCHARS_EMULATE_TRIMMED_TRAILING_SPACES ? my_space_weight(scanner->level) : 0; - res.nchars= 1; (*generated)++; + res.nchars++; /* Count all ignorable characters and the padded space */ + if (res.nchars > nchars) + { + /* + We scanned a number of ignorable characters at the end of the + string and reached the "nchars" limit, so the virtual padded space + does not fit. This is possible with CONCAT('a', x'00') with + nchars=2 on the second iteration when we scan the x'00'. + */ + if (scanner->cs->state & MY_CS_NOPAD) + res.weight= 0; + res.nchars= (uint) nchars; + } } else if (res.nchars > nchars) { diff --git a/unittest/strings/strings-t.c b/unittest/strings/strings-t.c index f5563247f17..e2d2fb465db 100644 --- a/unittest/strings/strings-t.c +++ b/unittest/strings/strings-t.c @@ -911,6 +911,19 @@ static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mb3_unicode_ci[]= {{CSTR("ss")}, {CSTR(UTF8_sz)}, 4, TCHAR, 0}, {{CSTR("ss")}, {CSTR(UTF8_sz)}, 100, TCHAR, 0}, + {{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 0, TCHAR, 0}, + {{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 1, TCHAR, 0}, + {{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 2, TCHAR, 0}, + {{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 3, TCHAR, 0}, + {{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 100, TCHAR, 0}, + + {{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 0, TCHAR, 0}, + {{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 1, TCHAR, 0}, + {{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 2, TCHAR, 0}, + {{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 3, TCHAR, 0}, + {{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 4, TCHAR, 0}, + {{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 100, TCHAR, 0}, + {{NULL, 0}, {NULL, 0}, 0, 0, 0} }; @@ -938,6 +951,19 @@ static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mb3_unicode_nopad_ci[]= {{CSTR("ss")}, {CSTR(UTF8_sz)}, 4, TVCHAR, 0}, {{CSTR("ss")}, {CSTR(UTF8_sz)}, 100, TVCHAR, 0}, + {{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 0, TCHAR, 0}, + {{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 1, TCHAR, 0}, + {{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 2, TCHAR, -1}, + {{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 3, TCHAR, 0}, + {{CSTR("a" "\x01")}, {CSTR(UTF8_auml)}, 100, TCHAR, 0}, + + {{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 0, TCHAR, 0}, + {{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 1, TCHAR, 0}, + {{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 2, TCHAR, -1}, + {{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 3, TCHAR, -1}, + {{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 4, TCHAR, 0}, + {{CSTR("a" "\x01\x01")}, {CSTR(UTF8_auml)}, 100, TCHAR, 0}, + {{NULL, 0}, {NULL, 0}, 0, 0, 0} };