mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
Fixing a bug in MDEV-8418 (utf16, utf16le) and MDEV-8417 (utf8mb4).
Fixing non-BMP characters to have the same weight, as it was before MDEV-8418 and MDEV-8417.
This commit is contained in:
@ -1808,16 +1808,21 @@ DROP TABLE t1;
|
|||||||
--echo #
|
--echo #
|
||||||
--echo # MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
|
--echo # MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
|
||||||
--echo #
|
--echo #
|
||||||
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a));
|
CREATE TABLE t1 (
|
||||||
INSERT INTO t1 VALUES (0x61);
|
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
|
||||||
INSERT INTO t1 VALUES (0xC280),(0xDFBF);
|
a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a,id)
|
||||||
INSERT INTO t1 VALUES (0xE0A080),(0xEFBFBF);
|
);
|
||||||
INSERT INTO t1 VALUES (0xF0908080),(0xF48FBFBF);
|
INSERT INTO t1 (a) VALUES (0x61);
|
||||||
SELECT HEX(a) FROM t1 ORDER BY a;
|
INSERT INTO t1 (a) VALUES (0xC280),(0xDFBF);
|
||||||
SELECT HEX(a) FROM t1 ORDER BY a DESC;
|
INSERT INTO t1 (a) VALUES (0xE0A080),(0xEFBFBF);
|
||||||
|
INSERT INTO t1 (a) VALUES (0xF0908080),(0xF48FBFBF);
|
||||||
|
SELECT id,HEX(a) FROM t1 ORDER BY a,id;
|
||||||
|
SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1;
|
||||||
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
||||||
SELECT HEX(a) FROM t1 ORDER BY a;
|
SELECT id,HEX(a) FROM t1 ORDER BY a;
|
||||||
SELECT HEX(a) FROM t1 ORDER BY a DESC;
|
SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1;
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
|
|
||||||
--echo #
|
--echo #
|
||||||
|
@ -2127,3 +2127,64 @@ DEALLOCATE PREPARE stmt;
|
|||||||
#
|
#
|
||||||
# End of 10.0 tests
|
# End of 10.0 tests
|
||||||
#
|
#
|
||||||
|
#
|
||||||
|
# Start of 10.1 tests
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
|
||||||
|
#
|
||||||
|
CREATE TABLE t1 (
|
||||||
|
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
|
||||||
|
a VARCHAR(10) CHARACTER SET utf16, KEY(a,id)
|
||||||
|
);
|
||||||
|
INSERT INTO t1 (a) VALUES (_utf8mb4 0x61);
|
||||||
|
INSERT INTO t1 (a) VALUES (_utf8mb4 0xC280),(_utf8mb4 0xDFBF);
|
||||||
|
INSERT INTO t1 (a) VALUES (_utf8mb4 0xE0A080),(_utf8mb4 0xEFBFBF);
|
||||||
|
INSERT INTO t1 (a) VALUES (_utf8mb4 0xF0908080),(_utf8mb4 0xF48FBFBF);
|
||||||
|
SELECT id,HEX(a) FROM t1 ORDER BY a,id;
|
||||||
|
id HEX(a)
|
||||||
|
1 0061
|
||||||
|
2 0080
|
||||||
|
3 07FF
|
||||||
|
4 0800
|
||||||
|
6 D800DC00
|
||||||
|
7 DBFFDFFF
|
||||||
|
5 FFFF
|
||||||
|
SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
|
||||||
|
id HEX(a)
|
||||||
|
5 FFFF
|
||||||
|
7 DBFFDFFF
|
||||||
|
6 D800DC00
|
||||||
|
4 0800
|
||||||
|
3 07FF
|
||||||
|
2 0080
|
||||||
|
1 0061
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1;
|
||||||
|
COUNT(DISTINCT a)
|
||||||
|
6
|
||||||
|
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf16 COLLATE utf16_bin;
|
||||||
|
SELECT id,HEX(a) FROM t1 ORDER BY a;
|
||||||
|
id HEX(a)
|
||||||
|
1 0061
|
||||||
|
2 0080
|
||||||
|
3 07FF
|
||||||
|
4 0800
|
||||||
|
5 FFFF
|
||||||
|
6 D800DC00
|
||||||
|
7 DBFFDFFF
|
||||||
|
SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
|
||||||
|
id HEX(a)
|
||||||
|
7 DBFFDFFF
|
||||||
|
6 D800DC00
|
||||||
|
5 FFFF
|
||||||
|
4 0800
|
||||||
|
3 07FF
|
||||||
|
2 0080
|
||||||
|
1 0061
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1;
|
||||||
|
COUNT(DISTINCT a)
|
||||||
|
7
|
||||||
|
DROP TABLE t1;
|
||||||
|
#
|
||||||
|
# End of 10.1 tests
|
||||||
|
#
|
||||||
|
@ -2319,3 +2319,64 @@ DFFFFFDFFFFF9CFFFF9DFFFF9EFFFF
|
|||||||
#
|
#
|
||||||
# End of 5.6 tests
|
# End of 5.6 tests
|
||||||
#
|
#
|
||||||
|
#
|
||||||
|
# Start of 10.1 tests
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
|
||||||
|
#
|
||||||
|
CREATE TABLE t1 (
|
||||||
|
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
|
||||||
|
a VARCHAR(10) CHARACTER SET utf16le, KEY(a,id)
|
||||||
|
);
|
||||||
|
INSERT INTO t1 (a) VALUES (_utf8mb4 0x61);
|
||||||
|
INSERT INTO t1 (a) VALUES (_utf8mb4 0xC280),(_utf8mb4 0xDFBF);
|
||||||
|
INSERT INTO t1 (a) VALUES (_utf8mb4 0xE0A080),(_utf8mb4 0xEFBFBF);
|
||||||
|
INSERT INTO t1 (a) VALUES (_utf8mb4 0xF0908080),(_utf8mb4 0xF48FBFBF);
|
||||||
|
SELECT id,HEX(a) FROM t1 ORDER BY a,id;
|
||||||
|
id HEX(a)
|
||||||
|
1 6100
|
||||||
|
2 8000
|
||||||
|
3 FF07
|
||||||
|
4 0008
|
||||||
|
6 00D800DC
|
||||||
|
7 FFDBFFDF
|
||||||
|
5 FFFF
|
||||||
|
SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
|
||||||
|
id HEX(a)
|
||||||
|
5 FFFF
|
||||||
|
7 FFDBFFDF
|
||||||
|
6 00D800DC
|
||||||
|
4 0008
|
||||||
|
3 FF07
|
||||||
|
2 8000
|
||||||
|
1 6100
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1;
|
||||||
|
COUNT(DISTINCT a)
|
||||||
|
6
|
||||||
|
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf16le COLLATE utf16le_bin;
|
||||||
|
SELECT id,HEX(a) FROM t1 ORDER BY a;
|
||||||
|
id HEX(a)
|
||||||
|
1 6100
|
||||||
|
2 8000
|
||||||
|
3 FF07
|
||||||
|
4 0008
|
||||||
|
5 FFFF
|
||||||
|
6 00D800DC
|
||||||
|
7 FFDBFFDF
|
||||||
|
SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
|
||||||
|
id HEX(a)
|
||||||
|
7 FFDBFFDF
|
||||||
|
6 00D800DC
|
||||||
|
5 FFFF
|
||||||
|
4 0008
|
||||||
|
3 FF07
|
||||||
|
2 8000
|
||||||
|
1 6100
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1;
|
||||||
|
COUNT(DISTINCT a)
|
||||||
|
7
|
||||||
|
DROP TABLE t1;
|
||||||
|
#
|
||||||
|
# End of 10.1 tests
|
||||||
|
#
|
||||||
|
@ -2500,48 +2500,57 @@ DROP TABLE t1;
|
|||||||
#
|
#
|
||||||
# MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
|
# MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
|
||||||
#
|
#
|
||||||
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a));
|
CREATE TABLE t1 (
|
||||||
INSERT INTO t1 VALUES (0x61);
|
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
|
||||||
INSERT INTO t1 VALUES (0xC280),(0xDFBF);
|
a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a,id)
|
||||||
INSERT INTO t1 VALUES (0xE0A080),(0xEFBFBF);
|
);
|
||||||
INSERT INTO t1 VALUES (0xF0908080),(0xF48FBFBF);
|
INSERT INTO t1 (a) VALUES (0x61);
|
||||||
SELECT HEX(a) FROM t1 ORDER BY a;
|
INSERT INTO t1 (a) VALUES (0xC280),(0xDFBF);
|
||||||
HEX(a)
|
INSERT INTO t1 (a) VALUES (0xE0A080),(0xEFBFBF);
|
||||||
61
|
INSERT INTO t1 (a) VALUES (0xF0908080),(0xF48FBFBF);
|
||||||
C280
|
SELECT id,HEX(a) FROM t1 ORDER BY a,id;
|
||||||
DFBF
|
id HEX(a)
|
||||||
E0A080
|
1 61
|
||||||
EFBFBF
|
2 C280
|
||||||
F0908080
|
3 DFBF
|
||||||
F48FBFBF
|
4 E0A080
|
||||||
SELECT HEX(a) FROM t1 ORDER BY a DESC;
|
6 F0908080
|
||||||
HEX(a)
|
7 F48FBFBF
|
||||||
F48FBFBF
|
5 EFBFBF
|
||||||
F0908080
|
SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
|
||||||
EFBFBF
|
id HEX(a)
|
||||||
E0A080
|
5 EFBFBF
|
||||||
DFBF
|
7 F48FBFBF
|
||||||
C280
|
6 F0908080
|
||||||
61
|
4 E0A080
|
||||||
|
3 DFBF
|
||||||
|
2 C280
|
||||||
|
1 61
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1;
|
||||||
|
COUNT(DISTINCT a)
|
||||||
|
6
|
||||||
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
||||||
SELECT HEX(a) FROM t1 ORDER BY a;
|
SELECT id,HEX(a) FROM t1 ORDER BY a;
|
||||||
HEX(a)
|
id HEX(a)
|
||||||
61
|
1 61
|
||||||
C280
|
2 C280
|
||||||
DFBF
|
3 DFBF
|
||||||
E0A080
|
4 E0A080
|
||||||
EFBFBF
|
5 EFBFBF
|
||||||
F0908080
|
6 F0908080
|
||||||
F48FBFBF
|
7 F48FBFBF
|
||||||
SELECT HEX(a) FROM t1 ORDER BY a DESC;
|
SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
|
||||||
HEX(a)
|
id HEX(a)
|
||||||
F48FBFBF
|
7 F48FBFBF
|
||||||
F0908080
|
6 F0908080
|
||||||
EFBFBF
|
5 EFBFBF
|
||||||
E0A080
|
4 E0A080
|
||||||
DFBF
|
3 DFBF
|
||||||
C280
|
2 C280
|
||||||
61
|
1 61
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1;
|
||||||
|
COUNT(DISTINCT a)
|
||||||
|
7
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
#
|
#
|
||||||
# ctype_utf8mb4.inc: End of 10.1 tests
|
# ctype_utf8mb4.inc: End of 10.1 tests
|
||||||
|
@ -2647,48 +2647,57 @@ DROP TABLE t1;
|
|||||||
#
|
#
|
||||||
# MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
|
# MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
|
||||||
#
|
#
|
||||||
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a));
|
CREATE TABLE t1 (
|
||||||
INSERT INTO t1 VALUES (0x61);
|
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
|
||||||
INSERT INTO t1 VALUES (0xC280),(0xDFBF);
|
a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a,id)
|
||||||
INSERT INTO t1 VALUES (0xE0A080),(0xEFBFBF);
|
);
|
||||||
INSERT INTO t1 VALUES (0xF0908080),(0xF48FBFBF);
|
INSERT INTO t1 (a) VALUES (0x61);
|
||||||
SELECT HEX(a) FROM t1 ORDER BY a;
|
INSERT INTO t1 (a) VALUES (0xC280),(0xDFBF);
|
||||||
HEX(a)
|
INSERT INTO t1 (a) VALUES (0xE0A080),(0xEFBFBF);
|
||||||
61
|
INSERT INTO t1 (a) VALUES (0xF0908080),(0xF48FBFBF);
|
||||||
C280
|
SELECT id,HEX(a) FROM t1 ORDER BY a,id;
|
||||||
DFBF
|
id HEX(a)
|
||||||
E0A080
|
1 61
|
||||||
EFBFBF
|
2 C280
|
||||||
F0908080
|
3 DFBF
|
||||||
F48FBFBF
|
4 E0A080
|
||||||
SELECT HEX(a) FROM t1 ORDER BY a DESC;
|
6 F0908080
|
||||||
HEX(a)
|
7 F48FBFBF
|
||||||
F48FBFBF
|
5 EFBFBF
|
||||||
F0908080
|
SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
|
||||||
EFBFBF
|
id HEX(a)
|
||||||
E0A080
|
5 EFBFBF
|
||||||
DFBF
|
7 F48FBFBF
|
||||||
C280
|
6 F0908080
|
||||||
61
|
4 E0A080
|
||||||
|
3 DFBF
|
||||||
|
2 C280
|
||||||
|
1 61
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1;
|
||||||
|
COUNT(DISTINCT a)
|
||||||
|
6
|
||||||
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
||||||
SELECT HEX(a) FROM t1 ORDER BY a;
|
SELECT id,HEX(a) FROM t1 ORDER BY a;
|
||||||
HEX(a)
|
id HEX(a)
|
||||||
61
|
1 61
|
||||||
C280
|
2 C280
|
||||||
DFBF
|
3 DFBF
|
||||||
E0A080
|
4 E0A080
|
||||||
EFBFBF
|
5 EFBFBF
|
||||||
F0908080
|
6 F0908080
|
||||||
F48FBFBF
|
7 F48FBFBF
|
||||||
SELECT HEX(a) FROM t1 ORDER BY a DESC;
|
SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
|
||||||
HEX(a)
|
id HEX(a)
|
||||||
F48FBFBF
|
7 F48FBFBF
|
||||||
F0908080
|
6 F0908080
|
||||||
EFBFBF
|
5 EFBFBF
|
||||||
E0A080
|
4 E0A080
|
||||||
DFBF
|
3 DFBF
|
||||||
C280
|
2 C280
|
||||||
61
|
1 61
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1;
|
||||||
|
COUNT(DISTINCT a)
|
||||||
|
7
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
#
|
#
|
||||||
# ctype_utf8mb4.inc: End of 10.1 tests
|
# ctype_utf8mb4.inc: End of 10.1 tests
|
||||||
|
@ -2647,48 +2647,57 @@ DROP TABLE t1;
|
|||||||
#
|
#
|
||||||
# MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
|
# MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
|
||||||
#
|
#
|
||||||
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a));
|
CREATE TABLE t1 (
|
||||||
INSERT INTO t1 VALUES (0x61);
|
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
|
||||||
INSERT INTO t1 VALUES (0xC280),(0xDFBF);
|
a VARCHAR(10) CHARACTER SET utf8mb4, KEY(a,id)
|
||||||
INSERT INTO t1 VALUES (0xE0A080),(0xEFBFBF);
|
);
|
||||||
INSERT INTO t1 VALUES (0xF0908080),(0xF48FBFBF);
|
INSERT INTO t1 (a) VALUES (0x61);
|
||||||
SELECT HEX(a) FROM t1 ORDER BY a;
|
INSERT INTO t1 (a) VALUES (0xC280),(0xDFBF);
|
||||||
HEX(a)
|
INSERT INTO t1 (a) VALUES (0xE0A080),(0xEFBFBF);
|
||||||
61
|
INSERT INTO t1 (a) VALUES (0xF0908080),(0xF48FBFBF);
|
||||||
C280
|
SELECT id,HEX(a) FROM t1 ORDER BY a,id;
|
||||||
DFBF
|
id HEX(a)
|
||||||
E0A080
|
1 61
|
||||||
EFBFBF
|
2 C280
|
||||||
F0908080
|
3 DFBF
|
||||||
F48FBFBF
|
4 E0A080
|
||||||
SELECT HEX(a) FROM t1 ORDER BY a DESC;
|
6 F0908080
|
||||||
HEX(a)
|
7 F48FBFBF
|
||||||
F48FBFBF
|
5 EFBFBF
|
||||||
F0908080
|
SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
|
||||||
EFBFBF
|
id HEX(a)
|
||||||
E0A080
|
5 EFBFBF
|
||||||
DFBF
|
7 F48FBFBF
|
||||||
C280
|
6 F0908080
|
||||||
61
|
4 E0A080
|
||||||
|
3 DFBF
|
||||||
|
2 C280
|
||||||
|
1 61
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1;
|
||||||
|
COUNT(DISTINCT a)
|
||||||
|
6
|
||||||
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
||||||
SELECT HEX(a) FROM t1 ORDER BY a;
|
SELECT id,HEX(a) FROM t1 ORDER BY a;
|
||||||
HEX(a)
|
id HEX(a)
|
||||||
61
|
1 61
|
||||||
C280
|
2 C280
|
||||||
DFBF
|
3 DFBF
|
||||||
E0A080
|
4 E0A080
|
||||||
EFBFBF
|
5 EFBFBF
|
||||||
F0908080
|
6 F0908080
|
||||||
F48FBFBF
|
7 F48FBFBF
|
||||||
SELECT HEX(a) FROM t1 ORDER BY a DESC;
|
SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
|
||||||
HEX(a)
|
id HEX(a)
|
||||||
F48FBFBF
|
7 F48FBFBF
|
||||||
F0908080
|
6 F0908080
|
||||||
EFBFBF
|
5 EFBFBF
|
||||||
E0A080
|
4 E0A080
|
||||||
DFBF
|
3 DFBF
|
||||||
C280
|
2 C280
|
||||||
61
|
1 61
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1;
|
||||||
|
COUNT(DISTINCT a)
|
||||||
|
7
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
#
|
#
|
||||||
# ctype_utf8mb4.inc: End of 10.1 tests
|
# ctype_utf8mb4.inc: End of 10.1 tests
|
||||||
|
@ -860,3 +860,31 @@ DEALLOCATE PREPARE stmt;
|
|||||||
--echo #
|
--echo #
|
||||||
--echo # End of 10.0 tests
|
--echo # End of 10.0 tests
|
||||||
--echo #
|
--echo #
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # Start of 10.1 tests
|
||||||
|
--echo #
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
|
||||||
|
--echo #
|
||||||
|
CREATE TABLE t1 (
|
||||||
|
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
|
||||||
|
a VARCHAR(10) CHARACTER SET utf16, KEY(a,id)
|
||||||
|
);
|
||||||
|
INSERT INTO t1 (a) VALUES (_utf8mb4 0x61);
|
||||||
|
INSERT INTO t1 (a) VALUES (_utf8mb4 0xC280),(_utf8mb4 0xDFBF);
|
||||||
|
INSERT INTO t1 (a) VALUES (_utf8mb4 0xE0A080),(_utf8mb4 0xEFBFBF);
|
||||||
|
INSERT INTO t1 (a) VALUES (_utf8mb4 0xF0908080),(_utf8mb4 0xF48FBFBF);
|
||||||
|
SELECT id,HEX(a) FROM t1 ORDER BY a,id;
|
||||||
|
SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1;
|
||||||
|
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf16 COLLATE utf16_bin;
|
||||||
|
SELECT id,HEX(a) FROM t1 ORDER BY a;
|
||||||
|
SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # End of 10.1 tests
|
||||||
|
--echo #
|
||||||
|
@ -744,3 +744,31 @@ SET NAMES utf8, collation_connection=utf16le_bin;
|
|||||||
--echo #
|
--echo #
|
||||||
--echo # End of 5.6 tests
|
--echo # End of 5.6 tests
|
||||||
--echo #
|
--echo #
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # Start of 10.1 tests
|
||||||
|
--echo #
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-8417 utf8mb4: compare broken bytes as "greater than any non-broken character"
|
||||||
|
--echo #
|
||||||
|
CREATE TABLE t1 (
|
||||||
|
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
|
||||||
|
a VARCHAR(10) CHARACTER SET utf16le, KEY(a,id)
|
||||||
|
);
|
||||||
|
INSERT INTO t1 (a) VALUES (_utf8mb4 0x61);
|
||||||
|
INSERT INTO t1 (a) VALUES (_utf8mb4 0xC280),(_utf8mb4 0xDFBF);
|
||||||
|
INSERT INTO t1 (a) VALUES (_utf8mb4 0xE0A080),(_utf8mb4 0xEFBFBF);
|
||||||
|
INSERT INTO t1 (a) VALUES (_utf8mb4 0xF0908080),(_utf8mb4 0xF48FBFBF);
|
||||||
|
SELECT id,HEX(a) FROM t1 ORDER BY a,id;
|
||||||
|
SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1;
|
||||||
|
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET utf16le COLLATE utf16le_bin;
|
||||||
|
SELECT id,HEX(a) FROM t1 ORDER BY a;
|
||||||
|
SELECT id,HEX(a) FROM t1 ORDER BY a DESC,id DESC;
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # End of 10.1 tests
|
||||||
|
--echo #
|
||||||
|
@ -1216,7 +1216,7 @@ static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1)
|
|||||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_general_ci
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_general_ci
|
||||||
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
||||||
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b0,b1)
|
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b0,b1)
|
||||||
#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b0, b1, b2, b3))
|
#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
|
||||||
#include "strcoll.ic"
|
#include "strcoll.ic"
|
||||||
|
|
||||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_bin
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_bin
|
||||||
@ -1665,7 +1665,7 @@ struct charset_info_st my_charset_utf16_bin=
|
|||||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_general_ci
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_general_ci
|
||||||
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
|
||||||
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b1,b0)
|
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b1,b0)
|
||||||
#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b1, b0, b3, b2))
|
#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
|
||||||
#include "strcoll.ic"
|
#include "strcoll.ic"
|
||||||
|
|
||||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_bin
|
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_bin
|
||||||
|
@ -7775,10 +7775,9 @@ size_t my_well_formed_len_utf8mb4(CHARSET_INFO *cs,
|
|||||||
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf8_general_ci(b0,b1)
|
#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf8_general_ci(b0,b1)
|
||||||
#define WEIGHT_MB3(b0,b1,b2) my_weight_mb3_utf8_general_ci(b0,b1,b2)
|
#define WEIGHT_MB3(b0,b1,b2) my_weight_mb3_utf8_general_ci(b0,b1,b2)
|
||||||
/*
|
/*
|
||||||
There is no mapping between code point and weight for non-BMP characters
|
All non-BMP characters have the same weight.
|
||||||
in utf8mb4_general_ci. Just using code point as weight.
|
|
||||||
*/
|
*/
|
||||||
#define WEIGHT_MB4(b0,b1,b2,b3) UTF8MB4_CODE(b0,b1,b2,b3)
|
#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER
|
||||||
#include "strcoll.ic"
|
#include "strcoll.ic"
|
||||||
|
|
||||||
|
|
||||||
|
@ -412,6 +412,18 @@ static STRNNCOLL_PARAM strcoll_utf8mb4_common[]=
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static STRNNCOLL_PARAM strcoll_utf8mb4_general_ci[]=
|
||||||
|
{
|
||||||
|
/* All non-BMP characters are equal in utf8mb4_general_ci */
|
||||||
|
{CSTR("\xF0\x90\x80\x80"), CSTR("\xF0\x90\x80\x81"),0},/* Non-BMB MB4 vs non-BMP MB4 */
|
||||||
|
{CSTR("\xF0\x90\x80\x80"), CSTR("\xF4\x8F\xBF\xBF"),0},/* Non-BMB MB4 vs non-BMP MB4 */
|
||||||
|
{CSTR("\x00"), CSTR("\xF0\x90\x80\x80"),-1},/* U+0000 vs non-BMP MB4 */
|
||||||
|
{CSTR("\x00"), CSTR("\xF0\x90\x80\x81"),-1},/* U+0000 vs non-BMP MB4 */
|
||||||
|
{CSTR("\x00"), CSTR("\xF4\x8F\xBF\xBF"),-1},/* U+0000 vs non-BMP MB4 */
|
||||||
|
{NULL, 0, NULL, 0, 0}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
static STRNNCOLL_PARAM strcoll_ucs2_common[]=
|
static STRNNCOLL_PARAM strcoll_ucs2_common[]=
|
||||||
{
|
{
|
||||||
{CSTR("\xC0"), CSTR("\xC1"), -1}, /* Incomlete MB2 vs incomplete MB2 */
|
{CSTR("\xC0"), CSTR("\xC1"), -1}, /* Incomlete MB2 vs incomplete MB2 */
|
||||||
@ -474,13 +486,24 @@ static STRNNCOLL_PARAM strcoll_utf16_common[]=
|
|||||||
{CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xDC\xFF\xDF"), -1},/* MB4 vs incomplete MB4 */
|
{CSTR("\xDB\xFF\xDF\xFF"), CSTR("\xDC\xFF\xDF"), -1},/* MB4 vs incomplete MB4 */
|
||||||
|
|
||||||
/* Broken MB4 vs broken MB4 */
|
/* Broken MB4 vs broken MB4 */
|
||||||
{CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xDC\x01"),-1},/* Broken MB4 vs broken MB4 */
|
{CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xDB\x01"),-1},/* Broken MB4 vs broken MB4 */
|
||||||
{CSTR("\xDB\xFF\xE0\xFE"), CSTR("\xDB\xFF\xE0\xFF"),-1},/* Broken MB4 vs broken MB4 */
|
{CSTR("\xDB\xFF\xE0\xFE"), CSTR("\xDB\xFF\xE0\xFF"),-1},/* Broken MB4 vs broken MB4 */
|
||||||
|
|
||||||
{NULL, 0, NULL, 0, 0}
|
{NULL, 0, NULL, 0, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static STRNNCOLL_PARAM strcoll_utf16_general_ci[]=
|
||||||
|
{
|
||||||
|
/* All non-BMP characters are compared as equal */
|
||||||
|
{CSTR("\xD8\x00\xDC\x00"), CSTR("\xD8\x00\xDC\x01"), 0},/* Non-BMP MB4 vs non-BMP MB4 */
|
||||||
|
{CSTR("\xD8\x00\xDC\x00"), CSTR("\xDB\xFF\xDF\xFF"), 0},/* Non-BMP MB4 vs non-BMP MB4 */
|
||||||
|
{CSTR("\x00\x00"), CSTR("\xD8\x00\xDC\x01"),-1},/* U+0000 vs non-BMP MB4 */
|
||||||
|
{CSTR("\x00\x00"), CSTR("\xDB\xFF\xDF\xFF"),-1},/* U+0000 vs non-BMP MB4 */
|
||||||
|
{NULL, 0, NULL, 0, 0}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
static STRNNCOLL_PARAM strcoll_utf16le_common[]=
|
static STRNNCOLL_PARAM strcoll_utf16le_common[]=
|
||||||
{
|
{
|
||||||
/* Minimum four-byte character: U+10000 == _utf16 0xD800DC00 */
|
/* Minimum four-byte character: U+10000 == _utf16 0xD800DC00 */
|
||||||
@ -500,13 +523,24 @@ static STRNNCOLL_PARAM strcoll_utf16le_common[]=
|
|||||||
{CSTR("\xFF\xDB\xFF\xDF"), CSTR("\xFF\xDC\x00"), -1},/* MB4 vs incomplete MB4 */
|
{CSTR("\xFF\xDB\xFF\xDF"), CSTR("\xFF\xDC\x00"), -1},/* MB4 vs incomplete MB4 */
|
||||||
|
|
||||||
/* Broken MB4 vs broken MB4 */
|
/* Broken MB4 vs broken MB4 */
|
||||||
{CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x01\xDC"),-1},/* Broken MB4 vs broken MB4 */
|
{CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x01\xDB"),-1},/* Broken MB4 vs broken MB4 */
|
||||||
{CSTR("\xFF\xDB\xFE\xE0"), CSTR("\xFF\xDB\xFF\xE0"),-1},/* Broken MB4 vs broken MB4 */
|
{CSTR("\xFF\xDB\xFE\xE0"), CSTR("\xFF\xDB\xFF\xE0"),-1},/* Broken MB4 vs broken MB4 */
|
||||||
|
|
||||||
{NULL, 0, NULL, 0, 0}
|
{NULL, 0, NULL, 0, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static STRNNCOLL_PARAM strcoll_utf16le_general_ci[]=
|
||||||
|
{
|
||||||
|
/* All non-BMP characters are compared as equal */
|
||||||
|
{CSTR("\x00\xD8\x00\xDC"), CSTR("\x00\xD8\x01\xDC"), 0},/* Non-BMP MB4 vs non-BMP MB4 */
|
||||||
|
{CSTR("\x00\xD8\x00\xDC"), CSTR("\xFF\xDB\xFF\xDF"), 0},/* Non-BMP MB4 vs non-BMP MB4 */
|
||||||
|
{CSTR("\x00\x00"), CSTR("\x00\xD8\x01\xDC"), -1},/* U+0000 vs non-BMP MB4 */
|
||||||
|
{CSTR("\x00\x00"), CSTR("\xFF\xDB\xFF\xDF"), -1},/* U+0000 vs non-BMP MB4 */
|
||||||
|
{NULL, 0, NULL, 0, 0}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
str2hex(char *dst, size_t dstlen, const char *src, size_t srclen)
|
str2hex(char *dst, size_t dstlen, const char *src, size_t srclen)
|
||||||
{
|
{
|
||||||
@ -641,6 +675,7 @@ test_strcollsp()
|
|||||||
failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_ucs2_common);
|
failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_ucs2_common);
|
||||||
failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_ucs2_space);
|
failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_ucs2_space);
|
||||||
failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_utf16_common);
|
failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_utf16_common);
|
||||||
|
failed+= strcollsp(&my_charset_utf16_general_ci, strcoll_utf16_general_ci);
|
||||||
failed+= strcollsp(&my_charset_utf16_bin, strcoll_ucs2_common);
|
failed+= strcollsp(&my_charset_utf16_bin, strcoll_ucs2_common);
|
||||||
failed+= strcollsp(&my_charset_utf16_bin, strcoll_ucs2_space);
|
failed+= strcollsp(&my_charset_utf16_bin, strcoll_ucs2_space);
|
||||||
failed+= strcollsp(&my_charset_utf16_bin, strcoll_utf16_common);
|
failed+= strcollsp(&my_charset_utf16_bin, strcoll_utf16_common);
|
||||||
@ -648,6 +683,7 @@ test_strcollsp()
|
|||||||
failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_ucs2_common);
|
failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_ucs2_common);
|
||||||
failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_utf16le_space);
|
failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_utf16le_space);
|
||||||
failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_utf16le_common);
|
failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_utf16le_common);
|
||||||
|
failed+= strcollsp(&my_charset_utf16le_general_ci,strcoll_utf16le_general_ci);
|
||||||
failed+= strcollsp(&my_charset_utf16le_bin, strcoll_ucs2_common);
|
failed+= strcollsp(&my_charset_utf16le_bin, strcoll_ucs2_common);
|
||||||
failed+= strcollsp(&my_charset_utf16le_bin, strcoll_utf16le_space);
|
failed+= strcollsp(&my_charset_utf16le_bin, strcoll_utf16le_space);
|
||||||
failed+= strcollsp(&my_charset_utf16le_bin, strcoll_utf16le_common);
|
failed+= strcollsp(&my_charset_utf16le_bin, strcoll_utf16le_common);
|
||||||
@ -661,6 +697,7 @@ test_strcollsp()
|
|||||||
failed+= strcollsp(&my_charset_utf8mb4_general_ci, strcoll_utf8mb3_common);
|
failed+= strcollsp(&my_charset_utf8mb4_general_ci, strcoll_utf8mb3_common);
|
||||||
failed+= strcollsp(&my_charset_utf8mb4_bin, strcoll_utf8mb3_common);
|
failed+= strcollsp(&my_charset_utf8mb4_bin, strcoll_utf8mb3_common);
|
||||||
failed+= strcollsp(&my_charset_utf8mb4_general_ci, strcoll_utf8mb4_common);
|
failed+= strcollsp(&my_charset_utf8mb4_general_ci, strcoll_utf8mb4_common);
|
||||||
|
failed+= strcollsp(&my_charset_utf8mb4_general_ci, strcoll_utf8mb4_general_ci);
|
||||||
failed+= strcollsp(&my_charset_utf8mb4_bin, strcoll_utf8mb4_common);
|
failed+= strcollsp(&my_charset_utf8mb4_bin, strcoll_utf8mb4_common);
|
||||||
#endif
|
#endif
|
||||||
return failed;
|
return failed;
|
||||||
|
Reference in New Issue
Block a user