1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-01 03:47:19 +03:00

MDEV-25829 Change default Unicode collation to uca1400_ai_ci

Step#2 - Adding a new collation derivation level for CAST and CONVERT.

Now character string cast functions:
  - CAST(string_expr AS CHAR)
  - CONVERT(expr USING charset_name)

have a new collation derivation level between:

  - string literals
  - utf8 metadata functions, e.g. user() and database()

Before the change these cast functions had collation derivation equal
to table columns, which caused more illegal mix of collation conflicts.

Note, binary string cast functions:
  - BINARY(expr)
  - CAST(string_expr AS BINARY)
  - CONVERT(expr USING binary)
did not change their collation derivation, to preserve the behaviour of
queries like these:
SELECT database()=BINARY'test';
SELECT user()=CAST('root' AS BINARY);
SELECT current_role()=CONVERT('role' USING binary);

Derivation levels after the change look as follows:

  DERIVATION_IGNORABLE= 7, // Explicit NULL

  DERIVATION_NUMERIC= 6,   // Numbers in string context,
                           // Numeric user variables
                           // CAST(numeric_expr AS CHAR)

  DERIVATION_COERCIBLE= 5, // Literals, string user variables

  DERIVATION_CAST= 4,      // CAST(string_expr AS CHAR),
                           // CONVERT(string_expr USING cs)

  DERIVATION_SYSCONST= 3,  // utf8 metadata functions, e.g. user(), database()
  DERIVATION_IMPLICIT= 2,  // Table columns, SP variables, BINARY(expr)
  DERIVATION_NONE= 1,      // A mix (e.g. CONCAT) of two differrent collations
  DERIVATION_EXPLICIT= 0   // An explicit COLLATE clause
This commit is contained in:
Alexander Barkov
2023-11-15 10:14:28 +04:00
parent 1b65cc9da7
commit a3117c7983
31 changed files with 218 additions and 121 deletions

View File

@ -608,103 +608,103 @@ select _latin1'B' COLLATE latin1_general_ci in (_latin1'a',_latin1'b' COLLATE la
ERROR HY000: Illegal mix of collations (latin1_general_ci,EXPLICIT), (latin1_swedish_ci,COERCIBLE), (latin1_bin,EXPLICIT) for operation 'in'
select collation(bin(130)), coercibility(bin(130));
collation(bin(130)) coercibility(bin(130))
latin1_swedish_ci 4
latin1_swedish_ci 5
select collation(oct(130)), coercibility(oct(130));
collation(oct(130)) coercibility(oct(130))
latin1_swedish_ci 4
latin1_swedish_ci 5
select collation(conv(130,16,10)), coercibility(conv(130,16,10));
collation(conv(130,16,10)) coercibility(conv(130,16,10))
latin1_swedish_ci 4
latin1_swedish_ci 5
select collation(hex(130)), coercibility(hex(130));
collation(hex(130)) coercibility(hex(130))
latin1_swedish_ci 4
latin1_swedish_ci 5
select collation(char(130)), coercibility(hex(130));
collation(char(130)) coercibility(hex(130))
binary 4
binary 5
select collation(format(130,10)), coercibility(format(130,10));
collation(format(130,10)) coercibility(format(130,10))
latin1_swedish_ci 4
latin1_swedish_ci 5
select collation(lcase(_latin2'a')), coercibility(lcase(_latin2'a'));
collation(lcase(_latin2'a')) coercibility(lcase(_latin2'a'))
latin2_general_ci 4
latin2_general_ci 5
select collation(ucase(_latin2'a')), coercibility(ucase(_latin2'a'));
collation(ucase(_latin2'a')) coercibility(ucase(_latin2'a'))
latin2_general_ci 4
latin2_general_ci 5
select collation(left(_latin2'a',1)), coercibility(left(_latin2'a',1));
collation(left(_latin2'a',1)) coercibility(left(_latin2'a',1))
latin2_general_ci 4
latin2_general_ci 5
select collation(right(_latin2'a',1)), coercibility(right(_latin2'a',1));
collation(right(_latin2'a',1)) coercibility(right(_latin2'a',1))
latin2_general_ci 4
latin2_general_ci 5
select collation(substring(_latin2'a',1,1)), coercibility(substring(_latin2'a',1,1));
collation(substring(_latin2'a',1,1)) coercibility(substring(_latin2'a',1,1))
latin2_general_ci 4
latin2_general_ci 5
select collation(concat(_latin2'a',_latin2'b')), coercibility(concat(_latin2'a',_latin2'b'));
collation(concat(_latin2'a',_latin2'b')) coercibility(concat(_latin2'a',_latin2'b'))
latin2_general_ci 4
latin2_general_ci 5
select collation(lpad(_latin2'a',4,_latin2'b')), coercibility(lpad(_latin2'a',4,_latin2'b'));
collation(lpad(_latin2'a',4,_latin2'b')) coercibility(lpad(_latin2'a',4,_latin2'b'))
latin2_general_ci 4
latin2_general_ci 5
select collation(lpad(_latin2'a',4)), coercibility(lpad(_latin2'a',4));
collation(lpad(_latin2'a',4)) coercibility(lpad(_latin2'a',4))
latin2_general_ci 4
latin2_general_ci 5
select collation(rpad(_latin2'a',4,_latin2'b')), coercibility(rpad(_latin2'a',4,_latin2'b'));
collation(rpad(_latin2'a',4,_latin2'b')) coercibility(rpad(_latin2'a',4,_latin2'b'))
latin2_general_ci 4
latin2_general_ci 5
select collation(rpad(_latin2'a',4)), coercibility(rpad(_latin2'a',4));
collation(rpad(_latin2'a',4)) coercibility(rpad(_latin2'a',4))
latin2_general_ci 4
latin2_general_ci 5
select collation(concat_ws(_latin2'a',_latin2'b')), coercibility(concat_ws(_latin2'a',_latin2'b'));
collation(concat_ws(_latin2'a',_latin2'b')) coercibility(concat_ws(_latin2'a',_latin2'b'))
latin2_general_ci 4
latin2_general_ci 5
select collation(make_set(255,_latin2'a',_latin2'b',_latin2'c')), coercibility(make_set(255,_latin2'a',_latin2'b',_latin2'c'));
collation(make_set(255,_latin2'a',_latin2'b',_latin2'c')) coercibility(make_set(255,_latin2'a',_latin2'b',_latin2'c'))
latin2_general_ci 4
latin2_general_ci 5
select collation(export_set(255,_latin2'y',_latin2'n',_latin2' ')), coercibility(export_set(255,_latin2'y',_latin2'n',_latin2' '));
collation(export_set(255,_latin2'y',_latin2'n',_latin2' ')) coercibility(export_set(255,_latin2'y',_latin2'n',_latin2' '))
latin2_general_ci 4
latin2_general_ci 5
select collation(trim(_latin2' a ')), coercibility(trim(_latin2' a '));
collation(trim(_latin2' a ')) coercibility(trim(_latin2' a '))
latin2_general_ci 4
latin2_general_ci 5
select collation(ltrim(_latin2' a ')), coercibility(ltrim(_latin2' a '));
collation(ltrim(_latin2' a ')) coercibility(ltrim(_latin2' a '))
latin2_general_ci 4
latin2_general_ci 5
select collation(rtrim(_latin2' a ')), coercibility(rtrim(_latin2' a '));
collation(rtrim(_latin2' a ')) coercibility(rtrim(_latin2' a '))
latin2_general_ci 4
latin2_general_ci 5
select collation(trim(LEADING _latin2' ' FROM _latin2'a')), coercibility(trim(LEADING _latin2'a' FROM _latin2'a'));
collation(trim(LEADING _latin2' ' FROM _latin2'a')) coercibility(trim(LEADING _latin2'a' FROM _latin2'a'))
latin2_general_ci 4
latin2_general_ci 5
select collation(trim(TRAILING _latin2' ' FROM _latin2'a')), coercibility(trim(TRAILING _latin2'a' FROM _latin2'a'));
collation(trim(TRAILING _latin2' ' FROM _latin2'a')) coercibility(trim(TRAILING _latin2'a' FROM _latin2'a'))
latin2_general_ci 4
latin2_general_ci 5
select collation(trim(BOTH _latin2' ' FROM _latin2'a')), coercibility(trim(BOTH _latin2'a' FROM _latin2'a'));
collation(trim(BOTH _latin2' ' FROM _latin2'a')) coercibility(trim(BOTH _latin2'a' FROM _latin2'a'))
latin2_general_ci 4
latin2_general_ci 5
select collation(repeat(_latin2'a',10)), coercibility(repeat(_latin2'a',10));
collation(repeat(_latin2'a',10)) coercibility(repeat(_latin2'a',10))
latin2_general_ci 4
latin2_general_ci 5
select collation(reverse(_latin2'ab')), coercibility(reverse(_latin2'ab'));
collation(reverse(_latin2'ab')) coercibility(reverse(_latin2'ab'))
latin2_general_ci 4
latin2_general_ci 5
select collation(quote(_latin2'ab')), coercibility(quote(_latin2'ab'));
collation(quote(_latin2'ab')) coercibility(quote(_latin2'ab'))
latin2_general_ci 4
latin2_general_ci 5
select collation(soundex(_latin2'ab')), coercibility(soundex(_latin2'ab'));
collation(soundex(_latin2'ab')) coercibility(soundex(_latin2'ab'))
latin2_general_ci 4
latin2_general_ci 5
select collation(substring(_latin2'ab',1)), coercibility(substring(_latin2'ab',1));
collation(substring(_latin2'ab',1)) coercibility(substring(_latin2'ab',1))
latin2_general_ci 4
latin2_general_ci 5
select collation(insert(_latin2'abcd',2,3,_latin2'ef')), coercibility(insert(_latin2'abcd',2,3,_latin2'ef'));
collation(insert(_latin2'abcd',2,3,_latin2'ef')) coercibility(insert(_latin2'abcd',2,3,_latin2'ef'))
latin2_general_ci 4
latin2_general_ci 5
select collation(replace(_latin2'abcd',_latin2'b',_latin2'B')), coercibility(replace(_latin2'abcd',_latin2'b',_latin2'B'));
collation(replace(_latin2'abcd',_latin2'b',_latin2'B')) coercibility(replace(_latin2'abcd',_latin2'b',_latin2'B'))
latin2_general_ci 4
latin2_general_ci 5
select collation(encode('abcd','ab')), coercibility(encode('abcd','ab'));
collation(encode('abcd','ab')) coercibility(encode('abcd','ab'))
binary 4
binary 5
create table t1
select
bin(130),
@ -787,7 +787,7 @@ latin2 latin2_general_ci 2
drop table t1;
select charset(null), collation(null), coercibility(null);
charset(null) collation(null) coercibility(null)
binary binary 6
binary binary 7
CREATE TABLE t1 (a int, b int);
CREATE TABLE t2 (a int, b int);
INSERT INTO t1 VALUES (1,1),(2,2);
@ -803,7 +803,7 @@ a b a b
1 1 NULL NULL
2 2 2 2
select t1.*,t2.* from t1 left join t2 on (t1.b=t2.b)
where coercibility(t2.a) = 5 order by t1.a,t2.a;
where coercibility(t2.a) = 6 order by t1.a,t2.a;
a b a b
1 1 NULL NULL
2 2 2 2
@ -5663,3 +5663,32 @@ CONV(-9223372036854775808, -10, -62)
#
# End of 11.4 tests
#
#
# Start of 11.5 tests
#
#
# MDEV-25829 Change default collation to utf8mb4_1400_ai_ci
#
SELECT
coercibility(CAST(1 AS CHAR)),
coercibility(CONVERT('a' USING latin1));
coercibility(CAST(1 AS CHAR)) 4
coercibility(CONVERT('a' USING latin1)) 4
SELECT
coercibility(CAST(1 AS BINARY)),
coercibility(CONVERT('a' USING binary)),
coercibility(binary'a');
coercibility(CAST(1 AS BINARY)) 2
coercibility(CONVERT('a' USING binary)) 2
coercibility(binary'a') 2
EXPLAIN EXTENDED SELECT
CAST(1 AS BINARY) AS c,
CONVERT('a' USING binary),
BINARY'a';
id select_type table type possible_keys key key_len ref rows filtered Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL No tables used
Warnings:
Note 1003 select cast(1 as char charset binary) AS `c`,convert('a' using binary) AS `CONVERT('a' USING binary)`,cast('a' as char charset binary) AS `BINARY'a'`
#
# End of 11.5 tests
#