From 1b65cc9da71bf267724ac13c9825b9ce1c75b78e Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Wed, 15 Nov 2023 06:09:41 +0400 Subject: [PATCH] MDEV-25829 Change default Unicode collation to uca1400_ai_ci Step#1 - Changing collation derivation for string user variables from IMPLICIT to COERCIBLE. Retionale: Without this preparatory change, switching the default collation for Unicode character sets from xxx_general_ci to uca1400_ai_ci would cause "Illegal mix of collations" errors in scenarios comparing a column with a non-default collation to a string user variable This is especially important for queries to INFORMATION_SCHEMA tables, whose columns use utf8mb3_general_ci. See the description of MDEV-25829 for more details and SQL script examples. --- mysql-test/main/func_regexp_pcre.result | 15 +++++++++++++-- mysql-test/main/func_regexp_pcre.test | 8 +++++++- mysql-test/main/get_diagnostics.result | 4 ++-- mysql-test/main/user_var.result | 14 +++++++------- sql/item_func.cc | 8 ++++---- 5 files changed, 33 insertions(+), 16 deletions(-) diff --git a/mysql-test/main/func_regexp_pcre.result b/mysql-test/main/func_regexp_pcre.result index 36f2fced1db..1f25f03db73 100644 --- a/mysql-test/main/func_regexp_pcre.result +++ b/mysql-test/main/func_regexp_pcre.result @@ -859,11 +859,22 @@ REGEXP_REPLACE('1 foo and bar', '(\\d+) foo and (\\d+ )?bar', '\\1 this and \\2t # Testing a warning SET NAMES latin1; SET @regCheck= '\\xE0\\x01'; -SELECT 0xE001 REGEXP @regCheck; -0xE001 REGEXP @regCheck +SELECT 0xE001 REGEXP @regCheck COLLATE latin1_bin; +0xE001 REGEXP @regCheck COLLATE latin1_bin 0 Warnings: Warning 1139 Regex error 'UTF-8 error: 1 byte missing at end' +SELECT 0xE001 REGEXP CAST(@regCheck AS CHAR); +0xE001 REGEXP CAST(@regCheck AS CHAR) +0 +Warnings: +Warning 1139 Regex error 'UTF-8 error: 1 byte missing at end' +# Since 11.4 user variables have DERIVATION_COERCIBLE +# so a user variable and a literal in the pattern give equal results +SET @regCheck= '\\xE0\\x01'; +SELECT 0xE001 REGEXP '\\xE0\\x01' AS c1, 0xE001 REGEXP @regCheck AS c2; +c1 c2 +1 1 # Testing workaround N1: This makes the pattern to be a binary string: SET NAMES latin1; SET @regCheck= X'E001'; diff --git a/mysql-test/main/func_regexp_pcre.test b/mysql-test/main/func_regexp_pcre.test index 8c2408f5763..37da65f296d 100644 --- a/mysql-test/main/func_regexp_pcre.test +++ b/mysql-test/main/func_regexp_pcre.test @@ -428,7 +428,13 @@ SELECT REGEXP_REPLACE('1 foo and bar', '(\\d+) foo and (\\d+ )?bar', '\\1 this a --echo # Testing a warning SET NAMES latin1; SET @regCheck= '\\xE0\\x01'; -SELECT 0xE001 REGEXP @regCheck; +SELECT 0xE001 REGEXP @regCheck COLLATE latin1_bin; +SELECT 0xE001 REGEXP CAST(@regCheck AS CHAR); + +--echo # Since 11.4 user variables have DERIVATION_COERCIBLE +--echo # so a user variable and a literal in the pattern give equal results +SET @regCheck= '\\xE0\\x01'; +SELECT 0xE001 REGEXP '\\xE0\\x01' AS c1, 0xE001 REGEXP @regCheck AS c2; --echo # Testing workaround N1: This makes the pattern to be a binary string: SET NAMES latin1; diff --git a/mysql-test/main/get_diagnostics.result b/mysql-test/main/get_diagnostics.result index 48eab8e0734..b6ba0bb3263 100644 --- a/mysql-test/main/get_diagnostics.result +++ b/mysql-test/main/get_diagnostics.result @@ -774,10 +774,10 @@ Warning 1916 Got overflow when converting '-19999999999999999999' to INT. Value GET DIAGNOSTICS CONDITION 1 @var1 = MESSAGE_TEXT, @var2 = CLASS_ORIGIN; SELECT CHARSET(@var1), COLLATION(@var1), COERCIBILITY(@var1); CHARSET(@var1) COLLATION(@var1) COERCIBILITY(@var1) -utf8mb3 utf8mb3_general_ci 2 +utf8mb3 utf8mb3_general_ci 4 SELECT CHARSET(@var2), COLLATION(@var2), COERCIBILITY(@var2); CHARSET(@var2) COLLATION(@var2) COERCIBILITY(@var2) -utf8mb3 utf8mb3_general_ci 2 +utf8mb3 utf8mb3_general_ci 4 # # Command statistics # diff --git a/mysql-test/main/user_var.result b/mysql-test/main/user_var.result index 26bf2853dd4..4bdf3a18cf2 100644 --- a/mysql-test/main/user_var.result +++ b/mysql-test/main/user_var.result @@ -137,7 +137,7 @@ drop table t1; set @a=_latin2'test'; select charset(@a),collation(@a),coercibility(@a); charset(@a) collation(@a) coercibility(@a) -latin2 latin2_general_ci 2 +latin2 latin2_general_ci 4 select @a=_latin2'TEST'; @a=_latin2'TEST' 1 @@ -147,7 +147,7 @@ select @a=_latin2'TEST' collate latin2_bin; set @a=_latin2'test' collate latin2_general_ci; select charset(@a),collation(@a),coercibility(@a); charset(@a) collation(@a) coercibility(@a) -latin2 latin2_general_ci 2 +latin2 latin2_general_ci 4 select @a=_latin2'TEST'; @a=_latin2'TEST' 1 @@ -162,19 +162,19 @@ collation(@a:=_latin2'test') latin2_general_ci select coercibility(@a:=_latin2'test'); coercibility(@a:=_latin2'test') -2 +4 select collation(@a:=_latin2'test' collate latin2_bin); collation(@a:=_latin2'test' collate latin2_bin) latin2_bin select coercibility(@a:=_latin2'test' collate latin2_bin); coercibility(@a:=_latin2'test' collate latin2_bin) -2 +4 select (@a:=_latin2'test' collate latin2_bin) = _latin2'TEST'; (@a:=_latin2'test' collate latin2_bin) = _latin2'TEST' 0 select charset(@a),collation(@a),coercibility(@a); charset(@a) collation(@a) coercibility(@a) -latin2 latin2_bin 2 +latin2 latin2_bin 4 select (@a:=_latin2'test' collate latin2_bin) = _latin2'TEST' collate latin2_general_ci; (@a:=_latin2'test' collate latin2_bin) = _latin2'TEST' collate latin2_general_ci 1 @@ -184,11 +184,11 @@ my_column 0 select @v, coercibility(@v); @v coercibility(@v) -NULL 2 +NULL 4 set @v1=null, @v2=1, @v3=1.1, @v4=now(); select coercibility(@v1),coercibility(@v2),coercibility(@v3),coercibility(@v4); coercibility(@v1) coercibility(@v2) coercibility(@v3) coercibility(@v4) -2 5 5 2 +4 5 5 4 set session @honk=99; ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '@honk=99' at line 1 select @@local.max_allowed_packet; diff --git a/sql/item_func.cc b/sql/item_func.cc index 8ded25def49..ec900aadaa2 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -4734,7 +4734,7 @@ bool Item_func_set_user_var::fix_fields(THD *thd, Item **ref) &my_charset_numeric : args[0]->collation.collation); collation.set(m_var_entry->charset(), args[0]->collation.derivation == DERIVATION_NUMERIC ? - DERIVATION_NUMERIC : DERIVATION_IMPLICIT); + DERIVATION_NUMERIC : DERIVATION_COERCIBLE); switch (args[0]->result_type()) { case STRING_RESULT: case TIME_RESULT: @@ -4799,7 +4799,7 @@ Item_func_set_user_var::fix_length_and_dec(THD *thd) } else { - collation.set(DERIVATION_IMPLICIT); + collation.set(DERIVATION_COERCIBLE); fix_length_and_charset(args[0]->max_char_length(), args[0]->collation.collation); } @@ -5676,7 +5676,7 @@ bool Item_func_get_user_var::fix_length_and_dec(THD *thd) set_handler(&type_handler_slonglong); break; case STRING_RESULT: - collation.set(m_var_entry->charset(), DERIVATION_IMPLICIT); + collation.set(m_var_entry->charset(), DERIVATION_COERCIBLE); max_length= MAX_BLOB_WIDTH - 1; set_handler(&type_handler_long_blob); if (m_var_entry->type_handler()->field_type() == MYSQL_TYPE_GEOMETRY) @@ -5696,7 +5696,7 @@ bool Item_func_get_user_var::fix_length_and_dec(THD *thd) } else { - collation.set(&my_charset_bin, DERIVATION_IMPLICIT); + collation.set(&my_charset_bin, DERIVATION_COERCIBLE); null_value= 1; set_handler(&type_handler_long_blob); max_length= MAX_BLOB_WIDTH;