From efa7ee357377daceb91db8dceae3b86119abfccc Mon Sep 17 00:00:00 2001 From: "bar@mysql.com/bar.myoffice.izhnet.ru" <> Date: Mon, 11 Feb 2008 16:28:33 +0400 Subject: [PATCH] Bug#27877 incorrect german order in utf8_general_ci Problem: incorrect sort order for "U+00DF SHARP S". Fix: changing sort order for U+00DF to be equal to 's', like the manual says. --- mysql-test/include/ctype_german.inc | 40 +++++++++++++++++++++++++++++ mysql-test/r/ctype_latin1_de.result | 35 +++++++++++++++++++++++++ mysql-test/r/ctype_uca.result | 35 +++++++++++++++++++++++++ mysql-test/r/ctype_ucs.result | 35 +++++++++++++++++++++++++ mysql-test/r/ctype_utf8.result | 35 +++++++++++++++++++++++++ mysql-test/t/ctype_latin1_de.test | 1 + mysql-test/t/ctype_uca.test | 1 + mysql-test/t/ctype_ucs.test | 1 + mysql-test/t/ctype_utf8.test | 1 + strings/ctype-utf8.c | 2 +- 10 files changed, 185 insertions(+), 1 deletion(-) create mode 100644 mysql-test/include/ctype_german.inc diff --git a/mysql-test/include/ctype_german.inc b/mysql-test/include/ctype_german.inc new file mode 100644 index 00000000000..14d09358bea --- /dev/null +++ b/mysql-test/include/ctype_german.inc @@ -0,0 +1,40 @@ +# +# Bug #27877 incorrect german order in utf8_general_ci +# +# Testing if "SHARP S" is equal to "S", +# like in latin1_german1_ci, utf8_general_ci, ucs2_general_ci +# Or if "SHART S" is equal to "SS", +# like in latin1_german2_ci, utf8_unicode_ci, ucs2_unicode_ci +# +# Also testing A-uml, O-uml, U-uml +# + +--disable_warnings +drop table if exists t1; +--enable_warnings + +# +# Create a table with a varchar(x) column, +# using current values of +# @@character_set_connection and @@collation_connection. +# + +create table t1 as select repeat(' ', 64) as s1; +select collation(s1) from t1; +delete from t1; + +# +# Populate data +# + +insert into t1 values ('a'),('ae'),(_latin1 0xE4); +insert into t1 values ('o'),('oe'),(_latin1 0xF6); +insert into t1 values ('s'),('ss'),(_latin1 0xDF); +insert into t1 values ('u'),('ue'),(_latin1 0xFC); + +# +# Check order +# +select s1, hex(s1) from t1 order by s1, binary s1; +select group_concat(s1 order by binary s1) from t1 group by s1; +drop table t1; diff --git a/mysql-test/r/ctype_latin1_de.result b/mysql-test/r/ctype_latin1_de.result index 5733877237d..f826199bb1f 100644 --- a/mysql-test/r/ctype_latin1_de.result +++ b/mysql-test/r/ctype_latin1_de.result @@ -326,6 +326,41 @@ latin1_german2_ci 6109 latin1_german2_ci 61 latin1_german2_ci 6120 drop table t1; +drop table if exists t1; +create table t1 as select repeat(' ', 64) as s1; +select collation(s1) from t1; +collation(s1) +latin1_german2_ci +delete from t1; +insert into t1 values ('a'),('ae'),(_latin1 0xE4); +insert into t1 values ('o'),('oe'),(_latin1 0xF6); +insert into t1 values ('s'),('ss'),(_latin1 0xDF); +insert into t1 values ('u'),('ue'),(_latin1 0xFC); +select s1, hex(s1) from t1 order by s1, binary s1; +s1 hex(s1) +a 61 +ae 6165 +ä E4 +o 6F +oe 6F65 +ö F6 +s 73 +ss 7373 +ß DF +u 75 +ue 7565 +ü FC +select group_concat(s1 order by binary s1) from t1 group by s1; +group_concat(s1 order by binary s1) +a +ae,ä +o +oe,ö +s +ss,ß +u +ue,ü +drop table t1; SET NAMES latin1; CREATE TABLE t1 ( col1 varchar(255) NOT NULL default '' diff --git a/mysql-test/r/ctype_uca.result b/mysql-test/r/ctype_uca.result index 92b76802d0b..999c20ce9e8 100644 --- a/mysql-test/r/ctype_uca.result +++ b/mysql-test/r/ctype_uca.result @@ -2647,6 +2647,41 @@ select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; c2h ab_def drop table t1; +drop table if exists t1; +create table t1 as select repeat(' ', 64) as s1; +select collation(s1) from t1; +collation(s1) +utf8_unicode_ci +delete from t1; +insert into t1 values ('a'),('ae'),(_latin1 0xE4); +insert into t1 values ('o'),('oe'),(_latin1 0xF6); +insert into t1 values ('s'),('ss'),(_latin1 0xDF); +insert into t1 values ('u'),('ue'),(_latin1 0xFC); +select s1, hex(s1) from t1 order by s1, binary s1; +s1 hex(s1) +a 61 +ä C3A4 +ae 6165 +o 6F +ö C3B6 +oe 6F65 +s 73 +ss 7373 +ß C39F +u 75 +ü C3BC +ue 7565 +select group_concat(s1 order by binary s1) from t1 group by s1; +group_concat(s1 order by binary s1) +a,ä +ae +o,ö +oe +s +ss,ß +u,ü +ue +drop table t1; CREATE TABLE t1 (id int, a varchar(30) character set utf8); INSERT INTO t1 VALUES (1, _ucs2 0x01310069), (2, _ucs2 0x01310131); INSERT INTO t1 VALUES (3, _ucs2 0x00690069), (4, _ucs2 0x01300049); diff --git a/mysql-test/r/ctype_ucs.result b/mysql-test/r/ctype_ucs.result index bd76f2a859f..8ec32d5bace 100644 --- a/mysql-test/r/ctype_ucs.result +++ b/mysql-test/r/ctype_ucs.result @@ -613,6 +613,41 @@ select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; c2h ab_def drop table t1; +drop table if exists t1; +create table t1 as select repeat(' ', 64) as s1; +select collation(s1) from t1; +collation(s1) +ucs2_general_ci +delete from t1; +insert into t1 values ('a'),('ae'),(_latin1 0xE4); +insert into t1 values ('o'),('oe'),(_latin1 0xF6); +insert into t1 values ('s'),('ss'),(_latin1 0xDF); +insert into t1 values ('u'),('ue'),(_latin1 0xFC); +select s1, hex(s1) from t1 order by s1, binary s1; +s1 hex(s1) +a 0061 +ä 00E4 +ae 00610065 +o 006F +ö 00F6 +oe 006F0065 +s 0073 +ß 00DF +ss 00730073 +u 0075 +ü 00FC +ue 00750065 +select group_concat(s1 order by binary s1) from t1 group by s1; +group_concat(s1 order by binary s1) +a,ä +ae +o,ö +oe +s,ß +ss +u,ü +ue +drop table t1; SET NAMES latin1; SET collation_connection='ucs2_bin'; create table t1 select repeat('a',4000) a; diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result index 5c90c3b5e0b..12e6c9bb492 100644 --- a/mysql-test/r/ctype_utf8.result +++ b/mysql-test/r/ctype_utf8.result @@ -939,6 +939,41 @@ select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; c2h ab_def drop table t1; +drop table if exists t1; +create table t1 as select repeat(' ', 64) as s1; +select collation(s1) from t1; +collation(s1) +utf8_general_ci +delete from t1; +insert into t1 values ('a'),('ae'),(_latin1 0xE4); +insert into t1 values ('o'),('oe'),(_latin1 0xF6); +insert into t1 values ('s'),('ss'),(_latin1 0xDF); +insert into t1 values ('u'),('ue'),(_latin1 0xFC); +select s1, hex(s1) from t1 order by s1, binary s1; +s1 hex(s1) +a 61 +ä C3A4 +ae 6165 +o 6F +ö C3B6 +oe 6F65 +s 73 +ß C39F +ss 7373 +u 75 +ü C3BC +ue 7565 +select group_concat(s1 order by binary s1) from t1 group by s1; +group_concat(s1 order by binary s1) +a,ä +ae +o,ö +oe +s,ß +ss +u,ü +ue +drop table t1; SET collation_connection='utf8_bin'; create table t1 select repeat('a',4000) a; delete from t1; diff --git a/mysql-test/t/ctype_latin1_de.test b/mysql-test/t/ctype_latin1_de.test index d6a11a22857..1efbfa4a3ae 100644 --- a/mysql-test/t/ctype_latin1_de.test +++ b/mysql-test/t/ctype_latin1_de.test @@ -116,6 +116,7 @@ SELECT FIELD('ue',s1), FIELD(' DROP TABLE t1; -- source include/ctype_filesort.inc +-- source include/ctype_german.inc # # Bug#7878 with utf8_general_ci, equals (=) has problem with diff --git a/mysql-test/t/ctype_uca.test b/mysql-test/t/ctype_uca.test index 0d917428efb..5749c7e4861 100644 --- a/mysql-test/t/ctype_uca.test +++ b/mysql-test/t/ctype_uca.test @@ -458,6 +458,7 @@ drop table t1; SET collation_connection='utf8_unicode_ci'; -- source include/ctype_filesort.inc -- source include/ctype_like_escape.inc +-- source include/ctype_german.inc # End of 4.1 tests diff --git a/mysql-test/t/ctype_ucs.test b/mysql-test/t/ctype_ucs.test index 3af5bfa54f9..08b3cb45395 100644 --- a/mysql-test/t/ctype_ucs.test +++ b/mysql-test/t/ctype_ucs.test @@ -373,6 +373,7 @@ drop table t1; SET collation_connection='ucs2_general_ci'; -- source include/ctype_filesort.inc -- source include/ctype_like_escape.inc +-- source include/ctype_german.inc SET NAMES latin1; SET collation_connection='ucs2_bin'; -- source include/ctype_filesort.inc diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test index d18a7d22a0e..2e23ac3661c 100644 --- a/mysql-test/t/ctype_utf8.test +++ b/mysql-test/t/ctype_utf8.test @@ -721,6 +721,7 @@ select hex(soundex(_utf8 0xD091D092D093)); SET collation_connection='utf8_general_ci'; -- source include/ctype_filesort.inc -- source include/ctype_like_escape.inc +-- source include/ctype_german.inc SET collation_connection='utf8_bin'; -- source include/ctype_filesort.inc -- source include/ctype_like_escape.inc diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 2b248f1eed5..2afb1930f3d 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -155,7 +155,7 @@ static MY_UNICASE_INFO plane00[]={ {0x00D8,0x00F8,0x00D8}, {0x00D9,0x00F9,0x0055}, {0x00DA,0x00FA,0x0055}, {0x00DB,0x00FB,0x0055}, {0x00DC,0x00FC,0x0055}, {0x00DD,0x00FD,0x0059}, - {0x00DE,0x00FE,0x00DE}, {0x00DF,0x00DF,0x00DF}, + {0x00DE,0x00FE,0x00DE}, {0x00DF,0x00DF,0x0053}, {0x00C0,0x00E0,0x0041}, {0x00C1,0x00E1,0x0041}, {0x00C2,0x00E2,0x0041}, {0x00C3,0x00E3,0x0041}, {0x00C4,0x00E4,0x0041}, {0x00C5,0x00E5,0x0041},