From 36b49259b1965743f12cd072bd033162becfbd10 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 13 Jun 2006 19:57:50 -0700 Subject: [PATCH] Fixed bug #14896. This bug in Field_string::cmp resulted in a wrong comparison with keys in partial indexes over multi-byte character fields. Given field a is declared as a varchar(16) collate utf8_unicode_ci INDEX(a(4)) gives us an example of such an index. Wrong key comparisons could lead to wrong result sets if the selected query execution plan used a range scan by a partial index over a utf8 character field. This also caused wrong results in many other cases. mysql-test/r/ctype_utf8.result: Added test cases for bug #14896. mysql-test/t/ctype_utf8.test: Added test cases for bug #14896. --- mysql-test/r/ctype_utf8.result | 40 ++++++++++++++++++++++++++++++++++ mysql-test/t/ctype_utf8.test | 26 ++++++++++++++++++++++ sql/field.cc | 20 ++++++----------- 3 files changed, 73 insertions(+), 13 deletions(-) diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result index 69d7577ee77..cc271176dfc 100644 --- a/mysql-test/r/ctype_utf8.result +++ b/mysql-test/r/ctype_utf8.result @@ -1124,3 +1124,43 @@ check table t1; Table Op Msg_type Msg_text test.t1 check status OK drop table t1; +SET NAMES utf8; +CREATE TABLE t1 (id int PRIMARY KEY, +a varchar(16) collate utf8_unicode_ci NOT NULL default '', +b int, +f varchar(128) default 'XXX', +INDEX (a(4)) +) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +INSERT INTO t1(id, a, b) VALUES +(1, 'cccc', 50), (2, 'cccc', 70), (3, 'cccc', 30), +(4, 'cccc', 30), (5, 'cccc', 20), (6, 'bbbbbb', 40), +(7, 'dddd', 30), (8, 'aaaa', 10), (9, 'aaaa', 50), +(10, 'eeeee', 40), (11, 'bbbbbb', 60); +SELECT id, a, b FROM t1; +id a b +1 cccc 50 +2 cccc 70 +3 cccc 30 +4 cccc 30 +5 cccc 20 +6 bbbbbb 40 +7 dddd 30 +8 aaaa 10 +9 aaaa 50 +10 eeeee 40 +11 bbbbbb 60 +SELECT id, a, b FROM t1 WHERE a BETWEEN 'aaaa' AND 'bbbbbb'; +id a b +8 aaaa 10 +9 aaaa 50 +6 bbbbbb 40 +11 bbbbbb 60 +SELECT id, a FROM t1 WHERE a='bbbbbb'; +id a +6 bbbbbb +11 bbbbbb +SELECT id, a FROM t1 WHERE a='bbbbbb' ORDER BY b; +id a +6 bbbbbb +11 bbbbbb +DROP TABLE t1; diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test index 5044f7979f1..9b8e6590999 100644 --- a/mysql-test/t/ctype_utf8.test +++ b/mysql-test/t/ctype_utf8.test @@ -926,4 +926,30 @@ INSERT INTO t1 VALUES('uUABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbb check table t1; drop table t1; +# +# Bug#14896: Comparison with a key in a partial index over mb chararacter field +# + +SET NAMES utf8; +CREATE TABLE t1 (id int PRIMARY KEY, + a varchar(16) collate utf8_unicode_ci NOT NULL default '', + b int, + f varchar(128) default 'XXX', + INDEX (a(4)) +) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; +INSERT INTO t1(id, a, b) VALUES + (1, 'cccc', 50), (2, 'cccc', 70), (3, 'cccc', 30), + (4, 'cccc', 30), (5, 'cccc', 20), (6, 'bbbbbb', 40), + (7, 'dddd', 30), (8, 'aaaa', 10), (9, 'aaaa', 50), + (10, 'eeeee', 40), (11, 'bbbbbb', 60); + +SELECT id, a, b FROM t1; + +SELECT id, a, b FROM t1 WHERE a BETWEEN 'aaaa' AND 'bbbbbb'; + +SELECT id, a FROM t1 WHERE a='bbbbbb'; +SELECT id, a FROM t1 WHERE a='bbbbbb' ORDER BY b; + +DROP TABLE t1; + # End of 4.1 tests diff --git a/sql/field.cc b/sql/field.cc index a64eaad7308..6eea006fcc8 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -5072,17 +5072,6 @@ int Field_string::cmp(const char *a_ptr, const char *b_ptr) { uint a_len, b_len; - if (field_charset->strxfrm_multiply > 1) - { - /* - We have to remove end space to be able to compare multi-byte-characters - like in latin_de 'ae' and 0xe4 - */ - return field_charset->coll->strnncollsp(field_charset, - (const uchar*) a_ptr, field_length, - (const uchar*) b_ptr, - field_length); - } if (field_charset->mbmaxlen != 1) { uint char_len= field_length/field_charset->mbmaxlen; @@ -5091,8 +5080,13 @@ int Field_string::cmp(const char *a_ptr, const char *b_ptr) } else a_len= b_len= field_length; - return my_strnncoll(field_charset,(const uchar*) a_ptr, a_len, - (const uchar*) b_ptr, b_len); + /* + We have to remove end space to be able to compare multi-byte-characters + like in latin_de 'ae' and 0xe4 + */ + return field_charset->coll->strnncollsp(field_charset, + (const uchar*) a_ptr, a_len, + (const uchar*) b_ptr, b_len); }