From fad7a5643aea8afc7572902ad7263d910c3ebbf2 Mon Sep 17 00:00:00 2001 From: "bar@mysql.com" <> Date: Thu, 12 Aug 2004 20:39:24 +0500 Subject: [PATCH] #4521: unique key prefix interacts poorly with utf8 Fix for MyISAM with prefix compressed keys. --- myisam/mi_key.c | 39 ++++++++++++++++++++++++++++++++++ mysql-test/r/ctype_utf8.result | 33 ++++++++++++++++++++++++++++ mysql-test/t/ctype_utf8.test | 22 +++++++++++++++++++ 3 files changed, 94 insertions(+) diff --git a/myisam/mi_key.c b/myisam/mi_key.c index d19a3130a86..a9b5a8b279f 100644 --- a/myisam/mi_key.c +++ b/myisam/mi_key.c @@ -32,6 +32,9 @@ static int _mi_put_key_in_record(MI_INFO *info,uint keynr,byte *record); ** Ret: Length of key */ +#define my_charpos(cs, b, e, num)\ + (cs)->cset->charpos((cs), (const char*) (b), (const char *)(e), (num)) + uint _mi_make_key(register MI_INFO *info, uint keynr, uchar *key, const byte *record, my_off_t filepos) { @@ -57,6 +60,8 @@ uint _mi_make_key(register MI_INFO *info, uint keynr, uchar *key, { enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type; uint length=keyseg->length; + uint char_length; + CHARSET_INFO *cs; if (keyseg->null_bit) { @@ -68,6 +73,15 @@ uint _mi_make_key(register MI_INFO *info, uint keynr, uchar *key, *key++=1; /* Not NULL */ } + char_length= (cs= keyseg->charset) && (cs->mbmaxlen > 1) ? + length / cs->mbmaxlen : 0; + + if (info->s->keyinfo[keynr].flag & HA_FULLTEXT) + { + /* Ask Serg to make a better fix */ + char_length= 0; + } + pos= (byte*) record+keyseg->start; if (keyseg->flag & HA_SPACE_PACK) { @@ -83,6 +97,11 @@ uint _mi_make_key(register MI_INFO *info, uint keynr, uchar *key, pos++; } length=(uint) (end-pos); + if (char_length && length > char_length) + { + char_length= my_charpos(cs, pos, pos+length, char_length); + set_if_smaller(length, char_length); + } store_key_length_inc(key,length); memcpy((byte*) key,(byte*) pos,(size_t) length); key+=length; @@ -94,13 +113,26 @@ uint _mi_make_key(register MI_INFO *info, uint keynr, uchar *key, pos+=2; /* Skip VARCHAR length */ set_if_smaller(length,tmp_length); store_key_length_inc(key,length); + memcpy((byte*) key, pos, length); + key+= length; + continue; } else if (keyseg->flag & HA_BLOB_PART) { uint tmp_length=_mi_calc_blob_length(keyseg->bit_start,pos); memcpy_fixed((byte*) &pos,pos+keyseg->bit_start,sizeof(char*)); set_if_smaller(length,tmp_length); +#if NOT_YET_BLOB_PART + if (char_length && length > char_length) + { + char_length= my_charpos(cs, pos, pos+length, char_length); + set_if_smaller(length, char_length); + } +#endif store_key_length_inc(key,length); + memcpy((byte*) key, pos, length); + key+= length; + continue; } else if (keyseg->flag & HA_SWAP_KEY) { /* Numerical column */ @@ -136,6 +168,13 @@ uint _mi_make_key(register MI_INFO *info, uint keynr, uchar *key, } continue; } +#ifdef NOT_YET_FIXED_LENGTH_KEY + if (char_length && length > char_length) + { + char_length= my_charpos(cs, pos, pos+length, char_length); + set_if_smaller(length, char_length); + } +#endif memcpy((byte*) key, pos, length); key+= length; } diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result index b8ca99fe8f1..4d1b5d54bda 100644 --- a/mysql-test/r/ctype_utf8.result +++ b/mysql-test/r/ctype_utf8.result @@ -243,3 +243,36 @@ select 'zвасяz' rlike '[[:<:]]вася[[:>:]]'; CREATE TABLE t1 (a enum ('Y', 'N') DEFAULT 'N' COLLATE utf8_unicode_ci); ALTER TABLE t1 ADD COLUMN b CHAR(20); DROP TABLE t1; +create table t1 (c varchar(30) character set utf8, unique(c(10))); +insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z'); +insert into t1 values ('aaaaaaaaaa'); +insert into t1 values ('aaaaaaaaaaa'); +ERROR 23000: Duplicate entry 'aaaaaaaaaaa' for key 1 +insert into t1 values ('aaaaaaaaaaaa'); +ERROR 23000: Duplicate entry 'aaaaaaaaaaaa' for key 1 +insert into t1 values (repeat('b',20)); +select c c1 from t1 where c='1'; +c1 +1 +select c c2 from t1 where c='2'; +c2 +2 +select c c3 from t1 where c='3'; +c3 +3 +select c cx from t1 where c='x'; +cx +x +select c cy from t1 where c='y'; +cy +y +select c cz from t1 where c='z'; +cz +z +select c ca10 from t1 where c='aaaaaaaaaa'; +ca10 +aaaaaaaaaa +select c cb20 from t1 where c=repeat('b',20); +cb20 +bbbbbbbbbbbbbbbbbbbb +drop table t1; diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test index 07baee1b3bd..4e130440a24 100644 --- a/mysql-test/t/ctype_utf8.test +++ b/mysql-test/t/ctype_utf8.test @@ -165,3 +165,25 @@ select 'zвасяz' rlike '[[:<:]]вася[[:>:]]'; CREATE TABLE t1 (a enum ('Y', 'N') DEFAULT 'N' COLLATE utf8_unicode_ci); ALTER TABLE t1 ADD COLUMN b CHAR(20); DROP TABLE t1; + +# +# Bug 4521: unique key prefix interacts poorly with utf8 +# Check keys with prefix compression +# +create table t1 (c varchar(30) character set utf8, unique(c(10))); +insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z'); +insert into t1 values ('aaaaaaaaaa'); +--error 1062 +insert into t1 values ('aaaaaaaaaaa'); +--error 1062 +insert into t1 values ('aaaaaaaaaaaa'); +insert into t1 values (repeat('b',20)); +select c c1 from t1 where c='1'; +select c c2 from t1 where c='2'; +select c c3 from t1 where c='3'; +select c cx from t1 where c='x'; +select c cy from t1 where c='y'; +select c cz from t1 where c='z'; +select c ca10 from t1 where c='aaaaaaaaaa'; +select c cb20 from t1 where c=repeat('b',20); +drop table t1;