From 13b47b5e4340169906698054a27ac91bc76d6211 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 29 Mar 2005 14:48:47 +0500 Subject: [PATCH 1/5] Adding Cybozu's patch. Not active by default. One need to pass -DHAVE_CYBOZU_COLLATION to activate it. mysys/charset-def.c: Adding Cybozu's patch. Not active by default. strings/ctype-utf8.c: Adding Cybozu's patch. Not active by default. --- mysys/charset-def.c | 6 ++ strings/ctype-utf8.c | 166 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 172 insertions(+) diff --git a/mysys/charset-def.c b/mysys/charset-def.c index 3278566788c..c7fa0ffd8e0 100644 --- a/mysys/charset-def.c +++ b/mysys/charset-def.c @@ -62,6 +62,9 @@ extern CHARSET_INFO my_charset_utf8_slovak_uca_ci; extern CHARSET_INFO my_charset_utf8_spanish2_uca_ci; extern CHARSET_INFO my_charset_utf8_roman_uca_ci; extern CHARSET_INFO my_charset_utf8_persian_uca_ci; +#ifdef HAVE_CYBOZU_COLLATION +extern CHARSET_INFO my_charset_utf8_general_cs; +#endif #endif #endif /* HAVE_UCA_COLLATIONS */ @@ -146,6 +149,9 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused))) #ifdef HAVE_CHARSET_utf8 add_compiled_collation(&my_charset_utf8_general_ci); add_compiled_collation(&my_charset_utf8_bin); +#ifdef HAVE_CYBOZU_COLLATION + add_compiled_collation(&my_charset_utf8_general_cs); +#endif #ifdef HAVE_UCA_COLLATIONS add_compiled_collation(&my_charset_utf8_general_uca_ci); add_compiled_collation(&my_charset_utf8_icelandic_uca_ci); diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 69371aa38c2..4d2bff5e89f 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -2380,6 +2380,172 @@ CHARSET_INFO my_charset_utf8_bin= &my_collation_mb_bin_handler }; +#ifdef HAVE_CYBOZU_COLLATION + +/* + * These functions bacically do the same as their original, except + * that they return 0 only when two comparing unicode strings are + * strictly the same in case-sensitive way. See "save_diff" local + * variable to what they actually do. + */ + +static int my_strnncoll_utf8_cs(CHARSET_INFO *cs, + const uchar *s, uint slen, + const uchar *t, uint tlen, + my_bool t_is_prefix) +{ + int s_res,t_res; + my_wc_t s_wc,t_wc; + const uchar *se=s+slen; + const uchar *te=t+tlen; + int save_diff = 0; + int diff; + + while ( s < se && t < te ) + { + int plane; + s_res=my_utf8_uni(cs,&s_wc, s, se); + t_res=my_utf8_uni(cs,&t_wc, t, te); + + if ( s_res <= 0 || t_res <= 0 ) + + { + /* Incorrect string, compare by char value */ + return ((int)s[0]-(int)t[0]); + } + + if ( save_diff == 0 ) + { + save_diff = ((int)s_wc) - ((int)t_wc); + } + plane=(s_wc>>8) & 0xFF; + s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc; + plane=(t_wc>>8) & 0xFF; + t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc; + if ( s_wc != t_wc ) + { + return ((int) s_wc) - ((int) t_wc); + } + + s+=s_res; + t+=t_res; + } + diff = ( (se-s) - (te-t) ); + return t_is_prefix ? t-te : ((diff == 0) ? save_diff : diff); +} + +static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, + const uchar *s, uint slen, + const uchar *t, uint tlen) +{ + int s_res,t_res; + my_wc_t s_wc,t_wc; + const uchar *se= s+slen; + const uchar *te= t+tlen; + int save_diff = 0; + + while ( s < se && t < te ) + { + int plane; + s_res=my_utf8_uni(cs,&s_wc, s, se); + t_res=my_utf8_uni(cs,&t_wc, t, te); + + if ( s_res <= 0 || t_res <= 0 ) + { + /* Incorrect string, compare by char value */ + return ((int)s[0]-(int)t[0]); + } + + if ( save_diff == 0 ) + { + save_diff = ((int)s_wc) - ((int)t_wc); + } + plane=(s_wc>>8) & 0xFF; + s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc; + plane=(t_wc>>8) & 0xFF; + t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc; + if ( s_wc != t_wc ) + { + return ((int) s_wc) - ((int) t_wc); + } + + s+=s_res; + t+=t_res; + } + + slen= se-s; + tlen= te-t; + + if (slen != tlen) + { + int swap= 0; + if (slen < tlen) + { + slen= tlen; + s= t; + se= te; + swap= -1; + } + /* + This following loop uses the fact that in UTF-8 + all multibyte characters are greater than space, + and all multibyte head characters are greater than + space. It means if we meet a character greater + than space, it always means that the longer string + is greater. So we can reuse the same loop from the + 8bit version, without having to process full multibute + sequences. + */ + for ( ; s < se; s++) + { + if (*s != ' ') + return ((int)*s - (int) ' ') ^ swap; + } + } + return save_diff; +} + +static MY_COLLATION_HANDLER my_collation_cs_handler = +{ + NULL, /* init */ + my_strnncoll_utf8_cs, + my_strnncollsp_utf8_cs, + my_strnxfrm_utf8, + my_like_range_simple, + my_wildcmp_mb, + my_strcasecmp_utf8, + my_instr_mb, + my_hash_sort_utf8 +}; + +CHARSET_INFO my_charset_utf8_general_cs= +{ + 254,0,0, /* number */ + MY_CS_COMPILED|MY_CS_UNICODE, /* state */ + "utf8", /* cs name */ + "utf8_general_cs", /* name */ + "", /* comment */ + NULL, /* tailoring */ + ctype_utf8, /* ctype */ + to_lower_utf8, /* to_lower */ + to_upper_utf8, /* to_upper */ + to_upper_utf8, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 1, /* mbminlen */ + 3, /* mbmaxlen */ + 0, /* min_sort_char */ + 255, /* max_sort_char */ + &my_charset_utf8_handler, + &my_collation_cs_handler +}; +#endif /* Cybozu Hack */ + #ifdef MY_TEST_UTF8 #include From dfc8b3aecc5365982c2da91621b2f55f03ab39bb Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 29 Mar 2005 14:30:34 +0400 Subject: [PATCH 2/5] Remove redundant my_security_attr_free() from handle_connections_shared_memory (double free spotted by Monty). sql/mysqld.cc: Remove excessive my_security_attr_free() --- sql/mysqld.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 05c3cb8deef..d75efbd0b00 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -3990,8 +3990,6 @@ errorconn: NullS); sql_perror(buff); } - my_security_attr_free(sa_event); - my_security_attr_free(sa_mapping); if (handle_client_file_map) CloseHandle(handle_client_file_map); if (handle_client_map) From 7bd6ddc046ddecdcfecd9b864ff3736db45ba7f2 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 29 Mar 2005 15:43:00 +0500 Subject: [PATCH 3/5] Better error message. Bugs#9057: Incorrect errormsg for too-large char field in table definition --- mysql-test/r/type_blob.result | 2 +- sql/share/english/errmsg.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mysql-test/r/type_blob.result b/mysql-test/r/type_blob.result index b1dc895ecc5..7481dc18641 100644 --- a/mysql-test/r/type_blob.result +++ b/mysql-test/r/type_blob.result @@ -27,7 +27,7 @@ t3 CREATE TABLE `t3` ( drop table t1,t2,t3 #; CREATE TABLE t1 (a char(257) default "hello"); -ERROR 42000: Column length too big for column 'a' (max = 255); use BLOB instead +ERROR 42000: Column length too big for column 'a' (max = 255); use BLOB or TEXT instead CREATE TABLE t2 (a blob default "hello"); ERROR 42000: BLOB/TEXT column 'a' can't have a default value drop table if exists t1,t2; diff --git a/sql/share/english/errmsg.txt b/sql/share/english/errmsg.txt index 104a055417c..854adab455e 100644 --- a/sql/share/english/errmsg.txt +++ b/sql/share/english/errmsg.txt @@ -90,7 +90,7 @@ character-set=latin1 "Specified key was too long; max key length is %d bytes", "Key column '%-.64s' doesn't exist in table", "BLOB column '%-.64s' can't be used in key specification with the used table type", -"Column length too big for column '%-.64s' (max = %d); use BLOB instead", +"Column length too big for column '%-.64s' (max = %d); use BLOB or TEXT instead", "Incorrect table definition; there can be only one auto column and it must be defined as a key", "%s: ready for connections.\nVersion: '%s' socket: '%s' port: %d", "%s: Normal shutdown\n", From db5bb0c46c4892c75e4ec5b7db9260c70664fd72 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 29 Mar 2005 16:19:31 +0500 Subject: [PATCH 4/5] ctype-big5.c: More readable and safer way. strings/ctype-big5.c: More readable and safer way. --- strings/ctype-big5.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index 58847a96591..e083d1371ec 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -6284,11 +6284,7 @@ uint my_well_formed_len_big5(CHARSET_INFO *cs __attribute__((unused)), const char *emb= e - 1; /* Last possible end of an MB character */ while (pos && b < e) { - /* - Cast to int8 for extra safety. "char" can be unsigned - by default on some platforms. - */ - if (((int8)b[0]) >= 0) + if ((uchar) b[0] < 128) { /* Single byte ascii character */ b++; From 0b45706c3bb5d855d1cc71cd8610a2d9c96e9547 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 29 Mar 2005 16:23:44 +0500 Subject: [PATCH 5/5] ctype-utf8.c, ctype-sjis.c: Safer and mode readable way. strings/ctype-sjis.c: Safer and mode readable way. strings/ctype-utf8.c: Safer and mode readable way. --- strings/ctype-sjis.c | 7 +------ strings/ctype-utf8.c | 9 ++------- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index 0cb30a9b6ee..20f0081888f 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -4576,12 +4576,7 @@ uint my_well_formed_len_sjis(CHARSET_INFO *cs __attribute__((unused)), const char *b0= b; while (pos && b < e) { - /* - Cast to int8 for extra safety. - "char" can be unsigned by default - on some platforms. - */ - if (((int8)b[0]) >= 0) + if ((uchar) b[0] < 128) { /* Single byte ascii character */ b++; diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 4d2bff5e89f..83725878a50 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -2129,12 +2129,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t) { my_wc_t s_wc,t_wc; - /* - Cast to int8 for extra safety. - char can be unsigned by default - on some platforms. - */ - if (((int8)s[0]) >= 0) + if ((uchar) s[0] < 128) { /* s[0] is between 0 and 127. @@ -2181,7 +2176,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t) /* Do the same for the second string */ - if (((int8)t[0]) >= 0) + if ((uchar) t[0] < 128) { /* Convert single byte character into weight */ t_wc= plane00[(uchar) t[0]].tolower;