diff --git a/.gitignore b/.gitignore
index 279d3f59dc5..ef26322a19d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -198,6 +198,7 @@ storage/tokudb/PerconaFT/tools/tokudb_load
storage/tokudb/PerconaFT/tools/tokuftdump
storage/tokudb/PerconaFT/tools/tokuft_logprint
storage/tokudb/PerconaFT/xz/
+strings/conf_to_src
support-files/MySQL-shared-compat.spec
support-files/binary-configure
support-files/config.huge.ini
diff --git a/include/m_ctype.h b/include/m_ctype.h
index 7a688f76acb..796dd630aa7 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -865,7 +865,6 @@ void my_string_metadata_get(MY_STRING_METADATA *metadata,
CHARSET_INFO *cs, const char *str, size_t len);
uint my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong len);
my_bool my_charset_is_ascii_based(CHARSET_INFO *cs);
-my_bool my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs);
uint my_charset_repertoire(CHARSET_INFO *cs);
uint my_strxfrm_flag_normalize(uint flags, uint nlevels);
@@ -875,8 +874,6 @@ size_t my_strxfrm_pad_desc_and_reverse(CHARSET_INFO *cs,
uchar *str, uchar *frmend, uchar *strend,
uint nweights, uint flags, uint level);
-my_bool my_charset_is_ascii_compatible(CHARSET_INFO *cs);
-
const MY_CONTRACTIONS *my_charset_get_contractions(CHARSET_INFO *cs,
int level);
diff --git a/mysql-test/r/ctype_ldml.result b/mysql-test/r/ctype_ldml.result
index d333c03143b..05862c92cb3 100644
--- a/mysql-test/r/ctype_ldml.result
+++ b/mysql-test/r/ctype_ldml.result
@@ -5,7 +5,7 @@ with all Unicode character sets
set names utf8;
show variables like 'character_sets_dir%';
Variable_name Value
-character_sets_dir MYSQL_TEST_DIR/std_data/
+character_sets_dir MYSQL_TEST_DIR/std_data/ldml/
show collation like 'utf8_phone_ci';
Collation Charset Id Default Compiled Sortlen
utf8_phone_ci utf8 352 8
@@ -454,10 +454,13 @@ select "foo" = "foo " collate latin1_test;
The following tests check that two-byte collation IDs work
select * from information_schema.collations where id>256 and is_compiled<>'Yes' order by id;
COLLATION_NAME CHARACTER_SET_NAME ID IS_DEFAULT IS_COMPILED SORTLEN
+ascii2_general_ci ascii2 320 Yes 1
+ascii2_bin ascii2 321 1
utf8mb4_test_ci utf8mb4 326 8
utf16_test_ci utf16 327 8
utf8mb4_test_400_ci utf8mb4 328 8
latin1_test2 latin1 332 1
+latin1_test2_cs latin1 333 1
utf8_bengali_standard_ci utf8 336 8
utf8_bengali_traditional_ci utf8 337 8
utf8_implicit_weights_ci utf8 338 8
@@ -478,6 +481,7 @@ show collation like '%test%';
Collation Charset Id Default Compiled Sortlen
latin1_test latin1 99 Yes 1
latin1_test2 latin1 332 1
+latin1_test2_cs latin1 333 1
utf8_test_ci utf8 353 8
ucs2_test_ci ucs2 358 8
utf8mb4_test_ci utf8mb4 326 8
@@ -1179,3 +1183,34 @@ ch w ducet
3700 FB80B700 FB80B700
3701 FB80B700 FB80B701
DROP TABLE t1;
+#
+# Testing that the MY_CS_PUREASCII flag is set properly
+#
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ascii2, b VARCHAR(10) CHARACTER SET latin1);
+INSERT INTO t1 VALUES ('a','a'),('b','b');
+SELECT * FROM t1 WHERE a=b;
+a b
+a a
+b b
+ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_bin;
+SELECT * FROM t1 WHERE a=b;
+a b
+a a
+b b
+DROP TABLE t1;
+#
+# Testing that the MY_CS_CSSORT flag is set properly
+#
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_test);
+INSERT INTO t1 VALUES ('a'),('A');
+SELECT * FROM t1 WHERE a RLIKE 'a';
+a
+a
+A
+DROP TABLE t1;
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_test2_cs);
+INSERT INTO t1 VALUES ('a'),('A');
+SELECT * FROM t1 WHERE a RLIKE 'a';
+a
+a
+DROP TABLE t1;
diff --git a/mysql-test/std_data/Index.xml b/mysql-test/std_data/ldml/Index.xml
similarity index 99%
rename from mysql-test/std_data/Index.xml
rename to mysql-test/std_data/ldml/Index.xml
index b66fdfee55c..66de0c725b7 100644
--- a/mysql-test/std_data/Index.xml
+++ b/mysql-test/std_data/ldml/Index.xml
@@ -318,6 +318,11 @@
+
+
+
+
+
Western
cp1252 West European
@@ -330,6 +335,7 @@
latin1
+
diff --git a/mysql-test/std_data/ldml/ascii2.xml b/mysql-test/std_data/ldml/ascii2.xml
new file mode 100644
index 00000000000..f1936e020be
--- /dev/null
+++ b/mysql-test/std_data/ldml/ascii2.xml
@@ -0,0 +1,121 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/mysql-test/std_data/latin1.xml b/mysql-test/std_data/ldml/latin1.xml
similarity index 87%
rename from mysql-test/std_data/latin1.xml
rename to mysql-test/std_data/ldml/latin1.xml
index fd5197254e2..3e1d021c127 100644
--- a/mysql-test/std_data/latin1.xml
+++ b/mysql-test/std_data/ldml/latin1.xml
@@ -152,6 +152,29 @@
+
+
+
+
+
+
diff --git a/mysql-test/t/ctype_ldml-master.opt b/mysql-test/t/ctype_ldml-master.opt
index d7ecd9095cb..250dd2cb5a2 100644
--- a/mysql-test/t/ctype_ldml-master.opt
+++ b/mysql-test/t/ctype_ldml-master.opt
@@ -1,2 +1,2 @@
---character-sets-dir=$MYSQL_TEST_DIR/std_data/
+--character-sets-dir=$MYSQL_TEST_DIR/std_data/ldml/
--log-error=$MYSQLTEST_VARDIR/tmp/ctype_ldml_log.err
diff --git a/mysql-test/t/ctype_ldml.test b/mysql-test/t/ctype_ldml.test
index 1ea8002a2eb..37efc775b27 100644
--- a/mysql-test/t/ctype_ldml.test
+++ b/mysql-test/t/ctype_ldml.test
@@ -409,3 +409,32 @@ INSERT INTO t1 VALUES (_ucs2 0x3400),(_ucs2 0x3560),(_ucs2 0x3561),(_ucs2 0x3600
INSERT INTO t1 VALUES (_ucs2 0x3700),(_ucs2 0x3701);
SELECT HEX(CONVERT(a USING ucs2)) AS ch, HEX(WEIGHT_STRING(a)) AS w, HEX(WEIGHT_STRING(a COLLATE utf8_unicode_ci)) AS ducet FROM t1 ORDER BY a,ch;
DROP TABLE t1;
+
+
+--echo #
+--echo # Testing that the MY_CS_PUREASCII flag is set properly
+--echo #
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ascii2, b VARCHAR(10) CHARACTER SET latin1);
+INSERT INTO t1 VALUES ('a','a'),('b','b');
+# should not give "illegal collation" error
+SELECT * FROM t1 WHERE a=b;
+ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_bin;
+# should not give "illegal collation" error
+SELECT * FROM t1 WHERE a=b;
+DROP TABLE t1;
+
+
+--echo #
+--echo # Testing that the MY_CS_CSSORT flag is set properly
+--echo #
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_test);
+INSERT INTO t1 VALUES ('a'),('A');
+# should be case insensitive
+SELECT * FROM t1 WHERE a RLIKE 'a';
+DROP TABLE t1;
+
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_test2_cs);
+INSERT INTO t1 VALUES ('a'),('A');
+ # should be case sensitive
+SELECT * FROM t1 WHERE a RLIKE 'a';
+DROP TABLE t1;
diff --git a/mysys/charset.c b/mysys/charset.c
index e46fd16a5fb..2a96ec0070d 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -297,7 +297,6 @@ static int add_collation(struct charset_info_st *cs)
}
else
{
- const uchar *sort_order= newcs->sort_order;
simple_cs_init_functions(newcs);
newcs->mbminlen= 1;
newcs->mbmaxlen= 1;
@@ -307,21 +306,6 @@ static int add_collation(struct charset_info_st *cs)
newcs->state |= MY_CS_LOADED;
}
newcs->state|= MY_CS_AVAILABLE;
-
- /*
- Check if case sensitive sort order: A < a < B.
- We need MY_CS_FLAG for regex library, and for
- case sensitivity flag for 5.0 client protocol,
- to support isCaseSensitive() method in JDBC driver
- */
- if (sort_order && sort_order['A'] < sort_order['a'] &&
- sort_order['a'] < sort_order['B'])
- newcs->state|= MY_CS_CSSORT;
-
- if (my_charset_is_8bit_pure_ascii(newcs))
- newcs->state|= MY_CS_PUREASCII;
- if (!my_charset_is_ascii_compatible(cs))
- newcs->state|= MY_CS_NONASCII;
}
}
else
diff --git a/strings/conf_to_src.c b/strings/conf_to_src.c
index 5b9793f388d..31093fe4230 100644
--- a/strings/conf_to_src.c
+++ b/strings/conf_to_src.c
@@ -193,25 +193,19 @@ static int my_read_charset_file(const char *filename)
return FALSE;
}
-static int
-is_case_sensitive(CHARSET_INFO *cs)
-{
- return (cs->sort_order &&
- cs->sort_order['A'] < cs->sort_order['a'] &&
- cs->sort_order['a'] < cs->sort_order['B']) ? 1 : 0;
-}
-
void dispcset(FILE *f,CHARSET_INFO *cs)
{
+ uint flags= my_8bit_charset_flags_from_data(cs) |
+ my_8bit_collation_flags_from_data(cs);
fprintf(f,"{\n");
fprintf(f," %d,%d,%d,\n",cs->number,0,0);
fprintf(f," MY_CS_COMPILED%s%s%s%s%s,\n",
cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
- is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "",
- my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "",
- !my_charset_is_ascii_compatible(cs) ? "|MY_CS_NONASCII": "");
+ flags & MY_CS_CSSORT ? "|MY_CS_CSSORT" : "",
+ flags & MY_CS_PUREASCII ? "|MY_CS_PUREASCII" : "",
+ flags & MY_CS_NONASCII ? "|MY_CS_NONASCII" : "");
if (cs->name)
{
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index f405c4f327b..e6cea06b17b 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -1340,9 +1340,80 @@ create_fromuni(struct charset_info_st *cs,
return FALSE;
}
+
+/*
+ Detect if a character set is 8bit,
+ and it is pure ascii, i.e. doesn't have
+ characters outside U+0000..U+007F
+ This functions is shared between "conf_to_src"
+ and dynamic charsets loader in "mysqld".
+*/
+static my_bool
+my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
+{
+ size_t code;
+ if (!cs->tab_to_uni)
+ return 0;
+ for (code= 0; code < 256; code++)
+ {
+ if (cs->tab_to_uni[code] > 0x7F)
+ return 0;
+ }
+ return 1;
+}
+
+
+/*
+ Shared function between conf_to_src and mysys.
+ Check if a 8bit character set is compatible with
+ ascii on the range 0x00..0x7F.
+*/
+static my_bool
+my_charset_is_ascii_compatible(CHARSET_INFO *cs)
+{
+ uint i;
+ if (!cs->tab_to_uni)
+ return 1;
+ for (i= 0; i < 128; i++)
+ {
+ if (cs->tab_to_uni[i] != i)
+ return 0;
+ }
+ return 1;
+}
+
+
+uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs)
+{
+ uint flags= 0;
+ if (my_charset_is_8bit_pure_ascii(cs))
+ flags|= MY_CS_PUREASCII;
+ if (!my_charset_is_ascii_compatible(cs))
+ flags|= MY_CS_NONASCII;
+ return flags;
+}
+
+
+/*
+ Check if case sensitive sort order: A < a < B.
+ We need MY_CS_FLAG for regex library, and for
+ case sensitivity flag for 5.0 client protocol,
+ to support isCaseSensitive() method in JDBC driver
+*/
+uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs)
+{
+ uint flags= 0;
+ if (cs->sort_order && cs->sort_order['A'] < cs->sort_order['a'] &&
+ cs->sort_order['a'] < cs->sort_order['B'])
+ flags|= MY_CS_CSSORT;
+ return flags;
+}
+
+
static my_bool
my_cset_init_8bit(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
{
+ cs->state|= my_8bit_charset_flags_from_data(cs);
cs->caseup_multiply= 1;
cs->casedn_multiply= 1;
cs->pad_char= ' ';
@@ -1371,6 +1442,7 @@ static void set_max_sort_char(struct charset_info_st *cs)
static my_bool my_coll_init_simple(struct charset_info_st *cs,
MY_CHARSET_LOADER *loader __attribute__((unused)))
{
+ cs->state|= my_8bit_collation_flags_from_data(cs);
set_max_sort_char(cs);
return FALSE;
}
diff --git a/strings/ctype.c b/strings/ctype.c
index 620c7e13503..be8a8cb506e 100644
--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -973,48 +973,6 @@ my_charset_is_ascii_based(CHARSET_INFO *cs)
}
-/*
- Detect if a character set is 8bit,
- and it is pure ascii, i.e. doesn't have
- characters outside U+0000..U+007F
- This functions is shared between "conf_to_src"
- and dynamic charsets loader in "mysqld".
-*/
-my_bool
-my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
-{
- size_t code;
- if (!cs->tab_to_uni)
- return 0;
- for (code= 0; code < 256; code++)
- {
- if (cs->tab_to_uni[code] > 0x7F)
- return 0;
- }
- return 1;
-}
-
-
-/*
- Shared function between conf_to_src and mysys.
- Check if a 8bit character set is compatible with
- ascii on the range 0x00..0x7F.
-*/
-my_bool
-my_charset_is_ascii_compatible(CHARSET_INFO *cs)
-{
- uint i;
- if (!cs->tab_to_uni)
- return 1;
- for (i= 0; i < 128; i++)
- {
- if (cs->tab_to_uni[i] != i)
- return 0;
- }
- return 1;
-}
-
-
/*
Convert a string between two character sets.
'to' must be large enough to store (form_length * to_cs->mbmaxlen) bytes.
diff --git a/strings/strings_def.h b/strings/strings_def.h
index fb280b6bb6b..36d3d2b2fe9 100644
--- a/strings/strings_def.h
+++ b/strings/strings_def.h
@@ -101,6 +101,11 @@ static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len)
return (end);
}
+
+uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs);
+uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs);
+
+
/* Macros for hashing characters */
#define MY_HASH_ADD(A, B, value) \