mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
LDML refactoring for "MDEV-9711 NO PAD collations"
- Moving detection of the MY_CS_CSSORT, MY_CS_PUREASCII, MY_CS_NONASCII flags of loadable collations from add_collation() in mysys.c to my_cset_init_8bit() and my_coll_init_simple() in ctype-simple.c. - Adding tests that these flags are set properly for loadable collations - Moving LDML test related *.xml files from mysql-test/std_data/ to mysql-test/std_data/ldml/, as there will be more *.xml test files
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@ -198,6 +198,7 @@ storage/tokudb/PerconaFT/tools/tokudb_load
|
||||
storage/tokudb/PerconaFT/tools/tokuftdump
|
||||
storage/tokudb/PerconaFT/tools/tokuft_logprint
|
||||
storage/tokudb/PerconaFT/xz/
|
||||
strings/conf_to_src
|
||||
support-files/MySQL-shared-compat.spec
|
||||
support-files/binary-configure
|
||||
support-files/config.huge.ini
|
||||
|
@ -865,7 +865,6 @@ void my_string_metadata_get(MY_STRING_METADATA *metadata,
|
||||
CHARSET_INFO *cs, const char *str, size_t len);
|
||||
uint my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong len);
|
||||
my_bool my_charset_is_ascii_based(CHARSET_INFO *cs);
|
||||
my_bool my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs);
|
||||
uint my_charset_repertoire(CHARSET_INFO *cs);
|
||||
|
||||
uint my_strxfrm_flag_normalize(uint flags, uint nlevels);
|
||||
@ -875,8 +874,6 @@ size_t my_strxfrm_pad_desc_and_reverse(CHARSET_INFO *cs,
|
||||
uchar *str, uchar *frmend, uchar *strend,
|
||||
uint nweights, uint flags, uint level);
|
||||
|
||||
my_bool my_charset_is_ascii_compatible(CHARSET_INFO *cs);
|
||||
|
||||
const MY_CONTRACTIONS *my_charset_get_contractions(CHARSET_INFO *cs,
|
||||
int level);
|
||||
|
||||
|
@ -5,7 +5,7 @@ with all Unicode character sets
|
||||
set names utf8;
|
||||
show variables like 'character_sets_dir%';
|
||||
Variable_name Value
|
||||
character_sets_dir MYSQL_TEST_DIR/std_data/
|
||||
character_sets_dir MYSQL_TEST_DIR/std_data/ldml/
|
||||
show collation like 'utf8_phone_ci';
|
||||
Collation Charset Id Default Compiled Sortlen
|
||||
utf8_phone_ci utf8 352 8
|
||||
@ -454,10 +454,13 @@ select "foo" = "foo " collate latin1_test;
|
||||
The following tests check that two-byte collation IDs work
|
||||
select * from information_schema.collations where id>256 and is_compiled<>'Yes' order by id;
|
||||
COLLATION_NAME CHARACTER_SET_NAME ID IS_DEFAULT IS_COMPILED SORTLEN
|
||||
ascii2_general_ci ascii2 320 Yes 1
|
||||
ascii2_bin ascii2 321 1
|
||||
utf8mb4_test_ci utf8mb4 326 8
|
||||
utf16_test_ci utf16 327 8
|
||||
utf8mb4_test_400_ci utf8mb4 328 8
|
||||
latin1_test2 latin1 332 1
|
||||
latin1_test2_cs latin1 333 1
|
||||
utf8_bengali_standard_ci utf8 336 8
|
||||
utf8_bengali_traditional_ci utf8 337 8
|
||||
utf8_implicit_weights_ci utf8 338 8
|
||||
@ -478,6 +481,7 @@ show collation like '%test%';
|
||||
Collation Charset Id Default Compiled Sortlen
|
||||
latin1_test latin1 99 Yes 1
|
||||
latin1_test2 latin1 332 1
|
||||
latin1_test2_cs latin1 333 1
|
||||
utf8_test_ci utf8 353 8
|
||||
ucs2_test_ci ucs2 358 8
|
||||
utf8mb4_test_ci utf8mb4 326 8
|
||||
@ -1179,3 +1183,34 @@ ch w ducet
|
||||
3700 FB80B700 FB80B700
|
||||
3701 FB80B700 FB80B701
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# Testing that the MY_CS_PUREASCII flag is set properly
|
||||
#
|
||||
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ascii2, b VARCHAR(10) CHARACTER SET latin1);
|
||||
INSERT INTO t1 VALUES ('a','a'),('b','b');
|
||||
SELECT * FROM t1 WHERE a=b;
|
||||
a b
|
||||
a a
|
||||
b b
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_bin;
|
||||
SELECT * FROM t1 WHERE a=b;
|
||||
a b
|
||||
a a
|
||||
b b
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# Testing that the MY_CS_CSSORT flag is set properly
|
||||
#
|
||||
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_test);
|
||||
INSERT INTO t1 VALUES ('a'),('A');
|
||||
SELECT * FROM t1 WHERE a RLIKE 'a';
|
||||
a
|
||||
a
|
||||
A
|
||||
DROP TABLE t1;
|
||||
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_test2_cs);
|
||||
INSERT INTO t1 VALUES ('a'),('A');
|
||||
SELECT * FROM t1 WHERE a RLIKE 'a';
|
||||
a
|
||||
a
|
||||
DROP TABLE t1;
|
||||
|
@ -318,6 +318,11 @@
|
||||
|
||||
</charset>
|
||||
|
||||
<charset name="ascii2">
|
||||
<collation name="ascii2_general_ci" id="320" flag="primary"/>
|
||||
<collation name="ascii2_bin" id="321" flag="binary"/>
|
||||
</charset>
|
||||
|
||||
<charset name="latin1">
|
||||
<family>Western</family>
|
||||
<description>cp1252 West European</description>
|
||||
@ -330,6 +335,7 @@
|
||||
<alias>latin1</alias>
|
||||
<collation name="latin1_test" id="99" order="test"/>
|
||||
<collation name="latin1_test2" id="332" order="test"/>
|
||||
<collation name="latin1_test2_cs" id="333"/>
|
||||
</charset>
|
||||
|
||||
<charset name="utf8">
|
121
mysql-test/std_data/ldml/ascii2.xml
Normal file
121
mysql-test/std_data/ldml/ascii2.xml
Normal file
@ -0,0 +1,121 @@
|
||||
<?xml version='1.0' encoding="utf-8"?>
|
||||
|
||||
<charsets>
|
||||
|
||||
<charset name="ascii2">
|
||||
|
||||
<ctype>
|
||||
<map>
|
||||
00
|
||||
20 20 20 20 20 20 20 20 20 28 28 28 28 28 20 20
|
||||
20 20 20 20 20 20 20 20 20 20 20 20 20 20 20 20
|
||||
48 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
|
||||
84 84 84 84 84 84 84 84 84 84 10 10 10 10 10 10
|
||||
10 81 81 81 81 81 81 01 01 01 01 01 01 01 01 01
|
||||
01 01 01 01 01 01 01 01 01 01 01 10 10 10 10 10
|
||||
10 82 82 82 82 82 82 02 02 02 02 02 02 02 02 02
|
||||
02 02 02 02 02 02 02 02 02 02 02 10 10 10 10 20
|
||||
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
</map>
|
||||
</ctype>
|
||||
|
||||
|
||||
<lower>
|
||||
<map>
|
||||
00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
|
||||
10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
|
||||
20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
|
||||
30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
|
||||
40 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F
|
||||
70 71 72 73 74 75 76 77 78 79 7A 5B 5C 5D 5E 5F
|
||||
60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F
|
||||
70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F
|
||||
80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F
|
||||
90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F
|
||||
A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF
|
||||
B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF
|
||||
C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF
|
||||
D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF
|
||||
E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF
|
||||
F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF
|
||||
</map>
|
||||
</lower>
|
||||
|
||||
|
||||
<upper>
|
||||
<map>
|
||||
00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
|
||||
10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
|
||||
20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
|
||||
30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
|
||||
40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
|
||||
50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F
|
||||
60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
|
||||
50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F
|
||||
80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F
|
||||
90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F
|
||||
A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF
|
||||
B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF
|
||||
C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF
|
||||
D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF
|
||||
E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF
|
||||
F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF
|
||||
</map>
|
||||
</upper>
|
||||
|
||||
|
||||
<unicode>
|
||||
<map>
|
||||
0000 0001 0002 0003 0004 0005 0006 0007 0008 0009 000A 000B 000C 000D 000E 000F
|
||||
0010 0011 0012 0013 0014 0015 0016 0017 0018 0019 001A 001B 001C 001D 001E 001F
|
||||
0020 0021 0022 0023 0024 0025 0026 0027 0028 0029 002A 002B 002C 002D 002E 002F
|
||||
0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 003A 003B 003C 003D 003E 003F
|
||||
0040 0041 0042 0043 0044 0045 0046 0047 0048 0049 004A 004B 004C 004D 004E 004F
|
||||
0050 0051 0052 0053 0054 0055 0056 0057 0058 0059 005A 005B 005C 005D 005E 005F
|
||||
0060 0061 0062 0063 0064 0065 0066 0067 0068 0069 006A 006B 006C 006D 006E 006F
|
||||
0070 0071 0072 0073 0074 0075 0076 0077 0078 0079 007A 007B 007C 007D 007E 007F
|
||||
0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
|
||||
0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
|
||||
0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
|
||||
0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
|
||||
0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
|
||||
0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
|
||||
0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
|
||||
0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
|
||||
</map>
|
||||
</unicode>
|
||||
|
||||
|
||||
<collation name="ascii2_general_ci">
|
||||
<map>
|
||||
00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
|
||||
10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
|
||||
20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
|
||||
30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
|
||||
40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
|
||||
50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F
|
||||
60 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F
|
||||
50 51 52 53 54 55 56 57 58 59 5A 7B 7C 7D 7E 7F
|
||||
80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F
|
||||
90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F
|
||||
A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF
|
||||
B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF
|
||||
C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF
|
||||
D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF
|
||||
E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF
|
||||
F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF
|
||||
</map>
|
||||
</collation>
|
||||
|
||||
<collation name="ascii2_bin" flag="binary"/>
|
||||
|
||||
</charset>
|
||||
|
||||
</charsets>
|
@ -152,6 +152,29 @@
|
||||
</map>
|
||||
</collation>
|
||||
|
||||
|
||||
<!-- This is a copy of latin1_general_cs -->
|
||||
<collation name="latin1_test2_cs">
|
||||
<map>
|
||||
00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
|
||||
10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
|
||||
20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F
|
||||
30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F
|
||||
40 41 51 53 57 5B 65 67 69 6B 75 77 79 7B 7D 81
|
||||
8F 91 93 95 98 9A A4 A6 A8 AA AF B3 B4 B5 B6 B7
|
||||
B8 42 52 54 58 5C 66 68 6A 6C 76 78 7A 7C 7E 82
|
||||
90 92 94 96 99 9B A5 A7 A9 AB B0 B9 BA BB BC BF
|
||||
C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF
|
||||
D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF
|
||||
E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF
|
||||
F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF
|
||||
43 45 47 49 4B 4D 4F 55 5D 5F 61 63 6D 6F 71 73
|
||||
59 7F 83 85 87 89 8B BD 8D 9C 9E A0 A2 AC B1 97
|
||||
44 46 48 4A 4C 4E 50 56 5E 60 62 64 6E 70 72 74
|
||||
5A 80 84 86 88 8A 8C BE 8E 9D 9F A1 A3 AD B2 AE
|
||||
</map>
|
||||
</collation>
|
||||
|
||||
</charset>
|
||||
|
||||
</charsets>
|
@ -1,2 +1,2 @@
|
||||
--character-sets-dir=$MYSQL_TEST_DIR/std_data/
|
||||
--character-sets-dir=$MYSQL_TEST_DIR/std_data/ldml/
|
||||
--log-error=$MYSQLTEST_VARDIR/tmp/ctype_ldml_log.err
|
||||
|
@ -409,3 +409,32 @@ INSERT INTO t1 VALUES (_ucs2 0x3400),(_ucs2 0x3560),(_ucs2 0x3561),(_ucs2 0x3600
|
||||
INSERT INTO t1 VALUES (_ucs2 0x3700),(_ucs2 0x3701);
|
||||
SELECT HEX(CONVERT(a USING ucs2)) AS ch, HEX(WEIGHT_STRING(a)) AS w, HEX(WEIGHT_STRING(a COLLATE utf8_unicode_ci)) AS ducet FROM t1 ORDER BY a,ch;
|
||||
DROP TABLE t1;
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # Testing that the MY_CS_PUREASCII flag is set properly
|
||||
--echo #
|
||||
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ascii2, b VARCHAR(10) CHARACTER SET latin1);
|
||||
INSERT INTO t1 VALUES ('a','a'),('b','b');
|
||||
# should not give "illegal collation" error
|
||||
SELECT * FROM t1 WHERE a=b;
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ascii2 COLLATE ascii2_bin;
|
||||
# should not give "illegal collation" error
|
||||
SELECT * FROM t1 WHERE a=b;
|
||||
DROP TABLE t1;
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # Testing that the MY_CS_CSSORT flag is set properly
|
||||
--echo #
|
||||
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_test);
|
||||
INSERT INTO t1 VALUES ('a'),('A');
|
||||
# should be case insensitive
|
||||
SELECT * FROM t1 WHERE a RLIKE 'a';
|
||||
DROP TABLE t1;
|
||||
|
||||
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1 COLLATE latin1_test2_cs);
|
||||
INSERT INTO t1 VALUES ('a'),('A');
|
||||
# should be case sensitive
|
||||
SELECT * FROM t1 WHERE a RLIKE 'a';
|
||||
DROP TABLE t1;
|
||||
|
@ -297,7 +297,6 @@ static int add_collation(struct charset_info_st *cs)
|
||||
}
|
||||
else
|
||||
{
|
||||
const uchar *sort_order= newcs->sort_order;
|
||||
simple_cs_init_functions(newcs);
|
||||
newcs->mbminlen= 1;
|
||||
newcs->mbmaxlen= 1;
|
||||
@ -307,21 +306,6 @@ static int add_collation(struct charset_info_st *cs)
|
||||
newcs->state |= MY_CS_LOADED;
|
||||
}
|
||||
newcs->state|= MY_CS_AVAILABLE;
|
||||
|
||||
/*
|
||||
Check if case sensitive sort order: A < a < B.
|
||||
We need MY_CS_FLAG for regex library, and for
|
||||
case sensitivity flag for 5.0 client protocol,
|
||||
to support isCaseSensitive() method in JDBC driver
|
||||
*/
|
||||
if (sort_order && sort_order['A'] < sort_order['a'] &&
|
||||
sort_order['a'] < sort_order['B'])
|
||||
newcs->state|= MY_CS_CSSORT;
|
||||
|
||||
if (my_charset_is_8bit_pure_ascii(newcs))
|
||||
newcs->state|= MY_CS_PUREASCII;
|
||||
if (!my_charset_is_ascii_compatible(cs))
|
||||
newcs->state|= MY_CS_NONASCII;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -193,25 +193,19 @@ static int my_read_charset_file(const char *filename)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static int
|
||||
is_case_sensitive(CHARSET_INFO *cs)
|
||||
{
|
||||
return (cs->sort_order &&
|
||||
cs->sort_order['A'] < cs->sort_order['a'] &&
|
||||
cs->sort_order['a'] < cs->sort_order['B']) ? 1 : 0;
|
||||
}
|
||||
|
||||
|
||||
void dispcset(FILE *f,CHARSET_INFO *cs)
|
||||
{
|
||||
uint flags= my_8bit_charset_flags_from_data(cs) |
|
||||
my_8bit_collation_flags_from_data(cs);
|
||||
fprintf(f,"{\n");
|
||||
fprintf(f," %d,%d,%d,\n",cs->number,0,0);
|
||||
fprintf(f," MY_CS_COMPILED%s%s%s%s%s,\n",
|
||||
cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
|
||||
cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
|
||||
is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "",
|
||||
my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "",
|
||||
!my_charset_is_ascii_compatible(cs) ? "|MY_CS_NONASCII": "");
|
||||
flags & MY_CS_CSSORT ? "|MY_CS_CSSORT" : "",
|
||||
flags & MY_CS_PUREASCII ? "|MY_CS_PUREASCII" : "",
|
||||
flags & MY_CS_NONASCII ? "|MY_CS_NONASCII" : "");
|
||||
|
||||
if (cs->name)
|
||||
{
|
||||
|
@ -1340,9 +1340,80 @@ create_fromuni(struct charset_info_st *cs,
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Detect if a character set is 8bit,
|
||||
and it is pure ascii, i.e. doesn't have
|
||||
characters outside U+0000..U+007F
|
||||
This functions is shared between "conf_to_src"
|
||||
and dynamic charsets loader in "mysqld".
|
||||
*/
|
||||
static my_bool
|
||||
my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
|
||||
{
|
||||
size_t code;
|
||||
if (!cs->tab_to_uni)
|
||||
return 0;
|
||||
for (code= 0; code < 256; code++)
|
||||
{
|
||||
if (cs->tab_to_uni[code] > 0x7F)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Shared function between conf_to_src and mysys.
|
||||
Check if a 8bit character set is compatible with
|
||||
ascii on the range 0x00..0x7F.
|
||||
*/
|
||||
static my_bool
|
||||
my_charset_is_ascii_compatible(CHARSET_INFO *cs)
|
||||
{
|
||||
uint i;
|
||||
if (!cs->tab_to_uni)
|
||||
return 1;
|
||||
for (i= 0; i < 128; i++)
|
||||
{
|
||||
if (cs->tab_to_uni[i] != i)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs)
|
||||
{
|
||||
uint flags= 0;
|
||||
if (my_charset_is_8bit_pure_ascii(cs))
|
||||
flags|= MY_CS_PUREASCII;
|
||||
if (!my_charset_is_ascii_compatible(cs))
|
||||
flags|= MY_CS_NONASCII;
|
||||
return flags;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Check if case sensitive sort order: A < a < B.
|
||||
We need MY_CS_FLAG for regex library, and for
|
||||
case sensitivity flag for 5.0 client protocol,
|
||||
to support isCaseSensitive() method in JDBC driver
|
||||
*/
|
||||
uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs)
|
||||
{
|
||||
uint flags= 0;
|
||||
if (cs->sort_order && cs->sort_order['A'] < cs->sort_order['a'] &&
|
||||
cs->sort_order['a'] < cs->sort_order['B'])
|
||||
flags|= MY_CS_CSSORT;
|
||||
return flags;
|
||||
}
|
||||
|
||||
|
||||
static my_bool
|
||||
my_cset_init_8bit(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
|
||||
{
|
||||
cs->state|= my_8bit_charset_flags_from_data(cs);
|
||||
cs->caseup_multiply= 1;
|
||||
cs->casedn_multiply= 1;
|
||||
cs->pad_char= ' ';
|
||||
@ -1371,6 +1442,7 @@ static void set_max_sort_char(struct charset_info_st *cs)
|
||||
static my_bool my_coll_init_simple(struct charset_info_st *cs,
|
||||
MY_CHARSET_LOADER *loader __attribute__((unused)))
|
||||
{
|
||||
cs->state|= my_8bit_collation_flags_from_data(cs);
|
||||
set_max_sort_char(cs);
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -973,48 +973,6 @@ my_charset_is_ascii_based(CHARSET_INFO *cs)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Detect if a character set is 8bit,
|
||||
and it is pure ascii, i.e. doesn't have
|
||||
characters outside U+0000..U+007F
|
||||
This functions is shared between "conf_to_src"
|
||||
and dynamic charsets loader in "mysqld".
|
||||
*/
|
||||
my_bool
|
||||
my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
|
||||
{
|
||||
size_t code;
|
||||
if (!cs->tab_to_uni)
|
||||
return 0;
|
||||
for (code= 0; code < 256; code++)
|
||||
{
|
||||
if (cs->tab_to_uni[code] > 0x7F)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Shared function between conf_to_src and mysys.
|
||||
Check if a 8bit character set is compatible with
|
||||
ascii on the range 0x00..0x7F.
|
||||
*/
|
||||
my_bool
|
||||
my_charset_is_ascii_compatible(CHARSET_INFO *cs)
|
||||
{
|
||||
uint i;
|
||||
if (!cs->tab_to_uni)
|
||||
return 1;
|
||||
for (i= 0; i < 128; i++)
|
||||
{
|
||||
if (cs->tab_to_uni[i] != i)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Convert a string between two character sets.
|
||||
'to' must be large enough to store (form_length * to_cs->mbmaxlen) bytes.
|
||||
|
@ -101,6 +101,11 @@ static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len)
|
||||
return (end);
|
||||
}
|
||||
|
||||
|
||||
uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs);
|
||||
uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs);
|
||||
|
||||
|
||||
/* Macros for hashing characters */
|
||||
|
||||
#define MY_HASH_ADD(A, B, value) \
|
||||
|
Reference in New Issue
Block a user