mirror of
https://github.com/MariaDB/server.git
synced 2025-08-07 00:04:31 +03:00
MDEV-11255 LDML: allow defining 2-level UCA collations
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -1167,6 +1167,45 @@
|
|||||||
</rules>
|
</rules>
|
||||||
</collation>
|
</collation>
|
||||||
|
|
||||||
|
<collation name="utf8_czech_test_w2" id="370" version="5.2.0">
|
||||||
|
<settings strength="2"/>
|
||||||
|
<rules>
|
||||||
|
<reset>C</reset><p>\u010D</p><t>\u010C</t>
|
||||||
|
<reset>H</reset><p>ch</p><t>Ch</t><t>CH</t>
|
||||||
|
<reset>R</reset><p>\u0159</p><t>\u0158</t>
|
||||||
|
<reset>S</reset><p>\u0161</p><t>\u0160</t>
|
||||||
|
<reset>Z</reset><p>\u017E</p><t>\u017D</t>
|
||||||
|
</rules>
|
||||||
|
</collation>
|
||||||
|
|
||||||
|
<collation name="utf8_czech_test_nopad_w2" id="371" version="5.2.0" flag="nopad">
|
||||||
|
<settings strength="2"/>
|
||||||
|
<rules>
|
||||||
|
<reset>C</reset><p>\u010D</p><t>\u010C</t>
|
||||||
|
<reset>H</reset><p>ch</p><t>Ch</t><t>CH</t>
|
||||||
|
<reset>R</reset><p>\u0159</p><t>\u0158</t>
|
||||||
|
<reset>S</reset><p>\u0161</p><t>\u0160</t>
|
||||||
|
<reset>Z</reset><p>\u017E</p><t>\u017D</t>
|
||||||
|
</rules>
|
||||||
|
</collation>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
This collation definition is bad.
|
||||||
|
It uses Unicode-4.0.0 (the default version), and requests strength="2".
|
||||||
|
Unicode-4.0.0 does not have information about the secondary weight level.
|
||||||
|
The version="5.2.0" collation attribute was forgotten in this definition.
|
||||||
|
-->
|
||||||
|
<collation name="utf8_czech_test_bad_w2" id="372">
|
||||||
|
<settings strength="2"/>
|
||||||
|
<rules>
|
||||||
|
<reset>C</reset><p>\u010D</p><t>\u010C</t>
|
||||||
|
<reset>H</reset><p>ch</p><t>Ch</t><t>CH</t>
|
||||||
|
<reset>R</reset><p>\u0159</p><t>\u0158</t>
|
||||||
|
<reset>S</reset><p>\u0161</p><t>\u0160</t>
|
||||||
|
<reset>Z</reset><p>\u017E</p><t>\u017D</t>
|
||||||
|
</rules>
|
||||||
|
</collation>
|
||||||
|
|
||||||
</charset>
|
</charset>
|
||||||
|
|
||||||
</charsets>
|
</charsets>
|
||||||
|
@@ -419,6 +419,9 @@ ucs2_vn_ci ucs2 359 8
|
|||||||
ucs2_5624_1 ucs2 360 8
|
ucs2_5624_1 ucs2 360 8
|
||||||
utf8_5624_5 utf8 368 8
|
utf8_5624_5 utf8 368 8
|
||||||
utf8_5624_5_bad utf8 369 8
|
utf8_5624_5_bad utf8 369 8
|
||||||
|
utf8_czech_test_w2 utf8 370 4
|
||||||
|
utf8_czech_test_nopad_w2 utf8 371 4
|
||||||
|
utf8_czech_test_bad_w2 utf8 372 4
|
||||||
utf32_test_ci utf32 391 8
|
utf32_test_ci utf32 391 8
|
||||||
utf8_maxuserid_ci utf8 2047 8
|
utf8_maxuserid_ci utf8 2047 8
|
||||||
show collation like '%test%';
|
show collation like '%test%';
|
||||||
@@ -427,6 +430,9 @@ latin1_test latin1 99 Yes 1
|
|||||||
latin1_test2 latin1 332 1
|
latin1_test2 latin1 332 1
|
||||||
latin1_test2_cs latin1 333 1
|
latin1_test2_cs latin1 333 1
|
||||||
utf8_test_ci utf8 353 8
|
utf8_test_ci utf8 353 8
|
||||||
|
utf8_czech_test_w2 utf8 370 4
|
||||||
|
utf8_czech_test_nopad_w2 utf8 371 4
|
||||||
|
utf8_czech_test_bad_w2 utf8 372 4
|
||||||
ucs2_test_ci ucs2 358 8
|
ucs2_test_ci ucs2 358 8
|
||||||
utf8mb4_test_ci utf8mb4 326 8
|
utf8mb4_test_ci utf8mb4 326 8
|
||||||
utf8mb4_test_400_ci utf8mb4 328 8
|
utf8mb4_test_400_ci utf8mb4 328 8
|
||||||
|
@@ -499,3 +499,106 @@ SELECT HEX(a), REPLACE(a,' ','<SP>') FROM t1 WHERE a='a';
|
|||||||
SELECT HEX(a), REPLACE(a,' ','<SP>') FROM t1 ORDER BY a;
|
SELECT HEX(a), REPLACE(a,' ','<SP>') FROM t1 ORDER BY a;
|
||||||
SELECT HEX(a), REPLACE(a,' ','<SP>') FROM t1 ORDER BY a DESC;
|
SELECT HEX(a), REPLACE(a,' ','<SP>') FROM t1 ORDER BY a DESC;
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
|
||||||
|
SET NAMES utf8 COLLATE utf8_czech_test_w2;
|
||||||
|
CREATE TABLE t1 AS SELECT SPACE(10) AS c1 LIMIT 0;
|
||||||
|
--source include/ctype_unicode_latin.inc
|
||||||
|
INSERT INTO t1 VALUES ('a ');
|
||||||
|
SELECT c1, HEX(WEIGHT_STRING(c1 LEVEL 1)), HEX(WEIGHT_STRING(c1 LEVEL 2)) FROM t1 ORDER BY c1, BINARY c1;
|
||||||
|
SELECT c1, HEX(WEIGHT_STRING(c1 AS CHAR(3) LEVEL 1)), HEX(WEIGHT_STRING(c1 AS CHAR(3) LEVEL 2)) FROM t1 WHERE c1 BETWEEN 'a' AND 'aZ' ORDER BY c1, BINARY c1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
SELECT 'a' = 'a ';
|
||||||
|
SELECT 'a' < 'á';
|
||||||
|
SELECT 'áa' < 'ab';
|
||||||
|
SELECT 'á' < 'ä';
|
||||||
|
SELECT 'äa' < 'áb';
|
||||||
|
SELECT 'c' < 'č';
|
||||||
|
SELECT 'cb' < 'ča';
|
||||||
|
SELECT 'd' < 'ď';
|
||||||
|
SELECT 'ďa' < 'db';
|
||||||
|
SELECT 'e' < 'é';
|
||||||
|
SELECT 'éa' < 'eb';
|
||||||
|
SELECT 'é' < 'ě';
|
||||||
|
SELECT 'ěa' < 'éb';
|
||||||
|
SELECT 'i' < 'í';
|
||||||
|
SELECT 'ía' < 'ib';
|
||||||
|
SELECT 'n' < 'ň';
|
||||||
|
SELECT 'ňa' < 'nb';
|
||||||
|
SELECT 'o' < 'ó';
|
||||||
|
SELECT 'óa' < 'ob';
|
||||||
|
SELECT 'ó' < 'ö';
|
||||||
|
SELECT 'öa' < 'ób';
|
||||||
|
SELECT 'r' < 'ř';
|
||||||
|
SELECT 'rb' < 'řa';
|
||||||
|
SELECT 's' < 'š';
|
||||||
|
SELECT 'sb' < 'ša';
|
||||||
|
SELECT 't' < 'ť';
|
||||||
|
SELECT 'ťa' < 'tb';
|
||||||
|
SELECT 'u' < 'ú';
|
||||||
|
SELECT 'úa' < 'ub';
|
||||||
|
SELECT 'ú' < 'ů';
|
||||||
|
SELECT 'ůa' < 'úb';
|
||||||
|
SELECT 'ů' < 'ü';
|
||||||
|
SELECT 'üa' < 'ůb';
|
||||||
|
SELECT 'y' < 'ý';
|
||||||
|
SELECT 'ýa' < 'yb';
|
||||||
|
SELECT 'z' < 'ž';
|
||||||
|
SELECT 'zb' < 'ža';
|
||||||
|
SELECT 'hž' < 'ch';
|
||||||
|
SELECT 'chž'< 'i';
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
SET NAMES utf8 COLLATE utf8_czech_test_nopad_w2;
|
||||||
|
CREATE TABLE t1 AS SELECT SPACE(10) AS c1 LIMIT 0;
|
||||||
|
--source include/ctype_unicode_latin.inc
|
||||||
|
INSERT INTO t1 VALUES ('a ');
|
||||||
|
SELECT c1, HEX(WEIGHT_STRING(c1 LEVEL 1)), HEX(WEIGHT_STRING(c1 LEVEL 2)) FROM t1 ORDER BY c1, BINARY c1;
|
||||||
|
SELECT c1, HEX(WEIGHT_STRING(c1 AS CHAR(3) LEVEL 1)), HEX(WEIGHT_STRING(c1 AS CHAR(3) LEVEL 2)) FROM t1 WHERE c1 BETWEEN 'a' AND 'aZ' ORDER BY c1, BINARY c1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
SELECT 'a' = 'a ';
|
||||||
|
SELECT 'a' < 'á';
|
||||||
|
SELECT 'áa' < 'ab';
|
||||||
|
SELECT 'á' < 'ä';
|
||||||
|
SELECT 'äa' < 'áb';
|
||||||
|
SELECT 'c' < 'č';
|
||||||
|
SELECT 'cb' < 'ča';
|
||||||
|
SELECT 'd' < 'ď';
|
||||||
|
SELECT 'ďa' < 'db';
|
||||||
|
SELECT 'e' < 'é';
|
||||||
|
SELECT 'éa' < 'eb';
|
||||||
|
SELECT 'é' < 'ě';
|
||||||
|
SELECT 'ěa' < 'éb';
|
||||||
|
SELECT 'i' < 'í';
|
||||||
|
SELECT 'ía' < 'ib';
|
||||||
|
SELECT 'n' < 'ň';
|
||||||
|
SELECT 'ňa' < 'nb';
|
||||||
|
SELECT 'o' < 'ó';
|
||||||
|
SELECT 'óa' < 'ob';
|
||||||
|
SELECT 'ó' < 'ö';
|
||||||
|
SELECT 'öa' < 'ób';
|
||||||
|
SELECT 'r' < 'ř';
|
||||||
|
SELECT 'rb' < 'řa';
|
||||||
|
SELECT 's' < 'š';
|
||||||
|
SELECT 'sb' < 'ša';
|
||||||
|
SELECT 't' < 'ť';
|
||||||
|
SELECT 'ťa' < 'tb';
|
||||||
|
SELECT 'u' < 'ú';
|
||||||
|
SELECT 'úa' < 'ub';
|
||||||
|
SELECT 'ú' < 'ů';
|
||||||
|
SELECT 'ůa' < 'úb';
|
||||||
|
SELECT 'ů' < 'ü';
|
||||||
|
SELECT 'üa' < 'ůb';
|
||||||
|
SELECT 'y' < 'ý';
|
||||||
|
SELECT 'ýa' < 'yb';
|
||||||
|
SELECT 'z' < 'ž';
|
||||||
|
SELECT 'zb' < 'ža';
|
||||||
|
SELECT 'hž' < 'ch';
|
||||||
|
SELECT 'chž'< 'i';
|
||||||
|
|
||||||
|
|
||||||
|
--error ER_UNKNOWN_COLLATION
|
||||||
|
SELECT 'a' COLLATE utf8_czech_test_bad_w2;
|
||||||
|
@@ -258,12 +258,38 @@ static my_bool simple_cs_is_full(CHARSET_INFO *cs)
|
|||||||
|
|
||||||
|
|
||||||
#if defined(HAVE_UCA_COLLATIONS) && (defined(HAVE_CHARSET_ucs2) || defined(HAVE_CHARSET_utf8))
|
#if defined(HAVE_UCA_COLLATIONS) && (defined(HAVE_CHARSET_ucs2) || defined(HAVE_CHARSET_utf8))
|
||||||
|
/**
|
||||||
|
Initialize a loaded collation.
|
||||||
|
@param [OUT] to - The new charset_info_st structure to initialize.
|
||||||
|
@param [IN] from - A template collation, to fill the missing data from.
|
||||||
|
@param [IN] loaded - The collation data loaded from the LDML file.
|
||||||
|
some data may be missing in "loaded".
|
||||||
|
*/
|
||||||
static void
|
static void
|
||||||
copy_uca_collation(struct charset_info_st *to, CHARSET_INFO *from)
|
copy_uca_collation(struct charset_info_st *to, CHARSET_INFO *from,
|
||||||
|
CHARSET_INFO *loaded)
|
||||||
{
|
{
|
||||||
to->cset= from->cset;
|
to->cset= from->cset;
|
||||||
to->coll= from->coll;
|
to->coll= from->coll;
|
||||||
to->strxfrm_multiply= from->strxfrm_multiply;
|
/*
|
||||||
|
Single-level UCA collation have strnxfrm_multiple=8.
|
||||||
|
In case of a multi-level UCA collation we use strnxfrm_multiply=4.
|
||||||
|
That means MY_COLLATION_HANDLER::strnfrmlen() will request the caller
|
||||||
|
to allocate a buffer smaller size for each level, for performance purpose,
|
||||||
|
and to fit longer VARCHARs to @@max_sort_length.
|
||||||
|
This makes filesort produce non-precise order for some rare Unicode
|
||||||
|
characters that produce more than 4 weights (long expansions).
|
||||||
|
UCA requires 2 bytes per weight multiplied by the number of levels.
|
||||||
|
In case of a 2-level collation, each character requires 4*2=8 bytes.
|
||||||
|
Therefore, the longest VARCHAR that fits into the default @@max_sort_length
|
||||||
|
is 1024/8=VARCHAR(128). With strnxfrm_multiply==8, only VARCHAR(64)
|
||||||
|
would fit.
|
||||||
|
Note, the built-in collation utf8_thai_520_w2 also uses strnxfrm_multiply=4,
|
||||||
|
for the same purpose.
|
||||||
|
TODO: we could add a new LDML syntax to choose strxfrm_multiply value.
|
||||||
|
*/
|
||||||
|
to->strxfrm_multiply= loaded->levels_for_order > 1 ?
|
||||||
|
4 : from->strxfrm_multiply;
|
||||||
to->min_sort_char= from->min_sort_char;
|
to->min_sort_char= from->min_sort_char;
|
||||||
to->max_sort_char= from->max_sort_char;
|
to->max_sort_char= from->max_sort_char;
|
||||||
to->mbminlen= from->mbminlen;
|
to->mbminlen= from->mbminlen;
|
||||||
@@ -312,7 +338,8 @@ static int add_collation(struct charset_info_st *cs)
|
|||||||
#if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS)
|
#if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS)
|
||||||
copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ?
|
copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ?
|
||||||
&my_charset_ucs2_unicode_nopad_ci :
|
&my_charset_ucs2_unicode_nopad_ci :
|
||||||
&my_charset_ucs2_unicode_ci);
|
&my_charset_ucs2_unicode_ci,
|
||||||
|
cs);
|
||||||
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
|
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@@ -321,7 +348,8 @@ static int add_collation(struct charset_info_st *cs)
|
|||||||
#if defined (HAVE_CHARSET_utf8) && defined(HAVE_UCA_COLLATIONS)
|
#if defined (HAVE_CHARSET_utf8) && defined(HAVE_UCA_COLLATIONS)
|
||||||
copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ?
|
copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ?
|
||||||
&my_charset_utf8_unicode_nopad_ci :
|
&my_charset_utf8_unicode_nopad_ci :
|
||||||
&my_charset_utf8_unicode_ci);
|
&my_charset_utf8_unicode_ci,
|
||||||
|
cs);
|
||||||
newcs->ctype= my_charset_utf8_unicode_ci.ctype;
|
newcs->ctype= my_charset_utf8_unicode_ci.ctype;
|
||||||
if (init_state_maps(newcs))
|
if (init_state_maps(newcs))
|
||||||
return MY_XML_ERROR;
|
return MY_XML_ERROR;
|
||||||
@@ -332,7 +360,8 @@ static int add_collation(struct charset_info_st *cs)
|
|||||||
#if defined (HAVE_CHARSET_utf8mb4) && defined(HAVE_UCA_COLLATIONS)
|
#if defined (HAVE_CHARSET_utf8mb4) && defined(HAVE_UCA_COLLATIONS)
|
||||||
copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ?
|
copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ?
|
||||||
&my_charset_utf8mb4_unicode_nopad_ci :
|
&my_charset_utf8mb4_unicode_nopad_ci :
|
||||||
&my_charset_utf8mb4_unicode_ci);
|
&my_charset_utf8mb4_unicode_ci,
|
||||||
|
cs);
|
||||||
newcs->ctype= my_charset_utf8mb4_unicode_ci.ctype;
|
newcs->ctype= my_charset_utf8mb4_unicode_ci.ctype;
|
||||||
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED;
|
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED;
|
||||||
#endif
|
#endif
|
||||||
@@ -342,7 +371,8 @@ static int add_collation(struct charset_info_st *cs)
|
|||||||
#if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS)
|
#if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS)
|
||||||
copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ?
|
copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ?
|
||||||
&my_charset_utf16_unicode_nopad_ci :
|
&my_charset_utf16_unicode_nopad_ci :
|
||||||
&my_charset_utf16_unicode_ci);
|
&my_charset_utf16_unicode_ci,
|
||||||
|
cs);
|
||||||
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
|
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@@ -351,7 +381,8 @@ static int add_collation(struct charset_info_st *cs)
|
|||||||
#if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS)
|
#if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS)
|
||||||
copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ?
|
copy_uca_collation(newcs, newcs->state & MY_CS_NOPAD ?
|
||||||
&my_charset_utf32_unicode_nopad_ci :
|
&my_charset_utf32_unicode_nopad_ci :
|
||||||
&my_charset_utf32_unicode_ci);
|
&my_charset_utf32_unicode_ci,
|
||||||
|
cs);
|
||||||
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
|
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@@ -6542,6 +6542,17 @@ MY_UCA_INFO my_uca_v400=
|
|||||||
},
|
},
|
||||||
0 /* levelno */
|
0 /* levelno */
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
0,
|
||||||
|
NULL,
|
||||||
|
NULL,
|
||||||
|
{
|
||||||
|
0,
|
||||||
|
NULL,
|
||||||
|
NULL
|
||||||
|
},
|
||||||
|
1 /* levelno */
|
||||||
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
/* Logical positions */
|
/* Logical positions */
|
||||||
@@ -30134,6 +30145,18 @@ MY_UCA_INFO my_uca_v520=
|
|||||||
},
|
},
|
||||||
0 /* levelno */
|
0 /* levelno */
|
||||||
},
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
0x10FFFF, /* maxchar */
|
||||||
|
(uchar *) uca520_length_w2,
|
||||||
|
(uint16 **) uca520_weight_w2,
|
||||||
|
{ /* Contractions: */
|
||||||
|
0, /* nitems */
|
||||||
|
NULL, /* item */
|
||||||
|
NULL /* flags */
|
||||||
|
},
|
||||||
|
1 /* levelno */
|
||||||
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
0x0009, /* first_non_ignorable p != ignore */
|
0x0009, /* first_non_ignorable p != ignore */
|
||||||
@@ -31851,6 +31874,25 @@ static int my_strnncoll_uca_multilevel(CHARSET_INFO *cs,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int
|
||||||
|
my_strnncollsp_generic_uca_nopad_multilevel(CHARSET_INFO *cs,
|
||||||
|
const uchar *s, size_t slen,
|
||||||
|
const uchar *t, size_t tlen)
|
||||||
|
{
|
||||||
|
uint num_level= cs->levels_for_order;
|
||||||
|
uint i;
|
||||||
|
for (i= 0; i != num_level; i++)
|
||||||
|
{
|
||||||
|
int ret= my_strnncoll_uca_onelevel(cs, &my_any_uca_scanner_handler,
|
||||||
|
&cs->uca->level[i],
|
||||||
|
s, slen, t, tlen, FALSE);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline int
|
static inline int
|
||||||
my_space_weight(const MY_UCA_WEIGHT_LEVEL *level)
|
my_space_weight(const MY_UCA_WEIGHT_LEVEL *level)
|
||||||
{
|
{
|
||||||
@@ -32181,6 +32223,16 @@ my_strnxfrm_uca_onelevel(CHARSET_INFO *cs,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Return the minimum possible weight on a level.
|
||||||
|
*/
|
||||||
|
static uint min_weight_on_level(MY_UCA_WEIGHT_LEVEL *level)
|
||||||
|
{
|
||||||
|
DBUG_ASSERT(level->levelno < 2); /* No 3-level NOPAD collations yet */
|
||||||
|
return level->levelno == 0 ? 0x0200 : 0x0020;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static uchar *
|
static uchar *
|
||||||
my_strnxfrm_uca_nopad_onelevel(CHARSET_INFO *cs,
|
my_strnxfrm_uca_nopad_onelevel(CHARSET_INFO *cs,
|
||||||
my_uca_scanner_handler *scanner_handler,
|
my_uca_scanner_handler *scanner_handler,
|
||||||
@@ -32194,12 +32246,9 @@ my_strnxfrm_uca_nopad_onelevel(CHARSET_INFO *cs,
|
|||||||
dst, de, &nweights,
|
dst, de, &nweights,
|
||||||
src, srclen);
|
src, srclen);
|
||||||
DBUG_ASSERT(dst <= de);
|
DBUG_ASSERT(dst <= de);
|
||||||
/*
|
/* Pad with the minimum possible weight on this level */
|
||||||
Pad with the minimum possible primary weight 0x0200.
|
|
||||||
*/
|
|
||||||
DBUG_ASSERT(level->levelno == 0); /* No multi-level NOPAD collations yet */
|
|
||||||
if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
|
if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
|
||||||
dst= my_strnxfrm_uca_padn(dst, de, nweights, 0x0200);
|
dst= my_strnxfrm_uca_padn(dst, de, nweights, min_weight_on_level(level));
|
||||||
DBUG_ASSERT(dst <= de);
|
DBUG_ASSERT(dst <= de);
|
||||||
my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
|
my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
|
||||||
return dst;
|
return dst;
|
||||||
@@ -32294,7 +32343,12 @@ my_strnxfrm_uca_multilevel(CHARSET_INFO *cs,
|
|||||||
{
|
{
|
||||||
if (!(flags & MY_STRXFRM_LEVEL_ALL) ||
|
if (!(flags & MY_STRXFRM_LEVEL_ALL) ||
|
||||||
(flags & (MY_STRXFRM_LEVEL1 << current_level)))
|
(flags & (MY_STRXFRM_LEVEL1 << current_level)))
|
||||||
dst= my_strnxfrm_uca_onelevel(cs, scanner_handler,
|
dst= cs->state & MY_CS_NOPAD ?
|
||||||
|
my_strnxfrm_uca_nopad_onelevel(cs, scanner_handler,
|
||||||
|
&cs->uca->level[current_level],
|
||||||
|
dst, de, nweights,
|
||||||
|
src, srclen, flags) :
|
||||||
|
my_strnxfrm_uca_onelevel(cs, scanner_handler,
|
||||||
&cs->uca->level[current_level],
|
&cs->uca->level[current_level],
|
||||||
dst, de, nweights,
|
dst, de, nweights,
|
||||||
src, srclen, flags);
|
src, srclen, flags);
|
||||||
@@ -32970,6 +33024,7 @@ typedef enum
|
|||||||
typedef struct my_coll_rules_st
|
typedef struct my_coll_rules_st
|
||||||
{
|
{
|
||||||
uint version; /* Unicode version, e.g. 400 or 520 */
|
uint version; /* Unicode version, e.g. 400 or 520 */
|
||||||
|
uint strength; /* Number of levels */
|
||||||
MY_UCA_INFO *uca; /* Unicode weight data */
|
MY_UCA_INFO *uca; /* Unicode weight data */
|
||||||
size_t nrules; /* Number of rules in the rule array */
|
size_t nrules; /* Number of rules in the rule array */
|
||||||
size_t mrules; /* Number of allocated rules */
|
size_t mrules; /* Number of allocated rules */
|
||||||
@@ -33251,6 +33306,10 @@ my_coll_parser_scan_setting(MY_COLL_RULE_PARSER *p)
|
|||||||
{
|
{
|
||||||
rules->shift_after_method= my_shift_method_simple;
|
rules->shift_after_method= my_shift_method_simple;
|
||||||
}
|
}
|
||||||
|
else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[strength 1]")))
|
||||||
|
rules->strength= 1;
|
||||||
|
else if (!lex_cmp(lexem, C_STRING_WITH_LEN("[strength 2]")))
|
||||||
|
rules->strength= 2;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
@@ -34189,6 +34248,10 @@ init_weight_level(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
MY_COLLATION_HANDLER my_collation_any_uca_handler_multilevel;
|
||||||
|
MY_COLLATION_HANDLER my_collation_generic_uca_nopad_handler_multilevel;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
This function copies an UCS2 collation from
|
This function copies an UCS2 collation from
|
||||||
the default Unicode Collation Algorithm (UCA)
|
the default Unicode Collation Algorithm (UCA)
|
||||||
@@ -34213,66 +34276,6 @@ create_tailoring(struct charset_info_st *cs,
|
|||||||
MY_COLL_RULES rules;
|
MY_COLL_RULES rules;
|
||||||
MY_UCA_INFO new_uca, *src_uca= NULL;
|
MY_UCA_INFO new_uca, *src_uca= NULL;
|
||||||
int rc= 0;
|
int rc= 0;
|
||||||
|
|
||||||
*loader->error= '\0';
|
|
||||||
|
|
||||||
if (!cs->tailoring)
|
|
||||||
return 0; /* Ok to add a collation without tailoring */
|
|
||||||
|
|
||||||
memset(&rules, 0, sizeof(rules));
|
|
||||||
rules.loader= loader;
|
|
||||||
rules.uca= cs->uca ? cs->uca : &my_uca_v400; /* For logical positions, etc */
|
|
||||||
memset(&new_uca, 0, sizeof(new_uca));
|
|
||||||
|
|
||||||
/* Parse ICU Collation Customization expression */
|
|
||||||
if ((rc= my_coll_rule_parse(&rules,
|
|
||||||
cs->tailoring,
|
|
||||||
cs->tailoring + strlen(cs->tailoring))))
|
|
||||||
goto ex;
|
|
||||||
|
|
||||||
if (rules.version == 520) /* Unicode-5.2.0 requested */
|
|
||||||
{
|
|
||||||
src_uca= &my_uca_v520;
|
|
||||||
cs->caseinfo= &my_unicase_unicode520;
|
|
||||||
}
|
|
||||||
else if (rules.version == 400) /* Unicode-4.0.0 requested */
|
|
||||||
{
|
|
||||||
src_uca= &my_uca_v400;
|
|
||||||
cs->caseinfo= &my_unicase_default;
|
|
||||||
}
|
|
||||||
else /* No Unicode version specified */
|
|
||||||
{
|
|
||||||
src_uca= cs->uca ? cs->uca : &my_uca_v400;
|
|
||||||
if (!cs->caseinfo)
|
|
||||||
cs->caseinfo= &my_unicase_default;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((rc= init_weight_level(loader, &rules,
|
|
||||||
&new_uca.level[0], &src_uca->level[0])))
|
|
||||||
goto ex;
|
|
||||||
|
|
||||||
if (!(cs->uca= (MY_UCA_INFO *) (loader->once_alloc)(sizeof(MY_UCA_INFO))))
|
|
||||||
{
|
|
||||||
rc= 1;
|
|
||||||
goto ex;
|
|
||||||
}
|
|
||||||
cs->uca[0]= new_uca;
|
|
||||||
|
|
||||||
ex:
|
|
||||||
(loader->free)(rules.rule);
|
|
||||||
if (rc != 0 && loader->error[0])
|
|
||||||
loader->reporter(ERROR_LEVEL, "%s", loader->error);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
|
|
||||||
static my_bool
|
|
||||||
create_tailoring_multilevel(struct charset_info_st *cs,
|
|
||||||
MY_CHARSET_LOADER *loader)
|
|
||||||
{
|
|
||||||
uint num_level= cs->levels_for_order;
|
|
||||||
MY_COLL_RULES rules;
|
|
||||||
MY_UCA_INFO new_uca, *src_uca= NULL;
|
|
||||||
int rc= 0;
|
|
||||||
uint i;
|
uint i;
|
||||||
|
|
||||||
*loader->error= '\0';
|
*loader->error= '\0';
|
||||||
@@ -34307,9 +34310,17 @@ create_tailoring_multilevel(struct charset_info_st *cs,
|
|||||||
if (!cs->caseinfo)
|
if (!cs->caseinfo)
|
||||||
cs->caseinfo= &my_unicase_default;
|
cs->caseinfo= &my_unicase_default;
|
||||||
}
|
}
|
||||||
|
cs->levels_for_order= rules.strength ? rules.strength : 1;
|
||||||
|
|
||||||
for (i= 0; i != num_level; i++)
|
for (i= 0; i != cs->levels_for_order; i++)
|
||||||
{
|
{
|
||||||
|
if ((rc= (src_uca->level[i].maxchar == 0)))
|
||||||
|
{
|
||||||
|
my_snprintf(loader->error, sizeof(loader->error) - 1,
|
||||||
|
"%s: no level #%d data for this Unicode version.",
|
||||||
|
cs->name, i + 1);
|
||||||
|
goto ex;
|
||||||
|
}
|
||||||
if ((rc= init_weight_level(loader, &rules,
|
if ((rc= init_weight_level(loader, &rules,
|
||||||
&new_uca.level[i], &src_uca->level[i])))
|
&new_uca.level[i], &src_uca->level[i])))
|
||||||
goto ex;
|
goto ex;
|
||||||
@@ -34321,6 +34332,10 @@ create_tailoring_multilevel(struct charset_info_st *cs,
|
|||||||
goto ex;
|
goto ex;
|
||||||
}
|
}
|
||||||
cs->uca[0]= new_uca;
|
cs->uca[0]= new_uca;
|
||||||
|
if (cs->levels_for_order > 1)
|
||||||
|
cs->coll= (cs->state & MY_CS_NOPAD) ?
|
||||||
|
&my_collation_generic_uca_nopad_handler_multilevel :
|
||||||
|
&my_collation_any_uca_handler_multilevel;
|
||||||
|
|
||||||
ex:
|
ex:
|
||||||
(loader->free)(rules.rule);
|
(loader->free)(rules.rule);
|
||||||
@@ -34345,16 +34360,6 @@ my_coll_init_uca(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
|
|||||||
return create_tailoring(cs, loader);
|
return create_tailoring(cs, loader);
|
||||||
}
|
}
|
||||||
|
|
||||||
static my_bool
|
|
||||||
my_coll_init_uca_multilevel(struct charset_info_st *cs,
|
|
||||||
MY_CHARSET_LOADER *loader)
|
|
||||||
{
|
|
||||||
cs->pad_char= ' ';
|
|
||||||
cs->ctype= my_charset_utf8_unicode_ci.ctype;
|
|
||||||
if (!cs->caseinfo)
|
|
||||||
cs->caseinfo= &my_unicase_default;
|
|
||||||
return create_tailoring_multilevel(cs, loader);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int my_strnncoll_any_uca(CHARSET_INFO *cs,
|
static int my_strnncoll_any_uca(CHARSET_INFO *cs,
|
||||||
const uchar *s, size_t slen,
|
const uchar *s, size_t slen,
|
||||||
@@ -34489,7 +34494,7 @@ MY_COLLATION_HANDLER my_collation_generic_uca_nopad_handler =
|
|||||||
|
|
||||||
MY_COLLATION_HANDLER my_collation_any_uca_handler_multilevel=
|
MY_COLLATION_HANDLER my_collation_any_uca_handler_multilevel=
|
||||||
{
|
{
|
||||||
my_coll_init_uca_multilevel,
|
my_coll_init_uca,
|
||||||
my_strnncoll_any_uca_multilevel,
|
my_strnncoll_any_uca_multilevel,
|
||||||
my_strnncollsp_any_uca_multilevel,
|
my_strnncollsp_any_uca_multilevel,
|
||||||
my_strnxfrm_any_uca_multilevel,
|
my_strnxfrm_any_uca_multilevel,
|
||||||
@@ -34503,6 +34508,22 @@ MY_COLLATION_HANDLER my_collation_any_uca_handler_multilevel=
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
MY_COLLATION_HANDLER my_collation_generic_uca_nopad_handler_multilevel =
|
||||||
|
{
|
||||||
|
my_coll_init_uca,
|
||||||
|
my_strnncoll_any_uca_multilevel,
|
||||||
|
my_strnncollsp_generic_uca_nopad_multilevel,
|
||||||
|
my_strnxfrm_any_uca_multilevel,
|
||||||
|
my_strnxfrmlen_any_uca_multilevel,
|
||||||
|
my_like_range_generic,
|
||||||
|
my_wildcmp_uca,
|
||||||
|
NULL,
|
||||||
|
my_instr_mb,
|
||||||
|
my_hash_sort_generic_uca_nopad,
|
||||||
|
my_propagate_complex
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_CHARSET_ucs2
|
#ifdef HAVE_CHARSET_ucs2
|
||||||
/*
|
/*
|
||||||
UCS2 optimized CHARSET_INFO compatible wrappers.
|
UCS2 optimized CHARSET_INFO compatible wrappers.
|
||||||
@@ -35342,7 +35363,7 @@ struct charset_info_st my_charset_ucs2_thai_520_w2=
|
|||||||
"ucs2", /* csname */
|
"ucs2", /* csname */
|
||||||
"ucs2_thai_520_w2", /* name */
|
"ucs2_thai_520_w2", /* name */
|
||||||
"", /* comment */
|
"", /* comment */
|
||||||
"", /* tailoring */
|
"[strength 2]", /* tailoring */
|
||||||
NULL, /* ctype */
|
NULL, /* ctype */
|
||||||
NULL, /* to_lower */
|
NULL, /* to_lower */
|
||||||
NULL, /* to_upper */
|
NULL, /* to_upper */
|
||||||
@@ -36363,7 +36384,7 @@ struct charset_info_st my_charset_utf8_thai_520_w2=
|
|||||||
MY_UTF8MB3, /* csname */
|
MY_UTF8MB3, /* csname */
|
||||||
MY_UTF8MB3 "_thai_520_w2",/* name */
|
MY_UTF8MB3 "_thai_520_w2",/* name */
|
||||||
"", /* comment */
|
"", /* comment */
|
||||||
"", /* tailoring */
|
"[strength 2]", /* tailoring */
|
||||||
ctype_utf8, /* ctype */
|
ctype_utf8, /* ctype */
|
||||||
NULL, /* to_lower */
|
NULL, /* to_lower */
|
||||||
NULL, /* to_upper */
|
NULL, /* to_upper */
|
||||||
@@ -37275,7 +37296,7 @@ struct charset_info_st my_charset_utf8mb4_thai_520_w2=
|
|||||||
MY_UTF8MB4, /* csname */
|
MY_UTF8MB4, /* csname */
|
||||||
MY_UTF8MB4 "_thai_520_w2", /* name */
|
MY_UTF8MB4 "_thai_520_w2", /* name */
|
||||||
"", /* comment */
|
"", /* comment */
|
||||||
"", /* tailoring */
|
"[strength 2]", /* tailoring */
|
||||||
ctype_utf8, /* ctype */
|
ctype_utf8, /* ctype */
|
||||||
NULL, /* to_lower */
|
NULL, /* to_lower */
|
||||||
NULL, /* to_upper */
|
NULL, /* to_upper */
|
||||||
@@ -38237,7 +38258,7 @@ struct charset_info_st my_charset_utf32_thai_520_w2=
|
|||||||
"utf32", /* csname */
|
"utf32", /* csname */
|
||||||
"utf32_thai_520_w2",/* name */
|
"utf32_thai_520_w2",/* name */
|
||||||
"", /* comment */
|
"", /* comment */
|
||||||
"", /* tailoring */
|
"[strength 2]", /* tailoring */
|
||||||
NULL, /* ctype */
|
NULL, /* ctype */
|
||||||
NULL, /* to_lower */
|
NULL, /* to_lower */
|
||||||
NULL, /* to_upper */
|
NULL, /* to_upper */
|
||||||
@@ -39204,7 +39225,7 @@ struct charset_info_st my_charset_utf16_thai_520_w2=
|
|||||||
"utf16", /* cs name */
|
"utf16", /* cs name */
|
||||||
"utf16_thai_520_w2",/* name */
|
"utf16_thai_520_w2",/* name */
|
||||||
"", /* comment */
|
"", /* comment */
|
||||||
"", /* tailoring */
|
"[strength 2]", /* tailoring */
|
||||||
NULL, /* ctype */
|
NULL, /* ctype */
|
||||||
NULL, /* to_lower */
|
NULL, /* to_lower */
|
||||||
NULL, /* to_upper */
|
NULL, /* to_upper */
|
||||||
|
@@ -667,6 +667,8 @@ static int cs_value(MY_XML_PARSER *st,const char *attr, size_t len)
|
|||||||
case _CS_ST_STRENGTH:
|
case _CS_ST_STRENGTH:
|
||||||
/* 1, 2, 3, 4, 5, or primary, secondary, tertiary, quaternary, identical */
|
/* 1, 2, 3, 4, 5, or primary, secondary, tertiary, quaternary, identical */
|
||||||
rc= tailoring_append(st, "[strength %.*s]", len, attr);
|
rc= tailoring_append(st, "[strength %.*s]", len, attr);
|
||||||
|
if (len && attr[0] >= '1' && attr[0] <= '9')
|
||||||
|
i->cs.levels_for_order= attr[0] - '0';
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case _CS_ST_ALTERNATE:
|
case _CS_ST_ALTERNATE:
|
||||||
|
Reference in New Issue
Block a user