mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
Bug#28875 Conversion between ASCII and LATIN1 charsets does not function
(Regression, caused by a patch for the bug 22646). Problem: when result type of date_format() was changed from binary string to character string, mixing date_format() with a ascii column in CONCAT() stopped to work. Fix: - adding "repertoire" flag into DTCollation class, to mark items which can return only pure ASCII strings. - allow character set conversion from pure ASCII to other character sets. include/m_ctype.h: Defining new flags. Adding new function prototypes. mysql-test/r/ctype_ucs.result: Adding tests. mysql-test/r/ctype_utf8.result: Adding tests. mysql-test/r/func_time.result: Adding tests. mysql-test/t/ctype_ucs.test: Adding tests. mysql-test/t/ctype_utf8.test: Adding tests. mysql-test/t/func_time.test: Adding test. mysys/charset.c: Adding pure ASCII detection when loading a dynamic character set. sql/item.cc: - Moving detection of a Unicode superset into function. - Adding detection of a ASCII subset. - Adding creation of to-ASCII character set convertor when safe_charset_converter() failed and when the argument. repertoire is know to be pure ASCII. sql/item.h: - Adding "repertoire" member into DTCollation class. - Adding "repertoire" argument to constructors. - Adding new methods: set_repertoire_from_charset() set_repertoire_from_value() sql/item_func.cc: Adding "repertoire" argument. sql/item_strfunc.cc: Adding "repertoire" argument. sql/item_timefunc.cc: Initializing the result repertoire taking into account the "is_ascii" flag of the current locale. sql/sql_lex.cc: Detect 7bit strings, return in Lex->text_string_is_7bit. sql/sql_lex.h: Adding new member into LEX structure. Adding new member into Lex_input_stream sql/sql_string.cc: Allow simple copy from pure ASCII to a ASCII-based character set. sql/sql_yacc.yy: Depening on Lex->text_string_is_7bit and character set features, create Item_string with MY_REPERTOIRE_ASCII when it is possible. strings/conf_to_src.c: - Adding printing of the "MY_CS_PUREASCII" flag - Adding printing of copyright strings/ctype-extra.c: Recreating ctype-extra.c: ascii_general_ci and ascii_bin are now marked with MY_CS_PUREASCII flag. strings/ctype.c: Adding new functions.
This commit is contained in:
@ -306,3 +306,89 @@ my_bool my_parse_charset_xml(const char *buf, uint len,
|
||||
my_xml_parser_free(&p);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Check repertoire: detect pure ascii strings
|
||||
*/
|
||||
uint
|
||||
my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length)
|
||||
{
|
||||
const char *strend= str + length;
|
||||
if (cs->mbminlen == 1)
|
||||
{
|
||||
for ( ; str < strend; str++)
|
||||
{
|
||||
if (((uchar) *str) > 0x7F)
|
||||
return MY_REPERTOIRE_UNICODE30;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
my_wc_t wc;
|
||||
int chlen;
|
||||
for (; (chlen= cs->cset->mb_wc(cs, &wc, str, strend)) > 0; str+= chlen)
|
||||
{
|
||||
if (wc > 0x7F)
|
||||
return MY_REPERTOIRE_UNICODE30;
|
||||
}
|
||||
}
|
||||
return MY_REPERTOIRE_ASCII;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Detect whether a character set is ASCII compatible.
|
||||
|
||||
Returns TRUE for:
|
||||
|
||||
- all 8bit character sets whose Unicode mapping of 0x7B is '{'
|
||||
(ignores swe7 which maps 0x7B to "LATIN LETTER A WITH DIAERESIS")
|
||||
|
||||
- all multi-byte character sets having mbminlen == 1
|
||||
(ignores ucs2 whose mbminlen is 2)
|
||||
|
||||
TODO:
|
||||
|
||||
When merging to 5.2, this function should be changed
|
||||
to check a new flag MY_CS_NONASCII,
|
||||
|
||||
return (cs->flag & MY_CS_NONASCII) ? 0 : 1;
|
||||
|
||||
This flag was previously added into 5.2 under terms
|
||||
of WL#3759 "Optimize identifier conversion in client-server protocol"
|
||||
especially to mark character sets not compatible with ASCII.
|
||||
|
||||
We won't backport this flag to 5.0 or 5.1.
|
||||
This function is Ok for 5.0 and 5.1, because we're not going
|
||||
to introduce new tricky character sets between 5.0 and 5.2.
|
||||
*/
|
||||
my_bool
|
||||
my_charset_is_ascii_based(CHARSET_INFO *cs)
|
||||
{
|
||||
return
|
||||
(cs->mbmaxlen == 1 && cs->tab_to_uni && cs->tab_to_uni['{'] == '{') ||
|
||||
(cs->mbminlen == 1 && cs->mbmaxlen > 1);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Detect if a character set is 8bit,
|
||||
and it is pure ascii, i.e. doesn't have
|
||||
characters outside U+0000..U+007F
|
||||
This functions is shared between "conf_to_src"
|
||||
and dynamic charsets loader in "mysqld".
|
||||
*/
|
||||
my_bool
|
||||
my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
|
||||
{
|
||||
size_t code;
|
||||
if (!cs->tab_to_uni)
|
||||
return 0;
|
||||
for (code= 0; code < 256; code++)
|
||||
{
|
||||
if (cs->tab_to_uni[code] > 0x7F)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
Reference in New Issue
Block a user