1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-30 16:24:05 +03:00

Bug#28875 Conversion between ASCII and LATIN1 charsets does not function

(Regression, caused by a patch for the bug 22646).
Problem: when result type of date_format() was changed from
binary string to character string, mixing date_format()
with a ascii column in CONCAT() stopped to work.
Fix:
- adding "repertoire" flag into DTCollation class,
to mark items which can return only pure ASCII strings.
- allow character set conversion from pure ASCII to other character sets.


include/m_ctype.h:
  Defining new flags.
  Adding new function prototypes.
mysql-test/r/ctype_ucs.result:
  Adding tests.
mysql-test/r/ctype_utf8.result:
  Adding tests.
mysql-test/r/func_time.result:
  Adding tests.
mysql-test/t/ctype_ucs.test:
  Adding tests.
mysql-test/t/ctype_utf8.test:
  Adding tests.
mysql-test/t/func_time.test:
  Adding test.
mysys/charset.c:
  Adding pure ASCII detection when loading a dynamic character set.
sql/item.cc:
  - Moving detection of a Unicode superset into function.
  - Adding detection of a ASCII subset.
  - Adding creation of to-ASCII character set convertor when
    safe_charset_converter() failed and when the argument.
    repertoire is know to be pure ASCII.
sql/item.h:
  - Adding "repertoire" member into DTCollation class.
  - Adding "repertoire" argument to constructors.
  - Adding new methods:
    set_repertoire_from_charset()
    set_repertoire_from_value()
sql/item_func.cc:
  Adding "repertoire" argument.
sql/item_strfunc.cc:
  Adding "repertoire" argument.
sql/item_timefunc.cc:
  Initializing the result repertoire taking into account the "is_ascii"
  flag of the current locale.
sql/sql_lex.cc:
  Detect 7bit strings, return in Lex->text_string_is_7bit.
sql/sql_lex.h:
  Adding new member into LEX structure.
  Adding new member into Lex_input_stream
sql/sql_string.cc:
  Allow simple copy from pure ASCII to a ASCII-based character set.
sql/sql_yacc.yy:
  Depening on Lex->text_string_is_7bit and character set features,
  create Item_string with MY_REPERTOIRE_ASCII when it is possible.
strings/conf_to_src.c:
  - Adding printing of the "MY_CS_PUREASCII" flag
  - Adding printing of copyright
strings/ctype-extra.c:
  Recreating ctype-extra.c: ascii_general_ci and ascii_bin
  are now marked with MY_CS_PUREASCII flag.
strings/ctype.c:
  Adding new functions.
This commit is contained in:
unknown
2007-08-03 15:25:23 +05:00
parent b307fc4d8f
commit 53df09a9a6
20 changed files with 457 additions and 57 deletions

View File

@ -306,3 +306,89 @@ my_bool my_parse_charset_xml(const char *buf, uint len,
my_xml_parser_free(&p);
return rc;
}
/*
Check repertoire: detect pure ascii strings
*/
uint
my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length)
{
const char *strend= str + length;
if (cs->mbminlen == 1)
{
for ( ; str < strend; str++)
{
if (((uchar) *str) > 0x7F)
return MY_REPERTOIRE_UNICODE30;
}
}
else
{
my_wc_t wc;
int chlen;
for (; (chlen= cs->cset->mb_wc(cs, &wc, str, strend)) > 0; str+= chlen)
{
if (wc > 0x7F)
return MY_REPERTOIRE_UNICODE30;
}
}
return MY_REPERTOIRE_ASCII;
}
/*
Detect whether a character set is ASCII compatible.
Returns TRUE for:
- all 8bit character sets whose Unicode mapping of 0x7B is '{'
(ignores swe7 which maps 0x7B to "LATIN LETTER A WITH DIAERESIS")
- all multi-byte character sets having mbminlen == 1
(ignores ucs2 whose mbminlen is 2)
TODO:
When merging to 5.2, this function should be changed
to check a new flag MY_CS_NONASCII,
return (cs->flag & MY_CS_NONASCII) ? 0 : 1;
This flag was previously added into 5.2 under terms
of WL#3759 "Optimize identifier conversion in client-server protocol"
especially to mark character sets not compatible with ASCII.
We won't backport this flag to 5.0 or 5.1.
This function is Ok for 5.0 and 5.1, because we're not going
to introduce new tricky character sets between 5.0 and 5.2.
*/
my_bool
my_charset_is_ascii_based(CHARSET_INFO *cs)
{
return
(cs->mbmaxlen == 1 && cs->tab_to_uni && cs->tab_to_uni['{'] == '{') ||
(cs->mbminlen == 1 && cs->mbmaxlen > 1);
}
/*
Detect if a character set is 8bit,
and it is pure ascii, i.e. doesn't have
characters outside U+0000..U+007F
This functions is shared between "conf_to_src"
and dynamic charsets loader in "mysqld".
*/
my_bool
my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
{
size_t code;
if (!cs->tab_to_uni)
return 0;
for (code= 0; code < 256; code++)
{
if (cs->tab_to_uni[code] > 0x7F)
return 0;
}
return 1;
}