1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-27 18:02:13 +03:00

Backporting WL#1213

config/ac-macros/character_sets.m4:
  - Adding configure definitions for utf8mb4, utf16, utf32
include/config-win.h:
  - Enabling utf8mb4, utf16, utf32 in Windows build
include/m_ctype.h:
  - Adding new flags
  - Adding new shared functions prototypes
mysql-test/include/ctype_datetime.inc:
  - Adding test to check that datetime functions
    work with "real" multibyte character sets.
mysql-test/include/ctype_like.inc:
  - Adding LIKE tests
mysql-test/include/have_utf16.inc:
  New file
mysql-test/include/have_utf32.inc:
  New file
mysql-test/include/have_utf8mb4.inc:
  New file
mysql-test/r/ctype_ldml.result:
  - Adding tests for utf8mb4, utf16, utf32
mysql-test/r/ctype_many.result:
  - Adding tests to check superset/subset relations
    between all Unicode character sets.
mysql-test/r/ctype_utf16.result:
  New file
mysql-test/r/ctype_utf16_uca.result:
  New file
mysql-test/r/ctype_utf32.result:
  New file
mysql-test/r/ctype_utf32_uca.result:
  New file
mysql-test/r/ctype_utf8.result:
  - Adding tests for utf8mn3 alias
mysql-test/r/ctype_utf8mb4.result:
  - Adding tests for utf8mb4
mysql-test/r/have_utf16.require:
  New file
mysql-test/r/have_utf32.require:
  New file
mysql-test/r/have_utf8mb4.require:
  New file
mysql-test/std_data/Index.xml:
  - Adding tests for loadable utf8m4, utf16, utf32 collations
mysql-test/suite/sys_vars/r/character_set_client_basic.result:
  - Adding tests for utf16, utf32.
  - Fixing new number of character sets
mysql-test/suite/sys_vars/r/character_set_connection_basic.result:
  - Fixing new number of character sets
mysql-test/suite/sys_vars/r/character_set_database_basic.result:
  - Fixing new number of character sets
mysql-test/suite/sys_vars/r/character_set_filesystem_basic.result:
  - Fixing new number of character sets
mysql-test/suite/sys_vars/r/character_set_results_basic.result:
  - Fixing new number of character sets
mysql-test/suite/sys_vars/t/character_set_client_basic.test:
  - Adding tests for new character sets
mysql-test/suite/sys_vars/t/character_set_connection_basic.test:
  - Adding dependency on utf8mb4, utf16, utf32
mysql-test/suite/sys_vars/t/character_set_database_basic.test:
  - Adding dependency on utf8mb4, utf16, utf32
mysql-test/suite/sys_vars/t/character_set_filesystem_basic.test:
  - Adding dependency on utf8mb4, utf16, utf32
mysql-test/suite/sys_vars/t/character_set_results_basic.test:
  - Adding dependency on utf8mb4, utf16, utf32
mysql-test/t/ctype_ldml.test:
  - Adding tests for dynamic utf8mb4, utf16, utf32 collations
mysql-test/t/ctype_many.test:
  - Adding tests to check superset/subset relations
    between all Unicode character sets
mysql-test/t/ctype_utf16.test:
  New file
mysql-test/t/ctype_utf16_uca.test:
  New file
mysql-test/t/ctype_utf32.test:
  New file
mysql-test/t/ctype_utf32_uca.test:
  New file
mysql-test/t/ctype_utf8.test:
  - Adding tests for utf8mb4 alias
mysql-test/t/ctype_utf8mb4.test:
  New file
mysys/charset-def.c:
  - Adding initialization of utf8mb4, utf16, utf32 built-int collations
mysys/charset.c:
  - Adding initialization of utf8mb4, utf16, utf32 dynamic collations
sql/field.cc:
  - Fixing "truncated" error with datetime functions:
    Force conversion in case of non-ascii character sets.
sql/item.cc:
  - Adding superset/subset relation check for utf8mb4/utf8
sql/item_strfunc.cc:
  - Fixing a problem with CHAR(x USING utf32)
sql/sql_string.cc:
  - Fixing problems with zero padding for UTF32
sql/sql_table.cc:
  - Fixing buffer size, to make utf32 comma fit.
strings/ctype-mb.c:
  - Making handlers for multi-byte binary collations public
strings/ctype-uca.c:
  - Adding definitions for utf8mb4, utf16, utf32 UCA collations
strings/ctype-ucs2.c:
  - Adding functions which are shared between ucs2, utf16, utf32
  - Ading utf16 implementation
  - Adding utf32 implementation
strings/ctype-utf8.c:
  - Adding functions shared between utf8 and utf8mb4
  - Adding implementation of utf8mb4
This commit is contained in:
Alexander Barkov
2010-02-24 13:15:34 +04:00
parent d2af6c43c0
commit 8994fad85d
49 changed files with 19441 additions and 1208 deletions

View File

@ -412,11 +412,25 @@ bool String::append(const char *s)
bool String::append(const char *s,uint32 arg_length, CHARSET_INFO *cs)
{
uint32 dummy_offset;
uint32 offset;
if (needs_conversion(arg_length, cs, str_charset, &dummy_offset))
if (needs_conversion(arg_length, cs, str_charset, &offset))
{
uint32 add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
uint32 add_length;
if ((cs == &my_charset_bin) && offset)
{
DBUG_ASSERT(str_charset->mbminlen > offset);
offset= str_charset->mbminlen - offset; // How many characters to pad
add_length= arg_length + offset;
if (realloc(str_length + add_length))
return TRUE;
bzero((char*) Ptr + str_length, offset);
memcpy(Ptr + str_length + offset, s, arg_length);
str_length+= add_length;
return FALSE;
}
add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
uint dummy_errors;
if (realloc(str_length + add_length))
return TRUE;
@ -966,6 +980,24 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
uint pad_length= to_cs->mbminlen - from_offset;
bzero(to, pad_length);
memmove(to + pad_length, from, from_offset);
/*
In some cases left zero-padding can create an incorrect character.
For example:
INSERT INTO t1 (utf32_column) VALUES (0x110000);
We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
The valid characters range is limited to 0x00000000..0x0010FFFF.
Make sure we didn't pad to an incorrect character.
*/
if (to_cs->cset->well_formed_len(to_cs,
to, to + to_cs->mbminlen, 1,
&well_formed_error) !=
to_cs->mbminlen)
{
*from_end_pos= *well_formed_error_pos= from;
*cannot_convert_error_pos= NULL;
return 0;
}
nchars--;
from+= from_offset;
from_length-= from_offset;