mirror of
https://github.com/MariaDB/server.git
synced 2025-07-18 23:03:28 +03:00
A joint patch for MDEV-19284 and MDEV-19285 (INSTANT ALTER)
This patch fixes: - MDEV-19284 INSTANT ALTER with ucs2-to-utf16 conversion produces bad data - MDEV-19285 INSTANT ALTER from ascii_general_ci to latin1_general_ci produces corrupt data These regressions were introduced in 10.4.3 by: - MDEV-15564 Avoid table rebuild in ALTER TABLE on collation or charset changes Changes: 1. Cleanup: Adding a helper method Field_longstr::csinfo_change_allows_instant_alter(), to remove some duplicate code in field.cc. 2. Cleanup: removing Type_handler::Charsets_are_compatible() and static function charsets_are_compatible() and introducing new methods in the recently added class Charset instead: - encoding_allows_reinterpret_as() - encoding_and_order_allow_reinterpret_as() 3. Bug fix: Removing the code that allowed instant conversion for ascii-to->8bit and ucs2-to->utf16. This actually fixes MDEV-19284 and MDEV-19285. 4. Bug fix: Adding a helper method Charset::collation_specific_name(). The old corresponding code in Type_handler::Charsets_are_compatible() was not safe against (badly named) user-defined collations whose character set name can be longer than collation name.
This commit is contained in:
@ -8219,48 +8219,51 @@ Type_handler_timestamp_common::Item_param_val_native(THD *thd,
|
||||
TIME_to_native(thd, <ime, to, item->datetime_precision(thd));
|
||||
}
|
||||
|
||||
static bool charsets_are_compatible(const char *old_cs_name,
|
||||
const CHARSET_INFO *new_ci)
|
||||
|
||||
LEX_CSTRING Charset::collation_specific_name() const
|
||||
{
|
||||
const char *new_cs_name= new_ci->csname;
|
||||
/*
|
||||
User defined collations can provide arbitrary names
|
||||
for character sets and collations, so a collation
|
||||
name not necessarily starts with the character set name.
|
||||
*/
|
||||
size_t csname_length= strlen(m_charset->csname);
|
||||
if (strncmp(m_charset->name, m_charset->csname, csname_length))
|
||||
return {NULL, 0};
|
||||
const char *ptr= m_charset->name + csname_length;
|
||||
return {ptr, strlen(ptr) };
|
||||
}
|
||||
|
||||
if (!strcmp(old_cs_name, new_cs_name))
|
||||
|
||||
bool
|
||||
Charset::encoding_allows_reinterpret_as(const CHARSET_INFO *cs) const
|
||||
{
|
||||
if (!strcmp(m_charset->csname, cs->csname))
|
||||
return true;
|
||||
|
||||
if (!strcmp(old_cs_name, MY_UTF8MB3) && !strcmp(new_cs_name, MY_UTF8MB4))
|
||||
return true;
|
||||
|
||||
if (!strcmp(old_cs_name, "ascii") && !(new_ci->state & MY_CS_NONASCII))
|
||||
return true;
|
||||
|
||||
if (!strcmp(old_cs_name, "ucs2") && !strcmp(new_cs_name, "utf16"))
|
||||
if (!strcmp(m_charset->csname, MY_UTF8MB3) &&
|
||||
!strcmp(cs->csname, MY_UTF8MB4))
|
||||
return true;
|
||||
|
||||
/*
|
||||
Originally we allowed here instat ALTER for ASCII-to-LATIN1
|
||||
and UCS2-to-UTF16, but this was wrong:
|
||||
- MariaDB's ascii is not a subset for 8-bit character sets
|
||||
like latin1, because it allows storing bytes 0x80..0xFF as
|
||||
"unassigned" characters (see MDEV-19285).
|
||||
- MariaDB's ucs2 (as in Unicode-1.1) is not a subset for UTF16,
|
||||
because they treat surrogate codes differently (MDEV-19284).
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Type_handler::Charsets_are_compatible(const CHARSET_INFO *old_ci,
|
||||
const CHARSET_INFO *new_ci,
|
||||
bool part_of_a_key)
|
||||
|
||||
bool
|
||||
Charset::encoding_and_order_allow_reinterpret_as(CHARSET_INFO *cs) const
|
||||
{
|
||||
const char *old_cs_name= old_ci->csname;
|
||||
const char *new_cs_name= new_ci->csname;
|
||||
|
||||
if (!charsets_are_compatible(old_cs_name, new_ci))
|
||||
{
|
||||
if (!encoding_allows_reinterpret_as(cs))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!part_of_a_key)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (strcmp(old_ci->name + strlen(old_cs_name),
|
||||
new_ci->name + strlen(new_cs_name)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
LEX_CSTRING name0= collation_specific_name();
|
||||
LEX_CSTRING name1= Charset(cs).collation_specific_name();
|
||||
return name0.length && !cmp(&name0, &name1);
|
||||
}
|
||||
|
Reference in New Issue
Block a user