mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
Problem: Parse-time conversion from binary to tricky character sets like utf32 produced ill-formed strings. So, later a chash happened in debug builds, or a wrong SHOW CREATE TABLE was returned in release builds. Fix: 1. Backporting a few methods from 10.3: - THD::check_string_for_wellformedness() - THD::convert_string() overloads - THD::make_text_string_connection() 2. Adding a new method THD::reinterpret_string_from_binary(), which makes sure to either returns a well-formed string (optionally prepending with zero bytes), or returns an error.
This commit is contained in:
@ -2890,5 +2890,28 @@ HEX(c1)
|
||||
0000006100000063
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
|
||||
#
|
||||
CREATE TABLE t1 (a CHAR(1));
|
||||
SET COLLATION_CONNECTION=utf32_general_ci, CHARACTER_SET_CLIENT=binary;
|
||||
ALTER TABLE t1 CHANGE a a ENUM('a','a') CHARACTER SET utf32;
|
||||
ERROR HY000: Column 'a' has duplicated value 'a' in ENUM
|
||||
ALTER TABLE t1 CHANGE a a ENUM('aaa') CHARACTER SET utf32;
|
||||
ERROR HY000: Invalid utf32 character string: '\x00aaa'
|
||||
ALTER TABLE t1 CHANGE a a ENUM('aa') CHARACTER SET utf32;
|
||||
SHOW CREATE TABLE t1;
|
||||
Table Create Table
|
||||
t1 CREATE TABLE `t1` (
|
||||
`a` enum('慡') CHARACTER SET utf32 DEFAULT NULL
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
||||
ALTER TABLE t1 CHANGE a a ENUM('a','b') CHARACTER SET utf32;
|
||||
SHOW CREATE TABLE t1;
|
||||
Table Create Table
|
||||
t1 CREATE TABLE `t1` (
|
||||
`a` enum('a','b') CHARACTER SET utf32 DEFAULT NULL
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
||||
DROP TABLE t1;
|
||||
SET NAMES utf8;
|
||||
#
|
||||
# End of 10.2 tests
|
||||
#
|
||||
|
@ -7941,5 +7941,20 @@ EXECUTE s;
|
||||
DEALLOCATE PREPARE s;
|
||||
SET NAMES utf8;
|
||||
#
|
||||
# MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
|
||||
#
|
||||
CREATE TABLE t1 (a CHAR(1));
|
||||
SET COLLATION_CONNECTION=utf32_myanmar_ci, CHARACTER_SET_CLIENT=binary;
|
||||
ALTER TABLE t1 CHANGE a a ENUM('a','a') CHARACTER SET utf32;
|
||||
ERROR HY000: Column 'a' has duplicated value 'a' in ENUM
|
||||
ALTER TABLE t1 CHANGE a a ENUM('a','b') CHARACTER SET utf32;
|
||||
SHOW CREATE TABLE t1;
|
||||
Table Create Table
|
||||
t1 CREATE TABLE `t1` (
|
||||
`a` enum('a','b') CHARACTER SET utf32 DEFAULT NULL
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
||||
DROP TABLE t1;
|
||||
SET NAMES utf8;
|
||||
#
|
||||
# End of 10.2 tests
|
||||
#
|
||||
|
@ -1048,6 +1048,25 @@ INSERT INTO t1 (c1) VALUES (1),(2),(3);
|
||||
SELECT HEX(c1) FROM t1 ORDER BY c1;
|
||||
DROP TABLE t1;
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
|
||||
--echo #
|
||||
|
||||
CREATE TABLE t1 (a CHAR(1));
|
||||
SET COLLATION_CONNECTION=utf32_general_ci, CHARACTER_SET_CLIENT=binary;
|
||||
--error ER_DUPLICATED_VALUE_IN_TYPE
|
||||
ALTER TABLE t1 CHANGE a a ENUM('a','a') CHARACTER SET utf32;
|
||||
--error ER_INVALID_CHARACTER_STRING
|
||||
ALTER TABLE t1 CHANGE a a ENUM('aaa') CHARACTER SET utf32;
|
||||
ALTER TABLE t1 CHANGE a a ENUM('aa') CHARACTER SET utf32;
|
||||
SHOW CREATE TABLE t1;
|
||||
ALTER TABLE t1 CHANGE a a ENUM('a','b') CHARACTER SET utf32;
|
||||
SHOW CREATE TABLE t1;
|
||||
DROP TABLE t1;
|
||||
SET NAMES utf8;
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # End of 10.2 tests
|
||||
--echo #
|
||||
|
@ -290,6 +290,19 @@ EXECUTE s;
|
||||
DEALLOCATE PREPARE s;
|
||||
SET NAMES utf8;
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
|
||||
--echo #
|
||||
|
||||
CREATE TABLE t1 (a CHAR(1));
|
||||
SET COLLATION_CONNECTION=utf32_myanmar_ci, CHARACTER_SET_CLIENT=binary;
|
||||
--error ER_DUPLICATED_VALUE_IN_TYPE
|
||||
ALTER TABLE t1 CHANGE a a ENUM('a','a') CHARACTER SET utf32;
|
||||
ALTER TABLE t1 CHANGE a a ENUM('a','b') CHARACTER SET utf32;
|
||||
SHOW CREATE TABLE t1;
|
||||
DROP TABLE t1;
|
||||
SET NAMES utf8;
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # End of 10.2 tests
|
||||
|
@ -2148,7 +2148,7 @@ void THD::cleanup_after_query()
|
||||
*/
|
||||
|
||||
bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
|
||||
const char *from, uint from_length,
|
||||
const char *from, size_t from_length,
|
||||
CHARSET_INFO *from_cs)
|
||||
{
|
||||
DBUG_ENTER("THD::convert_string");
|
||||
@ -2170,6 +2170,58 @@ bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Reinterpret a binary string to a character string
|
||||
|
||||
@param[OUT] to The result will be written here,
|
||||
either the original string as is,
|
||||
or a newly alloced fixed string with
|
||||
some zero bytes prepended.
|
||||
@param cs The destination character set
|
||||
@param str The binary string
|
||||
@param length The length of the binary string
|
||||
|
||||
@return false on success
|
||||
@return true on error
|
||||
*/
|
||||
|
||||
bool THD::reinterpret_string_from_binary(LEX_CSTRING *to, CHARSET_INFO *cs,
|
||||
const char *str, size_t length)
|
||||
{
|
||||
/*
|
||||
When reinterpreting from binary to tricky character sets like
|
||||
UCS2, UTF16, UTF32, we may need to prepend some zero bytes.
|
||||
This is possible in scenarios like this:
|
||||
SET COLLATION_CONNECTION=utf32_general_ci, CHARACTER_SET_CLIENT=binary;
|
||||
This code is similar to String::copy_aligned().
|
||||
*/
|
||||
size_t incomplete= length % cs->mbminlen; // Bytes in an incomplete character
|
||||
if (incomplete)
|
||||
{
|
||||
size_t zeros= cs->mbminlen - incomplete;
|
||||
size_t aligned_length= zeros + length;
|
||||
char *dst= (char*) alloc(aligned_length + 1);
|
||||
if (!dst)
|
||||
{
|
||||
to->str= NULL; // Safety
|
||||
to->length= 0;
|
||||
return true;
|
||||
}
|
||||
bzero(dst, zeros);
|
||||
memcpy(dst + zeros, str, length);
|
||||
dst[aligned_length]= '\0';
|
||||
to->str= dst;
|
||||
to->length= aligned_length;
|
||||
}
|
||||
else
|
||||
{
|
||||
to->str= str;
|
||||
to->length= length;
|
||||
}
|
||||
return check_string_for_wellformedness(to->str, to->length, cs);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Convert a string between two character sets.
|
||||
dstcs and srccs cannot be &my_charset_bin.
|
||||
@ -2274,6 +2326,21 @@ bool THD::convert_string(String *s, CHARSET_INFO *from_cs, CHARSET_INFO *to_cs)
|
||||
}
|
||||
|
||||
|
||||
bool THD::check_string_for_wellformedness(const char *str,
|
||||
size_t length,
|
||||
CHARSET_INFO *cs) const
|
||||
{
|
||||
size_t wlen= Well_formed_prefix(cs, str, length).length();
|
||||
if (wlen < length)
|
||||
{
|
||||
ErrConvString err(str, length, &my_charset_bin);
|
||||
my_error(ER_INVALID_CHARACTER_STRING, MYF(0), cs->csname, err.ptr());
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Update some cache variables when character set changes
|
||||
*/
|
||||
|
@ -3503,8 +3503,31 @@ public:
|
||||
return true; // EOM
|
||||
}
|
||||
bool convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
|
||||
const char *from, uint from_length,
|
||||
const char *from, size_t from_length,
|
||||
CHARSET_INFO *from_cs);
|
||||
bool reinterpret_string_from_binary(LEX_CSTRING *to, CHARSET_INFO *to_cs,
|
||||
const char *from, size_t from_length);
|
||||
bool convert_string(LEX_CSTRING *to, CHARSET_INFO *to_cs,
|
||||
const char *from, size_t from_length,
|
||||
CHARSET_INFO *from_cs)
|
||||
{
|
||||
LEX_STRING tmp;
|
||||
bool rc= convert_string(&tmp, to_cs, from, from_length, from_cs);
|
||||
to->str= tmp.str;
|
||||
to->length= tmp.length;
|
||||
return rc;
|
||||
}
|
||||
bool convert_string(LEX_CSTRING *to, CHARSET_INFO *tocs,
|
||||
const LEX_CSTRING *from, CHARSET_INFO *fromcs,
|
||||
bool simple_copy_is_possible)
|
||||
{
|
||||
if (!simple_copy_is_possible)
|
||||
return unlikely(convert_string(to, tocs, from->str, from->length, fromcs));
|
||||
if (fromcs == &my_charset_bin)
|
||||
return reinterpret_string_from_binary(to, tocs, from->str, from->length);
|
||||
*to= *from;
|
||||
return false;
|
||||
}
|
||||
/*
|
||||
Convert a strings between character sets.
|
||||
Uses my_convert_fix(), which uses an mb_wc .. mc_mb loop internally.
|
||||
@ -3540,6 +3563,44 @@ public:
|
||||
|
||||
bool convert_string(String *s, CHARSET_INFO *from_cs, CHARSET_INFO *to_cs);
|
||||
|
||||
/*
|
||||
Check if the string is wellformed, raise an error if not wellformed.
|
||||
@param str - The string to check.
|
||||
@param length - the string length.
|
||||
*/
|
||||
bool check_string_for_wellformedness(const char *str,
|
||||
size_t length,
|
||||
CHARSET_INFO *cs) const;
|
||||
|
||||
bool make_text_string_connection(LEX_CSTRING *to,
|
||||
const LEX_CSTRING *from)
|
||||
{
|
||||
return convert_string(to, variables.collation_connection,
|
||||
from, charset(), charset_is_collation_connection);
|
||||
}
|
||||
#if MYSQL_VERSION_ID < 100300
|
||||
/*
|
||||
A wrapper method for 10.2. It fixes the problem
|
||||
that various fields in bison %union use LEX_STRING.
|
||||
In 10.3 those fields are fixed to use LEX_CSTRING.
|
||||
Please remove this wrapper when mering to 10.3.
|
||||
*/
|
||||
bool make_text_string_connection(LEX_STRING *to,
|
||||
const LEX_STRING *from)
|
||||
{
|
||||
LEX_CSTRING cto;
|
||||
LEX_CSTRING cfrom;
|
||||
bool rc;
|
||||
cfrom.str= from->str;
|
||||
cfrom.length= from->length;
|
||||
rc= make_text_string_connection(&cto, &cfrom);
|
||||
to->str= (char*) cto.str;
|
||||
to->length= cto.length;
|
||||
return rc;
|
||||
}
|
||||
#else
|
||||
#error Remove the above wrapper
|
||||
#endif
|
||||
void add_changed_table(TABLE *table);
|
||||
void add_changed_table(const char *key, long key_length);
|
||||
CHANGED_TABLE_LIST * changed_table_dup(const char *key, long key_length);
|
||||
|
@ -14571,15 +14571,9 @@ TEXT_STRING_sys:
|
||||
TEXT_STRING_literal:
|
||||
TEXT_STRING
|
||||
{
|
||||
if (thd->charset_is_collation_connection)
|
||||
$$= $1;
|
||||
else
|
||||
{
|
||||
if (thd->convert_string(&$$, thd->variables.collation_connection,
|
||||
$1.str, $1.length, thd->charset()))
|
||||
if (thd->make_text_string_connection(&$$, &$1))
|
||||
MYSQL_YYABORT;
|
||||
}
|
||||
}
|
||||
;
|
||||
|
||||
TEXT_STRING_filesystem:
|
||||
|
Reference in New Issue
Block a user