mirror of
https://github.com/MariaDB/server.git
synced 2025-08-01 03:47:19 +03:00
MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
Problem: Parse-time conversion from binary to tricky character sets like utf32 produced ill-formed strings. So, later a chash happened in debug builds, or a wrong SHOW CREATE TABLE was returned in release builds. Fix: 1. Backporting a few methods from 10.3: - THD::check_string_for_wellformedness() - THD::convert_string() overloads - THD::make_text_string_connection() 2. Adding a new method THD::reinterpret_string_from_binary(), which makes sure to either returns a well-formed string (optionally prepending with zero bytes), or returns an error.
This commit is contained in:
@ -2890,5 +2890,28 @@ HEX(c1)
|
|||||||
0000006100000063
|
0000006100000063
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
#
|
#
|
||||||
|
# MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
|
||||||
|
#
|
||||||
|
CREATE TABLE t1 (a CHAR(1));
|
||||||
|
SET COLLATION_CONNECTION=utf32_general_ci, CHARACTER_SET_CLIENT=binary;
|
||||||
|
ALTER TABLE t1 CHANGE a a ENUM('a','a') CHARACTER SET utf32;
|
||||||
|
ERROR HY000: Column 'a' has duplicated value 'a' in ENUM
|
||||||
|
ALTER TABLE t1 CHANGE a a ENUM('aaa') CHARACTER SET utf32;
|
||||||
|
ERROR HY000: Invalid utf32 character string: '\x00aaa'
|
||||||
|
ALTER TABLE t1 CHANGE a a ENUM('aa') CHARACTER SET utf32;
|
||||||
|
SHOW CREATE TABLE t1;
|
||||||
|
Table Create Table
|
||||||
|
t1 CREATE TABLE `t1` (
|
||||||
|
`a` enum('慡') CHARACTER SET utf32 DEFAULT NULL
|
||||||
|
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
||||||
|
ALTER TABLE t1 CHANGE a a ENUM('a','b') CHARACTER SET utf32;
|
||||||
|
SHOW CREATE TABLE t1;
|
||||||
|
Table Create Table
|
||||||
|
t1 CREATE TABLE `t1` (
|
||||||
|
`a` enum('a','b') CHARACTER SET utf32 DEFAULT NULL
|
||||||
|
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
||||||
|
DROP TABLE t1;
|
||||||
|
SET NAMES utf8;
|
||||||
|
#
|
||||||
# End of 10.2 tests
|
# End of 10.2 tests
|
||||||
#
|
#
|
||||||
|
@ -7941,5 +7941,20 @@ EXECUTE s;
|
|||||||
DEALLOCATE PREPARE s;
|
DEALLOCATE PREPARE s;
|
||||||
SET NAMES utf8;
|
SET NAMES utf8;
|
||||||
#
|
#
|
||||||
|
# MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
|
||||||
|
#
|
||||||
|
CREATE TABLE t1 (a CHAR(1));
|
||||||
|
SET COLLATION_CONNECTION=utf32_myanmar_ci, CHARACTER_SET_CLIENT=binary;
|
||||||
|
ALTER TABLE t1 CHANGE a a ENUM('a','a') CHARACTER SET utf32;
|
||||||
|
ERROR HY000: Column 'a' has duplicated value 'a' in ENUM
|
||||||
|
ALTER TABLE t1 CHANGE a a ENUM('a','b') CHARACTER SET utf32;
|
||||||
|
SHOW CREATE TABLE t1;
|
||||||
|
Table Create Table
|
||||||
|
t1 CREATE TABLE `t1` (
|
||||||
|
`a` enum('a','b') CHARACTER SET utf32 DEFAULT NULL
|
||||||
|
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
||||||
|
DROP TABLE t1;
|
||||||
|
SET NAMES utf8;
|
||||||
|
#
|
||||||
# End of 10.2 tests
|
# End of 10.2 tests
|
||||||
#
|
#
|
||||||
|
@ -1048,6 +1048,25 @@ INSERT INTO t1 (c1) VALUES (1),(2),(3);
|
|||||||
SELECT HEX(c1) FROM t1 ORDER BY c1;
|
SELECT HEX(c1) FROM t1 ORDER BY c1;
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
|
||||||
|
--echo #
|
||||||
|
|
||||||
|
CREATE TABLE t1 (a CHAR(1));
|
||||||
|
SET COLLATION_CONNECTION=utf32_general_ci, CHARACTER_SET_CLIENT=binary;
|
||||||
|
--error ER_DUPLICATED_VALUE_IN_TYPE
|
||||||
|
ALTER TABLE t1 CHANGE a a ENUM('a','a') CHARACTER SET utf32;
|
||||||
|
--error ER_INVALID_CHARACTER_STRING
|
||||||
|
ALTER TABLE t1 CHANGE a a ENUM('aaa') CHARACTER SET utf32;
|
||||||
|
ALTER TABLE t1 CHANGE a a ENUM('aa') CHARACTER SET utf32;
|
||||||
|
SHOW CREATE TABLE t1;
|
||||||
|
ALTER TABLE t1 CHANGE a a ENUM('a','b') CHARACTER SET utf32;
|
||||||
|
SHOW CREATE TABLE t1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
SET NAMES utf8;
|
||||||
|
|
||||||
|
|
||||||
--echo #
|
--echo #
|
||||||
--echo # End of 10.2 tests
|
--echo # End of 10.2 tests
|
||||||
--echo #
|
--echo #
|
||||||
|
@ -290,6 +290,19 @@ EXECUTE s;
|
|||||||
DEALLOCATE PREPARE s;
|
DEALLOCATE PREPARE s;
|
||||||
SET NAMES utf8;
|
SET NAMES utf8;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT
|
||||||
|
--echo #
|
||||||
|
|
||||||
|
CREATE TABLE t1 (a CHAR(1));
|
||||||
|
SET COLLATION_CONNECTION=utf32_myanmar_ci, CHARACTER_SET_CLIENT=binary;
|
||||||
|
--error ER_DUPLICATED_VALUE_IN_TYPE
|
||||||
|
ALTER TABLE t1 CHANGE a a ENUM('a','a') CHARACTER SET utf32;
|
||||||
|
ALTER TABLE t1 CHANGE a a ENUM('a','b') CHARACTER SET utf32;
|
||||||
|
SHOW CREATE TABLE t1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
SET NAMES utf8;
|
||||||
|
|
||||||
|
|
||||||
--echo #
|
--echo #
|
||||||
--echo # End of 10.2 tests
|
--echo # End of 10.2 tests
|
||||||
|
@ -2148,7 +2148,7 @@ void THD::cleanup_after_query()
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
|
bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
|
||||||
const char *from, uint from_length,
|
const char *from, size_t from_length,
|
||||||
CHARSET_INFO *from_cs)
|
CHARSET_INFO *from_cs)
|
||||||
{
|
{
|
||||||
DBUG_ENTER("THD::convert_string");
|
DBUG_ENTER("THD::convert_string");
|
||||||
@ -2170,6 +2170,58 @@ bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Reinterpret a binary string to a character string
|
||||||
|
|
||||||
|
@param[OUT] to The result will be written here,
|
||||||
|
either the original string as is,
|
||||||
|
or a newly alloced fixed string with
|
||||||
|
some zero bytes prepended.
|
||||||
|
@param cs The destination character set
|
||||||
|
@param str The binary string
|
||||||
|
@param length The length of the binary string
|
||||||
|
|
||||||
|
@return false on success
|
||||||
|
@return true on error
|
||||||
|
*/
|
||||||
|
|
||||||
|
bool THD::reinterpret_string_from_binary(LEX_CSTRING *to, CHARSET_INFO *cs,
|
||||||
|
const char *str, size_t length)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
When reinterpreting from binary to tricky character sets like
|
||||||
|
UCS2, UTF16, UTF32, we may need to prepend some zero bytes.
|
||||||
|
This is possible in scenarios like this:
|
||||||
|
SET COLLATION_CONNECTION=utf32_general_ci, CHARACTER_SET_CLIENT=binary;
|
||||||
|
This code is similar to String::copy_aligned().
|
||||||
|
*/
|
||||||
|
size_t incomplete= length % cs->mbminlen; // Bytes in an incomplete character
|
||||||
|
if (incomplete)
|
||||||
|
{
|
||||||
|
size_t zeros= cs->mbminlen - incomplete;
|
||||||
|
size_t aligned_length= zeros + length;
|
||||||
|
char *dst= (char*) alloc(aligned_length + 1);
|
||||||
|
if (!dst)
|
||||||
|
{
|
||||||
|
to->str= NULL; // Safety
|
||||||
|
to->length= 0;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
bzero(dst, zeros);
|
||||||
|
memcpy(dst + zeros, str, length);
|
||||||
|
dst[aligned_length]= '\0';
|
||||||
|
to->str= dst;
|
||||||
|
to->length= aligned_length;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
to->str= str;
|
||||||
|
to->length= length;
|
||||||
|
}
|
||||||
|
return check_string_for_wellformedness(to->str, to->length, cs);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Convert a string between two character sets.
|
Convert a string between two character sets.
|
||||||
dstcs and srccs cannot be &my_charset_bin.
|
dstcs and srccs cannot be &my_charset_bin.
|
||||||
@ -2274,6 +2326,21 @@ bool THD::convert_string(String *s, CHARSET_INFO *from_cs, CHARSET_INFO *to_cs)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool THD::check_string_for_wellformedness(const char *str,
|
||||||
|
size_t length,
|
||||||
|
CHARSET_INFO *cs) const
|
||||||
|
{
|
||||||
|
size_t wlen= Well_formed_prefix(cs, str, length).length();
|
||||||
|
if (wlen < length)
|
||||||
|
{
|
||||||
|
ErrConvString err(str, length, &my_charset_bin);
|
||||||
|
my_error(ER_INVALID_CHARACTER_STRING, MYF(0), cs->csname, err.ptr());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Update some cache variables when character set changes
|
Update some cache variables when character set changes
|
||||||
*/
|
*/
|
||||||
|
@ -3503,8 +3503,31 @@ public:
|
|||||||
return true; // EOM
|
return true; // EOM
|
||||||
}
|
}
|
||||||
bool convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
|
bool convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
|
||||||
const char *from, uint from_length,
|
const char *from, size_t from_length,
|
||||||
CHARSET_INFO *from_cs);
|
CHARSET_INFO *from_cs);
|
||||||
|
bool reinterpret_string_from_binary(LEX_CSTRING *to, CHARSET_INFO *to_cs,
|
||||||
|
const char *from, size_t from_length);
|
||||||
|
bool convert_string(LEX_CSTRING *to, CHARSET_INFO *to_cs,
|
||||||
|
const char *from, size_t from_length,
|
||||||
|
CHARSET_INFO *from_cs)
|
||||||
|
{
|
||||||
|
LEX_STRING tmp;
|
||||||
|
bool rc= convert_string(&tmp, to_cs, from, from_length, from_cs);
|
||||||
|
to->str= tmp.str;
|
||||||
|
to->length= tmp.length;
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
bool convert_string(LEX_CSTRING *to, CHARSET_INFO *tocs,
|
||||||
|
const LEX_CSTRING *from, CHARSET_INFO *fromcs,
|
||||||
|
bool simple_copy_is_possible)
|
||||||
|
{
|
||||||
|
if (!simple_copy_is_possible)
|
||||||
|
return unlikely(convert_string(to, tocs, from->str, from->length, fromcs));
|
||||||
|
if (fromcs == &my_charset_bin)
|
||||||
|
return reinterpret_string_from_binary(to, tocs, from->str, from->length);
|
||||||
|
*to= *from;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
Convert a strings between character sets.
|
Convert a strings between character sets.
|
||||||
Uses my_convert_fix(), which uses an mb_wc .. mc_mb loop internally.
|
Uses my_convert_fix(), which uses an mb_wc .. mc_mb loop internally.
|
||||||
@ -3540,6 +3563,44 @@ public:
|
|||||||
|
|
||||||
bool convert_string(String *s, CHARSET_INFO *from_cs, CHARSET_INFO *to_cs);
|
bool convert_string(String *s, CHARSET_INFO *from_cs, CHARSET_INFO *to_cs);
|
||||||
|
|
||||||
|
/*
|
||||||
|
Check if the string is wellformed, raise an error if not wellformed.
|
||||||
|
@param str - The string to check.
|
||||||
|
@param length - the string length.
|
||||||
|
*/
|
||||||
|
bool check_string_for_wellformedness(const char *str,
|
||||||
|
size_t length,
|
||||||
|
CHARSET_INFO *cs) const;
|
||||||
|
|
||||||
|
bool make_text_string_connection(LEX_CSTRING *to,
|
||||||
|
const LEX_CSTRING *from)
|
||||||
|
{
|
||||||
|
return convert_string(to, variables.collation_connection,
|
||||||
|
from, charset(), charset_is_collation_connection);
|
||||||
|
}
|
||||||
|
#if MYSQL_VERSION_ID < 100300
|
||||||
|
/*
|
||||||
|
A wrapper method for 10.2. It fixes the problem
|
||||||
|
that various fields in bison %union use LEX_STRING.
|
||||||
|
In 10.3 those fields are fixed to use LEX_CSTRING.
|
||||||
|
Please remove this wrapper when mering to 10.3.
|
||||||
|
*/
|
||||||
|
bool make_text_string_connection(LEX_STRING *to,
|
||||||
|
const LEX_STRING *from)
|
||||||
|
{
|
||||||
|
LEX_CSTRING cto;
|
||||||
|
LEX_CSTRING cfrom;
|
||||||
|
bool rc;
|
||||||
|
cfrom.str= from->str;
|
||||||
|
cfrom.length= from->length;
|
||||||
|
rc= make_text_string_connection(&cto, &cfrom);
|
||||||
|
to->str= (char*) cto.str;
|
||||||
|
to->length= cto.length;
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#error Remove the above wrapper
|
||||||
|
#endif
|
||||||
void add_changed_table(TABLE *table);
|
void add_changed_table(TABLE *table);
|
||||||
void add_changed_table(const char *key, long key_length);
|
void add_changed_table(const char *key, long key_length);
|
||||||
CHANGED_TABLE_LIST * changed_table_dup(const char *key, long key_length);
|
CHANGED_TABLE_LIST * changed_table_dup(const char *key, long key_length);
|
||||||
|
@ -14571,14 +14571,8 @@ TEXT_STRING_sys:
|
|||||||
TEXT_STRING_literal:
|
TEXT_STRING_literal:
|
||||||
TEXT_STRING
|
TEXT_STRING
|
||||||
{
|
{
|
||||||
if (thd->charset_is_collation_connection)
|
if (thd->make_text_string_connection(&$$, &$1))
|
||||||
$$= $1;
|
MYSQL_YYABORT;
|
||||||
else
|
|
||||||
{
|
|
||||||
if (thd->convert_string(&$$, thd->variables.collation_connection,
|
|
||||||
$1.str, $1.length, thd->charset()))
|
|
||||||
MYSQL_YYABORT;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user