1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-08 11:22:35 +03:00

MDEV-23210 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on ALTER TABLE, SELECT and INSERT

Problem:
Parse-time conversion from binary to tricky character sets like utf32
produced ill-formed strings. So, later a chash happened in debug builds,
or a wrong SHOW CREATE TABLE was returned in release builds.

Fix:

1. Backporting a few methods from 10.3:
  - THD::check_string_for_wellformedness()
  - THD::convert_string() overloads
  - THD::make_text_string_connection()

2. Adding a new method THD::reinterpret_string_from_binary(),
   which makes sure to either returns a well-formed string
   (optionally prepending with zero bytes), or returns an error.
This commit is contained in:
Alexander Barkov
2022-03-12 15:38:44 +04:00
parent ed6e271f78
commit 03c3dc6365
7 changed files with 202 additions and 10 deletions

View File

@@ -2148,7 +2148,7 @@ void THD::cleanup_after_query()
*/
bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
const char *from, uint from_length,
const char *from, size_t from_length,
CHARSET_INFO *from_cs)
{
DBUG_ENTER("THD::convert_string");
@@ -2170,6 +2170,58 @@ bool THD::convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
}
/*
Reinterpret a binary string to a character string
@param[OUT] to The result will be written here,
either the original string as is,
or a newly alloced fixed string with
some zero bytes prepended.
@param cs The destination character set
@param str The binary string
@param length The length of the binary string
@return false on success
@return true on error
*/
bool THD::reinterpret_string_from_binary(LEX_CSTRING *to, CHARSET_INFO *cs,
const char *str, size_t length)
{
/*
When reinterpreting from binary to tricky character sets like
UCS2, UTF16, UTF32, we may need to prepend some zero bytes.
This is possible in scenarios like this:
SET COLLATION_CONNECTION=utf32_general_ci, CHARACTER_SET_CLIENT=binary;
This code is similar to String::copy_aligned().
*/
size_t incomplete= length % cs->mbminlen; // Bytes in an incomplete character
if (incomplete)
{
size_t zeros= cs->mbminlen - incomplete;
size_t aligned_length= zeros + length;
char *dst= (char*) alloc(aligned_length + 1);
if (!dst)
{
to->str= NULL; // Safety
to->length= 0;
return true;
}
bzero(dst, zeros);
memcpy(dst + zeros, str, length);
dst[aligned_length]= '\0';
to->str= dst;
to->length= aligned_length;
}
else
{
to->str= str;
to->length= length;
}
return check_string_for_wellformedness(to->str, to->length, cs);
}
/*
Convert a string between two character sets.
dstcs and srccs cannot be &my_charset_bin.
@@ -2274,6 +2326,21 @@ bool THD::convert_string(String *s, CHARSET_INFO *from_cs, CHARSET_INFO *to_cs)
}
bool THD::check_string_for_wellformedness(const char *str,
size_t length,
CHARSET_INFO *cs) const
{
size_t wlen= Well_formed_prefix(cs, str, length).length();
if (wlen < length)
{
ErrConvString err(str, length, &my_charset_bin);
my_error(ER_INVALID_CHARACTER_STRING, MYF(0), cs->csname, err.ptr());
return true;
}
return false;
}
/*
Update some cache variables when character set changes
*/