mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
InnoDB: Fixed bugs in the padding and trimming of trailing spaces
that affected the UCS2 character set. (Bug #7350) innobase/data/data0type.c: Added dtype_get_charset_coll_noninline() innobase/include/data0type.h: Added dtype_get_charset_coll_noninline() innobase/include/row0mysql.h: Added charset field to mysql_row_templ_struct. innobase/include/row0mysql.ic: row_mysql_store_col_in_innobase_format(): When removing trailing spaces, treat the UCS2 character set properly. innobase/rem/rem0cmp.c: cmp_whole_field(): Do not remove trailing 0x20 bytes, as innobase_mysql_cmp() implicitly pads the strings with trailing spaces as necessary. innobase/row/row0sel.c: row_sel_field_store_in_mysql_format(): Do not pad with 0x20 bytes. row_sel_store_mysql_rec(): Pad VARCHARs with trailing spaces (0x20, or 0x0020 in UCS2). sql/ha_innodb.cc: build_template(): Initialize templ->charset
This commit is contained in:
@ -165,6 +165,17 @@ dtype_is_non_binary_string_type(
|
||||
return(FALSE);
|
||||
}
|
||||
|
||||
/*************************************************************************
|
||||
Gets the MySQL charset-collation code for MySQL string types. */
|
||||
|
||||
ulint
|
||||
dtype_get_charset_coll_noninline(
|
||||
/*=============================*/
|
||||
ulint prtype) /* in: precise data type */
|
||||
{
|
||||
return(dtype_get_charset_coll(prtype));
|
||||
}
|
||||
|
||||
/*************************************************************************
|
||||
Forms a precise type from the < 4.1.2 format precise type plus the
|
||||
charset-collation code. */
|
||||
|
@ -234,6 +234,13 @@ dtype_get_prtype(
|
||||
dtype_t* type);
|
||||
/*************************************************************************
|
||||
Gets the MySQL charset-collation code for MySQL string types. */
|
||||
|
||||
ulint
|
||||
dtype_get_charset_coll_noninline(
|
||||
/*=============================*/
|
||||
ulint prtype);/* in: precise data type */
|
||||
/*************************************************************************
|
||||
Gets the MySQL charset-collation code for MySQL string types. */
|
||||
UNIV_INLINE
|
||||
ulint
|
||||
dtype_get_charset_coll(
|
||||
|
@ -454,6 +454,8 @@ struct mysql_row_templ_struct {
|
||||
zero if column cannot be NULL */
|
||||
ulint type; /* column type in Innobase mtype
|
||||
numbers DATA_CHAR... */
|
||||
ulint charset; /* MySQL charset-collation code
|
||||
of the column, or zero */
|
||||
ulint is_unsigned; /* if a column type is an integer
|
||||
type and this field is != 0, then
|
||||
it is an unsigned integer type */
|
||||
|
@ -91,11 +91,32 @@ row_mysql_store_col_in_innobase_format(
|
||||
}
|
||||
} else if (type == DATA_VARCHAR || type == DATA_VARMYSQL
|
||||
|| type == DATA_BINARY) {
|
||||
ptr = row_mysql_read_var_ref(&col_len, mysql_data);
|
||||
/* Remove trailing spaces. */
|
||||
|
||||
/* Remove trailing spaces */
|
||||
while (col_len > 0 && ptr[col_len - 1] == ' ') {
|
||||
col_len--;
|
||||
/* Handle UCS2 strings differently. As no new
|
||||
collations will be introduced in 4.1, we hardcode the
|
||||
charset-collation codes here. In 5.0, the logic will
|
||||
be based on mbminlen. */
|
||||
ulint cset = dtype_get_charset_coll(
|
||||
dtype_get_prtype(dfield_get_type(dfield)));
|
||||
ptr = row_mysql_read_var_ref(&col_len, mysql_data);
|
||||
if (cset == 35/*ucs2_general_ci*/
|
||||
|| cset == 90/*ucs2_bin*/
|
||||
|| (cset >= 128/*ucs2_unicode_ci*/
|
||||
&& cset <= 144/*ucs2_persian_ci*/)) {
|
||||
/* space=0x0020 */
|
||||
/* Trim "half-chars", just in case. */
|
||||
col_len &= ~1;
|
||||
|
||||
while (col_len >= 2 && ptr[col_len - 2] == 0x00
|
||||
&& ptr[col_len - 1] == 0x20) {
|
||||
col_len -= 2;
|
||||
}
|
||||
} else {
|
||||
/* space=0x20 */
|
||||
while (col_len > 0 && ptr[col_len - 1] == 0x20) {
|
||||
col_len--;
|
||||
}
|
||||
}
|
||||
} else if (type == DATA_BLOB) {
|
||||
ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
|
||||
|
@ -261,22 +261,6 @@ cmp_whole_field(
|
||||
"InnoDB: comparison!\n");
|
||||
}
|
||||
|
||||
/* MySQL does not pad the ends of strings with spaces in a
|
||||
comparison. That would cause a foreign key check to fail for
|
||||
non-latin1 character sets if we have different length columns.
|
||||
To prevent that we remove trailing spaces here before doing
|
||||
the comparison. NOTE that if we in the future map more MySQL
|
||||
types to DATA_MYSQL or DATA_VARMYSQL, we have to change this
|
||||
code. */
|
||||
|
||||
while (a_length > 0 && a[a_length - 1] == ' ') {
|
||||
a_length--;
|
||||
}
|
||||
|
||||
while (b_length > 0 && b[b_length - 1] == ' ') {
|
||||
b_length--;
|
||||
}
|
||||
|
||||
return(innobase_mysql_cmp(
|
||||
(int)(type->prtype & DATA_MYSQL_TYPE_MASK),
|
||||
(uint)dtype_get_charset_coll(type->prtype),
|
||||
|
@ -2204,9 +2204,6 @@ row_sel_field_store_in_mysql_format(
|
||||
dest = row_mysql_store_var_len(dest, len);
|
||||
ut_memcpy(dest, data, len);
|
||||
|
||||
/* Pad with trailing spaces */
|
||||
memset(dest + len, ' ', col_len - len);
|
||||
|
||||
/* ut_ad(col_len >= len + 2); No real var implemented in
|
||||
MySQL yet! */
|
||||
|
||||
@ -2335,6 +2332,44 @@ row_sel_store_mysql_rec(
|
||||
templ->mysql_col_len, data, len,
|
||||
templ->type, templ->is_unsigned);
|
||||
|
||||
if (templ->type == DATA_VARCHAR
|
||||
|| templ->type == DATA_VARMYSQL
|
||||
|| templ->type == DATA_BINARY) {
|
||||
/* Pad with trailing spaces */
|
||||
data = mysql_rec + templ->mysql_col_offset;
|
||||
|
||||
/* Handle UCS2 strings differently. As no new
|
||||
collations will be introduced in 4.1, we
|
||||
hardcode the charset-collation codes here.
|
||||
5.0 will use a different approach. */
|
||||
if (templ->charset == 35
|
||||
|| templ->charset == 90
|
||||
|| (templ->charset >= 128
|
||||
&& templ->charset <= 144)) {
|
||||
/* space=0x0020 */
|
||||
ulint col_len = templ->mysql_col_len;
|
||||
|
||||
ut_a(!(col_len & 1));
|
||||
if (len & 1) {
|
||||
/* A 0x20 has been stripped
|
||||
from the column.
|
||||
Pad it back. */
|
||||
goto pad_0x20;
|
||||
}
|
||||
/* Pad the rest of the string
|
||||
with 0x0020 */
|
||||
while (len < col_len) {
|
||||
data[len++] = 0x00;
|
||||
pad_0x20:
|
||||
data[len++] = 0x20;
|
||||
}
|
||||
} else {
|
||||
/* space=0x20 */
|
||||
memset(data + len, 0x20,
|
||||
templ->mysql_col_len - len);
|
||||
}
|
||||
}
|
||||
|
||||
/* Cleanup */
|
||||
if (extern_field_heap) {
|
||||
mem_heap_free(extern_field_heap);
|
||||
@ -2368,8 +2403,29 @@ row_sel_store_mysql_rec(
|
||||
pad_char = '\0';
|
||||
}
|
||||
|
||||
memset(mysql_rec + templ->mysql_col_offset, pad_char,
|
||||
templ->mysql_col_len);
|
||||
/* Handle UCS2 strings differently. As no new
|
||||
collations will be introduced in 4.1,
|
||||
we hardcode the charset-collation codes here.
|
||||
5.0 will use a different approach. */
|
||||
if (templ->charset == 35
|
||||
|| templ->charset == 90
|
||||
|| (templ->charset >= 128
|
||||
&& templ->charset <= 144)) {
|
||||
/* There are two bytes per char, so the length
|
||||
has to be an even number. */
|
||||
ut_a(!(templ->mysql_col_len & 1));
|
||||
data = mysql_rec + templ->mysql_col_offset;
|
||||
len = templ->mysql_col_len;
|
||||
/* Pad with 0x0020. */
|
||||
while (len >= 2) {
|
||||
*data++ = 0x00;
|
||||
*data++ = 0x20;
|
||||
len -= 2;
|
||||
}
|
||||
} else {
|
||||
memset(mysql_rec + templ->mysql_col_offset,
|
||||
pad_char, templ->mysql_col_len);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2248,6 +2248,8 @@ build_template(
|
||||
|
||||
templ->mysql_col_len = (ulint) field->pack_length();
|
||||
templ->type = get_innobase_type_from_mysql_type(field);
|
||||
templ->charset = dtype_get_charset_coll_noninline(
|
||||
index->table->cols[i].type.prtype);
|
||||
templ->is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG);
|
||||
|
||||
if (templ->type == DATA_BLOB) {
|
||||
|
Reference in New Issue
Block a user