mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
InnoDB: Fixed bugs in the padding and trimming of trailing spaces
that affected the UCS2 character set. (Bug #7350) innobase/data/data0type.c: Added dtype_get_charset_coll_noninline() innobase/include/data0type.h: Added dtype_get_charset_coll_noninline() innobase/include/row0mysql.h: Added charset field to mysql_row_templ_struct. innobase/include/row0mysql.ic: row_mysql_store_col_in_innobase_format(): When removing trailing spaces, treat the UCS2 character set properly. innobase/rem/rem0cmp.c: cmp_whole_field(): Do not remove trailing 0x20 bytes, as innobase_mysql_cmp() implicitly pads the strings with trailing spaces as necessary. innobase/row/row0sel.c: row_sel_field_store_in_mysql_format(): Do not pad with 0x20 bytes. row_sel_store_mysql_rec(): Pad VARCHARs with trailing spaces (0x20, or 0x0020 in UCS2). sql/ha_innodb.cc: build_template(): Initialize templ->charset
This commit is contained in:
@ -165,6 +165,17 @@ dtype_is_non_binary_string_type(
|
|||||||
return(FALSE);
|
return(FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*************************************************************************
|
||||||
|
Gets the MySQL charset-collation code for MySQL string types. */
|
||||||
|
|
||||||
|
ulint
|
||||||
|
dtype_get_charset_coll_noninline(
|
||||||
|
/*=============================*/
|
||||||
|
ulint prtype) /* in: precise data type */
|
||||||
|
{
|
||||||
|
return(dtype_get_charset_coll(prtype));
|
||||||
|
}
|
||||||
|
|
||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
Forms a precise type from the < 4.1.2 format precise type plus the
|
Forms a precise type from the < 4.1.2 format precise type plus the
|
||||||
charset-collation code. */
|
charset-collation code. */
|
||||||
|
@ -234,6 +234,13 @@ dtype_get_prtype(
|
|||||||
dtype_t* type);
|
dtype_t* type);
|
||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
Gets the MySQL charset-collation code for MySQL string types. */
|
Gets the MySQL charset-collation code for MySQL string types. */
|
||||||
|
|
||||||
|
ulint
|
||||||
|
dtype_get_charset_coll_noninline(
|
||||||
|
/*=============================*/
|
||||||
|
ulint prtype);/* in: precise data type */
|
||||||
|
/*************************************************************************
|
||||||
|
Gets the MySQL charset-collation code for MySQL string types. */
|
||||||
UNIV_INLINE
|
UNIV_INLINE
|
||||||
ulint
|
ulint
|
||||||
dtype_get_charset_coll(
|
dtype_get_charset_coll(
|
||||||
|
@ -454,6 +454,8 @@ struct mysql_row_templ_struct {
|
|||||||
zero if column cannot be NULL */
|
zero if column cannot be NULL */
|
||||||
ulint type; /* column type in Innobase mtype
|
ulint type; /* column type in Innobase mtype
|
||||||
numbers DATA_CHAR... */
|
numbers DATA_CHAR... */
|
||||||
|
ulint charset; /* MySQL charset-collation code
|
||||||
|
of the column, or zero */
|
||||||
ulint is_unsigned; /* if a column type is an integer
|
ulint is_unsigned; /* if a column type is an integer
|
||||||
type and this field is != 0, then
|
type and this field is != 0, then
|
||||||
it is an unsigned integer type */
|
it is an unsigned integer type */
|
||||||
|
@ -91,12 +91,33 @@ row_mysql_store_col_in_innobase_format(
|
|||||||
}
|
}
|
||||||
} else if (type == DATA_VARCHAR || type == DATA_VARMYSQL
|
} else if (type == DATA_VARCHAR || type == DATA_VARMYSQL
|
||||||
|| type == DATA_BINARY) {
|
|| type == DATA_BINARY) {
|
||||||
|
/* Remove trailing spaces. */
|
||||||
|
|
||||||
|
/* Handle UCS2 strings differently. As no new
|
||||||
|
collations will be introduced in 4.1, we hardcode the
|
||||||
|
charset-collation codes here. In 5.0, the logic will
|
||||||
|
be based on mbminlen. */
|
||||||
|
ulint cset = dtype_get_charset_coll(
|
||||||
|
dtype_get_prtype(dfield_get_type(dfield)));
|
||||||
ptr = row_mysql_read_var_ref(&col_len, mysql_data);
|
ptr = row_mysql_read_var_ref(&col_len, mysql_data);
|
||||||
|
if (cset == 35/*ucs2_general_ci*/
|
||||||
/* Remove trailing spaces */
|
|| cset == 90/*ucs2_bin*/
|
||||||
while (col_len > 0 && ptr[col_len - 1] == ' ') {
|
|| (cset >= 128/*ucs2_unicode_ci*/
|
||||||
col_len--;
|
&& cset <= 144/*ucs2_persian_ci*/)) {
|
||||||
}
|
/* space=0x0020 */
|
||||||
|
/* Trim "half-chars", just in case. */
|
||||||
|
col_len &= ~1;
|
||||||
|
|
||||||
|
while (col_len >= 2 && ptr[col_len - 2] == 0x00
|
||||||
|
&& ptr[col_len - 1] == 0x20) {
|
||||||
|
col_len -= 2;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* space=0x20 */
|
||||||
|
while (col_len > 0 && ptr[col_len - 1] == 0x20) {
|
||||||
|
col_len--;
|
||||||
|
}
|
||||||
|
}
|
||||||
} else if (type == DATA_BLOB) {
|
} else if (type == DATA_BLOB) {
|
||||||
ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
|
ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
|
||||||
}
|
}
|
||||||
|
@ -261,22 +261,6 @@ cmp_whole_field(
|
|||||||
"InnoDB: comparison!\n");
|
"InnoDB: comparison!\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* MySQL does not pad the ends of strings with spaces in a
|
|
||||||
comparison. That would cause a foreign key check to fail for
|
|
||||||
non-latin1 character sets if we have different length columns.
|
|
||||||
To prevent that we remove trailing spaces here before doing
|
|
||||||
the comparison. NOTE that if we in the future map more MySQL
|
|
||||||
types to DATA_MYSQL or DATA_VARMYSQL, we have to change this
|
|
||||||
code. */
|
|
||||||
|
|
||||||
while (a_length > 0 && a[a_length - 1] == ' ') {
|
|
||||||
a_length--;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (b_length > 0 && b[b_length - 1] == ' ') {
|
|
||||||
b_length--;
|
|
||||||
}
|
|
||||||
|
|
||||||
return(innobase_mysql_cmp(
|
return(innobase_mysql_cmp(
|
||||||
(int)(type->prtype & DATA_MYSQL_TYPE_MASK),
|
(int)(type->prtype & DATA_MYSQL_TYPE_MASK),
|
||||||
(uint)dtype_get_charset_coll(type->prtype),
|
(uint)dtype_get_charset_coll(type->prtype),
|
||||||
|
@ -2204,9 +2204,6 @@ row_sel_field_store_in_mysql_format(
|
|||||||
dest = row_mysql_store_var_len(dest, len);
|
dest = row_mysql_store_var_len(dest, len);
|
||||||
ut_memcpy(dest, data, len);
|
ut_memcpy(dest, data, len);
|
||||||
|
|
||||||
/* Pad with trailing spaces */
|
|
||||||
memset(dest + len, ' ', col_len - len);
|
|
||||||
|
|
||||||
/* ut_ad(col_len >= len + 2); No real var implemented in
|
/* ut_ad(col_len >= len + 2); No real var implemented in
|
||||||
MySQL yet! */
|
MySQL yet! */
|
||||||
|
|
||||||
@ -2334,7 +2331,45 @@ row_sel_store_mysql_rec(
|
|||||||
mysql_rec + templ->mysql_col_offset,
|
mysql_rec + templ->mysql_col_offset,
|
||||||
templ->mysql_col_len, data, len,
|
templ->mysql_col_len, data, len,
|
||||||
templ->type, templ->is_unsigned);
|
templ->type, templ->is_unsigned);
|
||||||
|
|
||||||
|
if (templ->type == DATA_VARCHAR
|
||||||
|
|| templ->type == DATA_VARMYSQL
|
||||||
|
|| templ->type == DATA_BINARY) {
|
||||||
|
/* Pad with trailing spaces */
|
||||||
|
data = mysql_rec + templ->mysql_col_offset;
|
||||||
|
|
||||||
|
/* Handle UCS2 strings differently. As no new
|
||||||
|
collations will be introduced in 4.1, we
|
||||||
|
hardcode the charset-collation codes here.
|
||||||
|
5.0 will use a different approach. */
|
||||||
|
if (templ->charset == 35
|
||||||
|
|| templ->charset == 90
|
||||||
|
|| (templ->charset >= 128
|
||||||
|
&& templ->charset <= 144)) {
|
||||||
|
/* space=0x0020 */
|
||||||
|
ulint col_len = templ->mysql_col_len;
|
||||||
|
|
||||||
|
ut_a(!(col_len & 1));
|
||||||
|
if (len & 1) {
|
||||||
|
/* A 0x20 has been stripped
|
||||||
|
from the column.
|
||||||
|
Pad it back. */
|
||||||
|
goto pad_0x20;
|
||||||
|
}
|
||||||
|
/* Pad the rest of the string
|
||||||
|
with 0x0020 */
|
||||||
|
while (len < col_len) {
|
||||||
|
data[len++] = 0x00;
|
||||||
|
pad_0x20:
|
||||||
|
data[len++] = 0x20;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* space=0x20 */
|
||||||
|
memset(data + len, 0x20,
|
||||||
|
templ->mysql_col_len - len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Cleanup */
|
/* Cleanup */
|
||||||
if (extern_field_heap) {
|
if (extern_field_heap) {
|
||||||
mem_heap_free(extern_field_heap);
|
mem_heap_free(extern_field_heap);
|
||||||
@ -2368,8 +2403,29 @@ row_sel_store_mysql_rec(
|
|||||||
pad_char = '\0';
|
pad_char = '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
memset(mysql_rec + templ->mysql_col_offset, pad_char,
|
/* Handle UCS2 strings differently. As no new
|
||||||
templ->mysql_col_len);
|
collations will be introduced in 4.1,
|
||||||
|
we hardcode the charset-collation codes here.
|
||||||
|
5.0 will use a different approach. */
|
||||||
|
if (templ->charset == 35
|
||||||
|
|| templ->charset == 90
|
||||||
|
|| (templ->charset >= 128
|
||||||
|
&& templ->charset <= 144)) {
|
||||||
|
/* There are two bytes per char, so the length
|
||||||
|
has to be an even number. */
|
||||||
|
ut_a(!(templ->mysql_col_len & 1));
|
||||||
|
data = mysql_rec + templ->mysql_col_offset;
|
||||||
|
len = templ->mysql_col_len;
|
||||||
|
/* Pad with 0x0020. */
|
||||||
|
while (len >= 2) {
|
||||||
|
*data++ = 0x00;
|
||||||
|
*data++ = 0x20;
|
||||||
|
len -= 2;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
memset(mysql_rec + templ->mysql_col_offset,
|
||||||
|
pad_char, templ->mysql_col_len);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2248,6 +2248,8 @@ build_template(
|
|||||||
|
|
||||||
templ->mysql_col_len = (ulint) field->pack_length();
|
templ->mysql_col_len = (ulint) field->pack_length();
|
||||||
templ->type = get_innobase_type_from_mysql_type(field);
|
templ->type = get_innobase_type_from_mysql_type(field);
|
||||||
|
templ->charset = dtype_get_charset_coll_noninline(
|
||||||
|
index->table->cols[i].type.prtype);
|
||||||
templ->is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG);
|
templ->is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG);
|
||||||
|
|
||||||
if (templ->type == DATA_BLOB) {
|
if (templ->type == DATA_BLOB) {
|
||||||
|
Reference in New Issue
Block a user