1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-29 05:21:33 +03:00

InnoDB: Fixed bugs in the padding and trimming of trailing spaces

that affected the UCS2 character set. (Bug #7350)


innobase/data/data0type.c:
  Added dtype_get_charset_coll_noninline()
innobase/include/data0type.h:
  Added dtype_get_charset_coll_noninline()
innobase/include/row0mysql.h:
  Added charset field to mysql_row_templ_struct.
innobase/include/row0mysql.ic:
  row_mysql_store_col_in_innobase_format():
  When removing trailing spaces, treat the UCS2 character set properly.
innobase/rem/rem0cmp.c:
  cmp_whole_field(): Do not remove trailing 0x20 bytes, as
  innobase_mysql_cmp() implicitly pads the strings with trailing
  spaces as necessary.
innobase/row/row0sel.c:
  row_sel_field_store_in_mysql_format(): Do not pad with 0x20 bytes.
  row_sel_store_mysql_rec(): Pad VARCHARs with trailing spaces
  (0x20, or 0x0020 in UCS2).
sql/ha_innodb.cc:
  build_template(): Initialize templ->charset
This commit is contained in:
unknown
2004-12-17 18:35:11 +02:00
parent 7b592c9e24
commit 7b06313785
7 changed files with 110 additions and 27 deletions

View File

@ -165,6 +165,17 @@ dtype_is_non_binary_string_type(
return(FALSE);
}
/*************************************************************************
Gets the MySQL charset-collation code for MySQL string types. */
ulint
dtype_get_charset_coll_noninline(
/*=============================*/
ulint prtype) /* in: precise data type */
{
return(dtype_get_charset_coll(prtype));
}
/*************************************************************************
Forms a precise type from the < 4.1.2 format precise type plus the
charset-collation code. */

View File

@ -234,6 +234,13 @@ dtype_get_prtype(
dtype_t* type);
/*************************************************************************
Gets the MySQL charset-collation code for MySQL string types. */
ulint
dtype_get_charset_coll_noninline(
/*=============================*/
ulint prtype);/* in: precise data type */
/*************************************************************************
Gets the MySQL charset-collation code for MySQL string types. */
UNIV_INLINE
ulint
dtype_get_charset_coll(

View File

@ -454,6 +454,8 @@ struct mysql_row_templ_struct {
zero if column cannot be NULL */
ulint type; /* column type in Innobase mtype
numbers DATA_CHAR... */
ulint charset; /* MySQL charset-collation code
of the column, or zero */
ulint is_unsigned; /* if a column type is an integer
type and this field is != 0, then
it is an unsigned integer type */

View File

@ -91,12 +91,33 @@ row_mysql_store_col_in_innobase_format(
}
} else if (type == DATA_VARCHAR || type == DATA_VARMYSQL
|| type == DATA_BINARY) {
ptr = row_mysql_read_var_ref(&col_len, mysql_data);
/* Remove trailing spaces. */
/* Remove trailing spaces */
while (col_len > 0 && ptr[col_len - 1] == ' ') {
/* Handle UCS2 strings differently. As no new
collations will be introduced in 4.1, we hardcode the
charset-collation codes here. In 5.0, the logic will
be based on mbminlen. */
ulint cset = dtype_get_charset_coll(
dtype_get_prtype(dfield_get_type(dfield)));
ptr = row_mysql_read_var_ref(&col_len, mysql_data);
if (cset == 35/*ucs2_general_ci*/
|| cset == 90/*ucs2_bin*/
|| (cset >= 128/*ucs2_unicode_ci*/
&& cset <= 144/*ucs2_persian_ci*/)) {
/* space=0x0020 */
/* Trim "half-chars", just in case. */
col_len &= ~1;
while (col_len >= 2 && ptr[col_len - 2] == 0x00
&& ptr[col_len - 1] == 0x20) {
col_len -= 2;
}
} else {
/* space=0x20 */
while (col_len > 0 && ptr[col_len - 1] == 0x20) {
col_len--;
}
}
} else if (type == DATA_BLOB) {
ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
}

View File

@ -261,22 +261,6 @@ cmp_whole_field(
"InnoDB: comparison!\n");
}
/* MySQL does not pad the ends of strings with spaces in a
comparison. That would cause a foreign key check to fail for
non-latin1 character sets if we have different length columns.
To prevent that we remove trailing spaces here before doing
the comparison. NOTE that if we in the future map more MySQL
types to DATA_MYSQL or DATA_VARMYSQL, we have to change this
code. */
while (a_length > 0 && a[a_length - 1] == ' ') {
a_length--;
}
while (b_length > 0 && b[b_length - 1] == ' ') {
b_length--;
}
return(innobase_mysql_cmp(
(int)(type->prtype & DATA_MYSQL_TYPE_MASK),
(uint)dtype_get_charset_coll(type->prtype),

View File

@ -2204,9 +2204,6 @@ row_sel_field_store_in_mysql_format(
dest = row_mysql_store_var_len(dest, len);
ut_memcpy(dest, data, len);
/* Pad with trailing spaces */
memset(dest + len, ' ', col_len - len);
/* ut_ad(col_len >= len + 2); No real var implemented in
MySQL yet! */
@ -2335,6 +2332,44 @@ row_sel_store_mysql_rec(
templ->mysql_col_len, data, len,
templ->type, templ->is_unsigned);
if (templ->type == DATA_VARCHAR
|| templ->type == DATA_VARMYSQL
|| templ->type == DATA_BINARY) {
/* Pad with trailing spaces */
data = mysql_rec + templ->mysql_col_offset;
/* Handle UCS2 strings differently. As no new
collations will be introduced in 4.1, we
hardcode the charset-collation codes here.
5.0 will use a different approach. */
if (templ->charset == 35
|| templ->charset == 90
|| (templ->charset >= 128
&& templ->charset <= 144)) {
/* space=0x0020 */
ulint col_len = templ->mysql_col_len;
ut_a(!(col_len & 1));
if (len & 1) {
/* A 0x20 has been stripped
from the column.
Pad it back. */
goto pad_0x20;
}
/* Pad the rest of the string
with 0x0020 */
while (len < col_len) {
data[len++] = 0x00;
pad_0x20:
data[len++] = 0x20;
}
} else {
/* space=0x20 */
memset(data + len, 0x20,
templ->mysql_col_len - len);
}
}
/* Cleanup */
if (extern_field_heap) {
mem_heap_free(extern_field_heap);
@ -2368,8 +2403,29 @@ row_sel_store_mysql_rec(
pad_char = '\0';
}
memset(mysql_rec + templ->mysql_col_offset, pad_char,
templ->mysql_col_len);
/* Handle UCS2 strings differently. As no new
collations will be introduced in 4.1,
we hardcode the charset-collation codes here.
5.0 will use a different approach. */
if (templ->charset == 35
|| templ->charset == 90
|| (templ->charset >= 128
&& templ->charset <= 144)) {
/* There are two bytes per char, so the length
has to be an even number. */
ut_a(!(templ->mysql_col_len & 1));
data = mysql_rec + templ->mysql_col_offset;
len = templ->mysql_col_len;
/* Pad with 0x0020. */
while (len >= 2) {
*data++ = 0x00;
*data++ = 0x20;
len -= 2;
}
} else {
memset(mysql_rec + templ->mysql_col_offset,
pad_char, templ->mysql_col_len);
}
}
}

View File

@ -2248,6 +2248,8 @@ build_template(
templ->mysql_col_len = (ulint) field->pack_length();
templ->type = get_innobase_type_from_mysql_type(field);
templ->charset = dtype_get_charset_coll_noninline(
index->table->cols[i].type.prtype);
templ->is_unsigned = (ulint) (field->flags & UNSIGNED_FLAG);
if (templ->type == DATA_BLOB) {