mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
MDEV-9823 LOAD DATA INFILE silently truncates incomplete byte sequences
This commit is contained in:
@@ -33913,3 +33913,24 @@ DROP TABLE t1;
|
||||
#
|
||||
# End of 10.1 tests
|
||||
#
|
||||
#
|
||||
# End of 10.2 tests
|
||||
#
|
||||
#
|
||||
# MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
|
||||
#
|
||||
CREATE TABLE t1 (a TEXT CHARACTER SET eucjpms);
|
||||
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.ujis.txt' INTO TABLE t1 CHARACTER SET eucjpms IGNORE 4 LINES;
|
||||
SELECT HEX(a) FROM t1;
|
||||
HEX(a)
|
||||
3F
|
||||
78787831
|
||||
3F3F
|
||||
78787832
|
||||
8FA1A1
|
||||
78787833
|
||||
3F3F
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# End of 10.2 tests
|
||||
#
|
||||
|
@@ -26218,3 +26218,24 @@ DROP TABLE t1;
|
||||
#
|
||||
# End of 10.1 tests
|
||||
#
|
||||
#
|
||||
# End of 10.2 tests
|
||||
#
|
||||
#
|
||||
# MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
|
||||
#
|
||||
CREATE TABLE t1 (a TEXT CHARACTER SET ujis);
|
||||
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.ujis.txt' INTO TABLE t1 CHARACTER SET ujis IGNORE 4 LINES;
|
||||
SELECT HEX(a) FROM t1;
|
||||
HEX(a)
|
||||
3F
|
||||
78787831
|
||||
3F3F
|
||||
78787832
|
||||
8FA1A1
|
||||
78787833
|
||||
3F3F
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# End of 10.2 tests
|
||||
#
|
||||
|
@@ -10426,5 +10426,27 @@ b
|
||||
c
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
|
||||
#
|
||||
CREATE TABLE t1 (a TEXT CHARACTER SET utf8);
|
||||
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.utf8mb4.txt' INTO TABLE t1 CHARACTER SET utf8 IGNORE 4 LINES;
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\xD0' for column 'a' at row 1
|
||||
Warning 1366 Incorrect string value: '\xE1\x80' for column 'a' at row 3
|
||||
Warning 1366 Incorrect string value: '\xF0\x9F\x98' for column 'a' at row 5
|
||||
Warning 1366 Incorrect string value: '\xF0\x9F\x98\x8E' for column 'a' at row 7
|
||||
Warning 1366 Incorrect string value: '\xF0\x9F\x98' for column 'a' at row 8
|
||||
SELECT HEX(a) FROM t1;
|
||||
HEX(a)
|
||||
3F
|
||||
78787831
|
||||
3F3F
|
||||
78787832
|
||||
3F3F3F
|
||||
78787833
|
||||
3F3F3F3F
|
||||
3F3F3F
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# End of 10.2 tests
|
||||
#
|
||||
|
@@ -3398,3 +3398,30 @@ DROP FUNCTION f1;
|
||||
#
|
||||
# End of 10.1 tests
|
||||
#
|
||||
#
|
||||
# End of 10.2 tests
|
||||
#
|
||||
#
|
||||
# MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
|
||||
#
|
||||
CREATE TABLE t1 (a TEXT CHARACTER SET utf8mb4);
|
||||
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.utf8mb4.txt' INTO TABLE t1 CHARACTER SET utf8mb4 IGNORE 4 LINES;
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\xD0' for column 'a' at row 1
|
||||
Warning 1366 Incorrect string value: '\xE1\x80' for column 'a' at row 3
|
||||
Warning 1366 Incorrect string value: '\xF0\x9F\x98' for column 'a' at row 5
|
||||
Warning 1366 Incorrect string value: '\xF0\x9F\x98' for column 'a' at row 8
|
||||
SELECT HEX(a) FROM t1;
|
||||
HEX(a)
|
||||
3F
|
||||
78787831
|
||||
3F3F
|
||||
78787832
|
||||
3F3F3F
|
||||
78787833
|
||||
F09F988E
|
||||
3F3F3F
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# End of 10.2 tests
|
||||
#
|
||||
|
11
mysql-test/std_data/loaddata/mdev9823.ujis.txt
Normal file
11
mysql-test/std_data/loaddata/mdev9823.ujis.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
# This file has incomplete UJIS sequences {8F}, {8FA1},
|
||||
# has a valid UJIS sequence {8FA1A1},
|
||||
# and has no NL at the end:
|
||||
# {8F} \n xxx1 {8FA1} \n xxx2 {8FA1A1} \n xxx3 \n {8FA1} EOF
|
||||
<EFBFBD>
|
||||
xxx1
|
||||
<EFBFBD><EFBFBD>
|
||||
xxx2
|
||||
<EFBFBD><EFBFBD><EFBFBD>
|
||||
xxx3
|
||||
<EFBFBD><EFBFBD>
|
12
mysql-test/std_data/loaddata/mdev9823.utf8mb4.txt
Normal file
12
mysql-test/std_data/loaddata/mdev9823.utf8mb4.txt
Normal file
@@ -0,0 +1,12 @@
|
||||
# This file has incomplete utf8mb4 sequences {D0}, {E180}, {F09F98},
|
||||
# has a valid utf8mb4 sequence {F09F988E}
|
||||
# and has no NL at the end:
|
||||
# {D0} \n xxx1 {E180} xxx2 \n {F09F98} \n xxx3 {F09F988E} {F09F98} EOF
|
||||
<EFBFBD>
|
||||
xxx1
|
||||
<EFBFBD><EFBFBD>
|
||||
xxx2
|
||||
<EFBFBD><EFBFBD><EFBFBD>
|
||||
xxx3
|
||||
😎
|
||||
<EFBFBD><EFBFBD><EFBFBD>
|
@@ -566,3 +566,19 @@ DROP TABLE t1;
|
||||
--echo #
|
||||
--echo # End of 10.1 tests
|
||||
--echo #
|
||||
|
||||
--echo #
|
||||
--echo # End of 10.2 tests
|
||||
--echo #
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
|
||||
--echo #
|
||||
CREATE TABLE t1 (a TEXT CHARACTER SET eucjpms);
|
||||
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.ujis.txt' INTO TABLE t1 CHARACTER SET eucjpms IGNORE 4 LINES;
|
||||
SELECT HEX(a) FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
--echo #
|
||||
--echo # End of 10.2 tests
|
||||
--echo #
|
||||
|
@@ -1396,3 +1396,20 @@ SELECT HEX(a) FROM t1 ORDER BY a;DROP TABLE t1;
|
||||
--echo #
|
||||
--echo # End of 10.1 tests
|
||||
--echo #
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # End of 10.2 tests
|
||||
--echo #
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
|
||||
--echo #
|
||||
CREATE TABLE t1 (a TEXT CHARACTER SET ujis);
|
||||
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.ujis.txt' INTO TABLE t1 CHARACTER SET ujis IGNORE 4 LINES;
|
||||
SELECT HEX(a) FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
--echo #
|
||||
--echo # End of 10.2 tests
|
||||
--echo #
|
||||
|
@@ -1966,6 +1966,14 @@ LOAD DATA INFILE '../../std_data/loaddata/mdev9824.txt' INTO TABLE t1 CHARACTER
|
||||
SELECT c1 FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
|
||||
--echo #
|
||||
CREATE TABLE t1 (a TEXT CHARACTER SET utf8);
|
||||
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.utf8mb4.txt' INTO TABLE t1 CHARACTER SET utf8 IGNORE 4 LINES;
|
||||
SELECT HEX(a) FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
--echo #
|
||||
--echo # End of 10.2 tests
|
||||
--echo #
|
||||
|
@@ -1919,3 +1919,20 @@ DROP FUNCTION f1;
|
||||
--echo #
|
||||
--echo # End of 10.1 tests
|
||||
--echo #
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # End of 10.2 tests
|
||||
--echo #
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
|
||||
--echo #
|
||||
CREATE TABLE t1 (a TEXT CHARACTER SET utf8mb4);
|
||||
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.utf8mb4.txt' INTO TABLE t1 CHARACTER SET utf8mb4 IGNORE 4 LINES;
|
||||
SELECT HEX(a) FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
--echo #
|
||||
--echo # End of 10.2 tests
|
||||
--echo #
|
||||
|
@@ -1589,38 +1589,34 @@ int READ_INFO::read_field()
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
#ifdef USE_MB
|
||||
if (my_mbcharlen(read_charset, chr) > 1)
|
||||
{
|
||||
uint32 length0= data.length();
|
||||
int ml= my_mbcharlen(read_charset, chr);
|
||||
data.append(chr);
|
||||
|
||||
for (int i= 1; i < ml; i++)
|
||||
{
|
||||
chr= GET;
|
||||
if (chr == my_b_EOF)
|
||||
{
|
||||
/*
|
||||
Need to back up the bytes already ready from illformed
|
||||
multi-byte char
|
||||
*/
|
||||
data.length(length0);
|
||||
goto found_eof;
|
||||
}
|
||||
data.append(chr);
|
||||
}
|
||||
if (my_ismbchar(read_charset,
|
||||
(const char *) data.ptr() + length0,
|
||||
(const char *) data.end()))
|
||||
continue;
|
||||
for (int i= 0; i < ml; i++)
|
||||
PUSH(data.end()[-1 - i]);
|
||||
data.length(length0);
|
||||
chr= GET;
|
||||
}
|
||||
#endif
|
||||
data.append(chr);
|
||||
if (use_mb(read_charset))
|
||||
{
|
||||
int chlen;
|
||||
if ((chlen= my_charlen(read_charset, data.end() - 1,
|
||||
data.end())) != 1)
|
||||
{
|
||||
for (uint32 length0= data.length() - 1 ; MY_CS_IS_TOOSMALL(chlen); )
|
||||
{
|
||||
chr= GET;
|
||||
if (chr == my_b_EOF)
|
||||
goto found_eof;
|
||||
data.append(chr);
|
||||
chlen= my_charlen(read_charset, data.ptr() + length0, data.end());
|
||||
if (chlen == MY_CS_ILSEQ)
|
||||
{
|
||||
/**
|
||||
It has been an incomplete (but a valid) sequence so far,
|
||||
but the last byte turned it into a bad byte sequence.
|
||||
Unget the very last byte.
|
||||
*/
|
||||
data.length(data.length() - 1);
|
||||
PUSH(chr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
** We come here if buffer is too small. Enlarge it and continue
|
||||
|
@@ -199,6 +199,7 @@ static const uchar sort_order_eucjpms[]=
|
||||
#define IS_MB2_KATA(x,y) (iseucjpms_ss2(x) && iskata(y))
|
||||
#define IS_MB2_CHAR(x,y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
|
||||
#define IS_MB3_CHAR(x,y,z) (iseucjpms_ss3(x) && IS_MB2_JIS(y,z))
|
||||
#define IS_MB_PREFIX2(x,y) (iseucjpms_ss3(x) && iseucjpms(y))
|
||||
#define DEFINE_ASIAN_ROUTINES
|
||||
#include "ctype-mb.ic"
|
||||
|
||||
|
@@ -75,7 +75,13 @@ MY_FUNCTION_NAME(charlen)(CHARSET_INFO *cs __attribute__((unused)),
|
||||
|
||||
#ifdef IS_MB3_CHAR
|
||||
if (b + 3 > e)
|
||||
{
|
||||
#ifdef IS_MB_PREFIX2
|
||||
if (!IS_MB_PREFIX2(b[0], b[1]))
|
||||
return MY_CS_ILSEQ;
|
||||
#endif
|
||||
return MY_CS_TOOSMALLN(3);
|
||||
}
|
||||
if (IS_MB3_CHAR(b[0], b[1], b[2]))
|
||||
return 3; /* Three-byte character */
|
||||
#endif
|
||||
|
@@ -198,6 +198,7 @@ static const uchar sort_order_ujis[]=
|
||||
#define IS_MB2_KATA(x,y) (isujis_ss2(x) && iskata(y))
|
||||
#define IS_MB2_CHAR(x, y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
|
||||
#define IS_MB3_CHAR(x, y, z) (isujis_ss3(x) && IS_MB2_JIS(y,z))
|
||||
#define IS_MB_PREFIX2(x,y) (isujis_ss3(x) && isujis(y))
|
||||
#define DEFINE_ASIAN_ROUTINES
|
||||
#include "ctype-mb.ic"
|
||||
|
||||
|
Reference in New Issue
Block a user