mirror of
https://github.com/MariaDB/server.git
synced 2025-08-08 11:22:35 +03:00
MDEV-9823 LOAD DATA INFILE silently truncates incomplete byte sequences
This commit is contained in:
@@ -33913,3 +33913,24 @@ DROP TABLE t1;
|
|||||||
#
|
#
|
||||||
# End of 10.1 tests
|
# End of 10.1 tests
|
||||||
#
|
#
|
||||||
|
#
|
||||||
|
# End of 10.2 tests
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
|
||||||
|
#
|
||||||
|
CREATE TABLE t1 (a TEXT CHARACTER SET eucjpms);
|
||||||
|
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.ujis.txt' INTO TABLE t1 CHARACTER SET eucjpms IGNORE 4 LINES;
|
||||||
|
SELECT HEX(a) FROM t1;
|
||||||
|
HEX(a)
|
||||||
|
3F
|
||||||
|
78787831
|
||||||
|
3F3F
|
||||||
|
78787832
|
||||||
|
8FA1A1
|
||||||
|
78787833
|
||||||
|
3F3F
|
||||||
|
DROP TABLE t1;
|
||||||
|
#
|
||||||
|
# End of 10.2 tests
|
||||||
|
#
|
||||||
|
@@ -26218,3 +26218,24 @@ DROP TABLE t1;
|
|||||||
#
|
#
|
||||||
# End of 10.1 tests
|
# End of 10.1 tests
|
||||||
#
|
#
|
||||||
|
#
|
||||||
|
# End of 10.2 tests
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
|
||||||
|
#
|
||||||
|
CREATE TABLE t1 (a TEXT CHARACTER SET ujis);
|
||||||
|
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.ujis.txt' INTO TABLE t1 CHARACTER SET ujis IGNORE 4 LINES;
|
||||||
|
SELECT HEX(a) FROM t1;
|
||||||
|
HEX(a)
|
||||||
|
3F
|
||||||
|
78787831
|
||||||
|
3F3F
|
||||||
|
78787832
|
||||||
|
8FA1A1
|
||||||
|
78787833
|
||||||
|
3F3F
|
||||||
|
DROP TABLE t1;
|
||||||
|
#
|
||||||
|
# End of 10.2 tests
|
||||||
|
#
|
||||||
|
@@ -10426,5 +10426,27 @@ b
|
|||||||
c
|
c
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
#
|
#
|
||||||
|
# MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
|
||||||
|
#
|
||||||
|
CREATE TABLE t1 (a TEXT CHARACTER SET utf8);
|
||||||
|
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.utf8mb4.txt' INTO TABLE t1 CHARACTER SET utf8 IGNORE 4 LINES;
|
||||||
|
Warnings:
|
||||||
|
Warning 1366 Incorrect string value: '\xD0' for column 'a' at row 1
|
||||||
|
Warning 1366 Incorrect string value: '\xE1\x80' for column 'a' at row 3
|
||||||
|
Warning 1366 Incorrect string value: '\xF0\x9F\x98' for column 'a' at row 5
|
||||||
|
Warning 1366 Incorrect string value: '\xF0\x9F\x98\x8E' for column 'a' at row 7
|
||||||
|
Warning 1366 Incorrect string value: '\xF0\x9F\x98' for column 'a' at row 8
|
||||||
|
SELECT HEX(a) FROM t1;
|
||||||
|
HEX(a)
|
||||||
|
3F
|
||||||
|
78787831
|
||||||
|
3F3F
|
||||||
|
78787832
|
||||||
|
3F3F3F
|
||||||
|
78787833
|
||||||
|
3F3F3F3F
|
||||||
|
3F3F3F
|
||||||
|
DROP TABLE t1;
|
||||||
|
#
|
||||||
# End of 10.2 tests
|
# End of 10.2 tests
|
||||||
#
|
#
|
||||||
|
@@ -3398,3 +3398,30 @@ DROP FUNCTION f1;
|
|||||||
#
|
#
|
||||||
# End of 10.1 tests
|
# End of 10.1 tests
|
||||||
#
|
#
|
||||||
|
#
|
||||||
|
# End of 10.2 tests
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
|
||||||
|
#
|
||||||
|
CREATE TABLE t1 (a TEXT CHARACTER SET utf8mb4);
|
||||||
|
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.utf8mb4.txt' INTO TABLE t1 CHARACTER SET utf8mb4 IGNORE 4 LINES;
|
||||||
|
Warnings:
|
||||||
|
Warning 1366 Incorrect string value: '\xD0' for column 'a' at row 1
|
||||||
|
Warning 1366 Incorrect string value: '\xE1\x80' for column 'a' at row 3
|
||||||
|
Warning 1366 Incorrect string value: '\xF0\x9F\x98' for column 'a' at row 5
|
||||||
|
Warning 1366 Incorrect string value: '\xF0\x9F\x98' for column 'a' at row 8
|
||||||
|
SELECT HEX(a) FROM t1;
|
||||||
|
HEX(a)
|
||||||
|
3F
|
||||||
|
78787831
|
||||||
|
3F3F
|
||||||
|
78787832
|
||||||
|
3F3F3F
|
||||||
|
78787833
|
||||||
|
F09F988E
|
||||||
|
3F3F3F
|
||||||
|
DROP TABLE t1;
|
||||||
|
#
|
||||||
|
# End of 10.2 tests
|
||||||
|
#
|
||||||
|
11
mysql-test/std_data/loaddata/mdev9823.ujis.txt
Normal file
11
mysql-test/std_data/loaddata/mdev9823.ujis.txt
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
# This file has incomplete UJIS sequences {8F}, {8FA1},
|
||||||
|
# has a valid UJIS sequence {8FA1A1},
|
||||||
|
# and has no NL at the end:
|
||||||
|
# {8F} \n xxx1 {8FA1} \n xxx2 {8FA1A1} \n xxx3 \n {8FA1} EOF
|
||||||
|
<EFBFBD>
|
||||||
|
xxx1
|
||||||
|
<EFBFBD><EFBFBD>
|
||||||
|
xxx2
|
||||||
|
<EFBFBD><EFBFBD><EFBFBD>
|
||||||
|
xxx3
|
||||||
|
<EFBFBD><EFBFBD>
|
12
mysql-test/std_data/loaddata/mdev9823.utf8mb4.txt
Normal file
12
mysql-test/std_data/loaddata/mdev9823.utf8mb4.txt
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
# This file has incomplete utf8mb4 sequences {D0}, {E180}, {F09F98},
|
||||||
|
# has a valid utf8mb4 sequence {F09F988E}
|
||||||
|
# and has no NL at the end:
|
||||||
|
# {D0} \n xxx1 {E180} xxx2 \n {F09F98} \n xxx3 {F09F988E} {F09F98} EOF
|
||||||
|
<EFBFBD>
|
||||||
|
xxx1
|
||||||
|
<EFBFBD><EFBFBD>
|
||||||
|
xxx2
|
||||||
|
<EFBFBD><EFBFBD><EFBFBD>
|
||||||
|
xxx3
|
||||||
|
😎
|
||||||
|
<EFBFBD><EFBFBD><EFBFBD>
|
@@ -566,3 +566,19 @@ DROP TABLE t1;
|
|||||||
--echo #
|
--echo #
|
||||||
--echo # End of 10.1 tests
|
--echo # End of 10.1 tests
|
||||||
--echo #
|
--echo #
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # End of 10.2 tests
|
||||||
|
--echo #
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
|
||||||
|
--echo #
|
||||||
|
CREATE TABLE t1 (a TEXT CHARACTER SET eucjpms);
|
||||||
|
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.ujis.txt' INTO TABLE t1 CHARACTER SET eucjpms IGNORE 4 LINES;
|
||||||
|
SELECT HEX(a) FROM t1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # End of 10.2 tests
|
||||||
|
--echo #
|
||||||
|
@@ -1396,3 +1396,20 @@ SELECT HEX(a) FROM t1 ORDER BY a;DROP TABLE t1;
|
|||||||
--echo #
|
--echo #
|
||||||
--echo # End of 10.1 tests
|
--echo # End of 10.1 tests
|
||||||
--echo #
|
--echo #
|
||||||
|
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # End of 10.2 tests
|
||||||
|
--echo #
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
|
||||||
|
--echo #
|
||||||
|
CREATE TABLE t1 (a TEXT CHARACTER SET ujis);
|
||||||
|
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.ujis.txt' INTO TABLE t1 CHARACTER SET ujis IGNORE 4 LINES;
|
||||||
|
SELECT HEX(a) FROM t1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # End of 10.2 tests
|
||||||
|
--echo #
|
||||||
|
@@ -1966,6 +1966,14 @@ LOAD DATA INFILE '../../std_data/loaddata/mdev9824.txt' INTO TABLE t1 CHARACTER
|
|||||||
SELECT c1 FROM t1;
|
SELECT c1 FROM t1;
|
||||||
DROP TABLE t1;
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
|
||||||
|
--echo #
|
||||||
|
CREATE TABLE t1 (a TEXT CHARACTER SET utf8);
|
||||||
|
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.utf8mb4.txt' INTO TABLE t1 CHARACTER SET utf8 IGNORE 4 LINES;
|
||||||
|
SELECT HEX(a) FROM t1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
|
||||||
--echo #
|
--echo #
|
||||||
--echo # End of 10.2 tests
|
--echo # End of 10.2 tests
|
||||||
--echo #
|
--echo #
|
||||||
|
@@ -1919,3 +1919,20 @@ DROP FUNCTION f1;
|
|||||||
--echo #
|
--echo #
|
||||||
--echo # End of 10.1 tests
|
--echo # End of 10.1 tests
|
||||||
--echo #
|
--echo #
|
||||||
|
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # End of 10.2 tests
|
||||||
|
--echo #
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
|
||||||
|
--echo #
|
||||||
|
CREATE TABLE t1 (a TEXT CHARACTER SET utf8mb4);
|
||||||
|
LOAD DATA INFILE '../../std_data/loaddata/mdev9823.utf8mb4.txt' INTO TABLE t1 CHARACTER SET utf8mb4 IGNORE 4 LINES;
|
||||||
|
SELECT HEX(a) FROM t1;
|
||||||
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # End of 10.2 tests
|
||||||
|
--echo #
|
||||||
|
@@ -1589,38 +1589,34 @@ int READ_INFO::read_field()
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef USE_MB
|
|
||||||
if (my_mbcharlen(read_charset, chr) > 1)
|
|
||||||
{
|
|
||||||
uint32 length0= data.length();
|
|
||||||
int ml= my_mbcharlen(read_charset, chr);
|
|
||||||
data.append(chr);
|
|
||||||
|
|
||||||
for (int i= 1; i < ml; i++)
|
|
||||||
{
|
|
||||||
chr= GET;
|
|
||||||
if (chr == my_b_EOF)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
Need to back up the bytes already ready from illformed
|
|
||||||
multi-byte char
|
|
||||||
*/
|
|
||||||
data.length(length0);
|
|
||||||
goto found_eof;
|
|
||||||
}
|
|
||||||
data.append(chr);
|
|
||||||
}
|
|
||||||
if (my_ismbchar(read_charset,
|
|
||||||
(const char *) data.ptr() + length0,
|
|
||||||
(const char *) data.end()))
|
|
||||||
continue;
|
|
||||||
for (int i= 0; i < ml; i++)
|
|
||||||
PUSH(data.end()[-1 - i]);
|
|
||||||
data.length(length0);
|
|
||||||
chr= GET;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
data.append(chr);
|
data.append(chr);
|
||||||
|
if (use_mb(read_charset))
|
||||||
|
{
|
||||||
|
int chlen;
|
||||||
|
if ((chlen= my_charlen(read_charset, data.end() - 1,
|
||||||
|
data.end())) != 1)
|
||||||
|
{
|
||||||
|
for (uint32 length0= data.length() - 1 ; MY_CS_IS_TOOSMALL(chlen); )
|
||||||
|
{
|
||||||
|
chr= GET;
|
||||||
|
if (chr == my_b_EOF)
|
||||||
|
goto found_eof;
|
||||||
|
data.append(chr);
|
||||||
|
chlen= my_charlen(read_charset, data.ptr() + length0, data.end());
|
||||||
|
if (chlen == MY_CS_ILSEQ)
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
It has been an incomplete (but a valid) sequence so far,
|
||||||
|
but the last byte turned it into a bad byte sequence.
|
||||||
|
Unget the very last byte.
|
||||||
|
*/
|
||||||
|
data.length(data.length() - 1);
|
||||||
|
PUSH(chr);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
** We come here if buffer is too small. Enlarge it and continue
|
** We come here if buffer is too small. Enlarge it and continue
|
||||||
|
@@ -199,6 +199,7 @@ static const uchar sort_order_eucjpms[]=
|
|||||||
#define IS_MB2_KATA(x,y) (iseucjpms_ss2(x) && iskata(y))
|
#define IS_MB2_KATA(x,y) (iseucjpms_ss2(x) && iskata(y))
|
||||||
#define IS_MB2_CHAR(x,y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
|
#define IS_MB2_CHAR(x,y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
|
||||||
#define IS_MB3_CHAR(x,y,z) (iseucjpms_ss3(x) && IS_MB2_JIS(y,z))
|
#define IS_MB3_CHAR(x,y,z) (iseucjpms_ss3(x) && IS_MB2_JIS(y,z))
|
||||||
|
#define IS_MB_PREFIX2(x,y) (iseucjpms_ss3(x) && iseucjpms(y))
|
||||||
#define DEFINE_ASIAN_ROUTINES
|
#define DEFINE_ASIAN_ROUTINES
|
||||||
#include "ctype-mb.ic"
|
#include "ctype-mb.ic"
|
||||||
|
|
||||||
|
@@ -75,7 +75,13 @@ MY_FUNCTION_NAME(charlen)(CHARSET_INFO *cs __attribute__((unused)),
|
|||||||
|
|
||||||
#ifdef IS_MB3_CHAR
|
#ifdef IS_MB3_CHAR
|
||||||
if (b + 3 > e)
|
if (b + 3 > e)
|
||||||
|
{
|
||||||
|
#ifdef IS_MB_PREFIX2
|
||||||
|
if (!IS_MB_PREFIX2(b[0], b[1]))
|
||||||
|
return MY_CS_ILSEQ;
|
||||||
|
#endif
|
||||||
return MY_CS_TOOSMALLN(3);
|
return MY_CS_TOOSMALLN(3);
|
||||||
|
}
|
||||||
if (IS_MB3_CHAR(b[0], b[1], b[2]))
|
if (IS_MB3_CHAR(b[0], b[1], b[2]))
|
||||||
return 3; /* Three-byte character */
|
return 3; /* Three-byte character */
|
||||||
#endif
|
#endif
|
||||||
|
@@ -198,6 +198,7 @@ static const uchar sort_order_ujis[]=
|
|||||||
#define IS_MB2_KATA(x,y) (isujis_ss2(x) && iskata(y))
|
#define IS_MB2_KATA(x,y) (isujis_ss2(x) && iskata(y))
|
||||||
#define IS_MB2_CHAR(x, y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
|
#define IS_MB2_CHAR(x, y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
|
||||||
#define IS_MB3_CHAR(x, y, z) (isujis_ss3(x) && IS_MB2_JIS(y,z))
|
#define IS_MB3_CHAR(x, y, z) (isujis_ss3(x) && IS_MB2_JIS(y,z))
|
||||||
|
#define IS_MB_PREFIX2(x,y) (isujis_ss3(x) && isujis(y))
|
||||||
#define DEFINE_ASIAN_ROUTINES
|
#define DEFINE_ASIAN_ROUTINES
|
||||||
#include "ctype-mb.ic"
|
#include "ctype-mb.ic"
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user