diff --git a/mysql-test/r/loaddata.result b/mysql-test/r/loaddata.result index 2d67d24bedd..2f2a3579eec 100644 --- a/mysql-test/r/loaddata.result +++ b/mysql-test/r/loaddata.result @@ -507,7 +507,7 @@ DROP TABLE t1; # Bug#11765139 58069: LOAD DATA INFILE: VALGRIND REPORTS INVALID MEMORY READS AND WRITES WITH U # CREATE TABLE t1(f1 INT); -SELECT 0xE1BB30 INTO OUTFILE 't1.dat'; +SELECT 0xE1C330 INTO OUTFILE 't1.dat'; LOAD DATA INFILE 't1.dat' IGNORE INTO TABLE t1 CHARACTER SET utf8; DROP TABLE t1; # @@ -532,3 +532,27 @@ FIELDS TERMINATED BY 't' LINES TERMINATED BY ''; Got one of the listed errors SET @@sql_mode= @old_mode; DROP TABLE t1; + +# +# Bug#23080148 - Backport of Bug#20683959. +# Bug#20683959 LOAD DATA INFILE IGNORES A SPECIFIC ROW SILENTLY +# UNDER DB CHARSET IS UTF8. +# +CREATE DATABASE d1 CHARSET latin1; +USE d1; +CREATE TABLE t1 (val TEXT); +LOAD DATA INFILE '../../std_data/bug20683959loaddata.txt' INTO TABLE t1; +SELECT COUNT(*) FROM t1; +COUNT(*) +1 +SELECT HEX(val) FROM t1; +HEX(val) +C38322525420406E696F757A656368756E3A20E98198E2889AF58081AEE7B99DE4B88AE383A3E7B99DE69690F58087B3E7B9A7EFBDA8E7B99DEFBDB3E7B99DE78999E880B3E7B8BAEFBDAAE7B9A7E89699E296A1E7B8BAE4BBA3EFBD8CE7B8BAEFBDA9E7B8B2E2889AE38184E7B99DEFBDB3E7B99DE4B88AE383A3E7B99DE69690F58087B3E7B9A7EFBDA8E7B99DEFBDB3E7B99DE5B3A8EFBD84E8ABA0EFBDA8E89C89F580948EE599AAE7B8BAEFBDAAE7B8BAE9A198EFBDA9EFBDB1E7B9A7E581B5E289A0E7B8BAEFBDBEE7B9A7E9A194EFBDA9E882B4EFBDA5EFBDB5E980A7F5808B96E28693E99EABE38287E58F99E7B8BAE58AB1E28691E7B8BAF5808B9AE7828AE98095EFBDB1E7B8BAEFBDAFE7B8B2E288ABE6A89FE89EB3E6BA98F58081ADE88EA0EFBDBAE98095E6BA98F58081AEE89D93EFBDBAE8AD9BEFBDACE980A7F5808B96E28693E7B8BAF580918EE288AAE7B8BAE4B88AEFBC9EE7B8BAE4B99DE28691E7B8BAF5808B96EFBCA0E88DB3E6A68AEFBDB9EFBDB3E981B2E5B3A8E296A1E7B8BAE7A4BCE7828AE88DB3E6A68AEFBDB0EFBDBDE7B8BAA0E7B8BAE88B93EFBDBEE5B899EFBC9E +CREATE DATABASE d2 CHARSET utf8; +USE d2; +CREATE TABLE t1 (val TEXT); +LOAD DATA INFILE '../../std_data/bug20683959loaddata.txt' INTO TABLE t1; +ERROR HY000: Invalid utf8 character string: '"RT @niouzechun: \9058\221A' +DROP TABLE d1.t1, d2.t1; +DROP DATABASE d1; +DROP DATABASE d2; diff --git a/mysql-test/std_data/bug20683959loaddata.txt b/mysql-test/std_data/bug20683959loaddata.txt new file mode 100644 index 00000000000..1878cc78879 --- /dev/null +++ b/mysql-test/std_data/bug20683959loaddata.txt @@ -0,0 +1 @@ +Ã"RT @niouzechun: 遘√繝上ャ繝斐繧ィ繝ウ繝牙耳縺ェ繧薙□縺代l縺ゥ縲√い繝ウ繝上ャ繝斐繧ィ繝ウ繝峨d諠ィ蜉噪縺ェ縺願ゥア繧偵≠縺セ繧顔ゥ肴・オ逧↓鞫ょ叙縺励↑縺炊逕ア縺ッ縲∫樟螳溘莠コ逕溘蝓コ譛ャ逧↓縺∪縺上>縺九↑縺@荳榊ケウ遲峨□縺礼炊荳榊ース縺縺苓セ帙> diff --git a/mysql-test/t/loaddata.test b/mysql-test/t/loaddata.test index aa7be52484e..9a664b84843 100644 --- a/mysql-test/t/loaddata.test +++ b/mysql-test/t/loaddata.test @@ -610,7 +610,7 @@ disconnect con1; --echo # CREATE TABLE t1(f1 INT); -EVAL SELECT 0xE1BB30 INTO OUTFILE 't1.dat'; +EVAL SELECT 0xE1C330 INTO OUTFILE 't1.dat'; --disable_warnings LOAD DATA INFILE 't1.dat' IGNORE INTO TABLE t1 CHARACTER SET utf8; --enable_warnings @@ -656,3 +656,26 @@ SET @@sql_mode= @old_mode; --remove_file $MYSQLTEST_VARDIR/mysql DROP TABLE t1; +--echo +--echo # +--echo # Bug#23080148 - Backport of Bug#20683959. +--echo # Bug#20683959 LOAD DATA INFILE IGNORES A SPECIFIC ROW SILENTLY +--echo # UNDER DB CHARSET IS UTF8. +--echo # + +CREATE DATABASE d1 CHARSET latin1; +USE d1; +CREATE TABLE t1 (val TEXT); +LOAD DATA INFILE '../../std_data/bug20683959loaddata.txt' INTO TABLE t1; +SELECT COUNT(*) FROM t1; +SELECT HEX(val) FROM t1; + +CREATE DATABASE d2 CHARSET utf8; +USE d2; +CREATE TABLE t1 (val TEXT); +--error ER_INVALID_CHARACTER_STRING +LOAD DATA INFILE '../../std_data/bug20683959loaddata.txt' INTO TABLE t1; + +DROP TABLE d1.t1, d2.t1; +DROP DATABASE d1; +DROP DATABASE d2; diff --git a/sql/sql_load.cc b/sql/sql_load.cc index c084e5e3839..a46967a24a8 100644 --- a/sql/sql_load.cc +++ b/sql/sql_load.cc @@ -1363,8 +1363,8 @@ READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs, set_if_bigger(length,line_start.length()); stack=stack_pos=(int*) sql_alloc(sizeof(int)*length); - if (!(buffer=(uchar*) my_malloc(buff_length+1,MYF(0)))) - error=1; /* purecov: inspected */ + if (!(buffer=(uchar*) my_malloc(buff_length+1,MYF(MY_WME)))) + error= true; /* purecov: inspected */ else { end_of_buff=buffer+buff_length; @@ -1556,37 +1556,50 @@ int READ_INFO::read_field() } } #ifdef USE_MB - if (my_mbcharlen(read_charset, chr) > 1 && - to + my_mbcharlen(read_charset, chr) <= end_of_buff) - { - uchar* p= to; - int ml, i; - *to++ = chr; - - ml= my_mbcharlen(read_charset, chr); - - for (i= 1; i < ml; i++) + uint ml= my_mbcharlen(read_charset, chr); + if (ml == 0) { - chr= GET; - if (chr == my_b_EOF) - { - /* - Need to back up the bytes already ready from illformed - multi-byte char - */ - to-= i; - goto found_eof; - } - *to++ = chr; + *to= '\0'; + my_error(ER_INVALID_CHARACTER_STRING, MYF(0), + read_charset->csname, buffer); + error= true; + return 1; } - if (my_ismbchar(read_charset, + + if (ml > 1 && + to + ml <= end_of_buff) + { + uchar* p= to; + *to++ = chr; + + for (uint i= 1; i < ml; i++) + { + chr= GET; + if (chr == my_b_EOF) + { + /* + Need to back up the bytes already ready from illformed + multi-byte char + */ + to-= i; + goto found_eof; + } + *to++ = chr; + } + if (my_ismbchar(read_charset, (const char *)p, (const char *)to)) - continue; - for (i= 0; i < ml; i++) - PUSH(*--to); - chr= GET; - } + continue; + for (uint i= 0; i < ml; i++) + PUSH(*--to); + chr= GET; + } + else if (ml > 1) + { + // Buffer is too small, exit while loop, and reallocate. + PUSH(chr); + break; + } #endif *to++ = (uchar) chr; } @@ -1830,7 +1843,15 @@ int READ_INFO::read_value(int delim, String *val) for (chr= GET; my_tospace(chr) != delim && chr != my_b_EOF;) { #ifdef USE_MB - if (my_mbcharlen(read_charset, chr) > 1) + uint ml= my_mbcharlen(read_charset, chr); + if (ml == 0) + { + chr= my_b_EOF; + val->length(0); + return chr; + } + + if (ml > 1) { DBUG_PRINT("read_xml",("multi byte")); int i, ml= my_mbcharlen(read_charset, chr);