1
0
mirror of https://github.com/MariaDB/server.git synced 2025-07-29 05:21:33 +03:00

MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion

This commit is contained in:
Alexander Barkov
2014-09-18 12:40:55 +04:00
parent 391fddf660
commit 8286bcd721
5 changed files with 96 additions and 4 deletions

View File

@ -7660,5 +7660,60 @@ DROP FUNCTION iswellformed;
DROP TABLE allbytes; DROP TABLE allbytes;
# End of ctype_backslash.inc # End of ctype_backslash.inc
# #
# MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion
#
SET NAMES utf8, character_set_connection=latin1;
SELECT '<27>';
?
?
SELECT HEX('<27>');
HEX('<27>')
3F
SELECT HEX(CAST('<27>' AS CHAR CHARACTER SET utf8));
HEX(CAST('<27>' AS CHAR CHARACTER SET utf8))
3F
SELECT HEX(CAST('<27>' AS CHAR CHARACTER SET latin1));
HEX(CAST('<27>' AS CHAR CHARACTER SET latin1))
3F
SELECT HEX(CONVERT('<27>' USING utf8));
HEX(CONVERT('<27>' USING utf8))
3F
SELECT HEX(CONVERT('<27>' USING latin1));
HEX(CONVERT('<27>' USING latin1))
3F
SELECT '<27>x';
?x
?x
SELECT HEX('<27>x');
HEX('<27>x')
3F78
SELECT HEX(CAST('<27>x' AS CHAR CHARACTER SET utf8));
HEX(CAST('<27>x' AS CHAR CHARACTER SET utf8))
3F78
SELECT HEX(CAST('<27>x' AS CHAR CHARACTER SET latin1));
HEX(CAST('<27>x' AS CHAR CHARACTER SET latin1))
3F78
SELECT HEX(CONVERT('<27>x' USING utf8));
HEX(CONVERT('<27>x' USING utf8))
3F78
SELECT HEX(CONVERT('<27>x' USING latin1));
HEX(CONVERT('<27>x' USING latin1))
3F78
SET NAMES utf8;
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1);
INSERT INTO t1 VALUES ('<27>'),('<27>#');
Warnings:
Warning 1366 Incorrect string value: '\xC2' for column 'a' at row 1
Warning 1366 Incorrect string value: '\xC2#' for column 'a' at row 2
SHOW WARNINGS;
Level Code Message
Warning 1366 Incorrect string value: '\xC2' for column 'a' at row 1
Warning 1366 Incorrect string value: '\xC2#' for column 'a' at row 2
SELECT HEX(a),a FROM t1;
HEX(a) a
3F ?
3F23 ?#
DROP TABLE t1;
#
# End of 10.0 tests # End of 10.0 tests
# #

View File

@ -30,7 +30,7 @@ SET @@session.character_set_client = utf8;
INSERT INTO t1 values('<03>'); INSERT INTO t1 values('<03>');
SELECT hex(a),CHAR_LENGTH(a) FROM t1; SELECT hex(a),CHAR_LENGTH(a) FROM t1;
hex(a) CHAR_LENGTH(a) hex(a) CHAR_LENGTH(a)
03 1 033F 2
DELETE FROM t1; DELETE FROM t1;
DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t1;
SET @@global.character_set_client = @global_character_set_client; SET @@global.character_set_client = @global_character_set_client;

View File

@ -210,6 +210,29 @@ set names latin1;
let $ctype_unescape_combinations=selected; let $ctype_unescape_combinations=selected;
--source include/ctype_unescape.inc --source include/ctype_unescape.inc
--echo #
--echo # MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion
--echo #
SET NAMES utf8, character_set_connection=latin1;
SELECT '<27>';
SELECT HEX('<27>');
SELECT HEX(CAST('<27>' AS CHAR CHARACTER SET utf8));
SELECT HEX(CAST('<27>' AS CHAR CHARACTER SET latin1));
SELECT HEX(CONVERT('<27>' USING utf8));
SELECT HEX(CONVERT('<27>' USING latin1));
SELECT '<27>x';
SELECT HEX('<27>x');
SELECT HEX(CAST('<27>x' AS CHAR CHARACTER SET utf8));
SELECT HEX(CAST('<27>x' AS CHAR CHARACTER SET latin1));
SELECT HEX(CONVERT('<27>x' USING utf8));
SELECT HEX(CONVERT('<27>x' USING latin1));
SET NAMES utf8;
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1);
INSERT INTO t1 VALUES ('<27>'),('<27>#');
SHOW WARNINGS;
SELECT HEX(a),a FROM t1;
DROP TABLE t1;
--echo # --echo #
--echo # End of 10.0 tests --echo # End of 10.0 tests
--echo # --echo #

View File

@ -1022,8 +1022,15 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
wc= '?'; wc= '?';
} }
else else
break; // Not enough characters {
if ((uchar *) from >= from_end)
break; // End of line
// Incomplete byte sequence
if (!*well_formed_error_pos)
*well_formed_error_pos= from;
from++;
wc= '?';
}
outp: outp:
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0) if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
to+= cnvres; to+= cnvres;

View File

@ -1066,7 +1066,14 @@ my_convert_internal(char *to, uint32 to_length,
wc= '?'; wc= '?';
} }
else else
break; // Not enough characters {
if ((uchar *) from >= from_end)
break; /* End of line */
/* Incomplete byte sequence */
error_count++;
from++;
wc= '?';
}
outp: outp:
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0) if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)