mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
Adding thorough tests covering what happens with escaped sequences
in the SQL parser. Various backslash escapes and quote-quote escaped sequences are covered in combination with single and multi-byte characters. This is especially important for the character sets that can have 0x5C as the second byte in a multi-byte character (big5, cp932, gbk, sjis). swe7 is also a special character set, because in swe7 0x5C is used for both escape character and for "LATIN CAPITAL LETTER O WITH DIAERESIS".
This commit is contained in:
9
mysql-test/include/bytes.inc
Normal file
9
mysql-test/include/bytes.inc
Normal file
@ -0,0 +1,9 @@
|
||||
#
|
||||
# Create a table with all byte values
|
||||
#
|
||||
CREATE TABLE halfs (a INT);
|
||||
INSERT INTO halfs VALUES (0x00),(0x01),(0x02),(0x03),(0x04),(0x05),(0x06),(0x07);
|
||||
INSERT INTO halfs VALUES (0x08),(0x09),(0x0A),(0x0B),(0x0C),(0x0D),(0x0E),(0x0F);
|
||||
CREATE TEMPORARY TABLE bytes (a BINARY(1), KEY(a)) ENGINE=MyISAM;
|
||||
INSERT INTO bytes SELECT CHAR((t1.a << 4) | t2.a USING BINARY) FROM halfs t1, halfs t2;
|
||||
DROP TABLE halfs;
|
21
mysql-test/include/bytes2.inc
Normal file
21
mysql-test/include/bytes2.inc
Normal file
@ -0,0 +1,21 @@
|
||||
#
|
||||
# Create a table with all 2 byte sequence values
|
||||
#
|
||||
|
||||
--source include/bytes.inc
|
||||
|
||||
CREATE TABLE halfs (a BINARY(1));
|
||||
# "bytes" is a temporary table, hence is not allowed in joins
|
||||
# Create a non-temporary copy.
|
||||
INSERT INTO halfs SELECT * FROM bytes;
|
||||
CREATE TEMPORARY TABLE bytes2 (
|
||||
a BINARY(2),
|
||||
hi BINARY(1),
|
||||
lo BINARY(1),
|
||||
KEY(a),
|
||||
KEY(lo)
|
||||
) ENGINE=MyISAM;
|
||||
INSERT INTO bytes2
|
||||
SELECT CONCAT(t1.a, t2.a), t1.a, t2.a FROM halfs t1, halfs t2
|
||||
ORDER BY t1.a, t2.a;
|
||||
DROP TABLE halfs;
|
327
mysql-test/include/ctype_unescape.inc
Normal file
327
mysql-test/include/ctype_unescape.inc
Normal file
@ -0,0 +1,327 @@
|
||||
--echo # Start of ctype_unescape.inc
|
||||
|
||||
#
|
||||
# Testing how string literals with backslash and quote-quote are unescaped.
|
||||
# The tests assume that single quote (') is used as a delimiter.
|
||||
#
|
||||
|
||||
#
|
||||
# Make sure that the parser really works using the character set we need.
|
||||
# We use binary strings to compose strings, to be able to test get malformed
|
||||
# sequences, which are possible as a result of mysql_real_escape_string().
|
||||
# The important thing for this test is to make the parser unescape using
|
||||
# the client character set, rather than binary. Currently it works exactly
|
||||
# that way by default, so the query below should return @@character_set_client
|
||||
#
|
||||
SET @query=_binary'SELECT CHARSET(\'test\'),@@character_set_client,@@character_set_connection';
|
||||
PREPARE stmt FROM @query;
|
||||
EXECUTE stmt;
|
||||
DEALLOCATE PREPARE stmt;
|
||||
|
||||
let $CHARSET=`SELECT @@character_set_connection`;
|
||||
|
||||
CREATE TABLE allbytes (a VARBINARY(10));
|
||||
|
||||
#
|
||||
# Create various byte sequences to test. Testing the full banch of
|
||||
# possible combinations takes about 2 minutes. So this test provides
|
||||
# variants to run with:
|
||||
# - the full set of possible combinations
|
||||
# - a reduced test of combinations for selected bytes only
|
||||
#
|
||||
|
||||
# Create selected byte combinations
|
||||
if ($ctype_unescape_combinations == 'selected')
|
||||
{
|
||||
--echo # Using selected bytes combinations
|
||||
--source include/bytes.inc
|
||||
#
|
||||
# Populate "selected_bytes" with bytes that have a special meaning.
|
||||
# We'll use "selected_bytes" to generate byte seqeunces,
|
||||
# instead of the full possible byte combinations, to reduce test time.
|
||||
#
|
||||
CREATE TABLE selected_bytes (a VARBINARY(10));
|
||||
|
||||
# Bytes that have a special meaning in all character sets:
|
||||
# 0x00 - mysql_real_escape_string() quotes this to '\0'
|
||||
# 0x0D - mysql_real_escape_string() quotes this to '\r'
|
||||
# 0x0A - mysql_real_escape_string() quotes this to '\n'
|
||||
# 0x1A - mysql_real_escape_string() quotes this to '\Z'
|
||||
# 0x08 - mysql_real_escape_string() does not quote this,
|
||||
# but '\b' is unescaped to 0x08.
|
||||
# 0x09 - mysql_real_escape_string() does not quote this,
|
||||
# but '\t' is unescaped to 0x09.
|
||||
# 0x30 - '0', as in '\0'
|
||||
# 0x5A - 'Z', as in '\Z'
|
||||
# 0x62 - 'b', as in '\b'
|
||||
# 0x6E - 'n', as in '\n'
|
||||
# 0x72 - 't', as in '\r'
|
||||
# 0x74 - 't', as in '\t'
|
||||
|
||||
INSERT INTO selected_bytes (a) VALUES ('\0'),('\b'),('\t'),('\r'),('\n'),('\Z');
|
||||
INSERT INTO selected_bytes (a) VALUES ('0'),('b'),('t'),('r'),('n'),('Z');
|
||||
|
||||
# 0x22 - double quote
|
||||
# 0x25 - percent sign, '\%' is preserved as is for LIKE.
|
||||
# 0x27 - single quote
|
||||
# 0x5C - backslash
|
||||
# 0x5F - underscore, '\_' is preserved as is for LIKE.
|
||||
INSERT INTO selected_bytes (a) VALUES ('\\'),('_'),('%'),(0x22),(0x27);
|
||||
|
||||
# Some bytes do not have any special meaning, for example basic Latin letters.
|
||||
# Let's add, one should be enough for a good enough coverage.
|
||||
INSERT INTO selected_bytes (a) VALUES ('a');
|
||||
|
||||
#
|
||||
# This maps summarizes bytes that have a special
|
||||
# meaning in various character sets:
|
||||
#
|
||||
# MBHEAD MBTAIL NONASCII-8BIT BAD
|
||||
# ------ ------ -------------- ----------
|
||||
# big5: [A1..F9] [40..7E,A1..FE] N/A [80..A0,FA..FF]
|
||||
# cp932: [81..9F,E0..FC] [40..7E,80..FC] [A1..DF] [FD..FF]
|
||||
# gbk: [81..FE] [40..7E,80..FE] N/A [FF]
|
||||
# sjis: [81..9F,E0..FC] [40..7E,80..FC] [A1..DF] [FD..FF]
|
||||
# swe7: N/A N/A [5B..5E,7B..7E] [80..FF]
|
||||
#
|
||||
|
||||
INSERT INTO selected_bytes (a) VALUES
|
||||
(0x3F), # 7bit
|
||||
(0x40), # 7bit mbtail
|
||||
(0x7E), # 7bit mbtail nonascii-8bit
|
||||
(0x7F), # 7bit nonascii-8bit
|
||||
(0x80), # mbtail bad-mb
|
||||
(0x81), # mbhead mbtail
|
||||
(0x9F), # mbhead mbtail bad-mb
|
||||
(0xA0), # mbhead mbtail bad-mb
|
||||
(0xA1), # mbhead mbtail nonascii-8bit
|
||||
(0xE0), # mbhead mbtai
|
||||
(0xEF), # mbhead mbtail
|
||||
(0xF9), # mbhead mbtail
|
||||
(0xFA), # mbhead mbtail bad-mb
|
||||
(0xFC), # mbhead mbtail bad-mb
|
||||
(0xFD), # mbhead mbtail bad-mb
|
||||
(0xFE), # mbhead mbtial bad-mb
|
||||
(0xFF); # bad-mb
|
||||
|
||||
#
|
||||
# Now populate the test table
|
||||
#
|
||||
|
||||
# Use all single bytes, this is cheap, there are only 256 values.
|
||||
INSERT INTO allbytes (a) SELECT a FROM bytes;
|
||||
|
||||
# Add selected bytes combinations
|
||||
INSERT INTO allbytes (a) SELECT CONCAT(t1.a,t2.a) FROM selected_bytes t1,selected_bytes t2;
|
||||
INSERT INTO allbytes (a) SELECT CONCAT(0x5C,t1.a,t2.a) FROM selected_bytes t1,selected_bytes t2;
|
||||
INSERT INTO allbytes (a) SELECT CONCAT(0x5C,t1.a,0x5C,t2.a) FROM selected_bytes t1,selected_bytes t2;
|
||||
DROP TABLE selected_bytes;
|
||||
|
||||
# Delete all non-single byte sequences that do not have
|
||||
# backslashes or quotes at all. There is nothing special with these strings.
|
||||
DELETE FROM allbytes WHERE
|
||||
OCTET_LENGTH(a)>1 AND
|
||||
LOCATE(0x5C,a)=0 AND
|
||||
a NOT LIKE '%\'%' AND
|
||||
a NOT LIKE '%"%';
|
||||
|
||||
}
|
||||
|
||||
if ($ctype_unescape_combinations=='')
|
||||
{
|
||||
--echo # Using full byte combinations
|
||||
--source include/bytes2.inc
|
||||
INSERT INTO allbytes (a) SELECT a FROM bytes;
|
||||
INSERT INTO allbytes (a) SELECT CONCAT(hi,lo) FROM bytes2;
|
||||
INSERT INTO allbytes (a) SELECT CONCAT(0x5C,hi,lo) FROM bytes2;
|
||||
INSERT INTO allbytes (a) SELECT CONCAT(0x5C,hi,0x5C,lo) FROM bytes2;
|
||||
}
|
||||
|
||||
|
||||
DELIMITER //;
|
||||
|
||||
#
|
||||
# A procedure that make an SQL query using 'val' as a string literal.
|
||||
# The result of the query execution is written into the table 't1'.
|
||||
# NULL in t1.b means that query failed due to syntax error,
|
||||
# typically because of mis-interpreted closing quote delimiter.
|
||||
#
|
||||
CREATE PROCEDURE p1(val VARBINARY(10))
|
||||
BEGIN
|
||||
DECLARE EXIT HANDLER FOR SQLSTATE '42000' INSERT INTO t1 (a,b) VALUES(val,NULL);
|
||||
SET @query=CONCAT(_binary"INSERT INTO t1 (a,b) VALUES (0x",HEX(val),",'",val,"')");
|
||||
PREPARE stmt FROM @query;
|
||||
EXECUTE stmt;
|
||||
DEALLOCATE PREPARE stmt;
|
||||
END//
|
||||
|
||||
#
|
||||
# A procedure that iterates through all records in "allbytes".
|
||||
# And runs p1() for every record.
|
||||
#
|
||||
CREATE PROCEDURE p2()
|
||||
BEGIN
|
||||
DECLARE val VARBINARY(10);
|
||||
DECLARE done INT DEFAULT FALSE;
|
||||
DECLARE stmt CURSOR FOR SELECT a FROM allbytes;
|
||||
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done=TRUE;
|
||||
OPEN stmt;
|
||||
read_loop1: LOOP
|
||||
FETCH stmt INTO val;
|
||||
IF done THEN
|
||||
LEAVE read_loop1;
|
||||
END IF;
|
||||
CALL p1(val);
|
||||
END LOOP;
|
||||
CLOSE stmt;
|
||||
END//
|
||||
|
||||
|
||||
# A function that converts the value from binary to $CHARSET
|
||||
# and check if it has changed. CONVERT() fixes malformed strings.
|
||||
# So if the string changes in CONVERT(), it means it was not wellformed.
|
||||
--eval CREATE FUNCTION iswellformed(a VARBINARY(256)) RETURNS INT RETURN a=BINARY CONVERT(a USING $CHARSET);
|
||||
|
||||
#
|
||||
# A function that approximately reproduces how the SQL parser
|
||||
# would unescape a binary string.
|
||||
#
|
||||
CREATE FUNCTION unescape(a VARBINARY(256)) RETURNS VARBINARY(256)
|
||||
BEGIN
|
||||
# We need to do it in a way to avoid producing new escape sequences
|
||||
# First, enclose all known escsape sequences to '{{xx}}'
|
||||
# - Backslash not followed by a LIKE pattern characters _ and %
|
||||
# - Double escapes
|
||||
# This uses PCRE Branch Reset Groups: (?|(alt1)|(alt2)|(alt3)).
|
||||
# So '\\1' in the last argument always means the match, no matter
|
||||
# which alternative it came from.
|
||||
SET a=REGEXP_REPLACE(a,'(?|(\\\\[^_%])|(\\x{27}\\x{27}))','{{\\1}}');
|
||||
# Now unescape all enclosed standard escape sequences
|
||||
SET a=REPLACE(a,'{{\\0}}', '\0');
|
||||
SET a=REPLACE(a,'{{\\b}}', '\b');
|
||||
SET a=REPLACE(a,'{{\\t}}', '\t');
|
||||
SET a=REPLACE(a,'{{\\r}}', '\r');
|
||||
SET a=REPLACE(a,'{{\\n}}', '\n');
|
||||
SET a=REPLACE(a,'{{\\Z}}', '\Z');
|
||||
SET a=REPLACE(a,'{{\\\'}}', '\'');
|
||||
# Unescape double quotes
|
||||
SET a=REPLACE(a,'{{\'\'}}', '\'');
|
||||
# Unescape the rest: all other \x sequences mean just 'x'
|
||||
SET a=REGEXP_REPLACE(a, '{{\\\\(.|\\R)}}', '\\1');
|
||||
RETURN a;
|
||||
END//
|
||||
|
||||
|
||||
#
|
||||
# A function that checks what happened during unescaping.
|
||||
#
|
||||
# @param a - the value before unescaping
|
||||
# @param b - the value after unescaping
|
||||
#
|
||||
# The following return values are possible:
|
||||
# - SyntErr - b IS NULL, which means syntax error happened in p1().
|
||||
# - Preserv - the value was not modified during unescaping.
|
||||
# This is possible if 0x5C was treated as mbtail.
|
||||
# Or only LIKE escape sequences were found: '\_' and '\%'.
|
||||
# - Trivial - only 0x5C were removed.
|
||||
# - Regular - the value was unescaped like a binary string.
|
||||
# Some standard escape sequences were found.
|
||||
# No special multi-byte handling happened.
|
||||
# - Special - Something else happened. Should not happen.
|
||||
#
|
||||
CREATE FUNCTION unescape_type(a VARBINARY(256),b VARBINARY(256)) RETURNS VARBINARY(256)
|
||||
BEGIN
|
||||
RETURN CASE
|
||||
WHEN b IS NULL THEN '[SyntErr]'
|
||||
WHEN a=b THEN CASE
|
||||
WHEN OCTET_LENGTH(a)=1 THEN '[Preserve]'
|
||||
WHEN a RLIKE '\\\\[_%]' THEN '[Preserve][LIKE]'
|
||||
WHEN a RLIKE '^[[:ascii:]]+$' THEN '[Preserve][ASCII]'
|
||||
ELSE '[Preserv][MB]' END
|
||||
WHEN REPLACE(a,0x5C,'')=b THEN '[Trivial]'
|
||||
WHEN UNESCAPE(a)=b THEN '[Regular]'
|
||||
ELSE '[Special]' END;
|
||||
END//
|
||||
|
||||
|
||||
#
|
||||
# Check what happened with wellformedness during unescaping
|
||||
# @param a - the value before unescaping
|
||||
# @param b - the value after unescaping
|
||||
#
|
||||
# Returned values:
|
||||
# [FIXED] - the value was malformed and become wellformed after unescaping
|
||||
# [BROKE] - the value was wellformed and become malformed after unescaping
|
||||
# [ILSEQ] - both values (before unescaping and after unescaping) are malformed
|
||||
# '' - both values are wellformed
|
||||
#
|
||||
CREATE FUNCTION wellformedness(a VARBINARY(256), b VARBINARY(256))
|
||||
RETURNS VARBINARY(256)
|
||||
BEGIN
|
||||
RETURN CASE
|
||||
WHEN b IS NULL THEN ''
|
||||
WHEN NOT iswellformed(a) AND iswellformed(b) THEN '[FIXED]'
|
||||
WHEN iswellformed(a) AND NOT iswellformed(b) THEN '[BROKE]'
|
||||
WHEN NOT iswellformed(a) AND NOT iswellformed(b) THEN '[ILSEQ]'
|
||||
ELSE ''
|
||||
END;
|
||||
END//
|
||||
|
||||
|
||||
#
|
||||
# Check if the value could be generated by mysql_real_escape_string(),
|
||||
# or can only come from a direct user input.
|
||||
#
|
||||
# @param a - the value before unescaping
|
||||
#
|
||||
# Returns:
|
||||
# [USER] - if the value could not be generated by mysql_real_escape_string()
|
||||
# '' - if the value was possibly generated by mysql_real_escape_string()
|
||||
#
|
||||
#
|
||||
CREATE FUNCTION mysql_real_escape_string_generated(a VARBINARY(256))
|
||||
RETURNS VARBINARY(256)
|
||||
BEGIN
|
||||
DECLARE a1 BINARY(1) DEFAULT SUBSTR(a,1,1);
|
||||
DECLARE a2 BINARY(1) DEFAULT SUBSTR(a,2,1);
|
||||
DECLARE a3 BINARY(1) DEFAULT SUBSTR(a,3,1);
|
||||
DECLARE a4 BINARY(1) DEFAULT SUBSTR(a,4,1);
|
||||
DECLARE a2a4 BINARY(2) DEFAULT CONCAT(a2,a4);
|
||||
RETURN CASE
|
||||
WHEN (a1=0x5C) AND
|
||||
(a3=0x5C) AND
|
||||
(a2>0x7F) AND
|
||||
(a4 NOT IN ('_','%','0','t','r','n','Z')) AND
|
||||
iswellformed(a2a4) THEN '[USER]'
|
||||
ELSE ''
|
||||
END;
|
||||
END//
|
||||
|
||||
DELIMITER ;//
|
||||
|
||||
|
||||
CREATE TABLE t1 (a VARBINARY(10),b VARBINARY(10));
|
||||
CALL p2();
|
||||
# Avoid "Invalid XXX character string" warnings
|
||||
# We mark malformed strings in the output anyway
|
||||
--disable_warnings
|
||||
# All records marked with '[BAD]' mean that the string was unescaped
|
||||
# in a unexpected way, that means there is a bug in UNESCAPE() above.
|
||||
SELECT HEX(a),HEX(b),
|
||||
CONCAT(unescape_type(a,b),
|
||||
wellformedness(a,b),
|
||||
mysql_real_escape_string_generated(a),
|
||||
IF(UNESCAPE(a)<>b,CONCAT('[BAD',HEX(UNESCAPE(a)),']'),'')) AS comment
|
||||
FROM t1 ORDER BY LENGTH(a),a;
|
||||
--enable_warnings
|
||||
DROP TABLE t1;
|
||||
DROP PROCEDURE p1;
|
||||
DROP PROCEDURE p2;
|
||||
DROP FUNCTION unescape;
|
||||
DROP FUNCTION unescape_type;
|
||||
DROP FUNCTION wellformedness;
|
||||
DROP FUNCTION mysql_real_escape_string_generated;
|
||||
DROP FUNCTION iswellformed;
|
||||
DROP TABLE allbytes;
|
||||
|
||||
--echo # End of ctype_backslash.inc
|
Reference in New Issue
Block a user