mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
A cleanup for MDEV-17088 Provide tools to encode/decode mysql-encoded file system names
- Load and convert the entire input file at once, rather than reading string-by-string using fgets(). This change makes it possible to convert from UCS2, UTF16, UTF32 data. - Adding the --delimiter command, to treat the specified characters as delimiters rather than data to convert. Useful in combination with `-f filename` or `-t filename`. The delimiter characters are not converted, they are copied from the input to the output as is. - Adding diagnostics with line number and position if: * an illegal input byte sequence was found * a character cannot be converted to the target character set
This commit is contained in:
@ -21,6 +21,8 @@ BINARY CONVERT(a USING filename)
|
||||
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>2
|
||||
test/<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>.frm
|
||||
test/<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>2.frm
|
||||
test/<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>.frm
|
||||
test/<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>2.frm
|
||||
DROP TABLE t1;
|
||||
# bulk convert with file
|
||||
# --- Start of mariadb-conv for mysql-conv-test-cp932.txt ---
|
||||
|
@ -10,26 +10,27 @@ SET NAMES cp932;
|
||||
--let $MYSQLD_DATADIR= `select @@datadir`
|
||||
|
||||
# simple I/O
|
||||
--exec echo "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>" | $MARIADB_CONV -f cp932 -t filename
|
||||
--exec echo "@6e2c@8a66@8cc7@6599@5eab" | $MARIADB_CONV -f filename -t cp932
|
||||
--exec echo "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>" | $MARIADB_CONV -f cp932 -t filename --delimiter="\r\n"
|
||||
--exec echo "@6e2c@8a66@8cc7@6599@5eab" | $MARIADB_CONV -f filename -t cp932 --delimiter="\r\n"
|
||||
|
||||
# undo query result
|
||||
--let $query_result=`SELECT CONVERT(CONVERT('<27><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>' USING filename) USING binary);`
|
||||
--echo $query_result
|
||||
--exec echo $query_result | $MARIADB_CONV -f filename -t cp932
|
||||
--exec echo $query_result | $MARIADB_CONV -f filename -t cp932 --delimiter="\r\n"
|
||||
|
||||
--let $reverse_query_result=`SELECT CONVERT(_filename '@6e2c@8a66@8cc7@6599@5eab' USING cp932);`
|
||||
--echo $reverse_query_result
|
||||
--exec echo $reverse_query_result | $MARIADB_CONV -f cp932 -t filename
|
||||
--exec echo $reverse_query_result | $MARIADB_CONV -f cp932 -t filename --delimiter="\r\n"
|
||||
|
||||
--echo # bulk convert with pipe
|
||||
|
||||
CREATE TABLE t1 (id SERIAL, a VARCHAR(64) CHARACTER SET cp932);
|
||||
INSERT INTO t1 (a) VALUES ('<27><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>'), ('<27><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>2');
|
||||
--exec $MYSQL -Dtest --default-character-set=cp932 -e "SELECT a FROM t1 ORDER BY id" | $MARIADB_CONV -f cp932 -t filename
|
||||
--exec $MYSQL -Dtest --default-character-set=cp932 -e "SELECT a FROM t1 ORDER BY id" | $MARIADB_CONV -f cp932 -t filename --delimiter="\r\n"
|
||||
--exec $MYSQL -Dtest --default-character-set=cp932 -e "SELECT BINARY CONVERT(a USING filename) FROM t1 ORDER BY id"
|
||||
--exec $MYSQL -Dtest --default-character-set=cp932 --column-names=0 -e "SELECT BINARY CONVERT(a USING filename) FROM t1 ORDER BY id" | $MARIADB_CONV -f filename -t cp932
|
||||
--exec $MYSQL -Dtest --default-character-set=cp932 --column-names=0 -e "SELECT CONCAT('test/', BINARY CONVERT(a USING filename),'.frm') FROM t1 ORDER BY id" | $REPLACE "/" "@002f" "." "@002e"| $MARIADB_CONV -f filename -t cp932
|
||||
--exec $MYSQL -Dtest --default-character-set=cp932 --column-names=0 -e "SELECT BINARY CONVERT(a USING filename) FROM t1 ORDER BY id" | $MARIADB_CONV -f filename -t cp932 --delimiter="\r\n"
|
||||
--exec $MYSQL -Dtest --default-character-set=cp932 --column-names=0 -e "SELECT CONCAT('test/', BINARY CONVERT(a USING filename),'.frm') FROM t1 ORDER BY id" | $REPLACE "/" "@002f" "." "@002e"| $MARIADB_CONV -f filename -t cp932 --delimiter="\r\n"
|
||||
--exec $MYSQL -Dtest --default-character-set=cp932 --column-names=0 -e "SELECT CONCAT('test/', BINARY CONVERT(a USING filename),'.frm') FROM t1 ORDER BY id" | $MARIADB_CONV -f filename -t cp932 --delimiter="/.\r\n"
|
||||
DROP TABLE t1;
|
||||
|
||||
|
||||
@ -44,12 +45,12 @@ DROP TABLE t1;
|
||||
EOF
|
||||
|
||||
--echo # --- Start of mariadb-conv for mysql-conv-test-cp932.txt ---
|
||||
--exec $MARIADB_CONV -f cp932 -t filename $MYSQL_TMP_DIR/mysql-conv-test-cp932.txt
|
||||
--exec $MARIADB_CONV -f cp932 -t filename --delimiter="\r\n" $MYSQL_TMP_DIR/mysql-conv-test-cp932.txt
|
||||
--echo # --- End of mariadb-conv for mysql-conv-test-cp932.txt ---
|
||||
|
||||
--copy_file $MYSQL_TMP_DIR/mysql-conv-test-cp932.txt $MYSQL_TMP_DIR/mysql-conv-test-cp932-2.txt
|
||||
--echo # --- Start of mariadb-conv for mysql-conv-test-cp932.txt and mysql-conv-test-cp932-2.txt ---
|
||||
--exec $MARIADB_CONV -f cp932 -t filename $MYSQL_TMP_DIR/mysql-conv-test-cp932.txt $MYSQL_TMP_DIR/mysql-conv-test-cp932-2.txt
|
||||
--exec $MARIADB_CONV -f cp932 -t filename --delimiter="\r\n" $MYSQL_TMP_DIR/mysql-conv-test-cp932.txt $MYSQL_TMP_DIR/mysql-conv-test-cp932-2.txt
|
||||
--echo # --- Start of mariadb-conv for mysql-conv-test-cp932.txt and mysql-conv-test-cp932-2.txt ---
|
||||
|
||||
--remove_file $MYSQL_TMP_DIR/mysql-conv-test-cp932.txt
|
||||
|
13
mysql-test/suite/client/mariadb-conv-utf16.result
Normal file
13
mysql-test/suite/client/mariadb-conv-utf16.result
Normal file
@ -0,0 +1,13 @@
|
||||
#
|
||||
# MDEV-17088 Provide tools to encode/decode mysql-encoded file system names
|
||||
#
|
||||
SET NAMES utf8;
|
||||
# Bad delimiter
|
||||
--delimiter cannot be used with utf16 to utf8 conversion
|
||||
# Bad delimiter
|
||||
--delimiter cannot be used with utf8 to utf16 conversion
|
||||
# Start of file01.utf16.txt
|
||||
aaa
|
||||
xxxяяяxxx
|
||||
bbb
|
||||
# End of file01.utf16.txt
|
21
mysql-test/suite/client/mariadb-conv-utf16.test
Normal file
21
mysql-test/suite/client/mariadb-conv-utf16.test
Normal file
@ -0,0 +1,21 @@
|
||||
-- source include/have_utf16.inc
|
||||
-- source include/not_embedded.inc
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-17088 Provide tools to encode/decode mysql-encoded file system names
|
||||
--echo #
|
||||
|
||||
--character_set utf8
|
||||
SET NAMES utf8;
|
||||
|
||||
--echo # Bad delimiter
|
||||
--error 1
|
||||
--exec $MARIADB_CONV -f utf16 -t utf8 --delimiter="\r\n" $MYSQL_TEST_DIR/std_data/mariadb-conv/file01.utf16.txt 2>&1
|
||||
|
||||
--echo # Bad delimiter
|
||||
--error 1
|
||||
--exec $MARIADB_CONV -f utf8 -t utf16 --delimiter="\r\n" $MYSQL_TEST_DIR/std_data/mariadb-conv/file01.utf8.txt 2>&1
|
||||
|
||||
--echo # Start of file01.utf16.txt
|
||||
--exec $MARIADB_CONV -f utf16 -t utf8 $MYSQL_TEST_DIR/std_data/mariadb-conv/file01.utf16.txt 2>&1
|
||||
--echo # End of file01.utf16.txt
|
@ -21,6 +21,8 @@ BINARY CONVERT(a USING filename)
|
||||
測試資料2
|
||||
test/測試資料.frm
|
||||
test/測試資料2.frm
|
||||
test/測試資料.frm
|
||||
test/測試資料2.frm
|
||||
DROP TABLE t1;
|
||||
# bulk convert with file
|
||||
# --- Start of mariadb-conv for mysql-conv-test-utf8.txt ---
|
||||
|
@ -10,26 +10,27 @@ SET NAMES utf8;
|
||||
--let $MYSQLD_DATADIR= `select @@datadir`
|
||||
|
||||
# simple I/O
|
||||
--exec echo "測試資料" | $MARIADB_CONV -f utf8 -t filename
|
||||
--exec echo "@6e2c@8a66@8cc7@6599@5eab" | $MARIADB_CONV -f filename -t utf8
|
||||
--exec echo "測試資料" | $MARIADB_CONV -f utf8 -t filename --delimiter="\r\n"
|
||||
--exec echo "@6e2c@8a66@8cc7@6599@5eab" | $MARIADB_CONV -f filename -t utf8 --delimiter="\r\n"
|
||||
|
||||
# undo query result
|
||||
--let $query_result=`SELECT CONVERT(CONVERT('測試資料' USING filename) USING binary);`
|
||||
--echo $query_result
|
||||
--exec echo $query_result | $MARIADB_CONV -f filename -t utf8
|
||||
--exec echo $query_result | $MARIADB_CONV -f filename -t utf8 --delimiter="\r\n"
|
||||
|
||||
--let $reverse_query_result=`SELECT CONVERT(_filename '@6e2c@8a66@8cc7@6599@5eab' USING utf8);`
|
||||
--echo $reverse_query_result
|
||||
--exec echo $reverse_query_result | $MARIADB_CONV -f utf8 -t filename
|
||||
--exec echo $reverse_query_result | $MARIADB_CONV -f utf8 -t filename --delimiter="\r\n"
|
||||
|
||||
--echo # bulk convert with pipe
|
||||
|
||||
CREATE TABLE t1 (id SERIAL, a VARCHAR(64) CHARACTER SET utf8);
|
||||
INSERT INTO t1 (a) VALUES ('測試資料'), ('測試資料2');
|
||||
--exec $MYSQL -Dtest --default-character-set=utf8 -e "SELECT a FROM t1 ORDER BY id" | $MARIADB_CONV -f utf8 -t filename
|
||||
--exec $MYSQL -Dtest --default-character-set=utf8 -e "SELECT a FROM t1 ORDER BY id" | $MARIADB_CONV -f utf8 -t filename --delimiter="\r\n"
|
||||
--exec $MYSQL -Dtest --default-character-set=utf8 -e "SELECT BINARY CONVERT(a USING filename) FROM t1 ORDER BY id"
|
||||
--exec $MYSQL -Dtest --default-character-set=utf8 --column-names=0 -e "SELECT BINARY CONVERT(a USING filename) FROM t1 ORDER BY id" | $MARIADB_CONV -f filename -t utf8
|
||||
--exec $MYSQL -Dtest --default-character-set=utf8 --column-names=0 -e "SELECT CONCAT('test/', BINARY CONVERT(a USING filename),'.frm') FROM t1 ORDER BY id" | $REPLACE "/" "@002f" "." "@002e"| $MARIADB_CONV -f filename -t utf8
|
||||
--exec $MYSQL -Dtest --default-character-set=utf8 --column-names=0 -e "SELECT BINARY CONVERT(a USING filename) FROM t1 ORDER BY id" | $MARIADB_CONV -f filename -t utf8 --delimiter="\r\n"
|
||||
--exec $MYSQL -Dtest --default-character-set=utf8 --column-names=0 -e "SELECT CONCAT('test/', BINARY CONVERT(a USING filename),'.frm') FROM t1 ORDER BY id" | $REPLACE "/" "@002f" "." "@002e"| $MARIADB_CONV -f filename -t utf8 --delimiter="\r\n"
|
||||
--exec $MYSQL -Dtest --default-character-set=utf8 --column-names=0 -e "SELECT CONCAT('test/', BINARY CONVERT(a USING filename),'.frm') FROM t1 ORDER BY id" | $MARIADB_CONV -f filename -t utf8 --delimiter="/.\r\n"
|
||||
DROP TABLE t1;
|
||||
|
||||
|
||||
@ -44,12 +45,12 @@ DROP TABLE t1;
|
||||
EOF
|
||||
|
||||
--echo # --- Start of mariadb-conv for mysql-conv-test-utf8.txt ---
|
||||
--exec $MARIADB_CONV -f utf8 -t filename $MYSQL_TMP_DIR/mysql-conv-test-utf8.txt
|
||||
--exec $MARIADB_CONV -f utf8 -t filename --delimiter="\r\n" $MYSQL_TMP_DIR/mysql-conv-test-utf8.txt
|
||||
--echo # --- End of mariadb-conv for mysql-conv-test-utf8.txt ---
|
||||
|
||||
--copy_file $MYSQL_TMP_DIR/mysql-conv-test-utf8.txt $MYSQL_TMP_DIR/mysql-conv-test-utf8-2.txt
|
||||
--echo # --- Start of mariadb-conv for mysql-conv-test-utf8.txt and mysql-conv-test-utf8-2.txt ---
|
||||
--exec $MARIADB_CONV -f utf8 -t filename $MYSQL_TMP_DIR/mysql-conv-test-utf8.txt $MYSQL_TMP_DIR/mysql-conv-test-utf8-2.txt
|
||||
--exec $MARIADB_CONV -f utf8 -t filename --delimiter="\r\n" $MYSQL_TMP_DIR/mysql-conv-test-utf8.txt $MYSQL_TMP_DIR/mysql-conv-test-utf8-2.txt
|
||||
--echo # --- Start of mariadb-conv for mysql-conv-test-utf8.txt and mysql-conv-test-utf8-2.txt ---
|
||||
|
||||
--remove_file $MYSQL_TMP_DIR/mysql-conv-test-utf8.txt
|
||||
|
@ -11,3 +11,15 @@ mariadb-conv: unknown option '-r'
|
||||
Character set unknown-cs is not supported
|
||||
# unknown "from" character set
|
||||
Character set unknown-cs is not supported
|
||||
# Bad delimiter
|
||||
Bad --delimiter value
|
||||
# Conversion error
|
||||
Conversion from utf8 to latin1 failed at position 7
|
||||
aaa
|
||||
xxx???xxx
|
||||
bbb
|
||||
# Bad input character
|
||||
Illegal utf8 byte sequence at position 7
|
||||
aaa
|
||||
xxx???xxx
|
||||
bbb
|
||||
|
@ -6,8 +6,8 @@
|
||||
|
||||
--echo # default encoding
|
||||
--exec echo "t1" | $MARIADB_CONV
|
||||
--exec echo "t1" | $MARIADB_CONV -f filename
|
||||
--exec echo "t1" | $MARIADB_CONV -t filename
|
||||
--exec echo "t1" | $MARIADB_CONV -f filename --delimiter="\r\n"
|
||||
--exec echo "t1" | $MARIADB_CONV -t filename --delimiter="\r\n"
|
||||
|
||||
--echo # invalid option
|
||||
--replace_regex /.*mariadb-conv.*: unknown/mariadb-conv: unknown/
|
||||
@ -23,3 +23,17 @@
|
||||
--replace_regex /.*mariadb-conv.*: unknown/mariadb-conv: unknown/
|
||||
--error 1
|
||||
--exec echo "t1" | $MARIADB_CONV -f unknown-cs -t latin1 2>&1 > /dev/null
|
||||
|
||||
--echo # Bad delimiter
|
||||
--error 1
|
||||
--exec echo "t1" | $MARIADB_CONV --delimiter="\x" 2>&1 > /dev/null
|
||||
|
||||
--echo # Conversion error
|
||||
--error 1
|
||||
--exec $MARIADB_CONV -f utf8 -t latin1 < $MYSQL_TEST_DIR/std_data/mariadb-conv/file01.utf8.txt 2>&1
|
||||
--exec $MARIADB_CONV -f utf8 -t latin1 -c < $MYSQL_TEST_DIR/std_data/mariadb-conv/file01.utf8.txt 2>&1
|
||||
|
||||
--echo # Bad input character
|
||||
--error 1
|
||||
--exec $MARIADB_CONV -f utf8 -t latin1 < $MYSQL_TEST_DIR/std_data/mariadb-conv/file02.latin1.txt 2>&1
|
||||
--exec $MARIADB_CONV -f utf8 -t latin1 -c < $MYSQL_TEST_DIR/std_data/mariadb-conv/file02.latin1.txt 2>&1
|
||||
|
Reference in New Issue
Block a user