1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-4931 Make cpimport charset-aware. (#2938)

1. Extend the following CalpontSystemCatalog member functions to
   set CalpontSystemCatalog::ColType::charsetNumber, after the
   system catalog update to add charset number to calpontsys.syscolumn
   in MCOL-5005:
     CalpontSystemCatalog::lookupOID
     CalpontSystemCatalog::colType
     CalpontSystemCatalog::columnRIDs
     CalpontSystemCatalog::getSchemaInfo

2. Update cpimport to use the CHARSET_INFO object associated with the
   charset number retrieved from the system catalog, for a
   dictionary/non-dictionary CHAR/VARCHAR/TEXT column, to truncate
   long strings that exceed the target column character length.

3. Add MTR test cases.
This commit is contained in:
Gagan Goel
2023-09-05 10:17:20 -04:00
committed by GitHub
parent 5b4f06bf0d
commit 931f2b36a1
12 changed files with 211 additions and 72 deletions

View File

@ -0,0 +1,44 @@
DROP DATABASE IF EXISTS mcol_4931;
CREATE DATABASE mcol_4931;
USE mcol_4931;
CREATE TABLE t1 (
a VARCHAR(15) collate 'utf8mb4_croatian_ci',
b CHAR(15) collate 'utf8mb4_croatian_ci',
c VARCHAR(2),
d CHAR(2)
)ENGINE=columnstore default charset=utf8mb4;
CREATE TABLE t2 (
a VARCHAR(15) collate 'latin2_hungarian_ci',
b CHAR(15) collate 'latin2_hungarian_ci',
c VARCHAR(2),
d CHAR(2)
)ENGINE=columnstore default charset=latin2;
LOAD DATA INFILE "DATADIR/mcol4931_1.txt" IGNORE INTO TABLE t1 charset utf8mb4 FIELDS TERMINATED BY "," ENCLOSED BY '"';;
Warnings:
Warning 1265 Data truncated for column 'a' at row 1
Warning 1265 Data truncated for column 'b' at row 1
Warning 1265 Data truncated for column 'c' at row 1
Warning 1265 Data truncated for column 'd' at row 1
LOAD DATA INFILE "DATADIR/mcol4931_2.txt" IGNORE INTO TABLE t2 charset latin2 FIELDS TERMINATED BY "," ENCLOSED BY '"';;
Warnings:
Warning 1265 Data truncated for column 'a' at row 1
Warning 1265 Data truncated for column 'b' at row 1
Warning 1265 Data truncated for column 'c' at row 1
Warning 1265 Data truncated for column 'd' at row 1
SELECT * FROM t1;
a b c d
K<EFBFBD>nig-abcd-Stra K<>nig-abcd-Stra K<> K<>
K<EFBFBD>nig-abcd-Stra K<>nig-abcd-Stra K<> K<>
SELECT CHAR_LENGTH(a), LENGTHB(a), CHAR_LENGTH(b), LENGTHB(b), CHAR_LENGTH(c), LENGTHB(c), CHAR_LENGTH(d), LENGTHB(d) FROM t1;
CHAR_LENGTH(a) LENGTHB(a) CHAR_LENGTH(b) LENGTHB(b) CHAR_LENGTH(c) LENGTHB(c) CHAR_LENGTH(d) LENGTHB(d)
15 16 15 16 2 3 2 3
15 16 15 16 2 3 2 3
SELECT * FROM t2;
a b c d
abcdefghijklmno abcdefghijklmno ab ab
abcdefghijklmno abcdefghijklmno ab ab
SELECT CHAR_LENGTH(a), LENGTHB(a), CHAR_LENGTH(b), LENGTHB(b), CHAR_LENGTH(c), LENGTHB(c), CHAR_LENGTH(d), LENGTHB(d) FROM t2;
CHAR_LENGTH(a) LENGTHB(a) CHAR_LENGTH(b) LENGTHB(b) CHAR_LENGTH(c) LENGTHB(c) CHAR_LENGTH(d) LENGTHB(d)
15 15 15 15 2 2 2 2
15 15 15 15 2 2 2 2
DROP DATABASE mcol_4931;

View File

@ -0,0 +1,57 @@
#
# MCOL-4931 Make cpimport charset aware
#
if (!$MYSQL_TEST_ROOT){
skip Should be run by root to execute cpimport;
}
--source ../include/have_columnstore.inc
let $DATADIR=`SELECT @@datadir`;
--disable_warnings
DROP DATABASE IF EXISTS mcol_4931;
--enable_warnings
CREATE DATABASE mcol_4931;
USE mcol_4931;
CREATE TABLE t1 (
a VARCHAR(15) collate 'utf8mb4_croatian_ci',
b CHAR(15) collate 'utf8mb4_croatian_ci',
c VARCHAR(2),
d CHAR(2)
)ENGINE=columnstore default charset=utf8mb4;
CREATE TABLE t2 (
a VARCHAR(15) collate 'latin2_hungarian_ci',
b CHAR(15) collate 'latin2_hungarian_ci',
c VARCHAR(2),
d CHAR(2)
)ENGINE=columnstore default charset=latin2;
--exec rm -f $DATADIR/mcol4931_1.txt
--exec rm -f $DATADIR/mcol4931_2.txt
--exec echo "\"König-abcd-Straße\",\"König-abcd-Straße\",\"König-abcd-Straße\",\"König-abcd-Straße\"" > $DATADIR/mcol4931_1.txt
--exec echo "\"abcdefghijklmnopq\",\"abcdefghijklmnopq\",\"abcdefghijklmnopq\",\"abcdefghijklmnopq\"" > $DATADIR/mcol4931_2.txt
--replace_result $DATADIR DATADIR
--eval LOAD DATA INFILE "$DATADIR/mcol4931_1.txt" IGNORE INTO TABLE t1 charset utf8mb4 FIELDS TERMINATED BY "," ENCLOSED BY '"';
--replace_result $DATADIR DATADIR
--eval LOAD DATA INFILE "$DATADIR/mcol4931_2.txt" IGNORE INTO TABLE t2 charset latin2 FIELDS TERMINATED BY "," ENCLOSED BY '"';
--disable_result_log
--exec $MCS_CPIMPORT -s',' -E'"' mcol_4931 t1 '$DATADIR/mcol4931_1.txt';
--exec $MCS_CPIMPORT -s',' -E'"' mcol_4931 t2 '$DATADIR/mcol4931_2.txt';
--enable_result_log
SELECT * FROM t1;
SELECT CHAR_LENGTH(a), LENGTHB(a), CHAR_LENGTH(b), LENGTHB(b), CHAR_LENGTH(c), LENGTHB(c), CHAR_LENGTH(d), LENGTHB(d) FROM t1;
SELECT * FROM t2;
SELECT CHAR_LENGTH(a), LENGTHB(a), CHAR_LENGTH(b), LENGTHB(b), CHAR_LENGTH(c), LENGTHB(c), CHAR_LENGTH(d), LENGTHB(d) FROM t2;
# Clean UP
--exec rm -f $DATADIR/mcol4931_1.txt
--exec rm -f $DATADIR/mcol4931_2.txt
DROP DATABASE mcol_4931;