1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

MCOL-5480 LOAD DATA INFILE incorrectly loads values for MEDIUMINT datatype.

Internal memory representation of MEDIUMINT datatype uses 24 bits. This is
true for both MariaDB server as well as ColumnStore. MCS plugin code uses
TypeHandlerSInt24 and TypeHandlerUInt24 classes to respectively convert the
binary representation of the signed and unsigned MEDIUMINT values passed by
the server to the plugin. The plugin then outputs the text representation
of these values into an open file descriptor which is piped to cpimport
for the final load into the MCS db files.

The TypeHandlerXInt24 classes were earlier incorrectly using
WriteBatchField::ColWriteBatchXInt32() functions which operate on a 4 byte
buffer. This resulted in incorrect parsing of MEDIUMINT values. As a fix,
we implement WriteBatchField::ColWriteBatchXInt24() functions which
correctly handle the 24 bit input buffer used for MEDIUMINT datatype.
This commit is contained in:
Gagan Goel
2023-05-19 18:30:52 -04:00
parent 611087fe7c
commit 10f1a7abbc
4 changed files with 316 additions and 2 deletions

View File

@ -0,0 +1,118 @@
DROP DATABASE IF EXISTS mcol_5480;
CREATE DATABASE mcol_5480;
USE mcol_5480;
SHOW VARIABLES LIKE 'columnstore_use_import_for_batchinsert';
Variable_name Value
columnstore_use_import_for_batchinsert ON
CREATE TABLE t1 (cmediumint MEDIUMINT, ctimestamp TIMESTAMP, ctime TIME) engine=columnstore;
LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";;
SELECT * FROM t1;
cmediumint ctimestamp ctime
-8388608 2020-08-13 03:14:07 11:58:28
-8388607 2020-08-14 03:14:07 11:58:38
-1000 2020-08-15 03:14:07 11:58:48
-1 2020-08-16 03:14:07 11:58:58
1 2020-08-13 03:14:08 12:58:28
1000 2020-08-13 03:14:09 13:58:28
8388607 2020-08-13 03:14:10 14:58:28
NULL 2020-08-13 03:14:11 15:58:28
DROP TABLE t1;
CREATE TABLE t1 (ctimestamp TIMESTAMP, cmediumint MEDIUMINT, ctime TIME) engine=columnstore;
LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";;
SELECT * FROM t1;
ctimestamp cmediumint ctime
2020-08-13 03:14:07 -8388608 11:58:28
2020-08-14 03:14:07 -8388607 11:58:38
2020-08-15 03:14:07 -1000 11:58:48
2020-08-16 03:14:07 -1 11:58:58
2020-08-13 03:14:08 1 12:58:28
2020-08-13 03:14:09 1000 13:58:28
2020-08-13 03:14:10 8388607 14:58:28
2020-08-13 03:14:11 NULL 15:58:28
DROP TABLE t1;
CREATE TABLE t1 (cmediumint MEDIUMINT UNSIGNED, ctimestamp TIMESTAMP, ctime TIME) engine=columnstore;
LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";;
SELECT * FROM t1;
cmediumint ctimestamp ctime
0 2020-08-12 03:14:08 12:58:18
1 2020-08-13 03:14:08 12:58:28
1000 2020-08-13 03:14:09 13:58:28
8388607 2020-08-13 03:14:10 14:58:28
NULL 2020-08-13 03:14:11 15:58:28
16777215 2020-08-13 03:14:11 15:58:28
DROP TABLE t1;
CREATE TABLE t1 (ctimestamp TIMESTAMP, cmediumint MEDIUMINT UNSIGNED, ctime TIME) engine=columnstore;
LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";;
SELECT * FROM t1;
ctimestamp cmediumint ctime
2020-08-12 03:14:08 0 12:58:18
2020-08-13 03:14:08 1 12:58:28
2020-08-13 03:14:09 1000 13:58:28
2020-08-13 03:14:10 8388607 14:58:28
2020-08-13 03:14:11 NULL 15:58:28
2020-08-13 03:14:11 16777215 15:58:28
DROP TABLE t1;
CREATE TABLE t1 (cmediumint MEDIUMINT) engine=columnstore;
LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";;
SELECT * FROM t1;
cmediumint
-8388608
-8388607
-1000
-1
1
1000
8388607
NULL
DROP TABLE t1;
CREATE TABLE t1 (cmediumint MEDIUMINT UNSIGNED) engine=columnstore;
LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";;
SELECT * FROM t1;
cmediumint
0
1
1000
8388607
NULL
16777215
DROP TABLE t1;
CREATE TABLE t1 (ctimestamp TIMESTAMP, ctime TIME) engine=columnstore;
LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";;
SELECT * FROM t1;
ctimestamp ctime
2020-08-13 03:14:07 11:58:28
2020-08-14 03:14:07 11:58:38
2020-08-15 03:14:07 11:58:48
2020-08-16 03:14:07 11:58:58
2020-08-13 03:14:08 12:58:28
2020-08-13 03:14:09 13:58:28
2020-08-13 03:14:10 14:58:28
2020-08-13 03:14:11 15:58:28
DROP TABLE t1;
CREATE TABLE t1 (ctimestamp TIMESTAMP) engine=columnstore;
LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";;
SELECT * FROM t1;
ctimestamp
2020-08-13 03:14:07
2020-08-14 03:14:07
2020-08-15 03:14:07
2020-08-16 03:14:07
2020-08-13 03:14:08
2020-08-13 03:14:09
2020-08-13 03:14:10
2020-08-13 03:14:11
DROP TABLE t1;
CREATE TABLE t1 (ctime TIME) engine=columnstore;
LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";;
SELECT * FROM t1;
ctime
11:58:28
11:58:38
11:58:48
11:58:58
12:58:28
13:58:28
14:58:28
15:58:28
DROP TABLE t1;
DROP DATABASE mcol_5480;

View File

@ -0,0 +1,160 @@
#
# MCOL-5480 LDI loads values incorrectly for MEDIUMINT, TIME and TIMESTAMP
# when cpimport is used for batch insert
#
--source ../include/have_columnstore.inc
let $DATADIR=`SELECT @@datadir`;
--disable_warnings
DROP DATABASE IF EXISTS mcol_5480;
--enable_warnings
CREATE DATABASE mcol_5480;
USE mcol_5480;
SHOW VARIABLES LIKE 'columnstore_use_import_for_batchinsert';
# Test for signed medium int with timestamp and time data types.
## Signed medium int as the first column
CREATE TABLE t1 (cmediumint MEDIUMINT, ctimestamp TIMESTAMP, ctime TIME) engine=columnstore;
--exec rm -f $DATADIR/mcol5480.txt
--exec echo "-8388608|2020-08-13 03:14:07|11:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "-8388607|2020-08-14 03:14:07|11:58:38|" >> $DATADIR/mcol5480.txt
--exec echo "-1000|2020-08-15 03:14:07|11:58:48|" >> $DATADIR/mcol5480.txt
--exec echo "-1|2020-08-16 03:14:07|11:58:58|" >> $DATADIR/mcol5480.txt
--exec echo "1|2020-08-13 03:14:08|12:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "1000|2020-08-13 03:14:09|13:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "8388607|2020-08-13 03:14:10|14:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "\N|2020-08-13 03:14:11|15:58:28|" >> $DATADIR/mcol5480.txt
--replace_result $DATADIR DATADIR
--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";
SELECT * FROM t1;
DROP TABLE t1;
## Signed medium int as the middle column
CREATE TABLE t1 (ctimestamp TIMESTAMP, cmediumint MEDIUMINT, ctime TIME) engine=columnstore;
--exec rm -f $DATADIR/mcol5480.txt
--exec echo "2020-08-13 03:14:07|-8388608|11:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-14 03:14:07|-8388607|11:58:38|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-15 03:14:07|-1000|11:58:48|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-16 03:14:07|-1|11:58:58|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-13 03:14:08|1|12:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-13 03:14:09|1000|13:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-13 03:14:10|8388607|14:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-13 03:14:11|\N|15:58:28|" >> $DATADIR/mcol5480.txt
--replace_result $DATADIR DATADIR
--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";
SELECT * FROM t1;
DROP TABLE t1;
# Test for unsigned medium int with timestamp and time data types.
## Unsigned medium int as the first column
CREATE TABLE t1 (cmediumint MEDIUMINT UNSIGNED, ctimestamp TIMESTAMP, ctime TIME) engine=columnstore;
--exec rm -f $DATADIR/mcol5480.txt
--exec echo "0|2020-08-12 03:14:08|12:58:18|" >> $DATADIR/mcol5480.txt
--exec echo "1|2020-08-13 03:14:08|12:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "1000|2020-08-13 03:14:09|13:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "8388607|2020-08-13 03:14:10|14:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "\N|2020-08-13 03:14:11|15:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "16777215|2020-08-13 03:14:11|15:58:28|" >> $DATADIR/mcol5480.txt
--replace_result $DATADIR DATADIR
--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";
SELECT * FROM t1;
DROP TABLE t1;
## Unsigned medium int as the middle column
CREATE TABLE t1 (ctimestamp TIMESTAMP, cmediumint MEDIUMINT UNSIGNED, ctime TIME) engine=columnstore;
--exec rm -f $DATADIR/mcol5480.txt
--exec echo "2020-08-12 03:14:08|0|12:58:18|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-13 03:14:08|1|12:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-13 03:14:09|1000|13:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-13 03:14:10|8388607|14:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-13 03:14:11|\N|15:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-13 03:14:11|16777215|15:58:28|" >> $DATADIR/mcol5480.txt
--replace_result $DATADIR DATADIR
--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";
SELECT * FROM t1;
DROP TABLE t1;
# Test for only signed medium int data type
CREATE TABLE t1 (cmediumint MEDIUMINT) engine=columnstore;
--exec rm -f $DATADIR/mcol5480.txt
--exec echo "-8388608|" >> $DATADIR/mcol5480.txt
--exec echo "-8388607|" >> $DATADIR/mcol5480.txt
--exec echo "-1000|" >> $DATADIR/mcol5480.txt
--exec echo "-1|" >> $DATADIR/mcol5480.txt
--exec echo "1|" >> $DATADIR/mcol5480.txt
--exec echo "1000|" >> $DATADIR/mcol5480.txt
--exec echo "8388607|" >> $DATADIR/mcol5480.txt
--exec echo "\N|" >> $DATADIR/mcol5480.txt
--replace_result $DATADIR DATADIR
--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";
SELECT * FROM t1;
DROP TABLE t1;
# Test for only unsigned medium int data type
CREATE TABLE t1 (cmediumint MEDIUMINT UNSIGNED) engine=columnstore;
--exec rm -f $DATADIR/mcol5480.txt
--exec echo "0|" >> $DATADIR/mcol5480.txt
--exec echo "1|" >> $DATADIR/mcol5480.txt
--exec echo "1000|" >> $DATADIR/mcol5480.txt
--exec echo "8388607|" >> $DATADIR/mcol5480.txt
--exec echo "\N|" >> $DATADIR/mcol5480.txt
--exec echo "16777215|" >> $DATADIR/mcol5480.txt
--replace_result $DATADIR DATADIR
--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";
SELECT * FROM t1;
DROP TABLE t1;
# Test for timestamp and time data types
CREATE TABLE t1 (ctimestamp TIMESTAMP, ctime TIME) engine=columnstore;
--exec rm -f $DATADIR/mcol5480.txt
--exec echo "2020-08-13 03:14:07|11:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-14 03:14:07|11:58:38|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-15 03:14:07|11:58:48|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-16 03:14:07|11:58:58|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-13 03:14:08|12:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-13 03:14:09|13:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-13 03:14:10|14:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-13 03:14:11|15:58:28|" >> $DATADIR/mcol5480.txt
--replace_result $DATADIR DATADIR
--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";
SELECT * FROM t1;
DROP TABLE t1;
# Test for only timestamp data type
CREATE TABLE t1 (ctimestamp TIMESTAMP) engine=columnstore;
--exec rm -f $DATADIR/mcol5480.txt
--exec echo "2020-08-13 03:14:07|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-14 03:14:07|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-15 03:14:07|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-16 03:14:07|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-13 03:14:08|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-13 03:14:09|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-13 03:14:10|" >> $DATADIR/mcol5480.txt
--exec echo "2020-08-13 03:14:11|" >> $DATADIR/mcol5480.txt
--replace_result $DATADIR DATADIR
--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";
SELECT * FROM t1;
DROP TABLE t1;
# Test for only time data type
CREATE TABLE t1 (ctime TIME) engine=columnstore;
--exec rm -f $DATADIR/mcol5480.txt
--exec echo "11:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "11:58:38|" >> $DATADIR/mcol5480.txt
--exec echo "11:58:48|" >> $DATADIR/mcol5480.txt
--exec echo "11:58:58|" >> $DATADIR/mcol5480.txt
--exec echo "12:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "13:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "14:58:28|" >> $DATADIR/mcol5480.txt
--exec echo "15:58:28|" >> $DATADIR/mcol5480.txt
--replace_result $DATADIR DATADIR
--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";
SELECT * FROM t1;
DROP TABLE t1;
--exec rm -f $DATADIR/mcol5480.txt
--disable_warnings
DROP DATABASE mcol_5480;
--enable_warnings