From 10f1a7abbca680b934f9568a16652ea29526add0 Mon Sep 17 00:00:00 2001 From: Gagan Goel Date: Fri, 19 May 2023 18:30:52 -0400 Subject: [PATCH] MCOL-5480 LOAD DATA INFILE incorrectly loads values for MEDIUMINT datatype. Internal memory representation of MEDIUMINT datatype uses 24 bits. This is true for both MariaDB server as well as ColumnStore. MCS plugin code uses TypeHandlerSInt24 and TypeHandlerUInt24 classes to respectively convert the binary representation of the signed and unsigned MEDIUMINT values passed by the server to the plugin. The plugin then outputs the text representation of these values into an open file descriptor which is piped to cpimport for the final load into the MCS db files. The TypeHandlerXInt24 classes were earlier incorrectly using WriteBatchField::ColWriteBatchXInt32() functions which operate on a 4 byte buffer. This resulted in incorrect parsing of MEDIUMINT values. As a fix, we implement WriteBatchField::ColWriteBatchXInt24() functions which correctly handle the 24 bit input buffer used for MEDIUMINT datatype. --- datatypes/mcs_datatype.h | 6 +- dbcon/mysql/ha_mcs_datatype.h | 34 ++++ .../columnstore/bugfixes/mcol-5480.result | 118 +++++++++++++ .../columnstore/bugfixes/mcol-5480.test | 160 ++++++++++++++++++ 4 files changed, 316 insertions(+), 2 deletions(-) create mode 100644 mysql-test/columnstore/bugfixes/mcol-5480.result create mode 100644 mysql-test/columnstore/bugfixes/mcol-5480.test diff --git a/datatypes/mcs_datatype.h b/datatypes/mcs_datatype.h index fd4050505..6fac1b649 100644 --- a/datatypes/mcs_datatype.h +++ b/datatypes/mcs_datatype.h @@ -1015,6 +1015,8 @@ class WriteBatchField virtual size_t ColWriteBatchUInt64(const unsigned char* buf, bool nullVal, ColBatchWriter& ci) = 0; virtual size_t ColWriteBatchSInt32(const unsigned char* buf, bool nullVal, ColBatchWriter& ci) = 0; virtual size_t ColWriteBatchUInt32(const unsigned char* buf, bool nullVal, ColBatchWriter& ci) = 0; + virtual size_t ColWriteBatchSInt24(const unsigned char* buf, bool nullVal, ColBatchWriter& ci) = 0; + virtual size_t ColWriteBatchUInt24(const unsigned char* buf, bool nullVal, ColBatchWriter& ci) = 0; virtual size_t ColWriteBatchSInt16(const unsigned char* buf, bool nullVal, ColBatchWriter& ci) = 0; virtual size_t ColWriteBatchUInt16(const unsigned char* buf, bool nullVal, ColBatchWriter& ci) = 0; virtual size_t ColWriteBatchSInt8(const unsigned char* buf, bool nullVal, ColBatchWriter& ci) = 0; @@ -1294,7 +1296,7 @@ class TypeHandlerSInt24 : public TypeHandlerInt size_t ColWriteBatch(WriteBatchField* field, const unsigned char* buf, bool nullVal, ColBatchWriter& writer) const override { - return field->ColWriteBatchSInt32(buf, nullVal, writer); + return field->ColWriteBatchSInt24(buf, nullVal, writer); } int storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const override { @@ -1567,7 +1569,7 @@ class TypeHandlerUInt24 : public TypeHandlerInt size_t ColWriteBatch(WriteBatchField* field, const unsigned char* buf, bool nullVal, ColBatchWriter& writer) const override { - return field->ColWriteBatchUInt32(buf, nullVal, writer); + return field->ColWriteBatchUInt24(buf, nullVal, writer); } int storeValueToField(rowgroup::Row& row, int pos, StoreField* f) const override { diff --git a/dbcon/mysql/ha_mcs_datatype.h b/dbcon/mysql/ha_mcs_datatype.h index dbbbbe1d5..845956604 100644 --- a/dbcon/mysql/ha_mcs_datatype.h +++ b/dbcon/mysql/ha_mcs_datatype.h @@ -448,6 +448,40 @@ class WriteBatchFieldMariaDB : public WriteBatchField return 4; } + size_t ColWriteBatchSInt24(const uchar* buf, bool nullVal, ColBatchWriter& ci) override + { + if (nullVal && (m_type.constraintType != CalpontSystemCatalog::NOTNULL_CONSTRAINT)) + { + fprintf(ci.filePtr(), "%c", ci.delimiter()); + } + else + { + int32_t tmp = ( + (*const_cast(buf) << 8) | + (*const_cast(buf+1) << 16) | + (*const_cast(buf+2) << 24) + ) >> 8; + fprintf(ci.filePtr(), "%d%c", tmp, ci.delimiter()); + } + return 3; + } + + size_t ColWriteBatchUInt24(const uchar* buf, bool nullVal, ColBatchWriter& ci) override + { + if (nullVal && (m_type.constraintType != CalpontSystemCatalog::NOTNULL_CONSTRAINT)) + fprintf(ci.filePtr(), "%c", ci.delimiter()); + else + { + uint32_t tmp = ( + (*const_cast(buf)) | + (*const_cast(buf+1) << 8) | + (*const_cast(buf+2) << 16) + ); + fprintf(ci.filePtr(), "%u%c", tmp, ci.delimiter()); + } + return 3; + } + size_t ColWriteBatchSInt16(const uchar* buf, bool nullVal, ColBatchWriter& ci) override { if (nullVal && (m_type.constraintType != CalpontSystemCatalog::NOTNULL_CONSTRAINT)) diff --git a/mysql-test/columnstore/bugfixes/mcol-5480.result b/mysql-test/columnstore/bugfixes/mcol-5480.result new file mode 100644 index 000000000..80e6799ff --- /dev/null +++ b/mysql-test/columnstore/bugfixes/mcol-5480.result @@ -0,0 +1,118 @@ +DROP DATABASE IF EXISTS mcol_5480; +CREATE DATABASE mcol_5480; +USE mcol_5480; +SHOW VARIABLES LIKE 'columnstore_use_import_for_batchinsert'; +Variable_name Value +columnstore_use_import_for_batchinsert ON +CREATE TABLE t1 (cmediumint MEDIUMINT, ctimestamp TIMESTAMP, ctime TIME) engine=columnstore; +LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";; +SELECT * FROM t1; +cmediumint ctimestamp ctime +-8388608 2020-08-13 03:14:07 11:58:28 +-8388607 2020-08-14 03:14:07 11:58:38 +-1000 2020-08-15 03:14:07 11:58:48 +-1 2020-08-16 03:14:07 11:58:58 +1 2020-08-13 03:14:08 12:58:28 +1000 2020-08-13 03:14:09 13:58:28 +8388607 2020-08-13 03:14:10 14:58:28 +NULL 2020-08-13 03:14:11 15:58:28 +DROP TABLE t1; +CREATE TABLE t1 (ctimestamp TIMESTAMP, cmediumint MEDIUMINT, ctime TIME) engine=columnstore; +LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";; +SELECT * FROM t1; +ctimestamp cmediumint ctime +2020-08-13 03:14:07 -8388608 11:58:28 +2020-08-14 03:14:07 -8388607 11:58:38 +2020-08-15 03:14:07 -1000 11:58:48 +2020-08-16 03:14:07 -1 11:58:58 +2020-08-13 03:14:08 1 12:58:28 +2020-08-13 03:14:09 1000 13:58:28 +2020-08-13 03:14:10 8388607 14:58:28 +2020-08-13 03:14:11 NULL 15:58:28 +DROP TABLE t1; +CREATE TABLE t1 (cmediumint MEDIUMINT UNSIGNED, ctimestamp TIMESTAMP, ctime TIME) engine=columnstore; +LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";; +SELECT * FROM t1; +cmediumint ctimestamp ctime +0 2020-08-12 03:14:08 12:58:18 +1 2020-08-13 03:14:08 12:58:28 +1000 2020-08-13 03:14:09 13:58:28 +8388607 2020-08-13 03:14:10 14:58:28 +NULL 2020-08-13 03:14:11 15:58:28 +16777215 2020-08-13 03:14:11 15:58:28 +DROP TABLE t1; +CREATE TABLE t1 (ctimestamp TIMESTAMP, cmediumint MEDIUMINT UNSIGNED, ctime TIME) engine=columnstore; +LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";; +SELECT * FROM t1; +ctimestamp cmediumint ctime +2020-08-12 03:14:08 0 12:58:18 +2020-08-13 03:14:08 1 12:58:28 +2020-08-13 03:14:09 1000 13:58:28 +2020-08-13 03:14:10 8388607 14:58:28 +2020-08-13 03:14:11 NULL 15:58:28 +2020-08-13 03:14:11 16777215 15:58:28 +DROP TABLE t1; +CREATE TABLE t1 (cmediumint MEDIUMINT) engine=columnstore; +LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";; +SELECT * FROM t1; +cmediumint +-8388608 +-8388607 +-1000 +-1 +1 +1000 +8388607 +NULL +DROP TABLE t1; +CREATE TABLE t1 (cmediumint MEDIUMINT UNSIGNED) engine=columnstore; +LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";; +SELECT * FROM t1; +cmediumint +0 +1 +1000 +8388607 +NULL +16777215 +DROP TABLE t1; +CREATE TABLE t1 (ctimestamp TIMESTAMP, ctime TIME) engine=columnstore; +LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";; +SELECT * FROM t1; +ctimestamp ctime +2020-08-13 03:14:07 11:58:28 +2020-08-14 03:14:07 11:58:38 +2020-08-15 03:14:07 11:58:48 +2020-08-16 03:14:07 11:58:58 +2020-08-13 03:14:08 12:58:28 +2020-08-13 03:14:09 13:58:28 +2020-08-13 03:14:10 14:58:28 +2020-08-13 03:14:11 15:58:28 +DROP TABLE t1; +CREATE TABLE t1 (ctimestamp TIMESTAMP) engine=columnstore; +LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";; +SELECT * FROM t1; +ctimestamp +2020-08-13 03:14:07 +2020-08-14 03:14:07 +2020-08-15 03:14:07 +2020-08-16 03:14:07 +2020-08-13 03:14:08 +2020-08-13 03:14:09 +2020-08-13 03:14:10 +2020-08-13 03:14:11 +DROP TABLE t1; +CREATE TABLE t1 (ctime TIME) engine=columnstore; +LOAD DATA INFILE "DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|";; +SELECT * FROM t1; +ctime +11:58:28 +11:58:38 +11:58:48 +11:58:58 +12:58:28 +13:58:28 +14:58:28 +15:58:28 +DROP TABLE t1; +DROP DATABASE mcol_5480; diff --git a/mysql-test/columnstore/bugfixes/mcol-5480.test b/mysql-test/columnstore/bugfixes/mcol-5480.test new file mode 100644 index 000000000..48fa79464 --- /dev/null +++ b/mysql-test/columnstore/bugfixes/mcol-5480.test @@ -0,0 +1,160 @@ +# +# MCOL-5480 LDI loads values incorrectly for MEDIUMINT, TIME and TIMESTAMP +# when cpimport is used for batch insert +# + +--source ../include/have_columnstore.inc + +let $DATADIR=`SELECT @@datadir`; + +--disable_warnings +DROP DATABASE IF EXISTS mcol_5480; +--enable_warnings +CREATE DATABASE mcol_5480; +USE mcol_5480; +SHOW VARIABLES LIKE 'columnstore_use_import_for_batchinsert'; + +# Test for signed medium int with timestamp and time data types. +## Signed medium int as the first column +CREATE TABLE t1 (cmediumint MEDIUMINT, ctimestamp TIMESTAMP, ctime TIME) engine=columnstore; +--exec rm -f $DATADIR/mcol5480.txt +--exec echo "-8388608|2020-08-13 03:14:07|11:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "-8388607|2020-08-14 03:14:07|11:58:38|" >> $DATADIR/mcol5480.txt +--exec echo "-1000|2020-08-15 03:14:07|11:58:48|" >> $DATADIR/mcol5480.txt +--exec echo "-1|2020-08-16 03:14:07|11:58:58|" >> $DATADIR/mcol5480.txt +--exec echo "1|2020-08-13 03:14:08|12:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "1000|2020-08-13 03:14:09|13:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "8388607|2020-08-13 03:14:10|14:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "\N|2020-08-13 03:14:11|15:58:28|" >> $DATADIR/mcol5480.txt +--replace_result $DATADIR DATADIR +--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|"; +SELECT * FROM t1; +DROP TABLE t1; + +## Signed medium int as the middle column +CREATE TABLE t1 (ctimestamp TIMESTAMP, cmediumint MEDIUMINT, ctime TIME) engine=columnstore; +--exec rm -f $DATADIR/mcol5480.txt +--exec echo "2020-08-13 03:14:07|-8388608|11:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-14 03:14:07|-8388607|11:58:38|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-15 03:14:07|-1000|11:58:48|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-16 03:14:07|-1|11:58:58|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-13 03:14:08|1|12:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-13 03:14:09|1000|13:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-13 03:14:10|8388607|14:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-13 03:14:11|\N|15:58:28|" >> $DATADIR/mcol5480.txt +--replace_result $DATADIR DATADIR +--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|"; +SELECT * FROM t1; +DROP TABLE t1; + +# Test for unsigned medium int with timestamp and time data types. +## Unsigned medium int as the first column +CREATE TABLE t1 (cmediumint MEDIUMINT UNSIGNED, ctimestamp TIMESTAMP, ctime TIME) engine=columnstore; +--exec rm -f $DATADIR/mcol5480.txt +--exec echo "0|2020-08-12 03:14:08|12:58:18|" >> $DATADIR/mcol5480.txt +--exec echo "1|2020-08-13 03:14:08|12:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "1000|2020-08-13 03:14:09|13:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "8388607|2020-08-13 03:14:10|14:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "\N|2020-08-13 03:14:11|15:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "16777215|2020-08-13 03:14:11|15:58:28|" >> $DATADIR/mcol5480.txt +--replace_result $DATADIR DATADIR +--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|"; +SELECT * FROM t1; +DROP TABLE t1; + +## Unsigned medium int as the middle column +CREATE TABLE t1 (ctimestamp TIMESTAMP, cmediumint MEDIUMINT UNSIGNED, ctime TIME) engine=columnstore; +--exec rm -f $DATADIR/mcol5480.txt +--exec echo "2020-08-12 03:14:08|0|12:58:18|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-13 03:14:08|1|12:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-13 03:14:09|1000|13:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-13 03:14:10|8388607|14:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-13 03:14:11|\N|15:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-13 03:14:11|16777215|15:58:28|" >> $DATADIR/mcol5480.txt +--replace_result $DATADIR DATADIR +--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|"; +SELECT * FROM t1; +DROP TABLE t1; + +# Test for only signed medium int data type +CREATE TABLE t1 (cmediumint MEDIUMINT) engine=columnstore; +--exec rm -f $DATADIR/mcol5480.txt +--exec echo "-8388608|" >> $DATADIR/mcol5480.txt +--exec echo "-8388607|" >> $DATADIR/mcol5480.txt +--exec echo "-1000|" >> $DATADIR/mcol5480.txt +--exec echo "-1|" >> $DATADIR/mcol5480.txt +--exec echo "1|" >> $DATADIR/mcol5480.txt +--exec echo "1000|" >> $DATADIR/mcol5480.txt +--exec echo "8388607|" >> $DATADIR/mcol5480.txt +--exec echo "\N|" >> $DATADIR/mcol5480.txt +--replace_result $DATADIR DATADIR +--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|"; +SELECT * FROM t1; +DROP TABLE t1; + +# Test for only unsigned medium int data type +CREATE TABLE t1 (cmediumint MEDIUMINT UNSIGNED) engine=columnstore; +--exec rm -f $DATADIR/mcol5480.txt +--exec echo "0|" >> $DATADIR/mcol5480.txt +--exec echo "1|" >> $DATADIR/mcol5480.txt +--exec echo "1000|" >> $DATADIR/mcol5480.txt +--exec echo "8388607|" >> $DATADIR/mcol5480.txt +--exec echo "\N|" >> $DATADIR/mcol5480.txt +--exec echo "16777215|" >> $DATADIR/mcol5480.txt +--replace_result $DATADIR DATADIR +--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|"; +SELECT * FROM t1; +DROP TABLE t1; + +# Test for timestamp and time data types +CREATE TABLE t1 (ctimestamp TIMESTAMP, ctime TIME) engine=columnstore; +--exec rm -f $DATADIR/mcol5480.txt +--exec echo "2020-08-13 03:14:07|11:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-14 03:14:07|11:58:38|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-15 03:14:07|11:58:48|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-16 03:14:07|11:58:58|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-13 03:14:08|12:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-13 03:14:09|13:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-13 03:14:10|14:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-13 03:14:11|15:58:28|" >> $DATADIR/mcol5480.txt +--replace_result $DATADIR DATADIR +--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|"; +SELECT * FROM t1; +DROP TABLE t1; + +# Test for only timestamp data type +CREATE TABLE t1 (ctimestamp TIMESTAMP) engine=columnstore; +--exec rm -f $DATADIR/mcol5480.txt +--exec echo "2020-08-13 03:14:07|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-14 03:14:07|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-15 03:14:07|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-16 03:14:07|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-13 03:14:08|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-13 03:14:09|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-13 03:14:10|" >> $DATADIR/mcol5480.txt +--exec echo "2020-08-13 03:14:11|" >> $DATADIR/mcol5480.txt +--replace_result $DATADIR DATADIR +--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|"; +SELECT * FROM t1; +DROP TABLE t1; + +# Test for only time data type +CREATE TABLE t1 (ctime TIME) engine=columnstore; +--exec rm -f $DATADIR/mcol5480.txt +--exec echo "11:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "11:58:38|" >> $DATADIR/mcol5480.txt +--exec echo "11:58:48|" >> $DATADIR/mcol5480.txt +--exec echo "11:58:58|" >> $DATADIR/mcol5480.txt +--exec echo "12:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "13:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "14:58:28|" >> $DATADIR/mcol5480.txt +--exec echo "15:58:28|" >> $DATADIR/mcol5480.txt +--replace_result $DATADIR DATADIR +--eval LOAD DATA INFILE "$DATADIR/mcol5480.txt" INTO TABLE t1 FIELDS TERMINATED BY "|"; +SELECT * FROM t1; +DROP TABLE t1; + +--exec rm -f $DATADIR/mcol5480.txt +--disable_warnings +DROP DATABASE mcol_5480; +--enable_warnings