1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-5505 add parquet support for cpimport and add mcs_parquet_ddl and mcs_parquet_gen tools

This commit is contained in:
HanpyBin
2023-08-20 16:01:58 +08:00
committed by Leonid Fedorov
parent 94a680ea60
commit fe597ec78c
25 changed files with 4677 additions and 251 deletions

View File

@ -0,0 +1,82 @@
#
# parquet support for large volume data file
# Author: Bin Ruan, binruan0227@gmail.com
#
if (!$MYSQL_TEST_ROOT){
skip Should be run by root to execute cpimport;
}
-- source ../include/have_columnstore.inc
--disable_warnings
DROP DATABASE IF EXISTS mcol_5505_parquet_large_volume;
--enable_warnings
CREATE DATABASE mcol_5505_parquet_large_volume;
USE mcol_5505_parquet_large_volume;
SET time_zone = '+8:00';
# Create table
Create TABLE t1(
col1 INT,
col2 TIMESTAMP(3),
col3 CHAR(6),
col4 DECIMAL(38,10),
col5 DOUBLE,
col6 VARCHAR(20)
) ENGINE=Columnstore;
Create TABLE t2(
col1 INT,
col2 TIMESTAMP(3),
col3 CHAR(6),
col4 DECIMAL(38,10),
col5 DOUBLE,
col6 VARCHAR(20)
) ENGINE=Columnstore;
Create TABLE t3(
col1 INT,
col2 TIMESTAMP(3),
col3 CHAR(6),
col4 DECIMAL(38,10),
col5 DOUBLE,
col6 VARCHAR(20)
) ENGINE=Columnstore;
Create TABLE t4(
col1 INT,
col2 TIMESTAMP(3),
col3 CHAR(6),
col4 DECIMAL(38,10),
col5 DOUBLE,
col6 VARCHAR(20)
) ENGINE=Columnstore;
# Generate data
--exec mcs_parquet_gen -l -f $MTR_SUITE_DIR/../std_data
#Valid data and table
--exec $MCS_CPIMPORT mcol_5505_parquet_large_volume t1 $MTR_SUITE_DIR/../std_data/1MRows.parquet >/dev/null
--exec $MCS_CPIMPORT mcol_5505_parquet_large_volume t2 $MTR_SUITE_DIR/../std_data/10MRows.parquet >/dev/null
--exec $MCS_CPIMPORT mcol_5505_parquet_large_volume t3 $MTR_SUITE_DIR/../std_data/50MRows.parquet >/dev/null
--exec $MCS_CPIMPORT mcol_5505_parquet_large_volume t4 $MTR_SUITE_DIR/../std_data/100MRows.parquet >/dev/null
SELECT * FROM t1 ORDER BY col1 LIMIT 5;
SELECT COUNT(*) FROM t1;
SELECT * FROM t2 ORDER BY col1 LIMIT 5;
SELECT COUNT(*) FROM t2;
SELECT * FROM t3 ORDER BY col1 LIMIT 5;
SELECT COUNT(*) FROM t3;
SELECT * FROM t4 ORDER BY col1 LIMIT 5;
SELECT COUNT(*) FROM t4;
# Clean UP
--exec rm $MTR_SUITE_DIR/../std_data/1MRows.parquet
--exec rm $MTR_SUITE_DIR/../std_data/10MRows.parquet
--exec rm $MTR_SUITE_DIR/../std_data/50MRows.parquet
--exec rm $MTR_SUITE_DIR/../std_data/100MRows.parquet
DROP DATABASE mcol_5505_parquet_large_volume;

View File

@ -0,0 +1,64 @@
#
# Check the parquet support for different data types
# Author: Bin Ruan, binruan0227@gmail.com
#
if (!$MYSQL_TEST_ROOT){
skip Should be run by root to execute cpimport;
}
-- source ../include/have_columnstore.inc
--disable_warnings
DROP DATABASE IF EXISTS mcol_5505_cpimport_parquet;
--enable_warnings
CREATE DATABASE mcol_5505_cpimport_parquet;
USE mcol_5505_cpimport_parquet;
# Create table
SET time_zone = '+8:00';
Create TABLE t1(
col1 INT,
col2 BIGINT,
col3 FLOAT,
col4 DOUBLE,
col5 TIME(3),
col6 VARCHAR(2),
col7 VARCHAR(5),
col8 VARCHAR(20),
col9 CHAR(2),
col10 CHAR(5),
col11 CHAR(20),
col12 TIMESTAMP(3),
col13 DATE,
col14 DATETIME(3),
col15 SMALLINT,
col16 TINYINT,
col17 DECIMAL(9,3),
col18 INT UNSIGNED,
col19 SMALLINT UNSIGNED,
col20 TINYINT UNSIGNED,
col21 BIGINT UNSIGNED,
col22 BOOLEAN,
col23 DECIMAL(38,10),
col24 TIME(6),
col25 TIMESTAMP(6),
col26 DATETIME(6),
col27 CHAR(4),
col28 CHAR(4)
) ENGINE=Columnstore;
# Generate data
--exec mcs_parquet_gen -a -f $MTR_SUITE_DIR/../std_data
#Valid data and table
--exec $MCS_CPIMPORT mcol_5505_cpimport_parquet t1 $MTR_SUITE_DIR/../std_data/tests.parquet >/dev/null
--exec $MCS_CPIMPORT mcol_5505_cpimport_parquet t1 $MTR_SUITE_DIR/../std_data/nulls.parquet >/dev/null
SELECT * FROM t1;
SELECT COUNT(*) FROM t1;
# Clean UP
--exec rm $MTR_SUITE_DIR/../std_data/tests.parquet
--exec rm $MTR_SUITE_DIR/../std_data/nulls.parquet
DROP DATABASE mcol_5505_cpimport_parquet;

View File

@ -0,0 +1,33 @@
#
# check mcs_parquet_ddl tool
# Author: Bin Ruan, binruan0227@gmail.com
#
-- source ../include/have_columnstore.inc
--disable_warnings
DROP DATABASE IF EXISTS mcol_5505_parquet_ddl;
--enable_warnings
--disable_result_log
--exec mcs_parquet_gen -a -f $MTR_SUITE_DIR/../std_data
--exec mcs_parquet_ddl $MTR_SUITE_DIR/../std_data/tests.parquet $MTR_SUITE_DIR/../std_data/parquet_test_table.ddl
# Wrong source file type
--error 3
--exec mcs_parquet_ddl $MTR_SUITE_DIR/../std_data/int8.par $MTR_SUITE_DIR/../std_data/int8table.ddl
# Wrong number of argument files
--error 4
--exec mcs_parquet_ddl $MTR_SUITE_DIR/../std_data/tests.parquet
--enable_result_log
# Create table
CREATE DATABASE mcol_5505_parquet_ddl;
--exec $MYSQL mcol_5505_parquet_ddl < $MTR_SUITE_DIR/../std_data/parquet_test_table.ddl >/dev/null
USE mcol_5505_parquet_ddl;
SHOW CREATE TABLE parquet_test_table;
# Clean UP
--exec rm $MTR_SUITE_DIR/../std_data/tests.parquet
--exec rm $MTR_SUITE_DIR/../std_data/nulls.parquet
--exec rm $MTR_SUITE_DIR/../std_data/parquet_test_table.ddl
DROP DATABASE mcol_5505_parquet_ddl;