1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

MCOL-5013: Load Data from S3 into Columnstore

Introduced UDF and stored prodecure.
usage:

set columnstore_s3_key='<s3_key>';
set columnstore_s3_secret='<s3_secret>';
set columnstore_s3_region='region';

and then use UDF
select columnstore_dataload("<tablename>", "<filename>", "<bucket>", "<db_name>");
for UDF db_name can be ommited, then current connection db will be used

or stored function
call calpontsys.columnstore_load_from_s3("<tablename>", "<filename>", "<bucket>", "<db_name>");
This commit is contained in:
Leonid Fedorov
2022-03-28 14:26:02 +00:00
parent 7d955a0f85
commit f5b2a6885f
9 changed files with 356 additions and 29 deletions

View File

@ -40,7 +40,7 @@ set(cpimport.bin_SRCS cpimport.cpp)
add_executable(cpimport.bin ${cpimport.bin_SRCS})
add_dependencies(cpimport.bin marias3)
target_link_libraries(cpimport.bin ${ENGINE_LDFLAGS} ${NETSNMP_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS} ${S3API_DEPS} we_bulk we_xml)
target_link_libraries(cpimport.bin boost_program_options ${ENGINE_LDFLAGS} ${NETSNMP_LIBRARIES} ${MARIADB_CLIENT_LIBS} ${ENGINE_WRITE_LIBS} ${S3API_DEPS} we_bulk we_xml)
install(TARGETS cpimport.bin DESTINATION ${ENGINE_BINDIR} COMPONENT columnstore-engine)

View File

@ -172,11 +172,12 @@ void printUsage()
<< " -T Timezone used for TIMESTAMP datatype" << endl
<< " Possible values: \"SYSTEM\" (default)" << endl
<< " : Offset in the form +/-HH:MM" << endl
// << " -y S3 Authentication Key (for S3 imports)" << endl
// << " -K S3 Authentication Secret (for S3 imports)" << endl
// << " -t S3 Bucket (for S3 imports)" << endl
// << " -H S3 Hostname (for S3 imports, Amazon's S3 default)" << endl
// << " -g S3 Regions (for S3 imports)" << endl
<< endl
<< " -y S3 Authentication Key (for S3 imports)" << endl
<< " -K S3 Authentication Secret (for S3 imports)" << endl
<< " -t S3 Bucket (for S3 imports)" << endl
<< " -H S3 Hostname (for S3 imports, Amazon's S3 default)" << endl
<< " -g S3 Regions (for S3 imports)" << endl
<< " -U username of new data files owner. Default is mysql" << endl;
cout << " Example1:" << endl
@ -309,7 +310,7 @@ void parseCmdLineArgs(int argc, char** argv, BulkLoad& curJob, std::string& sJob
BulkModeType bulkMode = BULK_MODE_LOCAL;
std::string jobUUID;
while ((option = getopt(argc, argv, "b:c:d:e:f:hij:kl:m:n:p:r:s:u:w:B:C:DE:I:P:R:ST:X:NL:U:")) !=
while ((option = getopt(argc, argv, "b:c:d:e:f:hij:kl:m:n:p:r:s:u:w:B:C:DE:I:P:R:ST:X:NL:y:K:t:H:g:U:")) !=
EOF)
{
switch (option)
@ -675,7 +676,7 @@ void parseCmdLineArgs(int argc, char** argv, BulkLoad& curJob, std::string& sJob
BulkLoad::disableConsoleOutput(true);
break;
}
/*
case 'y':
{
curJob.setS3Key(optarg);
@ -705,7 +706,7 @@ void parseCmdLineArgs(int argc, char** argv, BulkLoad& curJob, std::string& sJob
curJob.setS3Region(optarg);
break;
}
*/
case 'U':
{
curJob.setUsername(optarg);

View File

@ -561,11 +561,11 @@ void WECmdArgs::usage()
<< "\t-T\tTimezone used for TIMESTAMP datatype.\n"
<< "\t\tPossible values: \"SYSTEM\" (default)\n"
<< "\t\t : Offset in the form +/-HH:MM\n"
// << "\t-y\tS3 Authentication Key (for S3 imports)\n"
// << "\t-K\tS3 Authentication Secret (for S3 imports)\n"
// << "\t-t\tS3 Bucket (for S3 imports)\n"
// << "\t-H\tS3 Hostname (for S3 imports, Amazon's S3 default)\n"
// << "\t-g\tS3 Region (for S3 imports)\n"
<< "\t-y\tS3 Authentication Key (for S3 imports)\n"
<< "\t-K\tS3 Authentication Secret (for S3 imports)\n"
<< "\t-t\tS3 Bucket (for S3 imports)\n"
<< "\t-H\tS3 Hostname (for S3 imports, Amazon's S3 default)\n"
<< "\t-g\tS3 Region (for S3 imports)\n"
<< "\t-L\tDirectory for the output .err and .bad files.\n"
<< "\t\tDefault is " << string(MCSLOGDIR);
@ -598,7 +598,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv)
if (argc > 0)
fPrgmName = string(MCSBINDIR) + "/" + "cpimport.bin"; // argv[0] is splitter but we need cpimport
while ((aCh = getopt(argc, argv, "d:j:w:s:v:l:r:b:e:B:f:q:ihm:E:C:P:I:n:p:c:ST:NU:L:")) != EOF)
while ((aCh = getopt(argc, argv, "d:j:w:s:v:l:r:b:e:B:f:q:ihm:E:C:P:I:n:p:c:ST:Ny:K:t:H:g:U:L:")) != EOF)
{
switch (aCh)
{
@ -906,7 +906,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv)
fConsoleOutput = false;
break;
}
/*
case 'y': //-y S3 Key
{
fS3Key = optarg;
@ -936,7 +936,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv)
fS3Region = optarg;
break;
}
*/
case 'U': //-U username of the files owner
{
fUsername = optarg;