From 2b4946f53a9242e0df54166a4b46deb237f88d24 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Wed, 23 Mar 2022 12:33:23 +0000 Subject: [PATCH] Revert "MCOL-4576: remove S3 options from cpimport. (#2307)" This reverts commit 14c4840d53097a77cb7e5ce67d8c399f217fe32c. --- writeengine/bulk/cpimport.cpp | 17 +++++++++-------- writeengine/splitter/we_cmdargs.cpp | 11 ++++++++--- writeengine/splitter/we_filereadthread.cpp | 9 --------- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/writeengine/bulk/cpimport.cpp b/writeengine/bulk/cpimport.cpp index 5e934c1a3..4b1fbac8d 100644 --- a/writeengine/bulk/cpimport.cpp +++ b/writeengine/bulk/cpimport.cpp @@ -172,11 +172,12 @@ void printUsage() << " -T Timezone used for TIMESTAMP datatype" << endl << " Possible values: \"SYSTEM\" (default)" << endl << " : Offset in the form +/-HH:MM" << endl -// << " -y S3 Authentication Key (for S3 imports)" << endl -// << " -K S3 Authentication Secret (for S3 imports)" << endl -// << " -t S3 Bucket (for S3 imports)" << endl -// << " -H S3 Hostname (for S3 imports, Amazon's S3 default)" << endl -// << " -g S3 Regions (for S3 imports)" << endl + << endl + << " -y S3 Authentication Key (for S3 imports)" << endl + << " -K S3 Authentication Secret (for S3 imports)" << endl + << " -t S3 Bucket (for S3 imports)" << endl + << " -H S3 Hostname (for S3 imports, Amazon's S3 default)" << endl + << " -g S3 Regions (for S3 imports)" << endl << " -U username of new data files owner. Default is mysql" << endl; cout << " Example1:" << endl @@ -309,7 +310,7 @@ void parseCmdLineArgs(int argc, char** argv, BulkLoad& curJob, std::string& sJob BulkModeType bulkMode = BULK_MODE_LOCAL; std::string jobUUID; - while ((option = getopt(argc, argv, "b:c:d:e:f:hij:kl:m:n:p:r:s:u:w:B:C:DE:I:P:R:ST:X:NL:U:")) != + while ((option = getopt(argc, argv, "b:c:d:e:f:hij:kl:m:n:p:r:s:u:w:B:C:DE:I:P:R:ST:X:NL:y:K:t:H:g:U:")) != EOF) { switch (option) @@ -676,7 +677,7 @@ void parseCmdLineArgs(int argc, char** argv, BulkLoad& curJob, std::string& sJob break; } -/* case 'y': + case 'y': { curJob.setS3Key(optarg); break; @@ -705,7 +706,7 @@ void parseCmdLineArgs(int argc, char** argv, BulkLoad& curJob, std::string& sJob curJob.setS3Region(optarg); break; } -*/ + case 'U': { curJob.setUsername(optarg); diff --git a/writeengine/splitter/we_cmdargs.cpp b/writeengine/splitter/we_cmdargs.cpp index 5d6adac51..27efb1d37 100644 --- a/writeengine/splitter/we_cmdargs.cpp +++ b/writeengine/splitter/we_cmdargs.cpp @@ -561,6 +561,11 @@ void WECmdArgs::usage() << "\t-T\tTimezone used for TIMESTAMP datatype.\n" << "\t\tPossible values: \"SYSTEM\" (default)\n" << "\t\t : Offset in the form +/-HH:MM\n" + << "\t-y\tS3 Authentication Key (for S3 imports)\n" + << "\t-K\tS3 Authentication Secret (for S3 imports)\n" + << "\t-t\tS3 Bucket (for S3 imports)\n" + << "\t-H\tS3 Hostname (for S3 imports, Amazon's S3 default)\n" + << "\t-g\tS3 Region (for S3 imports)\n" << "\t-L\tDirectory for the output .err and .bad files.\n" << "\t\tDefault is " << string(MCSLOGDIR); @@ -593,7 +598,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) if (argc > 0) fPrgmName = string(MCSBINDIR) + "/" + "cpimport.bin"; // argv[0] is splitter but we need cpimport - while ((aCh = getopt(argc, argv, "d:j:w:s:v:l:r:b:e:B:f:q:ihm:E:C:P:I:n:p:c:ST:N:U:L:")) != EOF) + while ((aCh = getopt(argc, argv, "d:j:w:s:v:l:r:b:e:B:f:q:ihm:E:C:P:I:n:p:c:ST:Ny:K:t:H:g:U:L:")) != EOF) { switch (aCh) { @@ -901,7 +906,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) fConsoleOutput = false; break; } -/* + case 'y': //-y S3 Key { fS3Key = optarg; @@ -931,7 +936,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) fS3Region = optarg; break; } -*/ + case 'U': //-U username of the files owner { fUsername = optarg; diff --git a/writeengine/splitter/we_filereadthread.cpp b/writeengine/splitter/we_filereadthread.cpp index eb975cd26..a21d5ef1f 100644 --- a/writeengine/splitter/we_filereadthread.cpp +++ b/writeengine/splitter/we_filereadthread.cpp @@ -481,15 +481,6 @@ void WEFileReadThread::openInFile() use ms3 lib to d/l data into mem use boost::iostreams to wrap the mem in a stream interface point infile's stream buffer to it. - MCOL-4576: The options to setup S3 with cpimport have been removed and this - code is unreachable. However we may need to resurrect it at some point in some form. - Performance issues with extremely large data files as well as the fact files larger - than system memory will cause an OOM error. Multipart downloads/uploads need to be - implemented or more likely a different streaming solution developed with external API tools - - MCOL-4576 work around is to use 3rd party CLI tools and pipe data file from S3 bucket - into cpimport stdin. 3rd party tooling for large object downloads will be more efficient. - */ if (fSdh.getDebugLvl())