You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
Feature/mcol 4882 cpimport skip rows (#3594)
* feat(cpimport): MCOL-4882 add a parameter to skip header rows * chore(cpimport): MCOL-4882 Use boost::program_options to arguments parsing * feat(cpimport.bin): MCOL-4882 Add missing changes * add test * fix clang * add missing cmdline argument * fix bug * Fix double lines skipping * Fix incorrect --silent (-N) parsing * fix default --max-errors processing * fix overwriting default username * move initialization to members declaration
This commit is contained in:
committed by
GitHub
parent
1c8d5ec04e
commit
78c1b5034d
@ -49,6 +49,7 @@
|
||||
#include "dataconvert.h"
|
||||
#include "mcsconfig.h"
|
||||
#include "mariadb_my_sys.h"
|
||||
#include "we_cmdargs.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace WriteEngine;
|
||||
@ -56,8 +57,8 @@ using namespace execplan;
|
||||
|
||||
namespace
|
||||
{
|
||||
char* pgmName = 0;
|
||||
const std::string IMPORT_PATH_CWD(".");
|
||||
unique_ptr<WECmdArgs> cmdArgs;
|
||||
bool bDebug = false;
|
||||
uint32_t cpimportJobId = 0;
|
||||
|
||||
@ -88,103 +89,6 @@ const char* taskLabels[] = {"",
|
||||
"processing data"};
|
||||
} // namespace
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Print command line usage
|
||||
//------------------------------------------------------------------------------
|
||||
void printUsage()
|
||||
{
|
||||
cout << endl
|
||||
<< "Simple usage using positional parameters "
|
||||
"(no XML job file):"
|
||||
<< endl
|
||||
<< " cpimport.bin dbName tblName [loadFile] [-j jobID] " << endl
|
||||
<< " [-h] [-r readers] [-w parsers] [-s c] [-f path] [-b readBufs] " << endl
|
||||
<< " [-c readBufSize] [-e maxErrs] [-B libBufSize] [-n NullOption] " << endl
|
||||
<< " [-E encloseChar] [-C escapeChar] [-I binaryOpt] [-S] "
|
||||
"[-d debugLevel] [-i] "
|
||||
<< endl
|
||||
<< " [-D] [-N] [-L rejectDir] [-T timeZone]" << endl
|
||||
<< " [-U username]" << endl
|
||||
<< endl;
|
||||
|
||||
cout << endl
|
||||
<< "Traditional usage without positional parameters "
|
||||
"(XML job file required):"
|
||||
<< endl
|
||||
<< " cpimport.bin -j jobID " << endl
|
||||
<< " [-h] [-r readers] [-w parsers] [-s c] [-f path] [-b readBufs] " << endl
|
||||
<< " [-c readBufSize] [-e maxErrs] [-B libBufSize] [-n NullOption] " << endl
|
||||
<< " [-E encloseChar] [-C escapeChar] [-I binaryOpt] [-S] "
|
||||
"[-d debugLevel] [-i] "
|
||||
<< endl
|
||||
<< " [-p path] [-l loadFile]" << endl
|
||||
<< " [-D] [-N] [-L rejectDir] [-T timeZone]" << endl
|
||||
<< " [-U username]" << endl
|
||||
<< endl;
|
||||
|
||||
cout << " Positional parameters:" << endl
|
||||
<< " dbName Name of database to load" << endl
|
||||
<< " tblName Name of table to load" << endl
|
||||
<< " loadFile Optional input file name in current directory, " << "unless a fully" << endl
|
||||
<< " qualified name is given. If not given, " << "input read from stdin." << endl
|
||||
<< endl;
|
||||
|
||||
cout << " Options:" << endl
|
||||
<< " -b Number of read buffers" << endl
|
||||
<< " -c Application read buffer size (in bytes)" << endl
|
||||
<< " -d Print different level (1-3) debug message " << endl
|
||||
<< " -e Maximum number of allowable errors per table" << endl
|
||||
<< " -f Data file directory path; " << endl
|
||||
<< " In simple usage:" << endl
|
||||
<< " Default is current working directory." << endl
|
||||
<< " -f option only applies if loadFile is specified." << endl
|
||||
<< " In traditional usage: " << endl
|
||||
<< " Default is <BulkRoot>/data/import." << endl
|
||||
<< " 'STDIN' (all caps) redirects input from stdin." << endl
|
||||
<< " -h Print this message" << endl
|
||||
<< " -i Print extended info to console, else this info only goes "
|
||||
"to log file."
|
||||
<< endl
|
||||
<< " -j Job id. In simple usage, default is the table OID." << endl
|
||||
<< " -l Name of input file to be loaded, relative to -f path," << endl
|
||||
<< " unless a fully qualified input file name is given." << endl
|
||||
<< " -n NullOption (0-treat the string NULL as data (default);" << endl
|
||||
<< " 1-treat the string NULL as a NULL value)" << endl
|
||||
<< " -p Path for XML job description file" << endl
|
||||
<< " -r Number of readers" << endl
|
||||
<< " -s 'c' is the delimiter between column values" << endl
|
||||
<< " -w Number of parsers" << endl
|
||||
<< " -B I/O library read buffer size (in bytes)" << endl
|
||||
<< " -E Enclosed by character if field values are enclosed" << endl
|
||||
<< " -C Escape character used in conjunction with 'enclosed by' " << "character," << endl
|
||||
<< " or as part of NULL escape sequence ('\\N'); default is '\\'" << endl
|
||||
<< " -I Binary import; binaryOpt 1-import NULL values" << endl
|
||||
<< " 2-saturate NULL values" << endl
|
||||
<< " -S Treat string truncations as errors" << endl
|
||||
<< " -D Disable timeout when waiting for table lock" << endl
|
||||
<< " -N Disable console output" << endl
|
||||
<< " -L send *.err and *.bad (reject) files here" << endl
|
||||
<< " -T Timezone used for TIMESTAMP datatype" << endl
|
||||
<< " Possible values: \"SYSTEM\" (default)" << endl
|
||||
<< " : Offset in the form +/-HH:MM" << endl
|
||||
<< endl
|
||||
<< " -y S3 Authentication Key (for S3 imports)" << endl
|
||||
<< " -K S3 Authentication Secret (for S3 imports)" << endl
|
||||
<< " -t S3 Bucket (for S3 imports)" << endl
|
||||
<< " -H S3 Hostname (for S3 imports, Amazon's S3 default)" << endl
|
||||
<< " -g S3 Regions (for S3 imports)" << endl
|
||||
<< " -U username of new data files owner. Default is mysql" << endl;
|
||||
|
||||
cout << " Example1:" << endl
|
||||
<< " cpimport.bin -j 1234" << endl
|
||||
<< " Example2: Some column values are enclosed within double quotes." << endl
|
||||
<< " cpimport.bin -j 3000 -E '\"'" << endl
|
||||
<< " Example3: Import a nation table without a Job XML file" << endl
|
||||
<< " cpimport.bin -j 301 tpch nation nation.tbl" << endl;
|
||||
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Signal handler to catch SIGTERM signal to terminate the process
|
||||
//------------------------------------------------------------------------------
|
||||
@ -227,40 +131,6 @@ void handleSigAbrt(int /*i*/)
|
||||
BulkStatus::setJobStatus(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// If error occurs during startup, this function is called to log the specified
|
||||
// message and terminate the process.
|
||||
//------------------------------------------------------------------------------
|
||||
void startupError(const std::string& errMsg, bool showHint)
|
||||
{
|
||||
BRMWrapper::getInstance()->finishCpimportJob(cpimportJobId);
|
||||
// Log to console
|
||||
if (!BulkLoad::disableConsoleOutput())
|
||||
cerr << errMsg << endl;
|
||||
|
||||
if (showHint)
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "Try '" << pgmName << " -h' for more information.";
|
||||
|
||||
if (!BulkLoad::disableConsoleOutput())
|
||||
cerr << oss.str() << endl;
|
||||
}
|
||||
|
||||
// Log to syslog
|
||||
logging::Message::Args errMsgArgs;
|
||||
errMsgArgs.add(errMsg);
|
||||
SimpleSysLog::instance()->logMsg(errMsgArgs, logging::LOG_TYPE_ERROR, logging::M0087);
|
||||
|
||||
std::string jobIdStr("0");
|
||||
logging::Message::Args endMsgArgs;
|
||||
endMsgArgs.add(jobIdStr);
|
||||
endMsgArgs.add("FAILED");
|
||||
SimpleSysLog::instance()->logMsg(endMsgArgs, logging::LOG_TYPE_INFO, logging::M0082);
|
||||
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Initialize signal handling
|
||||
//------------------------------------------------------------------------------
|
||||
@ -307,540 +177,6 @@ void setupSignalHandlers()
|
||||
sigaction(SIGABRT, &act, 0);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Parse the command line arguments
|
||||
//------------------------------------------------------------------------------
|
||||
void parseCmdLineArgs(int argc, char** argv, BulkLoad& curJob, std::string& sJobIdStr,
|
||||
std::string& sXMLJobDir, std::string& sModuleIDandPID, bool& bLogInfo2ToConsole,
|
||||
std::string& xmlGenSchema, std::string& xmlGenTable, bool& bValidateColumnList)
|
||||
{
|
||||
std::string importPath;
|
||||
std::string rptFileName;
|
||||
int option;
|
||||
bool bImportFileArg = false;
|
||||
BulkModeType bulkMode = BULK_MODE_LOCAL;
|
||||
std::string jobUUID;
|
||||
|
||||
while ((option = getopt(argc, argv, "b:c:d:e:f:hij:kl:m:n:p:r:s:u:w:B:C:DE:I:P:R:ST:X:NL:y:K:t:H:g:U:")) !=
|
||||
EOF)
|
||||
{
|
||||
switch (option)
|
||||
{
|
||||
case 'b': // -b: no. of read buffers
|
||||
{
|
||||
errno = 0;
|
||||
long lValue = strtol(optarg, 0, 10);
|
||||
|
||||
if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
|
||||
{
|
||||
startupError(std::string("Option -b is invalid or out of range."), true);
|
||||
}
|
||||
|
||||
int noOfReadBuffers = lValue;
|
||||
curJob.setReadBufferCount(noOfReadBuffers);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'c': // -c: read buffer size
|
||||
{
|
||||
errno = 0;
|
||||
long lValue = strtol(optarg, 0, 10);
|
||||
|
||||
if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
|
||||
{
|
||||
startupError(std::string("Option -c is invalid or out of range."), true);
|
||||
}
|
||||
|
||||
int readBufferSize = lValue;
|
||||
curJob.setReadBufferSize(readBufferSize);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'd': // -d: debug level
|
||||
{
|
||||
errno = 0;
|
||||
long lValue = strtol(optarg, 0, 10);
|
||||
|
||||
if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
|
||||
{
|
||||
startupError(std::string("Option -d is invalid or out of range."), true);
|
||||
}
|
||||
|
||||
int debugLevel = lValue;
|
||||
|
||||
if (debugLevel > 0 && debugLevel <= 3)
|
||||
{
|
||||
bDebug = true;
|
||||
curJob.setAllDebug((DebugLevel)debugLevel);
|
||||
|
||||
if (!BulkLoad::disableConsoleOutput())
|
||||
cout << "\nDebug level is set to " << debugLevel << endl;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case 'e': // -e: max allowed errors
|
||||
{
|
||||
errno = 0;
|
||||
long lValue = strtol(optarg, 0, 10);
|
||||
|
||||
if ((errno != 0) || (lValue < 0) || (lValue > INT_MAX))
|
||||
{
|
||||
startupError(std::string("Option -e is invalid or out of range."), true);
|
||||
}
|
||||
|
||||
int maxErrors = lValue;
|
||||
curJob.setMaxErrorCount(maxErrors);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'f': // -f: import path
|
||||
{
|
||||
importPath = optarg;
|
||||
std::string setAltErrMsg;
|
||||
|
||||
if (curJob.setAlternateImportDir(importPath, setAltErrMsg) != NO_ERROR)
|
||||
startupError(setAltErrMsg, false);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case 'h': // -h: help
|
||||
{
|
||||
printUsage();
|
||||
break;
|
||||
}
|
||||
|
||||
case 'i': // -i: log info to console
|
||||
{
|
||||
bLogInfo2ToConsole = true;
|
||||
break;
|
||||
}
|
||||
|
||||
case 'j': // -j: jobID
|
||||
{
|
||||
errno = 0;
|
||||
long lValue = strtol(optarg, 0, 10);
|
||||
|
||||
if ((errno != 0) || (lValue < 0) || (lValue > INT_MAX))
|
||||
{
|
||||
startupError(std::string("Option -j is invalid or out of range."), true);
|
||||
}
|
||||
|
||||
sJobIdStr = optarg;
|
||||
break;
|
||||
}
|
||||
|
||||
case 'k': // -k: hidden option to keep (not delete)
|
||||
{
|
||||
// bulk rollback meta-data files
|
||||
curJob.setKeepRbMetaFiles(true);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'l': // -l: import load file(s)
|
||||
{
|
||||
bImportFileArg = true;
|
||||
curJob.addToCmdLineImportFileList(std::string(optarg));
|
||||
break;
|
||||
}
|
||||
|
||||
case 'm': // -m: bulk load mode
|
||||
{
|
||||
bulkMode = (BulkModeType)atoi(optarg);
|
||||
|
||||
if ((bulkMode != BULK_MODE_REMOTE_SINGLE_SRC) && (bulkMode != BULK_MODE_REMOTE_MULTIPLE_SRC) &&
|
||||
(bulkMode != BULK_MODE_LOCAL))
|
||||
{
|
||||
startupError(std::string("Invalid bulk mode; can be 1,2, or 3"), true);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case 'n': // -n: treat "NULL" as null
|
||||
{
|
||||
int nullStringMode = atoi(optarg);
|
||||
|
||||
if ((nullStringMode != 0) && (nullStringMode != 1))
|
||||
{
|
||||
startupError(std::string("Invalid NULL option; value can be 0 or 1"), true);
|
||||
}
|
||||
|
||||
if (nullStringMode)
|
||||
curJob.setNullStringMode(true);
|
||||
else
|
||||
curJob.setNullStringMode(false);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case 'p': // -p: Job XML path
|
||||
{
|
||||
sXMLJobDir = optarg;
|
||||
break;
|
||||
}
|
||||
|
||||
case 'r': // -r: num read threads
|
||||
{
|
||||
errno = 0;
|
||||
long lValue = strtol(optarg, 0, 10);
|
||||
|
||||
if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
|
||||
{
|
||||
startupError(std::string("Option -r is invalid or out of range."), true);
|
||||
}
|
||||
|
||||
int numOfReaders = lValue;
|
||||
#if !defined(__LP64__) && !defined(_MSC_VER)
|
||||
|
||||
if (numOfReaders > 1)
|
||||
{
|
||||
cerr << "Note: resetting number of read threads to maximum" << endl;
|
||||
numOfReaders = 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
curJob.setNoOfReadThreads(numOfReaders);
|
||||
|
||||
if (!BulkLoad::disableConsoleOutput())
|
||||
cout << "number of read threads : " << numOfReaders << endl;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case 's': // -s: column delimiter
|
||||
{
|
||||
char delim;
|
||||
|
||||
if (!strcmp(optarg, "\\t"))
|
||||
{
|
||||
delim = '\t';
|
||||
|
||||
if (!BulkLoad::disableConsoleOutput())
|
||||
cout << "Column delimiter : " << "\\t" << endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
delim = optarg[0];
|
||||
|
||||
if (delim == '\t') // special case to print a <TAB>
|
||||
{
|
||||
if (!BulkLoad::disableConsoleOutput())
|
||||
cout << "Column delimiter : '\\t'" << endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!BulkLoad::disableConsoleOutput())
|
||||
cout << "Column delimiter : " << delim << endl;
|
||||
}
|
||||
}
|
||||
|
||||
curJob.setColDelimiter(delim);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'u': // -u: import job UUID
|
||||
{
|
||||
jobUUID = optarg;
|
||||
curJob.setJobUUID(jobUUID);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'w': // -w: num parse threads
|
||||
{
|
||||
errno = 0;
|
||||
long lValue = strtol(optarg, 0, 10);
|
||||
|
||||
if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
|
||||
{
|
||||
startupError(std::string("Option -w is invalid or out of range."), true);
|
||||
}
|
||||
|
||||
int numOfParser = lValue;
|
||||
#if !defined(__LP64__) && !defined(_MSC_VER)
|
||||
|
||||
if (numOfParser > 3)
|
||||
{
|
||||
cerr << "Note: resetting number of parse threads to maximum" << endl;
|
||||
numOfParser = 3;
|
||||
}
|
||||
|
||||
#endif
|
||||
curJob.setNoOfParseThreads(numOfParser);
|
||||
|
||||
if (!BulkLoad::disableConsoleOutput())
|
||||
cout << "number of parse threads : " << numOfParser << endl;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case 'B': // -B: setvbuf read size
|
||||
{
|
||||
errno = 0;
|
||||
long lValue = strtol(optarg, 0, 10);
|
||||
|
||||
if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
|
||||
{
|
||||
startupError(std::string("Option -B is invalid or out of range."), true);
|
||||
}
|
||||
|
||||
int vbufReadSize = lValue;
|
||||
curJob.setVbufReadSize(vbufReadSize);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'C': // -C: enclosed escape char
|
||||
{
|
||||
curJob.setEscapeChar(optarg[0]);
|
||||
|
||||
if (!BulkLoad::disableConsoleOutput())
|
||||
cout << "Escape Character : " << optarg[0] << endl;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case 'E': // -E: enclosed by char
|
||||
{
|
||||
curJob.setEnclosedByChar(optarg[0]);
|
||||
|
||||
if (!BulkLoad::disableConsoleOutput())
|
||||
cout << "Enclosed by Character : " << optarg[0] << endl;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case 'I': // -I: Binary import mode
|
||||
{
|
||||
ImportDataMode importMode = (ImportDataMode)atoi(optarg);
|
||||
|
||||
if ((importMode != IMPORT_DATA_BIN_ACCEPT_NULL) && (importMode != IMPORT_DATA_BIN_SAT_NULL))
|
||||
{
|
||||
startupError(std::string("Invalid binary import option; value can be 1"
|
||||
"(accept NULL values) or 2(saturate NULL values)"),
|
||||
true);
|
||||
}
|
||||
|
||||
curJob.setImportDataMode(importMode);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'L': // -L: Error log directory
|
||||
{
|
||||
curJob.setErrorDir(optarg);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'P': // -P: Calling moduleid
|
||||
{
|
||||
// and PID
|
||||
sModuleIDandPID = optarg;
|
||||
break;
|
||||
}
|
||||
|
||||
case 'R': // -R: distributed mode
|
||||
{
|
||||
// report file
|
||||
rptFileName = optarg;
|
||||
break;
|
||||
}
|
||||
|
||||
case 'S': // -S: Char & VarChar data
|
||||
{
|
||||
// greater than col def
|
||||
curJob.setTruncationAsError(true); // are reported as err
|
||||
break;
|
||||
}
|
||||
|
||||
case 'T':
|
||||
{
|
||||
std::string timeZone = optarg;
|
||||
long offset;
|
||||
|
||||
if (dataconvert::timeZoneToOffset(timeZone.c_str(), timeZone.size(), &offset))
|
||||
{
|
||||
startupError(std::string("Value for option -T is invalid"), true);
|
||||
}
|
||||
|
||||
curJob.setTimeZone(offset);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'X': // Hidden extra options
|
||||
{
|
||||
if (!strcmp(optarg, "AllowMissingColumn"))
|
||||
bValidateColumnList = false;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case 'D': // disable table lock waiting timeout
|
||||
{
|
||||
curJob.disableTimeOut(true);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'N': // silent the output to console
|
||||
{
|
||||
BulkLoad::disableConsoleOutput(true);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'y':
|
||||
{
|
||||
curJob.setS3Key(optarg);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'K':
|
||||
{
|
||||
curJob.setS3Secret(optarg);
|
||||
break;
|
||||
}
|
||||
|
||||
case 't':
|
||||
{
|
||||
curJob.setS3Bucket(optarg);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'H':
|
||||
{
|
||||
curJob.setS3Host(optarg);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'g':
|
||||
{
|
||||
curJob.setS3Region(optarg);
|
||||
break;
|
||||
}
|
||||
|
||||
case 'U':
|
||||
{
|
||||
curJob.setUsername(optarg);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
ostringstream oss;
|
||||
oss << "Unrecognized command line option (" << option << ")";
|
||||
startupError(oss.str(), true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
curJob.setDefaultJobUUID();
|
||||
|
||||
// Inconsistent to specify -f STDIN with -l importFile
|
||||
if ((bImportFileArg) && (importPath == "STDIN"))
|
||||
{
|
||||
startupError(std::string("-f STDIN is invalid with -l importFile."), true);
|
||||
}
|
||||
|
||||
// If distributed mode, make sure report filename is specified and that we
|
||||
// can create the file using the specified path.
|
||||
if ((bulkMode == BULK_MODE_REMOTE_SINGLE_SRC) || (bulkMode == BULK_MODE_REMOTE_MULTIPLE_SRC))
|
||||
{
|
||||
if (rptFileName.empty())
|
||||
{
|
||||
startupError(std::string("Bulk modes 1 and 2 require -R rptFileName."), true);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::ofstream rptFile(rptFileName.c_str());
|
||||
|
||||
if (rptFile.fail())
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "Unable to open report file " << rptFileName;
|
||||
startupError(oss.str(), false);
|
||||
}
|
||||
|
||||
rptFile.close();
|
||||
}
|
||||
|
||||
curJob.setBulkLoadMode(bulkMode, rptFileName);
|
||||
}
|
||||
|
||||
// Get positional arguments, User can provide:
|
||||
// 1. no positional parameters
|
||||
// 2. Two positional parameters (schema and table names)
|
||||
// 3. Three positional parameters (schema, table, and import file name)
|
||||
if (optind < argc) // see if db schema name is given
|
||||
{
|
||||
xmlGenSchema = argv[optind]; // 1st pos parm
|
||||
optind++;
|
||||
|
||||
if (optind < argc) // see if table name is given
|
||||
{
|
||||
// Validate invalid options in conjunction with 2-3 positional
|
||||
// parameter mode, which means we are using temp Job XML file.
|
||||
if (bImportFileArg)
|
||||
{
|
||||
startupError(std::string("-l importFile is invalid with positional parameters"), true);
|
||||
}
|
||||
|
||||
if (!sXMLJobDir.empty())
|
||||
{
|
||||
startupError(std::string("-p path is invalid with positional parameters."), true);
|
||||
}
|
||||
|
||||
if (importPath == "STDIN")
|
||||
{
|
||||
startupError(std::string("-f STDIN is invalid with positional parameters."), true);
|
||||
}
|
||||
|
||||
xmlGenTable = argv[optind]; // 2nd pos parm
|
||||
optind++;
|
||||
|
||||
if (optind < argc) // see if input file name is given
|
||||
{
|
||||
// 3rd pos parm
|
||||
curJob.addToCmdLineImportFileList(std::string(argv[optind]));
|
||||
|
||||
// Default to CWD if loadfile name given w/o -f path
|
||||
if (importPath.empty())
|
||||
{
|
||||
std::string setAltErrMsg;
|
||||
|
||||
if (curJob.setAlternateImportDir(std::string("."), setAltErrMsg) != NO_ERROR)
|
||||
startupError(setAltErrMsg, false);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Invalid to specify -f if no load file name given
|
||||
if (!importPath.empty())
|
||||
{
|
||||
startupError(std::string("-f requires 3rd positional parameter (load file name)."), true);
|
||||
}
|
||||
|
||||
// Default to STDIN if no import file name given
|
||||
std::string setAltErrMsg;
|
||||
|
||||
if (curJob.setAlternateImportDir(std::string("STDIN"), setAltErrMsg) != NO_ERROR)
|
||||
startupError(setAltErrMsg, false);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
startupError(std::string("No table name specified with schema."), true);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// JobID is a required parameter with no positional parm mode,
|
||||
// because we need the jobid to identify the input job xml file.
|
||||
if (sJobIdStr.empty())
|
||||
{
|
||||
startupError(std::string("No JobID specified."), true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Print the path of the input load file(s), and the name of the job xml file.
|
||||
//------------------------------------------------------------------------------
|
||||
@ -857,8 +193,7 @@ void printInputSource(const std::string& alternateImportDir, const std::string&
|
||||
if (alternateImportDir == IMPORT_PATH_CWD)
|
||||
{
|
||||
char cwdBuf[4096];
|
||||
char* bufPtr = &cwdBuf[0];
|
||||
bufPtr = ::getcwd(cwdBuf, sizeof(cwdBuf));
|
||||
char* bufPtr = ::getcwd(cwdBuf, sizeof(cwdBuf));
|
||||
|
||||
if (!(BulkLoad::disableConsoleOutput()))
|
||||
cout << "Input file(s) will be read from : " << bufPtr << endl;
|
||||
@ -900,14 +235,14 @@ void getTableOID(const std::string& xmlGenSchema, const std::string& xmlGenTable
|
||||
std::ostringstream oss;
|
||||
oss << "Unable to set default JobID; " << "Error getting OID for table " << tbl.schema << '.' << tbl.table
|
||||
<< ": " << ex.what();
|
||||
startupError(oss.str(), false);
|
||||
cmdArgs->startupError(oss.str(), false);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "Unable to set default JobID; " << "Unknown error getting OID for table " << tbl.schema << '.'
|
||||
<< tbl.table;
|
||||
startupError(oss.str(), false);
|
||||
cmdArgs->startupError(oss.str(), false);
|
||||
}
|
||||
|
||||
std::ostringstream oss;
|
||||
@ -950,7 +285,7 @@ void constructTempXmlFile(const std::string& tempJobDir, const std::string& sJob
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "cpimport.bin error creating temporary Job XML file name: " << xmlErrMsg;
|
||||
startupError(oss.str(), false);
|
||||
cmdArgs->startupError(oss.str(), false);
|
||||
}
|
||||
|
||||
printInputSource(alternateImportDir, sFileName.string(), S3Bucket);
|
||||
@ -970,7 +305,7 @@ void constructTempXmlFile(const std::string& tempJobDir, const std::string& sJob
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "No columns for " << xmlGenSchema << '.' << xmlGenTable;
|
||||
startupError(oss.str(), false);
|
||||
cmdArgs->startupError(oss.str(), false);
|
||||
}
|
||||
}
|
||||
catch (runtime_error& ex)
|
||||
@ -979,7 +314,7 @@ void constructTempXmlFile(const std::string& tempJobDir, const std::string& sJob
|
||||
oss << "cpimport.bin runtime exception constructing temporary "
|
||||
"Job XML file: "
|
||||
<< ex.what();
|
||||
startupError(oss.str(), false);
|
||||
cmdArgs->startupError(oss.str(), false);
|
||||
}
|
||||
catch (exception& ex)
|
||||
{
|
||||
@ -987,13 +322,13 @@ void constructTempXmlFile(const std::string& tempJobDir, const std::string& sJob
|
||||
oss << "cpimport.bin exception constructing temporary "
|
||||
"Job XML file: "
|
||||
<< ex.what();
|
||||
startupError(oss.str(), false);
|
||||
cmdArgs->startupError(oss.str(), false);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
startupError(std::string("cpimport.bin "
|
||||
"unknown exception constructing temporary Job XML file"),
|
||||
false);
|
||||
cmdArgs->startupError(std::string("cpimport.bin "
|
||||
"unknown exception constructing temporary Job XML file"),
|
||||
false);
|
||||
}
|
||||
|
||||
genProc.writeXMLFile(sFileName.string());
|
||||
@ -1009,9 +344,9 @@ void verifyNode()
|
||||
// Validate running on a PM
|
||||
if (localModuleType != "pm")
|
||||
{
|
||||
startupError(std::string("Exiting, "
|
||||
"cpimport.bin can only be run on a PM node"),
|
||||
true);
|
||||
cmdArgs->startupError(std::string("Exiting, "
|
||||
"cpimport.bin can only be run on a PM node"),
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1049,34 +384,22 @@ int main(int argc, char** argv)
|
||||
setlocale(LC_NUMERIC, "C");
|
||||
|
||||
// Initialize singleton instance of syslogging
|
||||
if (argc > 0)
|
||||
pgmName = argv[0];
|
||||
|
||||
logging::IDBErrorInfo::instance();
|
||||
SimpleSysLog::instance()->setLoggingID(logging::LoggingID(SUBSYSTEM_ID_WE_BULK));
|
||||
|
||||
// Log job initiation unless user is asking for help
|
||||
cmdArgs = make_unique<WECmdArgs>(argc, argv);
|
||||
std::ostringstream ossArgList;
|
||||
bool bHelpFlag = false;
|
||||
|
||||
for (int m = 1; m < argc; m++)
|
||||
{
|
||||
if (strcmp(argv[m], "-h") == 0)
|
||||
{
|
||||
bHelpFlag = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[m], "\t")) // special case to print a <TAB>
|
||||
ossArgList << "'\\t'" << ' ';
|
||||
else
|
||||
ossArgList << argv[m] << ' ';
|
||||
}
|
||||
|
||||
if (!bHelpFlag)
|
||||
{
|
||||
logInitiateMsg(ossArgList.str().c_str());
|
||||
}
|
||||
logInitiateMsg(ossArgList.str().c_str());
|
||||
|
||||
BulkLoad curJob;
|
||||
string sJobIdStr;
|
||||
@ -1099,8 +422,8 @@ int main(int argc, char** argv)
|
||||
task = TASK_CMD_LINE_PARSING;
|
||||
string xmlGenSchema;
|
||||
string xmlGenTable;
|
||||
parseCmdLineArgs(argc, argv, curJob, sJobIdStr, sXMLJobDir, sModuleIDandPID, bLogInfo2ToConsole,
|
||||
xmlGenSchema, xmlGenTable, bValidateColumnList);
|
||||
cmdArgs->fillParams(curJob, sJobIdStr, sXMLJobDir, sModuleIDandPID, bLogInfo2ToConsole, xmlGenSchema,
|
||||
xmlGenTable, bValidateColumnList);
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// Save basename portion of program path from argv[0]
|
||||
@ -1154,9 +477,9 @@ int main(int argc, char** argv)
|
||||
|
||||
if (!BRMWrapper::getInstance()->isSystemReady())
|
||||
{
|
||||
startupError(std::string("System is not ready. Verify that ColumnStore is up and ready "
|
||||
"before running cpimport."),
|
||||
false);
|
||||
cmdArgs->startupError(std::string("System is not ready. Verify that ColumnStore is up and ready "
|
||||
"before running cpimport."),
|
||||
false);
|
||||
}
|
||||
|
||||
if (bDebug)
|
||||
@ -1173,7 +496,7 @@ int main(int argc, char** argv)
|
||||
WErrorCodes ec;
|
||||
std::ostringstream oss;
|
||||
oss << ec.errorString(brmReadWriteStatus) << " cpimport.bin is terminating.";
|
||||
startupError(oss.str(), false);
|
||||
cmdArgs->startupError(oss.str(), false);
|
||||
}
|
||||
|
||||
if (bDebug)
|
||||
@ -1190,7 +513,7 @@ int main(int argc, char** argv)
|
||||
WErrorCodes ec;
|
||||
std::ostringstream oss;
|
||||
oss << ec.errorString(brmShutdownPending) << " cpimport.bin is terminating.";
|
||||
startupError(oss.str(), false);
|
||||
cmdArgs->startupError(oss.str(), false);
|
||||
}
|
||||
|
||||
if (bDebug)
|
||||
@ -1207,7 +530,7 @@ int main(int argc, char** argv)
|
||||
WErrorCodes ec;
|
||||
std::ostringstream oss;
|
||||
oss << ec.errorString(brmSuspendPending) << " cpimport.bin is terminating.";
|
||||
startupError(oss.str(), false);
|
||||
cmdArgs->startupError(oss.str(), false);
|
||||
}
|
||||
|
||||
if (bDebug)
|
||||
@ -1268,7 +591,7 @@ int main(int argc, char** argv)
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "cpimport.bin error creating Job XML file name: " << xmlErrMsg;
|
||||
startupError(oss.str(), false);
|
||||
cmdArgs->startupError(oss.str(), false);
|
||||
}
|
||||
|
||||
printInputSource(curJob.getAlternateImportDir(), sFileName.string(), curJob.getS3Bucket());
|
||||
@ -1300,13 +623,14 @@ int main(int argc, char** argv)
|
||||
}
|
||||
|
||||
rc = BRMWrapper::getInstance()->newCpimportJob(cpimportJobId);
|
||||
// TODO kemm: pass cpimportJobId to WECmdArgs
|
||||
if (rc != NO_ERROR)
|
||||
{
|
||||
WErrorCodes ec;
|
||||
std::ostringstream oss;
|
||||
oss << "Error in creating new cpimport job on Controller node; " << ec.errorString(rc)
|
||||
<< "; cpimport is terminating.";
|
||||
startupError(oss.str(), false);
|
||||
cmdArgs->startupError(oss.str(), false);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
@ -1321,7 +645,7 @@ int main(int argc, char** argv)
|
||||
WErrorCodes ec;
|
||||
std::ostringstream oss;
|
||||
oss << "Error in loading job information; " << ec.errorString(rc) << "; cpimport.bin is terminating.";
|
||||
startupError(oss.str(), false);
|
||||
cmdArgs->startupError(oss.str(), false);
|
||||
}
|
||||
|
||||
if (bDebug)
|
||||
@ -1353,7 +677,7 @@ int main(int argc, char** argv)
|
||||
|
||||
if (task != TASK_PROCESS_DATA)
|
||||
{
|
||||
startupError(exceptionMsg, false);
|
||||
cmdArgs->startupError(exceptionMsg, false);
|
||||
}
|
||||
|
||||
rc = ERR_UNKNOWN;
|
||||
@ -1379,7 +703,7 @@ int main(int argc, char** argv)
|
||||
failMsg += exceptionMsg;
|
||||
}
|
||||
|
||||
endMsgArgs.add(failMsg.c_str());
|
||||
endMsgArgs.add(failMsg);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
Reference in New Issue
Block a user