You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-29 08:21:15 +03:00
Feature/mcol 4882 cpimport skip rows (#3594)
* feat(cpimport): MCOL-4882 add a parameter to skip header rows * chore(cpimport): MCOL-4882 Use boost::program_options to arguments parsing * feat(cpimport.bin): MCOL-4882 Add missing changes * add test * fix clang * add missing cmdline argument * fix bug * Fix double lines skipping * Fix incorrect --silent (-N) parsing * fix default --max-errors processing * fix overwriting default username * move initialization to members declaration
This commit is contained in:
committed by
GitHub
parent
1c8d5ec04e
commit
78c1b5034d
@ -0,0 +1,22 @@
|
|||||||
|
DROP DATABASE IF EXISTS mcol4882;
|
||||||
|
CREATE DATABASE mcol4882;
|
||||||
|
USE mcol4882;
|
||||||
|
CREATE TABLE t1(col1 INT, col2 VARCHAR(64)) ENGINE=Columnstore;
|
||||||
|
SELECT * FROM t1;
|
||||||
|
col1 col2
|
||||||
|
1 test1
|
||||||
|
2 test2
|
||||||
|
3 test3
|
||||||
|
TRUNCATE t1;
|
||||||
|
SELECT * FROM t1;
|
||||||
|
col1 col2
|
||||||
|
2 test2
|
||||||
|
3 test3
|
||||||
|
TRUNCATE t1;
|
||||||
|
SELECT * FROM t1;
|
||||||
|
col1 col2
|
||||||
|
3 test3
|
||||||
|
TRUNCATE t1;
|
||||||
|
SELECT * FROM t1;
|
||||||
|
col1 col2
|
||||||
|
DROP DATABASE mcol4882;
|
@ -0,0 +1,43 @@
|
|||||||
|
if (!$MYSQL_TEST_ROOT){
|
||||||
|
skip Should be run by root to execute cpimport;
|
||||||
|
}
|
||||||
|
|
||||||
|
-- source ../include/have_columnstore.inc
|
||||||
|
|
||||||
|
--disable_warnings
|
||||||
|
DROP DATABASE IF EXISTS mcol4882;
|
||||||
|
--enable_warnings
|
||||||
|
|
||||||
|
CREATE DATABASE mcol4882;
|
||||||
|
USE mcol4882;
|
||||||
|
|
||||||
|
CREATE TABLE t1(col1 INT, col2 VARCHAR(64)) ENGINE=Columnstore;
|
||||||
|
|
||||||
|
--exec printf '1,test1\n2,test2\n3,test3\n' > /tmp/mcol4882.csv
|
||||||
|
|
||||||
|
--disable_result_log
|
||||||
|
--exec $MCS_CPIMPORT -s , mcol4882 t1 /tmp/mcol4882.csv
|
||||||
|
--enable_result_log
|
||||||
|
SELECT * FROM t1;
|
||||||
|
TRUNCATE t1;
|
||||||
|
|
||||||
|
--disable_result_log
|
||||||
|
--exec $MCS_CPIMPORT -s , --headers -- mcol4882 t1 /tmp/mcol4882.csv
|
||||||
|
--enable_result_log
|
||||||
|
SELECT * FROM t1;
|
||||||
|
TRUNCATE t1;
|
||||||
|
|
||||||
|
--disable_result_log
|
||||||
|
--exec $MCS_CPIMPORT -s , --headers 2 mcol4882 t1 /tmp/mcol4882.csv
|
||||||
|
--enable_result_log
|
||||||
|
SELECT * FROM t1;
|
||||||
|
TRUNCATE t1;
|
||||||
|
|
||||||
|
--disable_result_log
|
||||||
|
--exec $MCS_CPIMPORT -s , --headers 5 mcol4882 t1 /tmp/mcol4882.csv
|
||||||
|
--enable_result_log
|
||||||
|
SELECT * FROM t1;
|
||||||
|
|
||||||
|
# Clean UP
|
||||||
|
--exec rm -f /tmp/mcol4882.csv
|
||||||
|
DROP DATABASE mcol4882;
|
@ -9,6 +9,7 @@ set(we_bulk_STAT_SRCS
|
|||||||
we_bulkload.cpp
|
we_bulkload.cpp
|
||||||
we_bulkloadbuffer.cpp
|
we_bulkloadbuffer.cpp
|
||||||
we_bulkstatus.cpp
|
we_bulkstatus.cpp
|
||||||
|
we_cmdargs.cpp
|
||||||
we_colopbulk.cpp
|
we_colopbulk.cpp
|
||||||
we_colbuf.cpp
|
we_colbuf.cpp
|
||||||
we_colbufcompressed.cpp
|
we_colbufcompressed.cpp
|
||||||
@ -28,7 +29,7 @@ set(we_bulk_STAT_SRCS
|
|||||||
add_definitions(-D_FILE_OFFSET_BITS=64)
|
add_definitions(-D_FILE_OFFSET_BITS=64)
|
||||||
|
|
||||||
columnstore_static_library(we_bulk ${we_bulk_STAT_SRCS})
|
columnstore_static_library(we_bulk ${we_bulk_STAT_SRCS})
|
||||||
columnstore_link(we_bulk ${NETSNMP_LIBRARIES} loggingcpp)
|
columnstore_link(we_bulk ${NETSNMP_LIBRARIES} loggingcpp boost_program_options)
|
||||||
|
|
||||||
remove_definitions(-D_FILE_OFFSET_BITS=64)
|
remove_definitions(-D_FILE_OFFSET_BITS=64)
|
||||||
|
|
||||||
|
@ -49,6 +49,7 @@
|
|||||||
#include "dataconvert.h"
|
#include "dataconvert.h"
|
||||||
#include "mcsconfig.h"
|
#include "mcsconfig.h"
|
||||||
#include "mariadb_my_sys.h"
|
#include "mariadb_my_sys.h"
|
||||||
|
#include "we_cmdargs.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace WriteEngine;
|
using namespace WriteEngine;
|
||||||
@ -56,8 +57,8 @@ using namespace execplan;
|
|||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
char* pgmName = 0;
|
|
||||||
const std::string IMPORT_PATH_CWD(".");
|
const std::string IMPORT_PATH_CWD(".");
|
||||||
|
unique_ptr<WECmdArgs> cmdArgs;
|
||||||
bool bDebug = false;
|
bool bDebug = false;
|
||||||
uint32_t cpimportJobId = 0;
|
uint32_t cpimportJobId = 0;
|
||||||
|
|
||||||
@ -88,103 +89,6 @@ const char* taskLabels[] = {"",
|
|||||||
"processing data"};
|
"processing data"};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
// Print command line usage
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
void printUsage()
|
|
||||||
{
|
|
||||||
cout << endl
|
|
||||||
<< "Simple usage using positional parameters "
|
|
||||||
"(no XML job file):"
|
|
||||||
<< endl
|
|
||||||
<< " cpimport.bin dbName tblName [loadFile] [-j jobID] " << endl
|
|
||||||
<< " [-h] [-r readers] [-w parsers] [-s c] [-f path] [-b readBufs] " << endl
|
|
||||||
<< " [-c readBufSize] [-e maxErrs] [-B libBufSize] [-n NullOption] " << endl
|
|
||||||
<< " [-E encloseChar] [-C escapeChar] [-I binaryOpt] [-S] "
|
|
||||||
"[-d debugLevel] [-i] "
|
|
||||||
<< endl
|
|
||||||
<< " [-D] [-N] [-L rejectDir] [-T timeZone]" << endl
|
|
||||||
<< " [-U username]" << endl
|
|
||||||
<< endl;
|
|
||||||
|
|
||||||
cout << endl
|
|
||||||
<< "Traditional usage without positional parameters "
|
|
||||||
"(XML job file required):"
|
|
||||||
<< endl
|
|
||||||
<< " cpimport.bin -j jobID " << endl
|
|
||||||
<< " [-h] [-r readers] [-w parsers] [-s c] [-f path] [-b readBufs] " << endl
|
|
||||||
<< " [-c readBufSize] [-e maxErrs] [-B libBufSize] [-n NullOption] " << endl
|
|
||||||
<< " [-E encloseChar] [-C escapeChar] [-I binaryOpt] [-S] "
|
|
||||||
"[-d debugLevel] [-i] "
|
|
||||||
<< endl
|
|
||||||
<< " [-p path] [-l loadFile]" << endl
|
|
||||||
<< " [-D] [-N] [-L rejectDir] [-T timeZone]" << endl
|
|
||||||
<< " [-U username]" << endl
|
|
||||||
<< endl;
|
|
||||||
|
|
||||||
cout << " Positional parameters:" << endl
|
|
||||||
<< " dbName Name of database to load" << endl
|
|
||||||
<< " tblName Name of table to load" << endl
|
|
||||||
<< " loadFile Optional input file name in current directory, " << "unless a fully" << endl
|
|
||||||
<< " qualified name is given. If not given, " << "input read from stdin." << endl
|
|
||||||
<< endl;
|
|
||||||
|
|
||||||
cout << " Options:" << endl
|
|
||||||
<< " -b Number of read buffers" << endl
|
|
||||||
<< " -c Application read buffer size (in bytes)" << endl
|
|
||||||
<< " -d Print different level (1-3) debug message " << endl
|
|
||||||
<< " -e Maximum number of allowable errors per table" << endl
|
|
||||||
<< " -f Data file directory path; " << endl
|
|
||||||
<< " In simple usage:" << endl
|
|
||||||
<< " Default is current working directory." << endl
|
|
||||||
<< " -f option only applies if loadFile is specified." << endl
|
|
||||||
<< " In traditional usage: " << endl
|
|
||||||
<< " Default is <BulkRoot>/data/import." << endl
|
|
||||||
<< " 'STDIN' (all caps) redirects input from stdin." << endl
|
|
||||||
<< " -h Print this message" << endl
|
|
||||||
<< " -i Print extended info to console, else this info only goes "
|
|
||||||
"to log file."
|
|
||||||
<< endl
|
|
||||||
<< " -j Job id. In simple usage, default is the table OID." << endl
|
|
||||||
<< " -l Name of input file to be loaded, relative to -f path," << endl
|
|
||||||
<< " unless a fully qualified input file name is given." << endl
|
|
||||||
<< " -n NullOption (0-treat the string NULL as data (default);" << endl
|
|
||||||
<< " 1-treat the string NULL as a NULL value)" << endl
|
|
||||||
<< " -p Path for XML job description file" << endl
|
|
||||||
<< " -r Number of readers" << endl
|
|
||||||
<< " -s 'c' is the delimiter between column values" << endl
|
|
||||||
<< " -w Number of parsers" << endl
|
|
||||||
<< " -B I/O library read buffer size (in bytes)" << endl
|
|
||||||
<< " -E Enclosed by character if field values are enclosed" << endl
|
|
||||||
<< " -C Escape character used in conjunction with 'enclosed by' " << "character," << endl
|
|
||||||
<< " or as part of NULL escape sequence ('\\N'); default is '\\'" << endl
|
|
||||||
<< " -I Binary import; binaryOpt 1-import NULL values" << endl
|
|
||||||
<< " 2-saturate NULL values" << endl
|
|
||||||
<< " -S Treat string truncations as errors" << endl
|
|
||||||
<< " -D Disable timeout when waiting for table lock" << endl
|
|
||||||
<< " -N Disable console output" << endl
|
|
||||||
<< " -L send *.err and *.bad (reject) files here" << endl
|
|
||||||
<< " -T Timezone used for TIMESTAMP datatype" << endl
|
|
||||||
<< " Possible values: \"SYSTEM\" (default)" << endl
|
|
||||||
<< " : Offset in the form +/-HH:MM" << endl
|
|
||||||
<< endl
|
|
||||||
<< " -y S3 Authentication Key (for S3 imports)" << endl
|
|
||||||
<< " -K S3 Authentication Secret (for S3 imports)" << endl
|
|
||||||
<< " -t S3 Bucket (for S3 imports)" << endl
|
|
||||||
<< " -H S3 Hostname (for S3 imports, Amazon's S3 default)" << endl
|
|
||||||
<< " -g S3 Regions (for S3 imports)" << endl
|
|
||||||
<< " -U username of new data files owner. Default is mysql" << endl;
|
|
||||||
|
|
||||||
cout << " Example1:" << endl
|
|
||||||
<< " cpimport.bin -j 1234" << endl
|
|
||||||
<< " Example2: Some column values are enclosed within double quotes." << endl
|
|
||||||
<< " cpimport.bin -j 3000 -E '\"'" << endl
|
|
||||||
<< " Example3: Import a nation table without a Job XML file" << endl
|
|
||||||
<< " cpimport.bin -j 301 tpch nation nation.tbl" << endl;
|
|
||||||
|
|
||||||
exit(EXIT_SUCCESS);
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Signal handler to catch SIGTERM signal to terminate the process
|
// Signal handler to catch SIGTERM signal to terminate the process
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
@ -227,40 +131,6 @@ void handleSigAbrt(int /*i*/)
|
|||||||
BulkStatus::setJobStatus(EXIT_FAILURE);
|
BulkStatus::setJobStatus(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
// If error occurs during startup, this function is called to log the specified
|
|
||||||
// message and terminate the process.
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
void startupError(const std::string& errMsg, bool showHint)
|
|
||||||
{
|
|
||||||
BRMWrapper::getInstance()->finishCpimportJob(cpimportJobId);
|
|
||||||
// Log to console
|
|
||||||
if (!BulkLoad::disableConsoleOutput())
|
|
||||||
cerr << errMsg << endl;
|
|
||||||
|
|
||||||
if (showHint)
|
|
||||||
{
|
|
||||||
std::ostringstream oss;
|
|
||||||
oss << "Try '" << pgmName << " -h' for more information.";
|
|
||||||
|
|
||||||
if (!BulkLoad::disableConsoleOutput())
|
|
||||||
cerr << oss.str() << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Log to syslog
|
|
||||||
logging::Message::Args errMsgArgs;
|
|
||||||
errMsgArgs.add(errMsg);
|
|
||||||
SimpleSysLog::instance()->logMsg(errMsgArgs, logging::LOG_TYPE_ERROR, logging::M0087);
|
|
||||||
|
|
||||||
std::string jobIdStr("0");
|
|
||||||
logging::Message::Args endMsgArgs;
|
|
||||||
endMsgArgs.add(jobIdStr);
|
|
||||||
endMsgArgs.add("FAILED");
|
|
||||||
SimpleSysLog::instance()->logMsg(endMsgArgs, logging::LOG_TYPE_INFO, logging::M0082);
|
|
||||||
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Initialize signal handling
|
// Initialize signal handling
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
@ -307,540 +177,6 @@ void setupSignalHandlers()
|
|||||||
sigaction(SIGABRT, &act, 0);
|
sigaction(SIGABRT, &act, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
// Parse the command line arguments
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
void parseCmdLineArgs(int argc, char** argv, BulkLoad& curJob, std::string& sJobIdStr,
|
|
||||||
std::string& sXMLJobDir, std::string& sModuleIDandPID, bool& bLogInfo2ToConsole,
|
|
||||||
std::string& xmlGenSchema, std::string& xmlGenTable, bool& bValidateColumnList)
|
|
||||||
{
|
|
||||||
std::string importPath;
|
|
||||||
std::string rptFileName;
|
|
||||||
int option;
|
|
||||||
bool bImportFileArg = false;
|
|
||||||
BulkModeType bulkMode = BULK_MODE_LOCAL;
|
|
||||||
std::string jobUUID;
|
|
||||||
|
|
||||||
while ((option = getopt(argc, argv, "b:c:d:e:f:hij:kl:m:n:p:r:s:u:w:B:C:DE:I:P:R:ST:X:NL:y:K:t:H:g:U:")) !=
|
|
||||||
EOF)
|
|
||||||
{
|
|
||||||
switch (option)
|
|
||||||
{
|
|
||||||
case 'b': // -b: no. of read buffers
|
|
||||||
{
|
|
||||||
errno = 0;
|
|
||||||
long lValue = strtol(optarg, 0, 10);
|
|
||||||
|
|
||||||
if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
|
|
||||||
{
|
|
||||||
startupError(std::string("Option -b is invalid or out of range."), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
int noOfReadBuffers = lValue;
|
|
||||||
curJob.setReadBufferCount(noOfReadBuffers);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'c': // -c: read buffer size
|
|
||||||
{
|
|
||||||
errno = 0;
|
|
||||||
long lValue = strtol(optarg, 0, 10);
|
|
||||||
|
|
||||||
if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
|
|
||||||
{
|
|
||||||
startupError(std::string("Option -c is invalid or out of range."), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
int readBufferSize = lValue;
|
|
||||||
curJob.setReadBufferSize(readBufferSize);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'd': // -d: debug level
|
|
||||||
{
|
|
||||||
errno = 0;
|
|
||||||
long lValue = strtol(optarg, 0, 10);
|
|
||||||
|
|
||||||
if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
|
|
||||||
{
|
|
||||||
startupError(std::string("Option -d is invalid or out of range."), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
int debugLevel = lValue;
|
|
||||||
|
|
||||||
if (debugLevel > 0 && debugLevel <= 3)
|
|
||||||
{
|
|
||||||
bDebug = true;
|
|
||||||
curJob.setAllDebug((DebugLevel)debugLevel);
|
|
||||||
|
|
||||||
if (!BulkLoad::disableConsoleOutput())
|
|
||||||
cout << "\nDebug level is set to " << debugLevel << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'e': // -e: max allowed errors
|
|
||||||
{
|
|
||||||
errno = 0;
|
|
||||||
long lValue = strtol(optarg, 0, 10);
|
|
||||||
|
|
||||||
if ((errno != 0) || (lValue < 0) || (lValue > INT_MAX))
|
|
||||||
{
|
|
||||||
startupError(std::string("Option -e is invalid or out of range."), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
int maxErrors = lValue;
|
|
||||||
curJob.setMaxErrorCount(maxErrors);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'f': // -f: import path
|
|
||||||
{
|
|
||||||
importPath = optarg;
|
|
||||||
std::string setAltErrMsg;
|
|
||||||
|
|
||||||
if (curJob.setAlternateImportDir(importPath, setAltErrMsg) != NO_ERROR)
|
|
||||||
startupError(setAltErrMsg, false);
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'h': // -h: help
|
|
||||||
{
|
|
||||||
printUsage();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'i': // -i: log info to console
|
|
||||||
{
|
|
||||||
bLogInfo2ToConsole = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'j': // -j: jobID
|
|
||||||
{
|
|
||||||
errno = 0;
|
|
||||||
long lValue = strtol(optarg, 0, 10);
|
|
||||||
|
|
||||||
if ((errno != 0) || (lValue < 0) || (lValue > INT_MAX))
|
|
||||||
{
|
|
||||||
startupError(std::string("Option -j is invalid or out of range."), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
sJobIdStr = optarg;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'k': // -k: hidden option to keep (not delete)
|
|
||||||
{
|
|
||||||
// bulk rollback meta-data files
|
|
||||||
curJob.setKeepRbMetaFiles(true);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'l': // -l: import load file(s)
|
|
||||||
{
|
|
||||||
bImportFileArg = true;
|
|
||||||
curJob.addToCmdLineImportFileList(std::string(optarg));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'm': // -m: bulk load mode
|
|
||||||
{
|
|
||||||
bulkMode = (BulkModeType)atoi(optarg);
|
|
||||||
|
|
||||||
if ((bulkMode != BULK_MODE_REMOTE_SINGLE_SRC) && (bulkMode != BULK_MODE_REMOTE_MULTIPLE_SRC) &&
|
|
||||||
(bulkMode != BULK_MODE_LOCAL))
|
|
||||||
{
|
|
||||||
startupError(std::string("Invalid bulk mode; can be 1,2, or 3"), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'n': // -n: treat "NULL" as null
|
|
||||||
{
|
|
||||||
int nullStringMode = atoi(optarg);
|
|
||||||
|
|
||||||
if ((nullStringMode != 0) && (nullStringMode != 1))
|
|
||||||
{
|
|
||||||
startupError(std::string("Invalid NULL option; value can be 0 or 1"), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (nullStringMode)
|
|
||||||
curJob.setNullStringMode(true);
|
|
||||||
else
|
|
||||||
curJob.setNullStringMode(false);
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'p': // -p: Job XML path
|
|
||||||
{
|
|
||||||
sXMLJobDir = optarg;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'r': // -r: num read threads
|
|
||||||
{
|
|
||||||
errno = 0;
|
|
||||||
long lValue = strtol(optarg, 0, 10);
|
|
||||||
|
|
||||||
if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
|
|
||||||
{
|
|
||||||
startupError(std::string("Option -r is invalid or out of range."), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
int numOfReaders = lValue;
|
|
||||||
#if !defined(__LP64__) && !defined(_MSC_VER)
|
|
||||||
|
|
||||||
if (numOfReaders > 1)
|
|
||||||
{
|
|
||||||
cerr << "Note: resetting number of read threads to maximum" << endl;
|
|
||||||
numOfReaders = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
curJob.setNoOfReadThreads(numOfReaders);
|
|
||||||
|
|
||||||
if (!BulkLoad::disableConsoleOutput())
|
|
||||||
cout << "number of read threads : " << numOfReaders << endl;
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 's': // -s: column delimiter
|
|
||||||
{
|
|
||||||
char delim;
|
|
||||||
|
|
||||||
if (!strcmp(optarg, "\\t"))
|
|
||||||
{
|
|
||||||
delim = '\t';
|
|
||||||
|
|
||||||
if (!BulkLoad::disableConsoleOutput())
|
|
||||||
cout << "Column delimiter : " << "\\t" << endl;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
delim = optarg[0];
|
|
||||||
|
|
||||||
if (delim == '\t') // special case to print a <TAB>
|
|
||||||
{
|
|
||||||
if (!BulkLoad::disableConsoleOutput())
|
|
||||||
cout << "Column delimiter : '\\t'" << endl;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (!BulkLoad::disableConsoleOutput())
|
|
||||||
cout << "Column delimiter : " << delim << endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
curJob.setColDelimiter(delim);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'u': // -u: import job UUID
|
|
||||||
{
|
|
||||||
jobUUID = optarg;
|
|
||||||
curJob.setJobUUID(jobUUID);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'w': // -w: num parse threads
|
|
||||||
{
|
|
||||||
errno = 0;
|
|
||||||
long lValue = strtol(optarg, 0, 10);
|
|
||||||
|
|
||||||
if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
|
|
||||||
{
|
|
||||||
startupError(std::string("Option -w is invalid or out of range."), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
int numOfParser = lValue;
|
|
||||||
#if !defined(__LP64__) && !defined(_MSC_VER)
|
|
||||||
|
|
||||||
if (numOfParser > 3)
|
|
||||||
{
|
|
||||||
cerr << "Note: resetting number of parse threads to maximum" << endl;
|
|
||||||
numOfParser = 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
curJob.setNoOfParseThreads(numOfParser);
|
|
||||||
|
|
||||||
if (!BulkLoad::disableConsoleOutput())
|
|
||||||
cout << "number of parse threads : " << numOfParser << endl;
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'B': // -B: setvbuf read size
|
|
||||||
{
|
|
||||||
errno = 0;
|
|
||||||
long lValue = strtol(optarg, 0, 10);
|
|
||||||
|
|
||||||
if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
|
|
||||||
{
|
|
||||||
startupError(std::string("Option -B is invalid or out of range."), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
int vbufReadSize = lValue;
|
|
||||||
curJob.setVbufReadSize(vbufReadSize);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'C': // -C: enclosed escape char
|
|
||||||
{
|
|
||||||
curJob.setEscapeChar(optarg[0]);
|
|
||||||
|
|
||||||
if (!BulkLoad::disableConsoleOutput())
|
|
||||||
cout << "Escape Character : " << optarg[0] << endl;
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'E': // -E: enclosed by char
|
|
||||||
{
|
|
||||||
curJob.setEnclosedByChar(optarg[0]);
|
|
||||||
|
|
||||||
if (!BulkLoad::disableConsoleOutput())
|
|
||||||
cout << "Enclosed by Character : " << optarg[0] << endl;
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'I': // -I: Binary import mode
|
|
||||||
{
|
|
||||||
ImportDataMode importMode = (ImportDataMode)atoi(optarg);
|
|
||||||
|
|
||||||
if ((importMode != IMPORT_DATA_BIN_ACCEPT_NULL) && (importMode != IMPORT_DATA_BIN_SAT_NULL))
|
|
||||||
{
|
|
||||||
startupError(std::string("Invalid binary import option; value can be 1"
|
|
||||||
"(accept NULL values) or 2(saturate NULL values)"),
|
|
||||||
true);
|
|
||||||
}
|
|
||||||
|
|
||||||
curJob.setImportDataMode(importMode);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'L': // -L: Error log directory
|
|
||||||
{
|
|
||||||
curJob.setErrorDir(optarg);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'P': // -P: Calling moduleid
|
|
||||||
{
|
|
||||||
// and PID
|
|
||||||
sModuleIDandPID = optarg;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'R': // -R: distributed mode
|
|
||||||
{
|
|
||||||
// report file
|
|
||||||
rptFileName = optarg;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'S': // -S: Char & VarChar data
|
|
||||||
{
|
|
||||||
// greater than col def
|
|
||||||
curJob.setTruncationAsError(true); // are reported as err
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'T':
|
|
||||||
{
|
|
||||||
std::string timeZone = optarg;
|
|
||||||
long offset;
|
|
||||||
|
|
||||||
if (dataconvert::timeZoneToOffset(timeZone.c_str(), timeZone.size(), &offset))
|
|
||||||
{
|
|
||||||
startupError(std::string("Value for option -T is invalid"), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
curJob.setTimeZone(offset);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'X': // Hidden extra options
|
|
||||||
{
|
|
||||||
if (!strcmp(optarg, "AllowMissingColumn"))
|
|
||||||
bValidateColumnList = false;
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'D': // disable table lock waiting timeout
|
|
||||||
{
|
|
||||||
curJob.disableTimeOut(true);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'N': // silent the output to console
|
|
||||||
{
|
|
||||||
BulkLoad::disableConsoleOutput(true);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'y':
|
|
||||||
{
|
|
||||||
curJob.setS3Key(optarg);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'K':
|
|
||||||
{
|
|
||||||
curJob.setS3Secret(optarg);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 't':
|
|
||||||
{
|
|
||||||
curJob.setS3Bucket(optarg);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'H':
|
|
||||||
{
|
|
||||||
curJob.setS3Host(optarg);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'g':
|
|
||||||
{
|
|
||||||
curJob.setS3Region(optarg);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'U':
|
|
||||||
{
|
|
||||||
curJob.setUsername(optarg);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
ostringstream oss;
|
|
||||||
oss << "Unrecognized command line option (" << option << ")";
|
|
||||||
startupError(oss.str(), true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
curJob.setDefaultJobUUID();
|
|
||||||
|
|
||||||
// Inconsistent to specify -f STDIN with -l importFile
|
|
||||||
if ((bImportFileArg) && (importPath == "STDIN"))
|
|
||||||
{
|
|
||||||
startupError(std::string("-f STDIN is invalid with -l importFile."), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
// If distributed mode, make sure report filename is specified and that we
|
|
||||||
// can create the file using the specified path.
|
|
||||||
if ((bulkMode == BULK_MODE_REMOTE_SINGLE_SRC) || (bulkMode == BULK_MODE_REMOTE_MULTIPLE_SRC))
|
|
||||||
{
|
|
||||||
if (rptFileName.empty())
|
|
||||||
{
|
|
||||||
startupError(std::string("Bulk modes 1 and 2 require -R rptFileName."), true);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::ofstream rptFile(rptFileName.c_str());
|
|
||||||
|
|
||||||
if (rptFile.fail())
|
|
||||||
{
|
|
||||||
std::ostringstream oss;
|
|
||||||
oss << "Unable to open report file " << rptFileName;
|
|
||||||
startupError(oss.str(), false);
|
|
||||||
}
|
|
||||||
|
|
||||||
rptFile.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
curJob.setBulkLoadMode(bulkMode, rptFileName);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get positional arguments, User can provide:
|
|
||||||
// 1. no positional parameters
|
|
||||||
// 2. Two positional parameters (schema and table names)
|
|
||||||
// 3. Three positional parameters (schema, table, and import file name)
|
|
||||||
if (optind < argc) // see if db schema name is given
|
|
||||||
{
|
|
||||||
xmlGenSchema = argv[optind]; // 1st pos parm
|
|
||||||
optind++;
|
|
||||||
|
|
||||||
if (optind < argc) // see if table name is given
|
|
||||||
{
|
|
||||||
// Validate invalid options in conjunction with 2-3 positional
|
|
||||||
// parameter mode, which means we are using temp Job XML file.
|
|
||||||
if (bImportFileArg)
|
|
||||||
{
|
|
||||||
startupError(std::string("-l importFile is invalid with positional parameters"), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!sXMLJobDir.empty())
|
|
||||||
{
|
|
||||||
startupError(std::string("-p path is invalid with positional parameters."), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (importPath == "STDIN")
|
|
||||||
{
|
|
||||||
startupError(std::string("-f STDIN is invalid with positional parameters."), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
xmlGenTable = argv[optind]; // 2nd pos parm
|
|
||||||
optind++;
|
|
||||||
|
|
||||||
if (optind < argc) // see if input file name is given
|
|
||||||
{
|
|
||||||
// 3rd pos parm
|
|
||||||
curJob.addToCmdLineImportFileList(std::string(argv[optind]));
|
|
||||||
|
|
||||||
// Default to CWD if loadfile name given w/o -f path
|
|
||||||
if (importPath.empty())
|
|
||||||
{
|
|
||||||
std::string setAltErrMsg;
|
|
||||||
|
|
||||||
if (curJob.setAlternateImportDir(std::string("."), setAltErrMsg) != NO_ERROR)
|
|
||||||
startupError(setAltErrMsg, false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Invalid to specify -f if no load file name given
|
|
||||||
if (!importPath.empty())
|
|
||||||
{
|
|
||||||
startupError(std::string("-f requires 3rd positional parameter (load file name)."), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Default to STDIN if no import file name given
|
|
||||||
std::string setAltErrMsg;
|
|
||||||
|
|
||||||
if (curJob.setAlternateImportDir(std::string("STDIN"), setAltErrMsg) != NO_ERROR)
|
|
||||||
startupError(setAltErrMsg, false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
startupError(std::string("No table name specified with schema."), true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// JobID is a required parameter with no positional parm mode,
|
|
||||||
// because we need the jobid to identify the input job xml file.
|
|
||||||
if (sJobIdStr.empty())
|
|
||||||
{
|
|
||||||
startupError(std::string("No JobID specified."), true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Print the path of the input load file(s), and the name of the job xml file.
|
// Print the path of the input load file(s), and the name of the job xml file.
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
@ -857,8 +193,7 @@ void printInputSource(const std::string& alternateImportDir, const std::string&
|
|||||||
if (alternateImportDir == IMPORT_PATH_CWD)
|
if (alternateImportDir == IMPORT_PATH_CWD)
|
||||||
{
|
{
|
||||||
char cwdBuf[4096];
|
char cwdBuf[4096];
|
||||||
char* bufPtr = &cwdBuf[0];
|
char* bufPtr = ::getcwd(cwdBuf, sizeof(cwdBuf));
|
||||||
bufPtr = ::getcwd(cwdBuf, sizeof(cwdBuf));
|
|
||||||
|
|
||||||
if (!(BulkLoad::disableConsoleOutput()))
|
if (!(BulkLoad::disableConsoleOutput()))
|
||||||
cout << "Input file(s) will be read from : " << bufPtr << endl;
|
cout << "Input file(s) will be read from : " << bufPtr << endl;
|
||||||
@ -900,14 +235,14 @@ void getTableOID(const std::string& xmlGenSchema, const std::string& xmlGenTable
|
|||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << "Unable to set default JobID; " << "Error getting OID for table " << tbl.schema << '.' << tbl.table
|
oss << "Unable to set default JobID; " << "Error getting OID for table " << tbl.schema << '.' << tbl.table
|
||||||
<< ": " << ex.what();
|
<< ": " << ex.what();
|
||||||
startupError(oss.str(), false);
|
cmdArgs->startupError(oss.str(), false);
|
||||||
}
|
}
|
||||||
catch (...)
|
catch (...)
|
||||||
{
|
{
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << "Unable to set default JobID; " << "Unknown error getting OID for table " << tbl.schema << '.'
|
oss << "Unable to set default JobID; " << "Unknown error getting OID for table " << tbl.schema << '.'
|
||||||
<< tbl.table;
|
<< tbl.table;
|
||||||
startupError(oss.str(), false);
|
cmdArgs->startupError(oss.str(), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
@ -950,7 +285,7 @@ void constructTempXmlFile(const std::string& tempJobDir, const std::string& sJob
|
|||||||
{
|
{
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << "cpimport.bin error creating temporary Job XML file name: " << xmlErrMsg;
|
oss << "cpimport.bin error creating temporary Job XML file name: " << xmlErrMsg;
|
||||||
startupError(oss.str(), false);
|
cmdArgs->startupError(oss.str(), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
printInputSource(alternateImportDir, sFileName.string(), S3Bucket);
|
printInputSource(alternateImportDir, sFileName.string(), S3Bucket);
|
||||||
@ -970,7 +305,7 @@ void constructTempXmlFile(const std::string& tempJobDir, const std::string& sJob
|
|||||||
{
|
{
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << "No columns for " << xmlGenSchema << '.' << xmlGenTable;
|
oss << "No columns for " << xmlGenSchema << '.' << xmlGenTable;
|
||||||
startupError(oss.str(), false);
|
cmdArgs->startupError(oss.str(), false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (runtime_error& ex)
|
catch (runtime_error& ex)
|
||||||
@ -979,7 +314,7 @@ void constructTempXmlFile(const std::string& tempJobDir, const std::string& sJob
|
|||||||
oss << "cpimport.bin runtime exception constructing temporary "
|
oss << "cpimport.bin runtime exception constructing temporary "
|
||||||
"Job XML file: "
|
"Job XML file: "
|
||||||
<< ex.what();
|
<< ex.what();
|
||||||
startupError(oss.str(), false);
|
cmdArgs->startupError(oss.str(), false);
|
||||||
}
|
}
|
||||||
catch (exception& ex)
|
catch (exception& ex)
|
||||||
{
|
{
|
||||||
@ -987,13 +322,13 @@ void constructTempXmlFile(const std::string& tempJobDir, const std::string& sJob
|
|||||||
oss << "cpimport.bin exception constructing temporary "
|
oss << "cpimport.bin exception constructing temporary "
|
||||||
"Job XML file: "
|
"Job XML file: "
|
||||||
<< ex.what();
|
<< ex.what();
|
||||||
startupError(oss.str(), false);
|
cmdArgs->startupError(oss.str(), false);
|
||||||
}
|
}
|
||||||
catch (...)
|
catch (...)
|
||||||
{
|
{
|
||||||
startupError(std::string("cpimport.bin "
|
cmdArgs->startupError(std::string("cpimport.bin "
|
||||||
"unknown exception constructing temporary Job XML file"),
|
"unknown exception constructing temporary Job XML file"),
|
||||||
false);
|
false);
|
||||||
}
|
}
|
||||||
|
|
||||||
genProc.writeXMLFile(sFileName.string());
|
genProc.writeXMLFile(sFileName.string());
|
||||||
@ -1009,9 +344,9 @@ void verifyNode()
|
|||||||
// Validate running on a PM
|
// Validate running on a PM
|
||||||
if (localModuleType != "pm")
|
if (localModuleType != "pm")
|
||||||
{
|
{
|
||||||
startupError(std::string("Exiting, "
|
cmdArgs->startupError(std::string("Exiting, "
|
||||||
"cpimport.bin can only be run on a PM node"),
|
"cpimport.bin can only be run on a PM node"),
|
||||||
true);
|
true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1049,34 +384,22 @@ int main(int argc, char** argv)
|
|||||||
setlocale(LC_NUMERIC, "C");
|
setlocale(LC_NUMERIC, "C");
|
||||||
|
|
||||||
// Initialize singleton instance of syslogging
|
// Initialize singleton instance of syslogging
|
||||||
if (argc > 0)
|
|
||||||
pgmName = argv[0];
|
|
||||||
|
|
||||||
logging::IDBErrorInfo::instance();
|
logging::IDBErrorInfo::instance();
|
||||||
SimpleSysLog::instance()->setLoggingID(logging::LoggingID(SUBSYSTEM_ID_WE_BULK));
|
SimpleSysLog::instance()->setLoggingID(logging::LoggingID(SUBSYSTEM_ID_WE_BULK));
|
||||||
|
|
||||||
// Log job initiation unless user is asking for help
|
// Log job initiation unless user is asking for help
|
||||||
|
cmdArgs = make_unique<WECmdArgs>(argc, argv);
|
||||||
std::ostringstream ossArgList;
|
std::ostringstream ossArgList;
|
||||||
bool bHelpFlag = false;
|
|
||||||
|
|
||||||
for (int m = 1; m < argc; m++)
|
for (int m = 1; m < argc; m++)
|
||||||
{
|
{
|
||||||
if (strcmp(argv[m], "-h") == 0)
|
|
||||||
{
|
|
||||||
bHelpFlag = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!strcmp(argv[m], "\t")) // special case to print a <TAB>
|
if (!strcmp(argv[m], "\t")) // special case to print a <TAB>
|
||||||
ossArgList << "'\\t'" << ' ';
|
ossArgList << "'\\t'" << ' ';
|
||||||
else
|
else
|
||||||
ossArgList << argv[m] << ' ';
|
ossArgList << argv[m] << ' ';
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!bHelpFlag)
|
logInitiateMsg(ossArgList.str().c_str());
|
||||||
{
|
|
||||||
logInitiateMsg(ossArgList.str().c_str());
|
|
||||||
}
|
|
||||||
|
|
||||||
BulkLoad curJob;
|
BulkLoad curJob;
|
||||||
string sJobIdStr;
|
string sJobIdStr;
|
||||||
@ -1099,8 +422,8 @@ int main(int argc, char** argv)
|
|||||||
task = TASK_CMD_LINE_PARSING;
|
task = TASK_CMD_LINE_PARSING;
|
||||||
string xmlGenSchema;
|
string xmlGenSchema;
|
||||||
string xmlGenTable;
|
string xmlGenTable;
|
||||||
parseCmdLineArgs(argc, argv, curJob, sJobIdStr, sXMLJobDir, sModuleIDandPID, bLogInfo2ToConsole,
|
cmdArgs->fillParams(curJob, sJobIdStr, sXMLJobDir, sModuleIDandPID, bLogInfo2ToConsole, xmlGenSchema,
|
||||||
xmlGenSchema, xmlGenTable, bValidateColumnList);
|
xmlGenTable, bValidateColumnList);
|
||||||
|
|
||||||
//--------------------------------------------------------------------------
|
//--------------------------------------------------------------------------
|
||||||
// Save basename portion of program path from argv[0]
|
// Save basename portion of program path from argv[0]
|
||||||
@ -1154,9 +477,9 @@ int main(int argc, char** argv)
|
|||||||
|
|
||||||
if (!BRMWrapper::getInstance()->isSystemReady())
|
if (!BRMWrapper::getInstance()->isSystemReady())
|
||||||
{
|
{
|
||||||
startupError(std::string("System is not ready. Verify that ColumnStore is up and ready "
|
cmdArgs->startupError(std::string("System is not ready. Verify that ColumnStore is up and ready "
|
||||||
"before running cpimport."),
|
"before running cpimport."),
|
||||||
false);
|
false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bDebug)
|
if (bDebug)
|
||||||
@ -1173,7 +496,7 @@ int main(int argc, char** argv)
|
|||||||
WErrorCodes ec;
|
WErrorCodes ec;
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << ec.errorString(brmReadWriteStatus) << " cpimport.bin is terminating.";
|
oss << ec.errorString(brmReadWriteStatus) << " cpimport.bin is terminating.";
|
||||||
startupError(oss.str(), false);
|
cmdArgs->startupError(oss.str(), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bDebug)
|
if (bDebug)
|
||||||
@ -1190,7 +513,7 @@ int main(int argc, char** argv)
|
|||||||
WErrorCodes ec;
|
WErrorCodes ec;
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << ec.errorString(brmShutdownPending) << " cpimport.bin is terminating.";
|
oss << ec.errorString(brmShutdownPending) << " cpimport.bin is terminating.";
|
||||||
startupError(oss.str(), false);
|
cmdArgs->startupError(oss.str(), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bDebug)
|
if (bDebug)
|
||||||
@ -1207,7 +530,7 @@ int main(int argc, char** argv)
|
|||||||
WErrorCodes ec;
|
WErrorCodes ec;
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << ec.errorString(brmSuspendPending) << " cpimport.bin is terminating.";
|
oss << ec.errorString(brmSuspendPending) << " cpimport.bin is terminating.";
|
||||||
startupError(oss.str(), false);
|
cmdArgs->startupError(oss.str(), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bDebug)
|
if (bDebug)
|
||||||
@ -1268,7 +591,7 @@ int main(int argc, char** argv)
|
|||||||
{
|
{
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << "cpimport.bin error creating Job XML file name: " << xmlErrMsg;
|
oss << "cpimport.bin error creating Job XML file name: " << xmlErrMsg;
|
||||||
startupError(oss.str(), false);
|
cmdArgs->startupError(oss.str(), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
printInputSource(curJob.getAlternateImportDir(), sFileName.string(), curJob.getS3Bucket());
|
printInputSource(curJob.getAlternateImportDir(), sFileName.string(), curJob.getS3Bucket());
|
||||||
@ -1300,13 +623,14 @@ int main(int argc, char** argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
rc = BRMWrapper::getInstance()->newCpimportJob(cpimportJobId);
|
rc = BRMWrapper::getInstance()->newCpimportJob(cpimportJobId);
|
||||||
|
// TODO kemm: pass cpimportJobId to WECmdArgs
|
||||||
if (rc != NO_ERROR)
|
if (rc != NO_ERROR)
|
||||||
{
|
{
|
||||||
WErrorCodes ec;
|
WErrorCodes ec;
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << "Error in creating new cpimport job on Controller node; " << ec.errorString(rc)
|
oss << "Error in creating new cpimport job on Controller node; " << ec.errorString(rc)
|
||||||
<< "; cpimport is terminating.";
|
<< "; cpimport is terminating.";
|
||||||
startupError(oss.str(), false);
|
cmdArgs->startupError(oss.str(), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
//--------------------------------------------------------------------------
|
//--------------------------------------------------------------------------
|
||||||
@ -1321,7 +645,7 @@ int main(int argc, char** argv)
|
|||||||
WErrorCodes ec;
|
WErrorCodes ec;
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << "Error in loading job information; " << ec.errorString(rc) << "; cpimport.bin is terminating.";
|
oss << "Error in loading job information; " << ec.errorString(rc) << "; cpimport.bin is terminating.";
|
||||||
startupError(oss.str(), false);
|
cmdArgs->startupError(oss.str(), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bDebug)
|
if (bDebug)
|
||||||
@ -1353,7 +677,7 @@ int main(int argc, char** argv)
|
|||||||
|
|
||||||
if (task != TASK_PROCESS_DATA)
|
if (task != TASK_PROCESS_DATA)
|
||||||
{
|
{
|
||||||
startupError(exceptionMsg, false);
|
cmdArgs->startupError(exceptionMsg, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = ERR_UNKNOWN;
|
rc = ERR_UNKNOWN;
|
||||||
@ -1379,7 +703,7 @@ int main(int argc, char** argv)
|
|||||||
failMsg += exceptionMsg;
|
failMsg += exceptionMsg;
|
||||||
}
|
}
|
||||||
|
|
||||||
endMsgArgs.add(failMsg.c_str());
|
endMsgArgs.add(failMsg);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -72,7 +72,7 @@ const std::string ERR_LOG_SUFFIX = ".err"; // Job err log file suffix
|
|||||||
namespace WriteEngine
|
namespace WriteEngine
|
||||||
{
|
{
|
||||||
/* static */ std::vector<std::shared_ptr<TableInfo>> BulkLoad::fTableInfo;
|
/* static */ std::vector<std::shared_ptr<TableInfo>> BulkLoad::fTableInfo;
|
||||||
/* static */ boost::mutex* BulkLoad::fDDLMutex = 0;
|
/* static */ boost::mutex* BulkLoad::fDDLMutex = new boost::mutex();
|
||||||
|
|
||||||
/* static */ const std::string BulkLoad::DIR_BULK_JOB("job");
|
/* static */ const std::string BulkLoad::DIR_BULK_JOB("job");
|
||||||
/* static */ const std::string BulkLoad::DIR_BULK_TEMP_JOB("tmpjob");
|
/* static */ const std::string BulkLoad::DIR_BULK_TEMP_JOB("tmpjob");
|
||||||
@ -140,35 +140,8 @@ struct CancellationThread
|
|||||||
// Constructor
|
// Constructor
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
BulkLoad::BulkLoad()
|
BulkLoad::BulkLoad()
|
||||||
: fColOp(new ColumnOpBulk())
|
|
||||||
, fColDelim('\0')
|
|
||||||
, fNoOfBuffers(-1)
|
|
||||||
, fBufferSize(-1)
|
|
||||||
, fFileVbufSize(-1)
|
|
||||||
, fMaxErrors(-1)
|
|
||||||
, fNoOfParseThreads(3)
|
|
||||||
, fNoOfReadThreads(1)
|
|
||||||
, fKeepRbMetaFiles(false)
|
|
||||||
, fNullStringMode(false)
|
|
||||||
, fEnclosedByChar('\0')
|
|
||||||
, // not enabled unless user overrides enclosed by char
|
|
||||||
fEscapeChar('\0')
|
|
||||||
, fTotalTime(0.0)
|
|
||||||
, fBulkMode(BULK_MODE_LOCAL)
|
|
||||||
, fbTruncationAsError(false)
|
|
||||||
, fImportDataMode(IMPORT_DATA_TEXT)
|
|
||||||
, fbContinue(false)
|
|
||||||
, fDisableTimeOut(false)
|
|
||||||
, fUUID(boost::uuids::nil_generator()())
|
|
||||||
, fTimeZone(dataconvert::systemTimeZoneOffset())
|
|
||||||
, fUsername("mysql") // MCOL-4328 default file owner
|
|
||||||
{
|
{
|
||||||
fTableInfo.clear();
|
|
||||||
setDebugLevel(DEBUG_0);
|
setDebugLevel(DEBUG_0);
|
||||||
|
|
||||||
fDDLMutex = new boost::mutex();
|
|
||||||
memset(&fStartTime, 0, sizeof(timeval));
|
|
||||||
memset(&fEndTime, 0, sizeof(timeval));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
@ -540,6 +513,7 @@ int BulkLoad::preProcess(Job& job, int tableNo, std::shared_ptr<TableInfo>& tabl
|
|||||||
tableInfo->setImportDataMode(fImportDataMode);
|
tableInfo->setImportDataMode(fImportDataMode);
|
||||||
tableInfo->setTimeZone(fTimeZone);
|
tableInfo->setTimeZone(fTimeZone);
|
||||||
tableInfo->setJobUUID(fUUID);
|
tableInfo->setJobUUID(fUUID);
|
||||||
|
tableInfo->setSkipRows(fSkipRows);
|
||||||
|
|
||||||
// MCOL-4328 Get username gid and uid if they are set
|
// MCOL-4328 Get username gid and uid if they are set
|
||||||
// We inject uid and gid into TableInfo and All ColumnInfo-s later.
|
// We inject uid and gid into TableInfo and All ColumnInfo-s later.
|
||||||
@ -1002,6 +976,11 @@ int BulkLoad::processJob()
|
|||||||
fEscapeChar = '\\';
|
fEscapeChar = '\\';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (fSkipRows == 0)
|
||||||
|
{
|
||||||
|
fSkipRows = curJob.fSkipRows;
|
||||||
|
}
|
||||||
|
|
||||||
// std::cout << "bulkload::fEnclosedByChar<" << fEnclosedByChar << '>' <<
|
// std::cout << "bulkload::fEnclosedByChar<" << fEnclosedByChar << '>' <<
|
||||||
// std::endl << "bulkload::fEscapeChar<" << fEscapeChar << '>' << std::endl;
|
// std::endl << "bulkload::fEscapeChar<" << fEscapeChar << '>' << std::endl;
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@
|
|||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
|
|
||||||
#include <we_log.h>
|
#include <we_log.h>
|
||||||
#include <we_colop.h>
|
#include <we_colopbulk.h>
|
||||||
#include <we_xmljob.h>
|
#include <we_xmljob.h>
|
||||||
#include <we_convertor.h>
|
#include <we_convertor.h>
|
||||||
#include <writeengine.h>
|
#include <writeengine.h>
|
||||||
@ -48,12 +48,7 @@
|
|||||||
#include <boost/bind.hpp>
|
#include <boost/bind.hpp>
|
||||||
#include <boost/scoped_ptr.hpp>
|
#include <boost/scoped_ptr.hpp>
|
||||||
#include <boost/uuid/uuid.hpp>
|
#include <boost/uuid/uuid.hpp>
|
||||||
|
#include <boost/uuid/nil_generator.hpp>
|
||||||
#if 0 // defined(_MSC_VER) && defined(WE_BULKLOAD_DLLEXPORT)
|
|
||||||
#define EXPORT __declspec(dllexport)
|
|
||||||
#else
|
|
||||||
#define EXPORT
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/** Namespace WriteEngine */
|
/** Namespace WriteEngine */
|
||||||
namespace WriteEngine
|
namespace WriteEngine
|
||||||
@ -65,18 +60,18 @@ class BulkLoad : public FileOp
|
|||||||
/**
|
/**
|
||||||
* @brief BulkLoad constructor
|
* @brief BulkLoad constructor
|
||||||
*/
|
*/
|
||||||
EXPORT BulkLoad();
|
BulkLoad();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief BulkLoad destructor
|
* @brief BulkLoad destructor
|
||||||
*/
|
*/
|
||||||
EXPORT ~BulkLoad() override;
|
~BulkLoad() override;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Load job information
|
* @brief Load job information
|
||||||
*/
|
*/
|
||||||
EXPORT int loadJobInfo(const std::string& fullFileName, bool bUseTempJobFile, int argc, char** argv,
|
int loadJobInfo(const std::string& fullFileName, bool bUseTempJobFile, int argc, char** argv,
|
||||||
bool bLogInfo2ToConsole, bool bValidateColumnList);
|
bool bLogInfo2ToConsole, bool bValidateColumnList);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Pre process jobs to validate and assign values to the job structure
|
* @brief Pre process jobs to validate and assign values to the job structure
|
||||||
@ -91,7 +86,7 @@ class BulkLoad : public FileOp
|
|||||||
/**
|
/**
|
||||||
* @brief Process job
|
* @brief Process job
|
||||||
*/
|
*/
|
||||||
EXPORT int processJob();
|
int processJob();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Set Debug level for this BulkLoad object and any data members
|
* @brief Set Debug level for this BulkLoad object and any data members
|
||||||
@ -126,12 +121,13 @@ class BulkLoad : public FileOp
|
|||||||
return fUUID;
|
return fUUID;
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPORT int setAlternateImportDir(const std::string& loadDir, std::string& errMsg);
|
int setAlternateImportDir(const std::string& loadDir, std::string& errMsg);
|
||||||
void setImportDataMode(ImportDataMode importMode);
|
void setImportDataMode(ImportDataMode importMode);
|
||||||
void setColDelimiter(char delim);
|
void setColDelimiter(char delim);
|
||||||
void setBulkLoadMode(BulkModeType bulkMode, const std::string& rptFileName);
|
void setBulkLoadMode(BulkModeType bulkMode, const std::string& rptFileName);
|
||||||
void setEnclosedByChar(char enChar);
|
void setEnclosedByChar(char enChar);
|
||||||
void setEscapeChar(char esChar);
|
void setEscapeChar(char esChar);
|
||||||
|
void setSkipRows(size_t skipRows);
|
||||||
void setKeepRbMetaFiles(bool keepMeta);
|
void setKeepRbMetaFiles(bool keepMeta);
|
||||||
void setMaxErrorCount(unsigned int maxErrors);
|
void setMaxErrorCount(unsigned int maxErrors);
|
||||||
void setNoOfParseThreads(int parseThreads);
|
void setNoOfParseThreads(int parseThreads);
|
||||||
@ -181,7 +177,7 @@ class BulkLoad : public FileOp
|
|||||||
//--------------------------------------------------------------------------
|
//--------------------------------------------------------------------------
|
||||||
XMLJob fJobInfo; // current job information
|
XMLJob fJobInfo; // current job information
|
||||||
|
|
||||||
boost::scoped_ptr<ColumnOp> fColOp; // column operation
|
boost::scoped_ptr<ColumnOp> fColOp{new ColumnOpBulk()}; // column operation
|
||||||
|
|
||||||
std::string fRootDir; // job process root directory
|
std::string fRootDir; // job process root directory
|
||||||
std::string fJobFileName; // job description file name
|
std::string fJobFileName; // job description file name
|
||||||
@ -189,49 +185,50 @@ class BulkLoad : public FileOp
|
|||||||
Log fLog; // logger
|
Log fLog; // logger
|
||||||
|
|
||||||
int fNumOfParser; // total number of parser
|
int fNumOfParser; // total number of parser
|
||||||
char fColDelim; // delimits col values within a row
|
char fColDelim{0}; // delimits col values within a row
|
||||||
|
|
||||||
int fNoOfBuffers; // Number of read buffers
|
int fNoOfBuffers{-1}; // Number of read buffers
|
||||||
int fBufferSize; // Read buffer size
|
int fBufferSize{-1}; // Read buffer size
|
||||||
int fFileVbufSize; // Internal file system buffer size
|
int fFileVbufSize{-1}; // Internal file system buffer size
|
||||||
long long fMaxErrors; // Max allowable errors per job
|
long long fMaxErrors{-1}; // Max allowable errors per job
|
||||||
std::string fAlternateImportDir; // Alternate bulk import directory
|
std::string fAlternateImportDir; // Alternate bulk import directory
|
||||||
std::string fErrorDir; // Opt. where error records record
|
std::string fErrorDir; // Opt. where error records record
|
||||||
std::string fProcessName; // Application process name
|
std::string fProcessName; // Application process name
|
||||||
static std::vector<std::shared_ptr<TableInfo>> fTableInfo; // Vector of Table information
|
static std::vector<std::shared_ptr<TableInfo>> fTableInfo; // Vector of Table information
|
||||||
int fNoOfParseThreads; // Number of parse threads
|
int fNoOfParseThreads{3}; // Number of parse threads
|
||||||
int fNoOfReadThreads; // Number of read threads
|
int fNoOfReadThreads{1}; // Number of read threads
|
||||||
boost::thread_group fReadThreads; // Read thread group
|
boost::thread_group fReadThreads; // Read thread group
|
||||||
boost::thread_group fParseThreads; // Parse thread group
|
boost::thread_group fParseThreads; // Parse thread group
|
||||||
boost::mutex fReadMutex; // Manages table selection by each
|
boost::mutex fReadMutex; // Manages table selection by each
|
||||||
// read thread
|
// read thread
|
||||||
boost::mutex fParseMutex; // Manages table/buffer/column
|
boost::mutex fParseMutex; // Manages table/buffer/column
|
||||||
// selection by each parsing thread
|
// selection by each parsing thread
|
||||||
BRM::TxnID fTxnID; // TransID acquired from SessionMgr
|
BRM::TxnID fTxnID; // TransID acquired from SessionMgr
|
||||||
bool fKeepRbMetaFiles; // Keep/delete bulkRB metadata files
|
bool fKeepRbMetaFiles{false}; // Keep/delete bulkRB metadata files
|
||||||
bool fNullStringMode; // Treat "NULL" as NULL value
|
bool fNullStringMode{false}; // Treat "NULL" as NULL value
|
||||||
char fEnclosedByChar; // Char used to enclose column value
|
char fEnclosedByChar{0}; // Char used to enclose column value
|
||||||
char fEscapeChar; // Escape char within enclosed value
|
char fEscapeChar{0}; // Escape char within enclosed value
|
||||||
timeval fStartTime; // job start time
|
size_t fSkipRows{0}; // Header rows to skip
|
||||||
timeval fEndTime; // job end time
|
timeval fStartTime{0, 0}; // job start time
|
||||||
double fTotalTime; // elapsed time for current phase
|
timeval fEndTime{0, 0}; // job end time
|
||||||
std::vector<std::string> fCmdLineImportFiles; // Import Files from cmd line
|
double fTotalTime{0.0}; // elapsed time for current phase
|
||||||
BulkModeType fBulkMode; // Distributed bulk mode (1,2, or 3)
|
std::vector<std::string> fCmdLineImportFiles; // Import Files from cmd line
|
||||||
std::string fBRMRptFileName; // Name of distributed mode rpt file
|
BulkModeType fBulkMode{BULK_MODE_LOCAL}; // Distributed bulk mode (1,2, or 3)
|
||||||
bool fbTruncationAsError; // Treat string truncation as error
|
std::string fBRMRptFileName; // Name of distributed mode rpt file
|
||||||
ImportDataMode fImportDataMode; // Importing text or binary data
|
bool fbTruncationAsError{false}; // Treat string truncation as error
|
||||||
bool fbContinue; // true when read and parse r running
|
ImportDataMode fImportDataMode{IMPORT_DATA_TEXT}; // Importing text or binary data
|
||||||
|
bool fbContinue{false}; // true when read and parse r running
|
||||||
//
|
//
|
||||||
static boost::mutex* fDDLMutex; // Insure only 1 DDL op at a time
|
static boost::mutex* fDDLMutex; // Insure only 1 DDL op at a time
|
||||||
|
|
||||||
EXPORT static const std::string DIR_BULK_JOB; // Bulk job directory
|
static const std::string DIR_BULK_JOB; // Bulk job directory
|
||||||
EXPORT static const std::string DIR_BULK_TEMP_JOB; // Dir for tmp job files
|
static const std::string DIR_BULK_TEMP_JOB; // Dir for tmp job files
|
||||||
static const std::string DIR_BULK_IMPORT; // Bulk job import dir
|
static const std::string DIR_BULK_IMPORT; // Bulk job import dir
|
||||||
static const std::string DIR_BULK_LOG; // Bulk job log directory
|
static const std::string DIR_BULK_LOG; // Bulk job log directory
|
||||||
bool fDisableTimeOut; // disable timeout when waiting for table lock
|
bool fDisableTimeOut{false}; // disable timeout when waiting for table lock
|
||||||
boost::uuids::uuid fUUID; // job UUID
|
boost::uuids::uuid fUUID{boost::uuids::nil_generator()()}; // job UUID
|
||||||
static bool fNoConsoleOutput; // disable output to console
|
static bool fNoConsoleOutput; // disable output to console
|
||||||
long fTimeZone; // Timezone offset (in seconds) relative to UTC,
|
long fTimeZone{dataconvert::systemTimeZoneOffset()};// Timezone offset (in seconds) relative to UTC,
|
||||||
// to use for TIMESTAMP data type. For example,
|
// to use for TIMESTAMP data type. For example,
|
||||||
// for EST which is UTC-5:00, offset will be -18000s.
|
// for EST which is UTC-5:00, offset will be -18000s.
|
||||||
std::string fS3Key; // S3 Key
|
std::string fS3Key; // S3 Key
|
||||||
@ -239,7 +236,7 @@ class BulkLoad : public FileOp
|
|||||||
std::string fS3Host; // S3 Host
|
std::string fS3Host; // S3 Host
|
||||||
std::string fS3Bucket; // S3 Bucket
|
std::string fS3Bucket; // S3 Bucket
|
||||||
std::string fS3Region; // S3 Region
|
std::string fS3Region; // S3 Region
|
||||||
std::string fUsername; // data files owner name mysql by default
|
std::string fUsername{"mysql"}; // data files owner name mysql by default
|
||||||
|
|
||||||
//--------------------------------------------------------------------------
|
//--------------------------------------------------------------------------
|
||||||
// Private Functions
|
// Private Functions
|
||||||
@ -417,6 +414,11 @@ inline void BulkLoad::setEscapeChar(char esChar)
|
|||||||
fEscapeChar = esChar;
|
fEscapeChar = esChar;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void BulkLoad::setSkipRows(size_t skipRows)
|
||||||
|
{
|
||||||
|
fSkipRows = skipRows;
|
||||||
|
}
|
||||||
|
|
||||||
inline void BulkLoad::setImportDataMode(ImportDataMode importMode)
|
inline void BulkLoad::setImportDataMode(ImportDataMode importMode)
|
||||||
{
|
{
|
||||||
fImportDataMode = importMode;
|
fImportDataMode = importMode;
|
||||||
|
@ -2047,8 +2047,8 @@ int BulkLoadBuffer::parseDictSection(ColumnInfo& columnInfo, int tokenPos, RID s
|
|||||||
}
|
}
|
||||||
|
|
||||||
int BulkLoadBuffer::fillFromMemory(const BulkLoadBuffer& overFlowBufIn, const char* input, size_t length,
|
int BulkLoadBuffer::fillFromMemory(const BulkLoadBuffer& overFlowBufIn, const char* input, size_t length,
|
||||||
size_t* parse_length, RID& totalReadRows, RID& correctTotalRows,
|
size_t* parse_length, size_t& skipRows, RID& totalReadRows,
|
||||||
const boost::ptr_vector<ColumnInfo>& columnsInfo,
|
RID& correctTotalRows, const boost::ptr_vector<ColumnInfo>& columnsInfo,
|
||||||
unsigned int allowedErrCntThisCall)
|
unsigned int allowedErrCntThisCall)
|
||||||
{
|
{
|
||||||
boost::mutex::scoped_lock lock(fSyncUpdatesBLB);
|
boost::mutex::scoped_lock lock(fSyncUpdatesBLB);
|
||||||
@ -2119,7 +2119,7 @@ int BulkLoadBuffer::fillFromMemory(const BulkLoadBuffer& overFlowBufIn, const ch
|
|||||||
|
|
||||||
if (fImportDataMode == IMPORT_DATA_TEXT)
|
if (fImportDataMode == IMPORT_DATA_TEXT)
|
||||||
{
|
{
|
||||||
tokenize(columnsInfo, allowedErrCntThisCall);
|
tokenize(columnsInfo, allowedErrCntThisCall, skipRows);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -2150,8 +2150,9 @@ int BulkLoadBuffer::fillFromMemory(const BulkLoadBuffer& overFlowBufIn, const ch
|
|||||||
// correctTotalRows (input/output) - total valid row count from tokenize()
|
// correctTotalRows (input/output) - total valid row count from tokenize()
|
||||||
// (cumulative)
|
// (cumulative)
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
int BulkLoadBuffer::fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* handle, RID& totalReadRows,
|
int BulkLoadBuffer::fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* handle, size_t& skipRows,
|
||||||
RID& correctTotalRows, const boost::ptr_vector<ColumnInfo>& columnsInfo,
|
RID& totalReadRows, RID& correctTotalRows,
|
||||||
|
const boost::ptr_vector<ColumnInfo>& columnsInfo,
|
||||||
unsigned int allowedErrCntThisCall)
|
unsigned int allowedErrCntThisCall)
|
||||||
{
|
{
|
||||||
boost::mutex::scoped_lock lock(fSyncUpdatesBLB);
|
boost::mutex::scoped_lock lock(fSyncUpdatesBLB);
|
||||||
@ -2164,10 +2165,10 @@ int BulkLoadBuffer::fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* hand
|
|||||||
{
|
{
|
||||||
memcpy(fData, fOverflowBuf, fOverflowSize);
|
memcpy(fData, fOverflowBuf, fOverflowSize);
|
||||||
|
|
||||||
if (fOverflowBuf != NULL)
|
if (fOverflowBuf != nullptr)
|
||||||
{
|
{
|
||||||
delete[] fOverflowBuf;
|
delete[] fOverflowBuf;
|
||||||
fOverflowBuf = NULL;
|
fOverflowBuf = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2219,7 +2220,7 @@ int BulkLoadBuffer::fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* hand
|
|||||||
|
|
||||||
if (fImportDataMode == IMPORT_DATA_TEXT)
|
if (fImportDataMode == IMPORT_DATA_TEXT)
|
||||||
{
|
{
|
||||||
tokenize(columnsInfo, allowedErrCntThisCall);
|
tokenize(columnsInfo, allowedErrCntThisCall, skipRows);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -2276,7 +2277,7 @@ int BulkLoadBuffer::fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* hand
|
|||||||
// depending on whether the user has enabled the "enclosed by" feature.
|
// depending on whether the user has enabled the "enclosed by" feature.
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
void BulkLoadBuffer::tokenize(const boost::ptr_vector<ColumnInfo>& columnsInfo,
|
void BulkLoadBuffer::tokenize(const boost::ptr_vector<ColumnInfo>& columnsInfo,
|
||||||
unsigned int allowedErrCntThisCall)
|
unsigned int allowedErrCntThisCall, size_t& skipRows)
|
||||||
{
|
{
|
||||||
unsigned offset = 0; // length of field
|
unsigned offset = 0; // length of field
|
||||||
unsigned curCol = 0; // dest db column counter within a row
|
unsigned curCol = 0; // dest db column counter within a row
|
||||||
@ -2334,6 +2335,15 @@ void BulkLoadBuffer::tokenize(const boost::ptr_vector<ColumnInfo>& columnsInfo,
|
|||||||
while (p < pEndOfData)
|
while (p < pEndOfData)
|
||||||
{
|
{
|
||||||
c = *p;
|
c = *p;
|
||||||
|
if (UNLIKELY(skipRows > 0))
|
||||||
|
{
|
||||||
|
if (c == NEWLINE_CHAR)
|
||||||
|
{
|
||||||
|
--skipRows;
|
||||||
|
}
|
||||||
|
++p;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// If we have stripped "enclosed" characters, then save raw data
|
// If we have stripped "enclosed" characters, then save raw data
|
||||||
if (rawDataRowLength > 0)
|
if (rawDataRowLength > 0)
|
||||||
|
@ -215,7 +215,8 @@ class BulkLoadBuffer
|
|||||||
|
|
||||||
/** @brief tokenize the buffer contents and fill up the token array.
|
/** @brief tokenize the buffer contents and fill up the token array.
|
||||||
*/
|
*/
|
||||||
void tokenize(const boost::ptr_vector<ColumnInfo>& columnsInfo, unsigned int allowedErrCntThisCall);
|
void tokenize(const boost::ptr_vector<ColumnInfo>& columnsInfo, unsigned int allowedErrCntThisCall,
|
||||||
|
size_t& skipRows);
|
||||||
|
|
||||||
/** @brief Binary tokenization of the buffer, and fill up the token array.
|
/** @brief Binary tokenization of the buffer, and fill up the token array.
|
||||||
*/
|
*/
|
||||||
@ -273,13 +274,14 @@ class BulkLoadBuffer
|
|||||||
bool tryAndLockColumn(const int& columnId, const int& id);
|
bool tryAndLockColumn(const int& columnId, const int& id);
|
||||||
|
|
||||||
int fillFromMemory(const BulkLoadBuffer& overFlowBufIn, const char* input, size_t length,
|
int fillFromMemory(const BulkLoadBuffer& overFlowBufIn, const char* input, size_t length,
|
||||||
size_t* parse_length, RID& totalReadRows, RID& correctTotalRows,
|
size_t* parse_length, size_t& skipRows, RID& totalReadRows, RID& correctTotalRows,
|
||||||
const boost::ptr_vector<ColumnInfo>& columnsInfo, unsigned int allowedErrCntThisCall);
|
const boost::ptr_vector<ColumnInfo>& columnsInfo, unsigned int allowedErrCntThisCall);
|
||||||
|
|
||||||
/** @brief Read the table data into the buffer
|
/** @brief Read the table data into the buffer
|
||||||
*/
|
*/
|
||||||
int fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* handle, RID& totalRows, RID& correctTotalRows,
|
int fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* handle, size_t& skipRows, RID& totalRows,
|
||||||
const boost::ptr_vector<ColumnInfo>& columnsInfo, unsigned int allowedErrCntThisCall);
|
RID& correctTotalRows, const boost::ptr_vector<ColumnInfo>& columnsInfo,
|
||||||
|
unsigned int allowedErrCntThisCall);
|
||||||
|
|
||||||
/** @brief Get the overflow size
|
/** @brief Get the overflow size
|
||||||
*/
|
*/
|
||||||
|
559
writeengine/bulk/we_cmdargs.cpp
Normal file
559
writeengine/bulk/we_cmdargs.cpp
Normal file
@ -0,0 +1,559 @@
|
|||||||
|
/* Copyright (C) 2014 InfiniDB, Inc.
|
||||||
|
Copyright (C) 2016 MariaDB Corporation
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; version 2 of
|
||||||
|
the License.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||||
|
MA 02110-1301, USA. */
|
||||||
|
|
||||||
|
#include "we_simplesyslog.h"
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstring>
|
||||||
|
#include <ctime>
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <sstream>
|
||||||
|
#include <iostream>
|
||||||
|
#include <exception>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <cerrno>
|
||||||
|
#include <boost/program_options.hpp>
|
||||||
|
namespace po = boost::program_options;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#include <boost/uuid/uuid.hpp>
|
||||||
|
#include <boost/uuid/uuid_generators.hpp>
|
||||||
|
#include <boost/uuid/uuid_io.hpp>
|
||||||
|
#include <boost/filesystem.hpp>
|
||||||
|
|
||||||
|
#include "dataconvert.h"
|
||||||
|
#include "liboamcpp.h"
|
||||||
|
using namespace oam;
|
||||||
|
|
||||||
|
#include "we_cmdargs.h"
|
||||||
|
|
||||||
|
#include "mcsconfig.h"
|
||||||
|
|
||||||
|
namespace WriteEngine
|
||||||
|
{
|
||||||
|
//----------------------------------------------------------------------
|
||||||
|
//----------------------------------------------------------------------
|
||||||
|
WECmdArgs::WECmdArgs(int argc, char** argv)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
fOptions = std::make_unique<po::options_description>();
|
||||||
|
fVisibleOptions = std::make_unique<po::options_description>();
|
||||||
|
#define DECLARE_INT_ARG(name, stor, min, max, desc) \
|
||||||
|
(name,\
|
||||||
|
po::value<int>(&stor)\
|
||||||
|
->notifier([this](auto&& value) { checkIntArg(name, min, max, value); }),\
|
||||||
|
desc)
|
||||||
|
|
||||||
|
fVisibleOptions->add_options()
|
||||||
|
("help,h", "Print this message.")
|
||||||
|
DECLARE_INT_ARG("read-buffer,b", fIOReadBufSize, 1, INT_MAX, "Number of read buffers.")
|
||||||
|
DECLARE_INT_ARG("read-buffer-size,c", fReadBufSize, 1, INT_MAX,
|
||||||
|
"Application read buffer size (in bytes)")
|
||||||
|
DECLARE_INT_ARG("debug,d", fDebugLvl, 1, 3, "Print different level(1-3) debug message")
|
||||||
|
DECLARE_INT_ARG("max-errors,e", fMaxErrors, 0, INT_MAX,
|
||||||
|
"Maximum number of allowable error per table per PM")
|
||||||
|
("file-path,f", po::value<string>(&fPmFilePath),
|
||||||
|
"Data file directory path. Default is current working directory.\n"
|
||||||
|
"\tIn Mode 1, represents the local input file path.\n"
|
||||||
|
"\tIn Mode 2, represents the PM based input file path.\n"
|
||||||
|
"\tIn Mode 3, represents the local input file path.")
|
||||||
|
DECLARE_INT_ARG("mode,m", fArgMode, 1, 3,
|
||||||
|
"\t1 - rows will be loaded in a distributed manner acress PMs.\n"
|
||||||
|
"\t2 - PM based input files loaded into their respective PM.\n"
|
||||||
|
"\t3 - input files will be loaded on the local PM.")
|
||||||
|
("filename,l", po::value<string>(&fPmFile),
|
||||||
|
"Name of import file to be loaded, relative to 'file-path'")
|
||||||
|
("console-log,i", po::bool_switch(&fConsoleLog),
|
||||||
|
"Print extended info to console in Mode 3.")
|
||||||
|
("job-id,j", po::value<string>(),
|
||||||
|
"Job ID. In simple usage, default is the table OID unless a fully qualified input "
|
||||||
|
"file name is given.")
|
||||||
|
("null-strings,n", po::value(&fNullStrMode)->implicit_value(true),
|
||||||
|
"NullOption (0-treat the string NULL as data (default);\n"
|
||||||
|
"1-treat the string NULL as a NULL value)")
|
||||||
|
("xml-job-path,p", po::value<string>(&fJobPath), "Path for the XML job description file.")
|
||||||
|
DECLARE_INT_ARG("readers,r", fNoOfReadThrds, 1, INT_MAX, "Number of readers.")
|
||||||
|
("separator,s", po::value<string>(), "Delimiter between column values.")
|
||||||
|
DECLARE_INT_ARG("io-buffer-size,B", fSetBufSize, 1, INT_MAX,
|
||||||
|
"I/O library read buffer size (in bytes)")
|
||||||
|
DECLARE_INT_ARG("writers,w", fNoOfWriteThrds, 1, INT_MAX, "Number of parsers.")
|
||||||
|
("enclosed-by,E", po::value<char>(&fEnclosedChar),
|
||||||
|
"Enclosed by character if field values are enclosed.")
|
||||||
|
("escape-char,C", po::value<char>(&fEscChar)->default_value('\\'),
|
||||||
|
"Escape character used in conjunction with 'enclosed-by'"
|
||||||
|
"character, or as a part of NULL escape sequence ('\\N');\n"
|
||||||
|
"default is '\\'")
|
||||||
|
("headers,O",
|
||||||
|
po::value<int>(&fSkipRows)->implicit_value(1)
|
||||||
|
->notifier([this](auto&& value) { checkIntArg("headers,O", 0, INT_MAX, value); }),
|
||||||
|
"Number of header rows to skip.")
|
||||||
|
("binary-mode,I", po::value<int>(),
|
||||||
|
"Import binary data; how to treat NULL values:\n"
|
||||||
|
"\t1 - import NULL values\n"
|
||||||
|
"\t2 - saturate NULL values\n")
|
||||||
|
("calling-module,P", po::value<string>(&fModuleIDandPID), "Calling module ID and PID.")
|
||||||
|
("truncation-as-error,S", po::bool_switch(&fbTruncationAsError),
|
||||||
|
"Treat string truncations as errors.")
|
||||||
|
("tz,T", po::value<string>(),
|
||||||
|
"Timezone used for TIMESTAMP datatype. Possible values:\n"
|
||||||
|
"\t\"SYSTEM\" (default)\n"
|
||||||
|
"\tOffset in the form +/-HH:MM")
|
||||||
|
("disable-tablelock-timeout,D", po::bool_switch(&fDisableTableLockTimeOut),
|
||||||
|
"Disable timeout when waiting for table lock.")
|
||||||
|
("silent,N", po::bool_switch(&fSilent), "Disable console output.")
|
||||||
|
("s3-key,y", po::value<string>(&fS3Key),
|
||||||
|
"S3 Authentication Key (for S3 imports)")
|
||||||
|
("s3-secret,K", po::value<string>(&fS3Secret),
|
||||||
|
"S3 Authentication Secret (for S3 imports)")
|
||||||
|
("s3-bucket,t", po::value<string>(&fS3Bucket),
|
||||||
|
"S3 Bucket (for S3 imports)")
|
||||||
|
("s3-hostname,H", po::value<string>(&fS3Host),
|
||||||
|
"S3 Hostname (for S3 imports, Amazon's S3 default)")
|
||||||
|
("s3-region,g", po::value<string>(&fS3Region),
|
||||||
|
"S3 Region (for S3 imports)")
|
||||||
|
("errors-dir,L", po::value<string>(&fErrorDir)->default_value(MCSLOGDIR),
|
||||||
|
"Directory for the output .err and .bad files")
|
||||||
|
("job-uuid,u", po::value<string>(&fUUID), "import job UUID")
|
||||||
|
("username,U", po::value<string>(&fUsername), "Username of the files owner.")
|
||||||
|
("dbname", po::value<string>(), "Name of the database to load")
|
||||||
|
("table", po::value<string>(), "Name of table to load")
|
||||||
|
("load-file", po::value<string>(),
|
||||||
|
"Optional input file name in current directory, "
|
||||||
|
"unless a fully qualified name is given. If not given, input read from STDIN.");
|
||||||
|
|
||||||
|
po::options_description hidden("Hidden options");
|
||||||
|
hidden.add_options()
|
||||||
|
("keep-rollback-metadata,k", po::bool_switch(&fKeepRollbackMetaData),
|
||||||
|
"Keep rollback metadata.")
|
||||||
|
("report-file,R", po::value<string>(&fReportFilename), "Report file name.")
|
||||||
|
("allow-missing-columns,X", po::value<string>(), "Allow missing columns.");
|
||||||
|
|
||||||
|
fOptions->add(*fVisibleOptions).add(hidden);
|
||||||
|
|
||||||
|
#undef DECLARE_INT_ARG
|
||||||
|
parseCmdLineArgs(argc, argv);
|
||||||
|
}
|
||||||
|
catch (std::exception& exp)
|
||||||
|
{
|
||||||
|
startupError(exp.what(), true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
WECmdArgs::~WECmdArgs() = default;
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------
|
||||||
|
|
||||||
|
void WECmdArgs::checkIntArg(const std::string& name, long min, long max, int value) const
|
||||||
|
{
|
||||||
|
if (value < min || value > max)
|
||||||
|
{
|
||||||
|
ostringstream oss;
|
||||||
|
oss << "Argument " << name << " is out of range [" << min << ", " << max << "]";
|
||||||
|
startupError(oss.str(), true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------
|
||||||
|
|
||||||
|
void WECmdArgs::usage() const
|
||||||
|
{
|
||||||
|
cout << endl
|
||||||
|
<< "Simple usage using positional parameters "
|
||||||
|
"(no XML job file):"
|
||||||
|
<< endl
|
||||||
|
<< " " << fPrgmName << " dbName tblName [loadFile] [-j jobID] " << endl
|
||||||
|
<< " [-h] [-r readers] [-w parsers] [-s c] [-f path] [-b readBufs] " << endl
|
||||||
|
<< " [-c readBufSize] [-e maxErrs] [-B libBufSize] [-n NullOption] " << endl
|
||||||
|
<< " [-E encloseChar] [-C escapeChar] [-I binaryOpt] [-S] "
|
||||||
|
"[-d debugLevel] [-i] "
|
||||||
|
<< endl
|
||||||
|
<< " [-D] [-N] [-L rejectDir] [-T timeZone]" << endl
|
||||||
|
<< " [-U username]" << endl
|
||||||
|
<< endl;
|
||||||
|
|
||||||
|
cout << endl
|
||||||
|
<< "Traditional usage without positional parameters "
|
||||||
|
"(XML job file required):"
|
||||||
|
<< endl
|
||||||
|
<< " " << fPrgmName << " -j jobID " << endl
|
||||||
|
<< " [-h] [-r readers] [-w parsers] [-s c] [-f path] [-b readBufs] " << endl
|
||||||
|
<< " [-c readBufSize] [-e maxErrs] [-B libBufSize] [-n NullOption] " << endl
|
||||||
|
<< " [-E encloseChar] [-C escapeChar] [-I binaryOpt] [-S] "
|
||||||
|
"[-d debugLevel] [-i] "
|
||||||
|
<< endl
|
||||||
|
<< " [-p path] [-l loadFile]" << endl
|
||||||
|
<< " [-D] [-N] [-L rejectDir] [-T timeZone]" << endl
|
||||||
|
<< " [-U username]" << endl
|
||||||
|
<< endl;
|
||||||
|
|
||||||
|
cout << "\n\n" << (*fVisibleOptions) << endl;
|
||||||
|
|
||||||
|
cout << " Example1:" << endl
|
||||||
|
<< " " << fPrgmName << " -j 1234" << endl
|
||||||
|
<< " Example2: Some column values are enclosed within double quotes." << endl
|
||||||
|
<< " " << fPrgmName << " -j 3000 -E '\"'" << endl
|
||||||
|
<< " Example3: Import a nation table without a Job XML file" << endl
|
||||||
|
<< " " << fPrgmName << " -j 301 tpch nation nation.tbl" << endl;
|
||||||
|
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
//-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
void WECmdArgs::parseCmdLineArgs(int argc, char** argv)
|
||||||
|
{
|
||||||
|
std::string importPath;
|
||||||
|
|
||||||
|
if (argc > 0)
|
||||||
|
fPrgmName = string(MCSBINDIR) + "/" + "cpimport.bin"; // argv[0] is splitter but we need cpimport
|
||||||
|
|
||||||
|
po::positional_options_description pos_opt;
|
||||||
|
pos_opt.add("dbname", 1)
|
||||||
|
.add("table", 1)
|
||||||
|
.add("load-file", 1);
|
||||||
|
|
||||||
|
po::variables_map vm;
|
||||||
|
po::store(po::command_line_parser(argc, argv).options(*fOptions).positional(pos_opt).run(), vm);
|
||||||
|
po::notify(vm);
|
||||||
|
|
||||||
|
if (vm.contains("help"))
|
||||||
|
{
|
||||||
|
fHelp = true;
|
||||||
|
usage();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (vm.contains("separator"))
|
||||||
|
{
|
||||||
|
auto value = vm["separator"].as<std::string>();
|
||||||
|
if (value == "\\t")
|
||||||
|
{
|
||||||
|
fColDelim = '\t';
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fColDelim = value[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (vm.contains("binary-mode"))
|
||||||
|
{
|
||||||
|
int value = vm["binary-mode"].as<int>();
|
||||||
|
if (value == 1)
|
||||||
|
{
|
||||||
|
fImportDataMode = IMPORT_DATA_BIN_ACCEPT_NULL;
|
||||||
|
}
|
||||||
|
else if (value == 2)
|
||||||
|
{
|
||||||
|
fImportDataMode = IMPORT_DATA_BIN_SAT_NULL;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
startupError("Invalid Binary mode; value can be 1 or 2");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (vm.contains("tz"))
|
||||||
|
{
|
||||||
|
auto tz = vm["tz"].as<std::string>();
|
||||||
|
long offset;
|
||||||
|
if (tz != "SYSTEM" && dataconvert::timeZoneToOffset(tz.c_str(), tz.size(), &offset))
|
||||||
|
{
|
||||||
|
startupError("Value for option --tz/-T is invalid");
|
||||||
|
}
|
||||||
|
fTimeZone = tz;
|
||||||
|
}
|
||||||
|
if (vm.contains("job-id"))
|
||||||
|
{
|
||||||
|
errno = 0;
|
||||||
|
string optarg = vm["job-id"].as<std::string>();
|
||||||
|
long lValue = strtol(optarg.c_str(), nullptr, 10);
|
||||||
|
if (errno != 0 || lValue < 0 || lValue > INT_MAX)
|
||||||
|
{
|
||||||
|
startupError("Option --job-id/-j is invalid or outof range");
|
||||||
|
}
|
||||||
|
fJobId = optarg;
|
||||||
|
fOrigJobId = fJobId;
|
||||||
|
|
||||||
|
if (0 == fJobId.length())
|
||||||
|
{
|
||||||
|
startupError("Wrong JobID Value");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (vm.contains("allow-missing-columns"))
|
||||||
|
{
|
||||||
|
if (vm["allow-missing-columns"].as<string>() == "AllowMissingColumn")
|
||||||
|
{
|
||||||
|
fAllowMissingColumn = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fArgMode != -1)
|
||||||
|
fMode = fArgMode; // BUG 4210
|
||||||
|
|
||||||
|
if (2 == fArgMode && fPmFilePath.empty())
|
||||||
|
throw runtime_error("-f option is mandatory with mode 2.");
|
||||||
|
|
||||||
|
if (vm.contains("dbname"))
|
||||||
|
{
|
||||||
|
fSchema = vm["dbname"].as<std::string>();
|
||||||
|
}
|
||||||
|
if (vm.contains("table"))
|
||||||
|
{
|
||||||
|
fTable = vm["table"].as<std::string>();
|
||||||
|
}
|
||||||
|
if (vm.contains("load-file"))
|
||||||
|
{
|
||||||
|
fLocFile = vm["load-file"].as<std::string>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void WECmdArgs::fillParams(BulkLoad& curJob, std::string& sJobIdStr, std::string& sXMLJobDir,
|
||||||
|
std::string& sModuleIDandPID, bool& bLogInfo2ToConsole, std::string& xmlGenSchema,
|
||||||
|
std::string& xmlGenTable, bool& bValidateColumnList)
|
||||||
|
{
|
||||||
|
std::string importPath;
|
||||||
|
std::string rptFileName;
|
||||||
|
bool bImportFileArg = false;
|
||||||
|
BulkModeType bulkMode = BULK_MODE_LOCAL;
|
||||||
|
std::string jobUUID;
|
||||||
|
|
||||||
|
curJob.setReadBufferCount(fIOReadBufSize);
|
||||||
|
curJob.setReadBufferSize(fReadBufSize);
|
||||||
|
if (fMaxErrors >= 0)
|
||||||
|
{
|
||||||
|
curJob.setMaxErrorCount(fMaxErrors);
|
||||||
|
}
|
||||||
|
if (!fPmFilePath.empty())
|
||||||
|
{
|
||||||
|
importPath = fPmFilePath;
|
||||||
|
string setAltErrMsg;
|
||||||
|
if (curJob.setAlternateImportDir(importPath, setAltErrMsg) != NO_ERROR)
|
||||||
|
{
|
||||||
|
startupError(setAltErrMsg, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bLogInfo2ToConsole = fConsoleLog;
|
||||||
|
sJobIdStr = fJobId;
|
||||||
|
curJob.setKeepRbMetaFiles(fKeepRollbackMetaData);
|
||||||
|
bulkMode = static_cast<BulkModeType>(fMode);
|
||||||
|
curJob.setNullStringMode(fNullStrMode);
|
||||||
|
sXMLJobDir = fJobPath;
|
||||||
|
curJob.setNoOfReadThreads(fNoOfReadThrds);
|
||||||
|
curJob.setColDelimiter(fColDelim);
|
||||||
|
curJob.setJobUUID(fUUID);
|
||||||
|
curJob.setNoOfParseThreads(fNoOfWriteThrds);
|
||||||
|
curJob.setVbufReadSize(fReadBufSize);
|
||||||
|
if (fEscChar != -1)
|
||||||
|
{
|
||||||
|
curJob.setEscapeChar(fEscChar);
|
||||||
|
}
|
||||||
|
if (fEnclosedChar != -1)
|
||||||
|
{
|
||||||
|
curJob.setEnclosedByChar(fEnclosedChar);
|
||||||
|
}
|
||||||
|
curJob.setImportDataMode(fImportDataMode);
|
||||||
|
curJob.setErrorDir(fErrorDir);
|
||||||
|
sModuleIDandPID = fModuleIDandPID;
|
||||||
|
rptFileName = fReportFilename;
|
||||||
|
curJob.setTruncationAsError(fbTruncationAsError);
|
||||||
|
if (!fTimeZone.empty())
|
||||||
|
{
|
||||||
|
long offset;
|
||||||
|
if (dataconvert::timeZoneToOffset(fTimeZone.c_str(), fTimeZone.size(), &offset))
|
||||||
|
{
|
||||||
|
startupError("Invalid timezone specified");
|
||||||
|
}
|
||||||
|
curJob.setTimeZone(offset);
|
||||||
|
}
|
||||||
|
bValidateColumnList = !fAllowMissingColumn;
|
||||||
|
curJob.disableTimeOut(fDisableTableLockTimeOut);
|
||||||
|
curJob.disableConsoleOutput(fSilent);
|
||||||
|
curJob.setS3Key(fS3Key);
|
||||||
|
curJob.setS3Bucket(fS3Bucket);
|
||||||
|
curJob.setS3Secret(fS3Secret);
|
||||||
|
curJob.setS3Region(fS3Region);
|
||||||
|
curJob.setS3Host(fS3Host);
|
||||||
|
if (!fUsername.empty())
|
||||||
|
{
|
||||||
|
curJob.setUsername(fUsername);
|
||||||
|
}
|
||||||
|
curJob.setSkipRows(fSkipRows);
|
||||||
|
|
||||||
|
curJob.setDefaultJobUUID();
|
||||||
|
|
||||||
|
// Inconsistent to specify -f STDIN with -l importFile
|
||||||
|
if (bImportFileArg && importPath == "STDIN")
|
||||||
|
{
|
||||||
|
startupError(std::string("-f STDIN is invalid with -l importFile."), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If distributed mode, make sure report filename is specified and that we
|
||||||
|
// can create the file using the specified path.
|
||||||
|
if (bulkMode == BULK_MODE_REMOTE_SINGLE_SRC || bulkMode == BULK_MODE_REMOTE_MULTIPLE_SRC)
|
||||||
|
{
|
||||||
|
if (rptFileName.empty())
|
||||||
|
{
|
||||||
|
startupError(std::string("Bulk modes 1 and 2 require -R rptFileName."), true);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::ofstream rptFile(rptFileName.c_str());
|
||||||
|
|
||||||
|
if (rptFile.fail())
|
||||||
|
{
|
||||||
|
std::ostringstream oss;
|
||||||
|
oss << "Unable to open report file " << rptFileName;
|
||||||
|
startupError(oss.str(), false);
|
||||||
|
}
|
||||||
|
|
||||||
|
rptFile.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
curJob.setBulkLoadMode(bulkMode, rptFileName);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get positional arguments, User can provide:
|
||||||
|
// 1. no positional parameters
|
||||||
|
// 2. Two positional parameters (schema and table names)
|
||||||
|
// 3. Three positional parameters (schema, table, and import file name)
|
||||||
|
if (!fSchema.empty())
|
||||||
|
{
|
||||||
|
xmlGenSchema = fSchema;
|
||||||
|
|
||||||
|
if (!fTable.empty())
|
||||||
|
{
|
||||||
|
// Validate invalid options in conjunction with 2-3 positional
|
||||||
|
// parameter mode, which means we are using temp Job XML file.
|
||||||
|
if (bImportFileArg)
|
||||||
|
{
|
||||||
|
startupError(std::string("-l importFile is invalid with positional parameters"), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!sXMLJobDir.empty())
|
||||||
|
{
|
||||||
|
startupError(std::string("-p path is invalid with positional parameters."), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (importPath == "STDIN")
|
||||||
|
{
|
||||||
|
startupError(std::string("-f STDIN is invalid with positional parameters."), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
xmlGenTable = fTable;
|
||||||
|
|
||||||
|
if (!fLocFile.empty())
|
||||||
|
{
|
||||||
|
// 3rd pos parm
|
||||||
|
curJob.addToCmdLineImportFileList(fLocFile);
|
||||||
|
|
||||||
|
// Default to CWD if loadfile name given w/o -f path
|
||||||
|
if (importPath.empty())
|
||||||
|
{
|
||||||
|
std::string setAltErrMsg;
|
||||||
|
|
||||||
|
if (curJob.setAlternateImportDir(std::string("."), setAltErrMsg) != NO_ERROR)
|
||||||
|
startupError(setAltErrMsg, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Invalid to specify -f if no load file name given
|
||||||
|
if (!importPath.empty())
|
||||||
|
{
|
||||||
|
startupError(std::string("-f requires 3rd positional parameter (load file name)."), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default to STDIN if no import file name given
|
||||||
|
std::string setAltErrMsg;
|
||||||
|
|
||||||
|
if (curJob.setAlternateImportDir(std::string("STDIN"), setAltErrMsg) != NO_ERROR)
|
||||||
|
startupError(setAltErrMsg, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
startupError(std::string("No table name specified with schema."), true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// JobID is a required parameter with no positional parm mode,
|
||||||
|
// because we need the jobid to identify the input job xml file.
|
||||||
|
if (sJobIdStr.empty())
|
||||||
|
{
|
||||||
|
startupError(std::string("No JobID specified."), true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dump some configuration info
|
||||||
|
if (!fSilent)
|
||||||
|
{
|
||||||
|
if (fDebugLvl != 0)
|
||||||
|
{
|
||||||
|
cout << "Debug level is set to " << fDebugLvl << endl;
|
||||||
|
}
|
||||||
|
if (fNoOfReadThrds != 0)
|
||||||
|
{
|
||||||
|
cout << "number of read threads : " << fNoOfReadThrds << endl;
|
||||||
|
}
|
||||||
|
cout << "Column delimiter : " << (fColDelim == '\t' ? "\\t" : string{fColDelim}) << endl;
|
||||||
|
if (fNoOfWriteThrds != 0)
|
||||||
|
{
|
||||||
|
cout << "number of parse threads : " << fNoOfWriteThrds << endl;
|
||||||
|
}
|
||||||
|
if (fEscChar != 0)
|
||||||
|
{
|
||||||
|
cout << "Escape Character : " << fEscChar << endl;
|
||||||
|
}
|
||||||
|
if (fEnclosedChar != 0)
|
||||||
|
{
|
||||||
|
cout << "Enclosed by Character : " << fEnclosedChar << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void WECmdArgs::startupError(const std::string& errMsg, bool showHint) const
|
||||||
|
{
|
||||||
|
BRMWrapper::getInstance()->finishCpimportJob(fCpimportJobId);
|
||||||
|
// Log to console
|
||||||
|
if (!BulkLoad::disableConsoleOutput())
|
||||||
|
cerr << errMsg << endl;
|
||||||
|
|
||||||
|
if (showHint && !fSilent)
|
||||||
|
{
|
||||||
|
cerr << "Try '" << fPrgmName << " -h' for more information." << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Log to syslog
|
||||||
|
logging::Message::Args errMsgArgs;
|
||||||
|
errMsgArgs.add(errMsg);
|
||||||
|
SimpleSysLog::instance()->logMsg(errMsgArgs, logging::LOG_TYPE_ERROR, logging::M0087);
|
||||||
|
|
||||||
|
std::string jobIdStr("0");
|
||||||
|
logging::Message::Args endMsgArgs;
|
||||||
|
endMsgArgs.add(jobIdStr);
|
||||||
|
endMsgArgs.add("FAILED");
|
||||||
|
SimpleSysLog::instance()->logMsg(endMsgArgs, logging::LOG_TYPE_INFO, logging::M0082);
|
||||||
|
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
} /* namespace WriteEngine */
|
130
writeengine/bulk/we_cmdargs.h
Normal file
130
writeengine/bulk/we_cmdargs.h
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
/* Copyright (C) 2014 InfiniDB, Inc.
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU General Public License
|
||||||
|
as published by the Free Software Foundation; version 2 of
|
||||||
|
the License.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||||
|
MA 02110-1301, USA. */
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* $Id$
|
||||||
|
*
|
||||||
|
*******************************************************************************/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <boost/uuid/nil_generator.hpp>
|
||||||
|
#include <boost/uuid/uuid_io.hpp>
|
||||||
|
#include "we_bulkload.h"
|
||||||
|
|
||||||
|
#include "we_type.h"
|
||||||
|
|
||||||
|
namespace boost::program_options
|
||||||
|
{
|
||||||
|
class options_description;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace WriteEngine
|
||||||
|
{
|
||||||
|
class WECmdArgs
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
WECmdArgs(int argc, char** argv);
|
||||||
|
~WECmdArgs();
|
||||||
|
|
||||||
|
using VecInts = std::vector<unsigned int>;
|
||||||
|
using VecArgs = std::vector<std::string>;
|
||||||
|
|
||||||
|
void parseCmdLineArgs(int argc, char** argv);
|
||||||
|
void usage() const;
|
||||||
|
bool checkForCornerCases();
|
||||||
|
|
||||||
|
void startupError(const std::string& errMsg, bool showHint = false) const;
|
||||||
|
void fillParams(BulkLoad& curJob, std::string& sJobIdStr,
|
||||||
|
std::string& sXMLJobDir, std::string& sModuleIDandPID, bool& bLogInfo2ToConsole,
|
||||||
|
std::string& xmlGenSchema, std::string& xmlGenTable, bool& bValidateColumnList);
|
||||||
|
|
||||||
|
void setCpimportJobId(uint32_t cpimportJobId)
|
||||||
|
{
|
||||||
|
fCpimportJobId = cpimportJobId;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void checkIntArg(const std::string& name, long min, long max, int value) const;
|
||||||
|
VecArgs fVecArgs;
|
||||||
|
VecInts fPmVec;
|
||||||
|
|
||||||
|
VecArgs fVecJobFiles; // JobFiles splitter from master JobFile
|
||||||
|
int fMultiTableCount{0}; // MultiTable count
|
||||||
|
VecArgs fColFldsFromJobFile; // List of columns from any job file, that
|
||||||
|
// represent fields in the import data
|
||||||
|
|
||||||
|
std::string fJobId; // JobID
|
||||||
|
std::string fOrigJobId; // Original JobID, in case we have to split it
|
||||||
|
bool fJobLogOnly{false}; // Job number is only for log filename only
|
||||||
|
bool fHelp{false}; // Help mode
|
||||||
|
int fMode{BULK_MODE_LOCAL}; // splitter Mode
|
||||||
|
int fArgMode{-1}; // Argument mode, dep. on this fMode is decided.
|
||||||
|
bool fQuiteMode{true}; // in quite mode or not
|
||||||
|
bool fConsoleLog{false}; // Log everything to console - w.r.t cpimport
|
||||||
|
std::string fPmFile; // FileName at PM
|
||||||
|
std::string fPmFilePath; // Path of input file in PM
|
||||||
|
std::string fLocFile; // Local file name
|
||||||
|
std::string fBrmRptFile; // BRM report file
|
||||||
|
std::string fJobPath; // Path to Job File
|
||||||
|
std::string fTmpFileDir; // Temp file directory.
|
||||||
|
std::string fBulkRoot; // Bulk Root path
|
||||||
|
std::string fJobFile; // Job File Name
|
||||||
|
std::string fS3Key; // S3 key
|
||||||
|
std::string fS3Secret; // S3 Secret
|
||||||
|
std::string fS3Bucket; // S3 Bucket
|
||||||
|
std::string fS3Host; // S3 Host
|
||||||
|
std::string fS3Region; // S3 Region
|
||||||
|
|
||||||
|
int fNoOfReadThrds{1}; // No. of read buffers
|
||||||
|
int fDebugLvl{0}; // Debug level
|
||||||
|
int fMaxErrors{-1}; // Max allowable errors
|
||||||
|
int fReadBufSize{-1}; // Read buffer size
|
||||||
|
int fIOReadBufSize{-1}; // I/O read buffer size
|
||||||
|
int fSetBufSize{0}; // Buff size w/setvbuf
|
||||||
|
char fColDelim{0}; // column delimiter
|
||||||
|
char fEnclosedChar{0}; // enclosed by char
|
||||||
|
char fEscChar{0}; // esc char
|
||||||
|
int fSkipRows{0}; // skip header
|
||||||
|
int fNoOfWriteThrds{3}; // No. of write threads
|
||||||
|
bool fNullStrMode{false}; // set null string mode - treat null as null
|
||||||
|
ImportDataMode fImportDataMode{IMPORT_DATA_TEXT}; // Importing text or binary data
|
||||||
|
std::string fPrgmName; // argv[0]
|
||||||
|
std::string fSchema; // Schema name - positional parmater
|
||||||
|
std::string fTable; // Table name - table name parameter
|
||||||
|
|
||||||
|
bool fBlockMode3{false}; // Do not allow Mode 3
|
||||||
|
bool fbTruncationAsError{false}; // Treat string truncation as error
|
||||||
|
std::string fUUID{boost::uuids::to_string(boost::uuids::nil_generator()())};
|
||||||
|
bool fConsoleOutput{true}; // If false, no output to console.
|
||||||
|
std::string fTimeZone{"SYSTEM"}; // Timezone to use for TIMESTAMP datatype
|
||||||
|
std::string fUsername; // Username of the data files owner
|
||||||
|
std::string fErrorDir{MCSLOGDIR "/cpimport"};
|
||||||
|
bool fDisableTableLockTimeOut{false};
|
||||||
|
bool fSilent{false};
|
||||||
|
std::string fModuleIDandPID;
|
||||||
|
|
||||||
|
std::string fReportFilename;
|
||||||
|
bool fKeepRollbackMetaData{false};
|
||||||
|
bool fAllowMissingColumn{false};
|
||||||
|
|
||||||
|
uint32_t fCpimportJobId{};
|
||||||
|
|
||||||
|
std::unique_ptr<boost::program_options::options_description> fOptions;
|
||||||
|
std::unique_ptr<boost::program_options::options_description> fVisibleOptions;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace WriteEngine
|
@ -145,6 +145,8 @@ TableInfo::TableInfo(Log* logger, const BRM::TxnID txnID, const string& processN
|
|||||||
, fNullStringMode(false)
|
, fNullStringMode(false)
|
||||||
, fEnclosedByChar('\0')
|
, fEnclosedByChar('\0')
|
||||||
, fEscapeChar('\\')
|
, fEscapeChar('\\')
|
||||||
|
, fSkipRows(0)
|
||||||
|
, fSkipRowsCur(0)
|
||||||
, fProcessingBegun(false)
|
, fProcessingBegun(false)
|
||||||
, fBulkMode(BULK_MODE_LOCAL)
|
, fBulkMode(BULK_MODE_LOCAL)
|
||||||
, fBRMReporter(logger, tableName)
|
, fBRMReporter(logger, tableName)
|
||||||
@ -269,7 +271,7 @@ int TableInfo::readTableData()
|
|||||||
int fileCounter = 0;
|
int fileCounter = 0;
|
||||||
unsigned long long qtSentAt = 0;
|
unsigned long long qtSentAt = 0;
|
||||||
|
|
||||||
if (fHandle == NULL)
|
if (fHandle == nullptr)
|
||||||
{
|
{
|
||||||
fFileName = fLoadFileList[fileCounter];
|
fFileName = fLoadFileList[fileCounter];
|
||||||
int rc = openTableFile();
|
int rc = openTableFile();
|
||||||
@ -421,13 +423,14 @@ int TableInfo::readTableData()
|
|||||||
if (fReadFromS3)
|
if (fReadFromS3)
|
||||||
{
|
{
|
||||||
readRc = fBuffers[readBufNo].fillFromMemory(fBuffers[prevReadBuf], fFileBuffer, fS3ReadLength,
|
readRc = fBuffers[readBufNo].fillFromMemory(fBuffers[prevReadBuf], fFileBuffer, fS3ReadLength,
|
||||||
&fS3ParseLength, totalRowsPerInputFile, validTotalRows,
|
&fS3ParseLength, fSkipRowsCur, totalRowsPerInputFile,
|
||||||
fColumns, allowedErrCntThisCall);
|
validTotalRows, fColumns, allowedErrCntThisCall);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
readRc = fBuffers[readBufNo].fillFromFile(fBuffers[prevReadBuf], fHandle, totalRowsPerInputFile,
|
readRc = fBuffers[readBufNo].fillFromFile(fBuffers[prevReadBuf], fHandle, fSkipRowsCur,
|
||||||
validTotalRows, fColumns, allowedErrCntThisCall);
|
totalRowsPerInputFile, validTotalRows, fColumns,
|
||||||
|
allowedErrCntThisCall);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (readRc != NO_ERROR)
|
if (readRc != NO_ERROR)
|
||||||
@ -1208,7 +1211,6 @@ bool TableInfo::bufferReadyForParse(const int& bufferId, bool report) const
|
|||||||
int TableInfo::initializeBuffers(int noOfBuffers, const JobFieldRefList& jobFieldRefList,
|
int TableInfo::initializeBuffers(int noOfBuffers, const JobFieldRefList& jobFieldRefList,
|
||||||
unsigned int fixedBinaryRecLen)
|
unsigned int fixedBinaryRecLen)
|
||||||
{
|
{
|
||||||
|
|
||||||
fReadBufCount = noOfBuffers;
|
fReadBufCount = noOfBuffers;
|
||||||
|
|
||||||
// initialize and populate the buffer vector.
|
// initialize and populate the buffer vector.
|
||||||
@ -1258,7 +1260,7 @@ void TableInfo::addColumn(ColumnInfo* info)
|
|||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
int TableInfo::openTableFile()
|
int TableInfo::openTableFile()
|
||||||
{
|
{
|
||||||
if (fHandle != NULL)
|
if (fHandle != nullptr)
|
||||||
return NO_ERROR;
|
return NO_ERROR;
|
||||||
|
|
||||||
if (fReadFromStdin)
|
if (fReadFromStdin)
|
||||||
@ -1322,6 +1324,8 @@ int TableInfo::openTableFile()
|
|||||||
fLog->logMsg(oss.str(), MSGLVL_INFO2);
|
fLog->logMsg(oss.str(), MSGLVL_INFO2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fSkipRowsCur = fSkipRows;
|
||||||
|
|
||||||
return NO_ERROR;
|
return NO_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -148,8 +148,9 @@ class TableInfo : public WeUIDGID
|
|||||||
size_t fS3ParseLength;
|
size_t fS3ParseLength;
|
||||||
bool fNullStringMode; // Treat "NULL" as a null value
|
bool fNullStringMode; // Treat "NULL" as a null value
|
||||||
char fEnclosedByChar; // Character to enclose col values
|
char fEnclosedByChar; // Character to enclose col values
|
||||||
char fEscapeChar; // Escape character used in conjunc-
|
char fEscapeChar; // Escape character used in conjunction with fEnclosedByChar
|
||||||
// tion with fEnclosedByChar
|
size_t fSkipRows; // Header rows to skip
|
||||||
|
size_t fSkipRowsCur; // Header rows left oto skip in the current file
|
||||||
bool fProcessingBegun; // Has processing begun on this tbl
|
bool fProcessingBegun; // Has processing begun on this tbl
|
||||||
BulkModeType fBulkMode; // Distributed bulk mode (1,2, or 3)
|
BulkModeType fBulkMode; // Distributed bulk mode (1,2, or 3)
|
||||||
std::string fBRMRptFileName; // Name of distributed mode rpt file
|
std::string fBRMRptFileName; // Name of distributed mode rpt file
|
||||||
@ -334,6 +335,10 @@ class TableInfo : public WeUIDGID
|
|||||||
*/
|
*/
|
||||||
void setEscapeChar(char esChar);
|
void setEscapeChar(char esChar);
|
||||||
|
|
||||||
|
/** @brief Set how many header rows should be skipped.
|
||||||
|
*/
|
||||||
|
void setSkipRows(size_t skipRows);
|
||||||
|
|
||||||
/** @brief Has processing begun for this table.
|
/** @brief Has processing begun for this table.
|
||||||
*/
|
*/
|
||||||
bool hasProcessingBegun();
|
bool hasProcessingBegun();
|
||||||
@ -579,6 +584,12 @@ inline void TableInfo::setEscapeChar(char esChar)
|
|||||||
fEscapeChar = esChar;
|
fEscapeChar = esChar;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void TableInfo::setSkipRows(size_t skipRows)
|
||||||
|
{
|
||||||
|
fSkipRows = skipRows;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
inline void TableInfo::setFileBufferSize(const int fileBufSize)
|
inline void TableInfo::setFileBufferSize(const int fileBufSize)
|
||||||
{
|
{
|
||||||
fFileBufSize = fileBufSize;
|
fFileBufSize = fileBufSize;
|
||||||
|
@ -239,12 +239,13 @@ bool WEDataLoader::setupCpimport() // fork the cpimport
|
|||||||
std::string aCmdLine = fCmdLineStr;
|
std::string aCmdLine = fCmdLineStr;
|
||||||
std::istringstream ss(aCmdLine);
|
std::istringstream ss(aCmdLine);
|
||||||
std::string arg;
|
std::string arg;
|
||||||
std::vector<std::string> v2(20, "");
|
std::vector<std::string> v2;
|
||||||
unsigned int i = 0;
|
unsigned int i = 0;
|
||||||
|
|
||||||
while (ss >> arg)
|
while (ss >> arg)
|
||||||
{
|
{
|
||||||
v2[i++] = arg;
|
v2.push_back(arg);
|
||||||
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned int j = 0; j < i; ++j)
|
for (unsigned int j = 0; j < i; ++j)
|
||||||
|
@ -525,6 +525,7 @@ struct Job /** @brief Job Structure */
|
|||||||
int numberOfReadBuffers;
|
int numberOfReadBuffers;
|
||||||
unsigned readBufferSize;
|
unsigned readBufferSize;
|
||||||
unsigned writeBufferSize;
|
unsigned writeBufferSize;
|
||||||
|
int fSkipRows;
|
||||||
Job()
|
Job()
|
||||||
: id(0)
|
: id(0)
|
||||||
, fDelimiter('|')
|
, fDelimiter('|')
|
||||||
@ -533,6 +534,7 @@ struct Job /** @brief Job Structure */
|
|||||||
, numberOfReadBuffers(0)
|
, numberOfReadBuffers(0)
|
||||||
, readBufferSize(0)
|
, readBufferSize(0)
|
||||||
, writeBufferSize(0)
|
, writeBufferSize(0)
|
||||||
|
, fSkipRows(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -26,4 +26,5 @@ columnstore_link(
|
|||||||
batchloader
|
batchloader
|
||||||
threadpool
|
threadpool
|
||||||
marias3
|
marias3
|
||||||
|
boost_program_options
|
||||||
)
|
)
|
||||||
|
@ -29,6 +29,8 @@
|
|||||||
#include <exception>
|
#include <exception>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <cerrno>
|
#include <cerrno>
|
||||||
|
#include <boost/program_options.hpp>
|
||||||
|
namespace po = boost::program_options;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#include <boost/uuid/uuid.hpp>
|
#include <boost/uuid/uuid.hpp>
|
||||||
@ -50,38 +52,96 @@ namespace WriteEngine
|
|||||||
//----------------------------------------------------------------------
|
//----------------------------------------------------------------------
|
||||||
//----------------------------------------------------------------------
|
//----------------------------------------------------------------------
|
||||||
WECmdArgs::WECmdArgs(int argc, char** argv)
|
WECmdArgs::WECmdArgs(int argc, char** argv)
|
||||||
: fMultiTableCount(0)
|
|
||||||
, fJobLogOnly(false)
|
|
||||||
, fHelp(false)
|
|
||||||
, fMode(1)
|
|
||||||
, fArgMode(-1)
|
|
||||||
, fQuiteMode(true)
|
|
||||||
, fConsoleLog(false)
|
|
||||||
, fVerbose(0)
|
|
||||||
, fBatchQty(10000)
|
|
||||||
, fNoOfReadThrds(0)
|
|
||||||
, fDebugLvl(0)
|
|
||||||
, fMaxErrors(-1)
|
|
||||||
, fReadBufSize(0)
|
|
||||||
, fIOReadBufSize(0)
|
|
||||||
, fSetBufSize(0)
|
|
||||||
, fColDelim('|')
|
|
||||||
, fEnclosedChar(0)
|
|
||||||
, fEscChar(0)
|
|
||||||
, fNoOfWriteThrds(0)
|
|
||||||
, fNullStrMode(false)
|
|
||||||
, fImportDataMode(IMPORT_DATA_TEXT)
|
|
||||||
, fCpiInvoke(false)
|
|
||||||
, fBlockMode3(false)
|
|
||||||
, fbTruncationAsError(false)
|
|
||||||
, fUUID(boost::uuids::nil_generator()())
|
|
||||||
, fConsoleOutput(true)
|
|
||||||
, fTimeZone("SYSTEM")
|
|
||||||
, fErrorDir(string(MCSLOGDIR) + "/cpimport/")
|
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
appTestFunction();
|
appTestFunction();
|
||||||
|
fOptions = std::make_unique<po::options_description>();
|
||||||
|
#define DECLARE_INT_ARG(name, stor, min, max, desc) \
|
||||||
|
(name,\
|
||||||
|
po::value<int>(&stor)\
|
||||||
|
->notifier([](auto&& value) { checkIntArg(name, min, max, value); }),\
|
||||||
|
desc)
|
||||||
|
|
||||||
|
fOptions->add_options()
|
||||||
|
("help,h", "Print this message.")
|
||||||
|
DECLARE_INT_ARG("read-buffer,b", fIOReadBufSize, 1, INT_MAX, "Number of read buffers.")
|
||||||
|
DECLARE_INT_ARG("read-buffer-size,c", fReadBufSize, 1, INT_MAX,
|
||||||
|
"Application read buffer size (in bytes)")
|
||||||
|
DECLARE_INT_ARG("debug,d", fDebugLvl, 1, 3, "Print different level(1-3) debug message")
|
||||||
|
("verbose,v", po::value<string>())
|
||||||
|
("silent,N", po::bool_switch())
|
||||||
|
DECLARE_INT_ARG("max-errors,e", fMaxErrors, 0, INT_MAX,
|
||||||
|
"Maximum number of allowable error per table per PM")
|
||||||
|
("file-path,f", po::value<string>(&fPmFilePath),
|
||||||
|
"Data file directory path. Default is current working directory.\n"
|
||||||
|
"\tIn Mode 1, represents the local input file path.\n"
|
||||||
|
"\tIn Mode 2, represents the PM based input file path.\n"
|
||||||
|
"\tIn Mode 3, represents the local input file path.")
|
||||||
|
DECLARE_INT_ARG("mode,m", fArgMode, 0, 3,
|
||||||
|
"\t1 - rows will be loaded in a distributed manner acress PMs.\n"
|
||||||
|
"\t2 - PM based input files loaded into their respective PM.\n"
|
||||||
|
"\t3 - input files will be loaded on the local PM.")
|
||||||
|
("filename,l", po::value<string>(&fPmFile),
|
||||||
|
"Name of import file to be loaded, relative to 'file-path'")
|
||||||
|
DECLARE_INT_ARG("batch-quantity,q", fBatchQty, 1, INT_MAX,
|
||||||
|
"Batch quantity, Number of rows distributed per batch in Mode 1")
|
||||||
|
("console-log,i", po::bool_switch(&fConsoleLog),
|
||||||
|
"Print extended info to console in Mode 3.")
|
||||||
|
("job-id,j", po::value<string>(),
|
||||||
|
"Job ID. In simple usage, default is the table OID unless a fully qualified input "
|
||||||
|
"file name is given.")
|
||||||
|
("null-strings,n", po::value(&fNullStrMode)->implicit_value(true),
|
||||||
|
"NullOption (0-treat the string NULL as data (default);\n"
|
||||||
|
"1-treat the string NULL as a NULL value)")
|
||||||
|
("xml-job-path,p", po::value<string>(&fJobPath), "Path for the XML job description file.")
|
||||||
|
DECLARE_INT_ARG("readers,r", fNoOfReadThrds, 1, INT_MAX, "Number of readers.")
|
||||||
|
("separator,s", po::value<string>(), "Delimiter between column values.")
|
||||||
|
DECLARE_INT_ARG("io-buffer-size,B", fSetBufSize, 1, INT_MAX,
|
||||||
|
"I/O library read buffer size (in bytes)")
|
||||||
|
DECLARE_INT_ARG("writers,w", fNoOfWriteThrds, 1, INT_MAX, "Number of parsers.")
|
||||||
|
("enclosed-by,E", po::value<char>(&fEnclosedChar),
|
||||||
|
"Enclosed by character if field values are enclosed.")
|
||||||
|
("escape-char,C", po::value<char>(&fEscChar)->default_value('\\'),
|
||||||
|
"Escape character used in conjunction with 'enclosed-by'"
|
||||||
|
"character, or as a part of NULL escape sequence ('\\N');\n"
|
||||||
|
"default is '\\'")
|
||||||
|
("headers,O",
|
||||||
|
po::value<int>(&fSkipRows)->implicit_value(1)
|
||||||
|
->notifier([](auto&& value) { checkIntArg("headers,O", 0, INT_MAX, value); }),
|
||||||
|
"Number of header rows to skip.")
|
||||||
|
("binary-mode,I", po::value<int>(),
|
||||||
|
"Import binary data; how to treat NULL values:\n"
|
||||||
|
"\t1 - import NULL values\n"
|
||||||
|
"\t2 - saturate NULL values\n")
|
||||||
|
("pm,P", po::value<vector<unsigned int>>(&fPmVec),
|
||||||
|
"List of PMs ex: -P 1,2,3. Default is all PMs.")
|
||||||
|
("truncation-as-error,S", po::bool_switch(&fbTruncationAsError),
|
||||||
|
"Treat string truncations as errors.")
|
||||||
|
("tz,T", po::value<string>(),
|
||||||
|
"Timezone used for TIMESTAMP datatype. Possible values:\n"
|
||||||
|
"\t\"SYSTEM\" (default)\n"
|
||||||
|
"\tOffset in the form +/-HH:MM")
|
||||||
|
("s3-key,y", po::value<string>(&fS3Key),
|
||||||
|
"S3 Authentication Key (for S3 imports)")
|
||||||
|
("s3-secret,K", po::value<string>(&fS3Secret),
|
||||||
|
"S3 Authentication Secret (for S3 imports)")
|
||||||
|
("s3-bucket,t", po::value<string>(&fS3Bucket),
|
||||||
|
"S3 Bucket (for S3 imports)")
|
||||||
|
("s3-hostname,H", po::value<string>(&fS3Host),
|
||||||
|
"S3 Hostname (for S3 imports, Amazon's S3 default)")
|
||||||
|
("s3-region,g", po::value<string>(&fS3Region),
|
||||||
|
"S3 Region (for S3 imports)")
|
||||||
|
("errors-dir,L", po::value<string>(&fErrorDir)->default_value(MCSLOGDIR),
|
||||||
|
"Directory for the output .err and .bad files")
|
||||||
|
("username,U", po::value<string>(&fUsername), "Username of the files owner.")
|
||||||
|
("dbname", po::value<string>(), "Name of the database to load")
|
||||||
|
("table", po::value<string>(), "Name of table to load")
|
||||||
|
("load-file", po::value<string>(),
|
||||||
|
"Optional input file name in current directory, "
|
||||||
|
"unless a fully qualified name is given. If not given, input read from STDIN.");
|
||||||
|
|
||||||
|
#undef DECLARE_INT_ARG
|
||||||
parseCmdLineArgs(argc, argv);
|
parseCmdLineArgs(argc, argv);
|
||||||
}
|
}
|
||||||
catch (std::exception& exp)
|
catch (std::exception& exp)
|
||||||
@ -92,6 +152,8 @@ WECmdArgs::WECmdArgs(int argc, char** argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
WECmdArgs::~WECmdArgs() = default;
|
||||||
|
|
||||||
//----------------------------------------------------------------------
|
//----------------------------------------------------------------------
|
||||||
|
|
||||||
void WECmdArgs::appTestFunction()
|
void WECmdArgs::appTestFunction()
|
||||||
@ -107,8 +169,18 @@ void WECmdArgs::appTestFunction()
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void WECmdArgs::checkIntArg(const std::string& name, long min, long max, int value)
|
||||||
|
{
|
||||||
|
if (value < min || value > max)
|
||||||
|
{
|
||||||
|
ostringstream oss;
|
||||||
|
oss << "Argument " << name << " is out of range [" << min << ", " << max << "]";
|
||||||
|
throw runtime_error(oss.str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//----------------------------------------------------------------------
|
//----------------------------------------------------------------------
|
||||||
std::string WECmdArgs::getCpImportCmdLine()
|
std::string WECmdArgs::getCpImportCmdLine(bool skipRows)
|
||||||
{
|
{
|
||||||
std::ostringstream aSS;
|
std::ostringstream aSS;
|
||||||
std::string aCmdLine;
|
std::string aCmdLine;
|
||||||
@ -185,6 +257,11 @@ std::string WECmdArgs::getCpImportCmdLine()
|
|||||||
if (fEscChar != 0)
|
if (fEscChar != 0)
|
||||||
aSS << " -C " << fEscChar;
|
aSS << " -C " << fEscChar;
|
||||||
|
|
||||||
|
if (skipRows && fSkipRows)
|
||||||
|
{
|
||||||
|
aSS << " -O " << fSkipRows;
|
||||||
|
}
|
||||||
|
|
||||||
if (fNullStrMode)
|
if (fNullStrMode)
|
||||||
aSS << " -n " << '1';
|
aSS << " -n " << '1';
|
||||||
|
|
||||||
@ -321,6 +398,12 @@ bool WECmdArgs::checkForCornerCases()
|
|||||||
// BUG 4210
|
// BUG 4210
|
||||||
this->checkJobIdCase(); // Need to do this before we go further
|
this->checkJobIdCase(); // Need to do this before we go further
|
||||||
|
|
||||||
|
if (fSkipRows && fImportDataMode != IMPORT_DATA_TEXT)
|
||||||
|
{
|
||||||
|
cout << "Invalid option -O with binary file" << endl;
|
||||||
|
throw runtime_error("Invalid option -O with binary file");
|
||||||
|
}
|
||||||
|
|
||||||
if (fMode == 0)
|
if (fMode == 0)
|
||||||
{
|
{
|
||||||
if (!fJobId.empty())
|
if (!fJobId.empty())
|
||||||
@ -522,52 +605,7 @@ void WECmdArgs::usage()
|
|||||||
cout << "\t\t\tunless a fully qualified name is given.\n";
|
cout << "\t\t\tunless a fully qualified name is given.\n";
|
||||||
cout << "\t\t\tIf not given, input read from STDIN.\n";
|
cout << "\t\t\tIf not given, input read from STDIN.\n";
|
||||||
|
|
||||||
cout << "\n\nOptions:\n"
|
cout << "\n\n" << (*fOptions) << endl;
|
||||||
<< "\t-b\tNumber of read buffers\n"
|
|
||||||
<< "\t-c\tApplication read buffer size(in bytes)\n"
|
|
||||||
<< "\t-d\tPrint different level(1-3) debug message\n"
|
|
||||||
<< "\t-e\tMax number of allowable error per table per PM\n"
|
|
||||||
<< "\t-f\tData file directory path.\n"
|
|
||||||
<< "\t\t\tDefault is current working directory.\n"
|
|
||||||
<< "\t\t\tIn Mode 1, -f represents the local input file path.\n"
|
|
||||||
<< "\t\t\tIn Mode 2, -f represents the PM based input file path.\n"
|
|
||||||
<< "\t\t\tIn Mode 3, -f represents the local input file path.\n"
|
|
||||||
<< "\t-l\tName of import file to be loaded, relative to -f path,\n"
|
|
||||||
<< "\t-h\tPrint this message.\n"
|
|
||||||
<< "\t-q\tBatch Quantity, Number of rows distributed per batch in Mode 1\n"
|
|
||||||
<< "\t-i\tPrint extended info to console in Mode 3.\n"
|
|
||||||
<< "\t-j\tJob ID. In simple usage, default is the table OID.\n"
|
|
||||||
<< "\t\t\tunless a fully qualified input file name is given.\n"
|
|
||||||
<< "\t-n\tNullOption (0-treat the string NULL as data (default);\n"
|
|
||||||
<< "\t\t\t1-treat the string NULL as a NULL value)\n"
|
|
||||||
<< "\t-p\tPath for XML job description file.\n"
|
|
||||||
<< "\t-r\tNumber of readers.\n"
|
|
||||||
<< "\t-s\t'c' is the delimiter between column values.\n"
|
|
||||||
<< "\t-B\tI/O library read buffer size (in bytes)\n"
|
|
||||||
<< "\t-w\tNumber of parsers.\n"
|
|
||||||
<< "\t-E\tEnclosed by character if field values are enclosed.\n"
|
|
||||||
<< "\t-C\tEscape character used in conjunction with 'enclosed by'\n"
|
|
||||||
<< "\t\t\tcharacter, or as part of NULL escape sequence ('\\N');\n"
|
|
||||||
<< "\t\t\tdefault is '\\'\n"
|
|
||||||
<< "\t-I\tImport binary data; how to treat NULL values:\n"
|
|
||||||
<< "\t\t\t1 - import NULL values\n"
|
|
||||||
<< "\t\t\t2 - saturate NULL values\n"
|
|
||||||
<< "\t-P\tList of PMs ex: -P 1,2,3. Default is all PMs.\n"
|
|
||||||
<< "\t-S\tTreat string truncations as errors.\n"
|
|
||||||
<< "\t-m\tmode\n"
|
|
||||||
<< "\t\t\t1 - rows will be loaded in a distributed manner across PMs.\n"
|
|
||||||
<< "\t\t\t2 - PM based input files loaded onto their respective PM.\n"
|
|
||||||
<< "\t\t\t3 - input files will be loaded on the local PM.\n"
|
|
||||||
<< "\t-T\tTimezone used for TIMESTAMP datatype.\n"
|
|
||||||
<< "\t\tPossible values: \"SYSTEM\" (default)\n"
|
|
||||||
<< "\t\t : Offset in the form +/-HH:MM\n"
|
|
||||||
<< "\t-y\tS3 Authentication Key (for S3 imports)\n"
|
|
||||||
<< "\t-K\tS3 Authentication Secret (for S3 imports)\n"
|
|
||||||
<< "\t-t\tS3 Bucket (for S3 imports)\n"
|
|
||||||
<< "\t-H\tS3 Hostname (for S3 imports, Amazon's S3 default)\n"
|
|
||||||
<< "\t-g\tS3 Region (for S3 imports)\n"
|
|
||||||
<< "\t-L\tDirectory for the output .err and .bad files.\n"
|
|
||||||
<< "\t\tDefault is " << string(MCSLOGDIR);
|
|
||||||
|
|
||||||
cout << "\nExample1: Traditional usage\n"
|
cout << "\nExample1: Traditional usage\n"
|
||||||
<< "\tcpimport -j 1234";
|
<< "\tcpimport -j 1234";
|
||||||
@ -591,375 +629,112 @@ void WECmdArgs::usage()
|
|||||||
|
|
||||||
void WECmdArgs::parseCmdLineArgs(int argc, char** argv)
|
void WECmdArgs::parseCmdLineArgs(int argc, char** argv)
|
||||||
{
|
{
|
||||||
int aCh;
|
|
||||||
std::string importPath;
|
std::string importPath;
|
||||||
bool aJobType = false;
|
bool aJobType = false;
|
||||||
|
|
||||||
if (argc > 0)
|
if (argc > 0)
|
||||||
fPrgmName = string(MCSBINDIR) + "/" + "cpimport.bin"; // argv[0] is splitter but we need cpimport
|
fPrgmName = string(MCSBINDIR) + "/" + "cpimport.bin"; // argv[0] is splitter but we need cpimport
|
||||||
|
|
||||||
while ((aCh = getopt(argc, argv, "d:j:w:s:v:l:r:b:e:B:f:q:ihm:E:C:P:I:n:p:c:ST:Ny:K:t:H:g:U:L:")) != EOF)
|
po::positional_options_description pos_opt;
|
||||||
|
pos_opt.add("dbname", 1)
|
||||||
|
.add("table", 1)
|
||||||
|
.add("load-file", 1);
|
||||||
|
|
||||||
|
po::variables_map vm;
|
||||||
|
po::store(po::command_line_parser(argc, argv).options(*fOptions).positional(pos_opt).run(), vm);
|
||||||
|
po::notify(vm);
|
||||||
|
|
||||||
|
if (vm.contains("silent"))
|
||||||
{
|
{
|
||||||
switch (aCh)
|
fConsoleOutput = !vm["silent"].as<bool>();
|
||||||
|
}
|
||||||
|
if (vm.contains("help"))
|
||||||
|
{
|
||||||
|
fHelp = true;
|
||||||
|
usage();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (vm.contains("separator"))
|
||||||
|
{
|
||||||
|
auto value = vm["separator"].as<std::string>();
|
||||||
|
if (value == "\\t")
|
||||||
{
|
{
|
||||||
case 'm':
|
fColDelim = '\t';
|
||||||
|
if (fDebugLvl)
|
||||||
{
|
{
|
||||||
fArgMode = atoi(optarg);
|
cout << "Column delimiter : \\t" << endl;
|
||||||
|
|
||||||
// cout << "Mode level set to " << fMode << endl;
|
|
||||||
if ((fArgMode > -1) && (fArgMode <= 3))
|
|
||||||
{
|
|
||||||
}
|
|
||||||
else
|
|
||||||
throw runtime_error("Wrong Mode level");
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
case 'B':
|
else
|
||||||
|
{
|
||||||
|
fColDelim = value[0];
|
||||||
|
if (fDebugLvl)
|
||||||
{
|
{
|
||||||
errno = 0;
|
cout << "Column delimiter : " << fColDelim << endl;
|
||||||
long lValue = strtol(optarg, 0, 10);
|
|
||||||
|
|
||||||
if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
|
|
||||||
throw runtime_error("Option -B is invalid or out of range");
|
|
||||||
|
|
||||||
fSetBufSize = lValue;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'b':
|
|
||||||
{
|
|
||||||
errno = 0;
|
|
||||||
long lValue = strtol(optarg, 0, 10);
|
|
||||||
|
|
||||||
if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
|
|
||||||
throw runtime_error("Option -b is invalid or out of range");
|
|
||||||
|
|
||||||
fIOReadBufSize = lValue;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'e':
|
|
||||||
{
|
|
||||||
errno = 0;
|
|
||||||
long lValue = strtol(optarg, 0, 10);
|
|
||||||
|
|
||||||
if ((errno != 0) || (lValue < 0) || (lValue > INT_MAX))
|
|
||||||
throw runtime_error("Option -e is invalid or out of range");
|
|
||||||
|
|
||||||
fMaxErrors = lValue;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'i':
|
|
||||||
{
|
|
||||||
fConsoleLog = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'c':
|
|
||||||
{
|
|
||||||
errno = 0;
|
|
||||||
long lValue = strtol(optarg, 0, 10);
|
|
||||||
|
|
||||||
if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
|
|
||||||
throw runtime_error("Option -c is invalid or out of range");
|
|
||||||
|
|
||||||
fReadBufSize = lValue;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'j': // -j: jobID
|
|
||||||
{
|
|
||||||
errno = 0;
|
|
||||||
long lValue = strtol(optarg, 0, 10);
|
|
||||||
|
|
||||||
if ((errno != 0) || (lValue < 0) || (lValue > INT_MAX))
|
|
||||||
throw runtime_error("Option -j is invalid or out of range");
|
|
||||||
|
|
||||||
fJobId = optarg;
|
|
||||||
fOrigJobId = fJobId; // in case if we need to split it.
|
|
||||||
|
|
||||||
if (0 == fJobId.length())
|
|
||||||
throw runtime_error("Wrong JobID Value");
|
|
||||||
|
|
||||||
aJobType = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'v': // verbose
|
|
||||||
{
|
|
||||||
string aVerbLen = optarg;
|
|
||||||
fVerbose = aVerbLen.length();
|
|
||||||
fDebugLvl = fVerbose;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'd': // -d debug
|
|
||||||
{
|
|
||||||
errno = 0;
|
|
||||||
long lValue = strtol(optarg, 0, 10);
|
|
||||||
|
|
||||||
if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
|
|
||||||
throw runtime_error("Option -d is invalid or out of range");
|
|
||||||
|
|
||||||
fDebugLvl = lValue;
|
|
||||||
|
|
||||||
if (fDebugLvl > 0 && fDebugLvl <= 3)
|
|
||||||
{
|
|
||||||
cout << "\nDebug level set to " << fDebugLvl << endl;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
throw runtime_error("Wrong Debug level");
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'r': // -r: num read threads
|
|
||||||
{
|
|
||||||
errno = 0;
|
|
||||||
long lValue = strtol(optarg, 0, 10);
|
|
||||||
|
|
||||||
if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
|
|
||||||
throw runtime_error("Option -r is invalid or out of range");
|
|
||||||
|
|
||||||
fNoOfReadThrds = lValue;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'w': // -w: num parse threads
|
|
||||||
{
|
|
||||||
errno = 0;
|
|
||||||
long lValue = strtol(optarg, 0, 10);
|
|
||||||
|
|
||||||
if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
|
|
||||||
throw runtime_error("Option -w is invalid or out of range");
|
|
||||||
|
|
||||||
fNoOfWriteThrds = lValue;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 's': // -s: column delimiter
|
|
||||||
{
|
|
||||||
if (!strcmp(optarg, "\\t"))
|
|
||||||
{
|
|
||||||
fColDelim = '\t';
|
|
||||||
|
|
||||||
if (fDebugLvl)
|
|
||||||
cout << "Column delimiter : "
|
|
||||||
<< "\\t" << endl;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
fColDelim = optarg[0];
|
|
||||||
|
|
||||||
if (fDebugLvl)
|
|
||||||
cout << "Column delimiter : " << fColDelim << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'l': // -l: if JobId (-j), it can be input file
|
|
||||||
{
|
|
||||||
fPmFile = optarg;
|
|
||||||
|
|
||||||
if (0 == fPmFile.length())
|
|
||||||
throw runtime_error("Wrong local filename");
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'f': // -f: import file path
|
|
||||||
{
|
|
||||||
fPmFilePath = optarg;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'n': // -n: treat "NULL" as null
|
|
||||||
{
|
|
||||||
// default is 0, ie it is equal to not giving this option
|
|
||||||
int nullStringMode = atoi(optarg);
|
|
||||||
|
|
||||||
if ((nullStringMode != 0) && (nullStringMode != 1))
|
|
||||||
{
|
|
||||||
throw(runtime_error("Invalid NULL option; value can be 0 or 1"));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (nullStringMode)
|
|
||||||
fNullStrMode = true;
|
|
||||||
else
|
|
||||||
fNullStrMode = false; // This is default
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'P': // -p: list of PM's
|
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
|
||||||
std::string aPmList = optarg;
|
|
||||||
|
|
||||||
if (!str2PmList(aPmList, fPmVec))
|
|
||||||
throw(runtime_error("PM list is wrong"));
|
|
||||||
}
|
|
||||||
catch (runtime_error& ex)
|
|
||||||
{
|
|
||||||
throw(ex);
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'p':
|
|
||||||
{
|
|
||||||
fJobPath = optarg;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'E': // -E: enclosed by char
|
|
||||||
{
|
|
||||||
fEnclosedChar = optarg[0];
|
|
||||||
// cout << "Enclosed by Character : " << optarg[0] << endl;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'C': // -C: enclosed escape char
|
|
||||||
{
|
|
||||||
fEscChar = optarg[0];
|
|
||||||
// cout << "Escape Character : " << optarg[0] << endl;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'h': // -h: help
|
|
||||||
{
|
|
||||||
// usage(); // will exit(1) here
|
|
||||||
fHelp = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'I': // -I: binary mode (null handling)
|
|
||||||
{
|
|
||||||
// default is text mode, unless -I option is specified
|
|
||||||
int binaryMode = atoi(optarg);
|
|
||||||
|
|
||||||
if (binaryMode == 1)
|
|
||||||
{
|
|
||||||
fImportDataMode = IMPORT_DATA_BIN_ACCEPT_NULL;
|
|
||||||
}
|
|
||||||
else if (binaryMode == 2)
|
|
||||||
{
|
|
||||||
fImportDataMode = IMPORT_DATA_BIN_SAT_NULL;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
throw(runtime_error("Invalid Binary mode; value can be 1 or 2"));
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'S': // -S: Treat string truncations as errors
|
|
||||||
{
|
|
||||||
setTruncationAsError(true);
|
|
||||||
// cout << "TruncationAsError : true" << endl;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'T':
|
|
||||||
{
|
|
||||||
std::string timeZone = optarg;
|
|
||||||
long offset;
|
|
||||||
|
|
||||||
if (timeZone != "SYSTEM" && dataconvert::timeZoneToOffset(timeZone.c_str(), timeZone.size(), &offset))
|
|
||||||
{
|
|
||||||
throw(runtime_error("Value for option -T is invalid"));
|
|
||||||
}
|
|
||||||
|
|
||||||
fTimeZone = timeZone;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'q': // -q: batch quantity - default value is 10000
|
|
||||||
{
|
|
||||||
errno = 0;
|
|
||||||
long long lValue = strtoll(optarg, 0, 10);
|
|
||||||
|
|
||||||
if ((errno != 0) || (lValue < 1) || (lValue > UINT_MAX))
|
|
||||||
throw runtime_error("Option -q is invalid or out of range");
|
|
||||||
|
|
||||||
fBatchQty = lValue;
|
|
||||||
|
|
||||||
if (fBatchQty < 10000)
|
|
||||||
fBatchQty = 10000;
|
|
||||||
else if (fBatchQty > 100000)
|
|
||||||
fBatchQty = 10000;
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'N': //-N no console output
|
|
||||||
{
|
|
||||||
fConsoleOutput = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'y': //-y S3 Key
|
|
||||||
{
|
|
||||||
fS3Key = optarg;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'K': //-K S3 Secret
|
|
||||||
{
|
|
||||||
fS3Secret = optarg;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'H': //-H S3 Host
|
|
||||||
{
|
|
||||||
fS3Host = optarg;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 't': //-t S3 bucket
|
|
||||||
{
|
|
||||||
fS3Bucket = optarg;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'g': //-g S3 Region
|
|
||||||
{
|
|
||||||
fS3Region = optarg;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'U': //-U username of the files owner
|
|
||||||
{
|
|
||||||
fUsername = optarg;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case 'L': // -L set the output location of .bad/.err files
|
|
||||||
{
|
|
||||||
fErrorDir = optarg;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
std::string aErr = std::string("Unknown command line option ") + std::to_string(aCh);
|
|
||||||
// cout << "Unknown command line option " << aCh << endl;
|
|
||||||
throw(runtime_error(aErr));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (vm.contains("binary-mode"))
|
||||||
|
{
|
||||||
|
int value = vm["binary-mode"].as<int>();
|
||||||
|
if (value == 1)
|
||||||
|
{
|
||||||
|
fImportDataMode = IMPORT_DATA_BIN_ACCEPT_NULL;
|
||||||
|
}
|
||||||
|
else if (value == 2)
|
||||||
|
{
|
||||||
|
fImportDataMode = IMPORT_DATA_BIN_SAT_NULL;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw runtime_error("Invalid Binary mode; value can be 1 or 2");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (vm.contains("tz"))
|
||||||
|
{
|
||||||
|
auto tz = vm["tz"].as<std::string>();
|
||||||
|
long offset;
|
||||||
|
if (tz != "SYSTEM" && dataconvert::timeZoneToOffset(tz.c_str(), tz.size(), &offset))
|
||||||
|
{
|
||||||
|
throw runtime_error("Value for option --tz/-T is invalid");
|
||||||
|
}
|
||||||
|
fTimeZone = tz;
|
||||||
|
}
|
||||||
|
if (vm.contains("job-id"))
|
||||||
|
{
|
||||||
|
errno = 0;
|
||||||
|
string optarg = vm["job-id"].as<std::string>();
|
||||||
|
long lValue = strtol(optarg.c_str(), nullptr, 10);
|
||||||
|
if (errno != 0 || lValue < 0 || lValue > INT_MAX)
|
||||||
|
{
|
||||||
|
throw runtime_error("Option --job-id/-j is invalid or out of range");
|
||||||
|
}
|
||||||
|
fJobId = optarg;
|
||||||
|
fOrigJobId = fJobId;
|
||||||
|
|
||||||
if (fHelp)
|
if (fJobId.empty())
|
||||||
usage(); // BUG 4210
|
{
|
||||||
|
throw runtime_error("Wrong JobID Value");
|
||||||
|
}
|
||||||
|
|
||||||
|
aJobType = true;
|
||||||
|
}
|
||||||
|
if (vm.contains("verbose"))
|
||||||
|
{
|
||||||
|
string optarg = vm["verbose"].as<std::string>();
|
||||||
|
fVerbose = fDebugLvl = optarg.length();
|
||||||
|
}
|
||||||
|
if (vm.contains("batch-quantity"))
|
||||||
|
{
|
||||||
|
if (fBatchQty < 10000)
|
||||||
|
{
|
||||||
|
fBatchQty = 10000;
|
||||||
|
}
|
||||||
|
else if (fBatchQty > 100000)
|
||||||
|
{
|
||||||
|
fBatchQty = 10000;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (fArgMode != -1)
|
if (fArgMode != -1)
|
||||||
fMode = fArgMode; // BUG 4210
|
fMode = fArgMode; // BUG 4210
|
||||||
@ -976,26 +751,23 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv)
|
|||||||
if (0 == fArgMode)
|
if (0 == fArgMode)
|
||||||
throw runtime_error("Incompatible mode and option types");
|
throw runtime_error("Incompatible mode and option types");
|
||||||
|
|
||||||
if (optind < argc)
|
if (vm.contains("dbname"))
|
||||||
{
|
{
|
||||||
fSchema = argv[optind]; // 1st pos parm
|
fSchema = vm["dbname"].as<string>();
|
||||||
optind++;
|
|
||||||
|
|
||||||
if (optind < argc)
|
|
||||||
{
|
if (!vm.contains("table"))
|
||||||
fTable = argv[optind]; // 2nd pos parm
|
|
||||||
optind++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
// if schema is there, table name should be there
|
// if schema is there, table name should be there
|
||||||
throw runtime_error("No table name specified with schema.");
|
throw runtime_error("No table name specified with schema.");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (optind < argc) // see if input file name is given
|
fTable = vm["table"].as<string>(); // 2nd pos parm
|
||||||
|
|
||||||
|
if (vm.contains("load-file")) // see if input file name is given
|
||||||
{
|
{
|
||||||
// 3rd pos parm
|
// 3rd pos parm
|
||||||
fLocFile = argv[optind];
|
fLocFile = vm["load-file"].as<string>();
|
||||||
|
|
||||||
if ((fLocFile.at(0) != '/') && (fLocFile != "STDIN"))
|
if ((fLocFile.at(0) != '/') && (fLocFile != "STDIN"))
|
||||||
{
|
{
|
||||||
@ -1074,7 +846,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv)
|
|||||||
// 1. no positional parameters - Mode 0 & stdin
|
// 1. no positional parameters - Mode 0 & stdin
|
||||||
// 2. Two positional parameters (schema and table names) - Mode 1/2, stdin
|
// 2. Two positional parameters (schema and table names) - Mode 1/2, stdin
|
||||||
// 3. Three positional parameters (schema, table, and import file name)
|
// 3. Three positional parameters (schema, table, and import file name)
|
||||||
else if (optind < argc) // see if db schema name is given
|
else if (vm.contains("dbname")) // see if db schema name is given
|
||||||
{
|
{
|
||||||
if (fArgMode == 0)
|
if (fArgMode == 0)
|
||||||
{
|
{
|
||||||
@ -1088,13 +860,12 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
fLocFile = argv[optind];
|
fLocFile = vm["dbname"].as<string>();
|
||||||
optind++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (optind < argc) // dest filename provided
|
if (vm.contains("table")) // dest filename provided
|
||||||
{
|
{
|
||||||
fPmFile = argv[optind];
|
fPmFile = vm["table"].as<string>();
|
||||||
|
|
||||||
if ((fPmFile.at(0) != '/') && (fS3Key.empty()))
|
if ((fPmFile.at(0) != '/') && (fS3Key.empty()))
|
||||||
{
|
{
|
||||||
@ -1144,19 +915,16 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv)
|
|||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
fSchema = argv[optind]; // 1st pos parm
|
fSchema = vm["dbname"].as<string>(); // 1st pos parm
|
||||||
|
|
||||||
optind++;
|
if (vm.contains("table")) // see if table name is given
|
||||||
|
|
||||||
if (optind < argc) // see if table name is given
|
|
||||||
{
|
{
|
||||||
fTable = argv[optind]; // 2nd pos parm
|
fTable = vm["table"].as<string>(); // 2nd pos parm
|
||||||
optind++;
|
|
||||||
|
|
||||||
if (optind < argc) // see if input file name is given
|
if (vm.contains("load-file")) // see if input file name is given
|
||||||
{
|
{
|
||||||
// 3rd pos parm
|
// 3rd pos parm
|
||||||
fLocFile = argv[optind];
|
fLocFile = vm["load-file"].as<string>();
|
||||||
|
|
||||||
// BUG 4379 if -f option given we need to use that path,
|
// BUG 4379 if -f option given we need to use that path,
|
||||||
// over riding bug 4231. look at the code below
|
// over riding bug 4231. look at the code below
|
||||||
@ -1543,9 +1311,7 @@ void WECmdArgs::setEnclByAndEscCharFromJobFile(std::string& JobName)
|
|||||||
if (fEnclosedChar == 0) // check anything in Jobxml file
|
if (fEnclosedChar == 0) // check anything in Jobxml file
|
||||||
{
|
{
|
||||||
WEXmlgetter aXmlGetter(JobName);
|
WEXmlgetter aXmlGetter(JobName);
|
||||||
vector<string> aSections;
|
const vector<string> aSections{"BulkJob", "EnclosedByChar"};
|
||||||
aSections.push_back("BulkJob");
|
|
||||||
aSections.push_back("EnclosedByChar");
|
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
@ -1569,9 +1335,7 @@ void WECmdArgs::setEnclByAndEscCharFromJobFile(std::string& JobName)
|
|||||||
if (fEscChar == 0) // check anything in Jobxml file
|
if (fEscChar == 0) // check anything in Jobxml file
|
||||||
{
|
{
|
||||||
WEXmlgetter aXmlGetter(JobName);
|
WEXmlgetter aXmlGetter(JobName);
|
||||||
vector<string> aSections;
|
const vector<string> aSections{"BulkJob", "EscapeChar"};
|
||||||
aSections.push_back("BulkJob");
|
|
||||||
aSections.push_back("EscapeChar");
|
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
@ -24,28 +24,33 @@
|
|||||||
#include <set>
|
#include <set>
|
||||||
|
|
||||||
#include <boost/uuid/uuid.hpp>
|
#include <boost/uuid/uuid.hpp>
|
||||||
|
#include <boost/uuid/nil_generator.hpp>
|
||||||
|
|
||||||
#include "we_xmlgetter.h"
|
#include "we_xmlgetter.h"
|
||||||
#include "we_type.h"
|
#include "we_type.h"
|
||||||
|
|
||||||
|
namespace boost::program_options
|
||||||
|
{
|
||||||
|
class options_description;
|
||||||
|
}
|
||||||
|
|
||||||
namespace WriteEngine
|
namespace WriteEngine
|
||||||
{
|
{
|
||||||
class WECmdArgs
|
class WECmdArgs
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
WECmdArgs(int argc, char** argv);
|
WECmdArgs(int argc, char** argv);
|
||||||
virtual ~WECmdArgs() = default;
|
virtual ~WECmdArgs();
|
||||||
|
|
||||||
typedef std::vector<unsigned int> VecInts;
|
typedef std::vector<unsigned int> VecInts;
|
||||||
typedef std::vector<std::string> VecArgs;
|
typedef std::vector<std::string> VecArgs;
|
||||||
|
|
||||||
void appTestFunction();
|
void appTestFunction();
|
||||||
void parseCmdLineArgs(int argc, char** argv);
|
void parseCmdLineArgs(int argc, char** argv);
|
||||||
std::string getCpImportCmdLine();
|
std::string getCpImportCmdLine(bool skipRows);
|
||||||
void setSchemaAndTableFromJobFile(std::string& JobName);
|
void setSchemaAndTableFromJobFile(std::string& JobName);
|
||||||
void setEnclByAndEscCharFromJobFile(std::string& JobName);
|
void setEnclByAndEscCharFromJobFile(std::string& JobName);
|
||||||
void usage();
|
void usage();
|
||||||
void usageMode3();
|
|
||||||
bool checkForCornerCases();
|
bool checkForCornerCases();
|
||||||
void checkForBulkLogDir(const std::string& BulkRoot);
|
void checkForBulkLogDir(const std::string& BulkRoot);
|
||||||
|
|
||||||
@ -76,11 +81,11 @@ class WECmdArgs
|
|||||||
{
|
{
|
||||||
return fLocFile;
|
return fLocFile;
|
||||||
}
|
}
|
||||||
int getReadBufSize()
|
int getReadBufSize() const
|
||||||
{
|
{
|
||||||
return fReadBufSize;
|
return fReadBufSize;
|
||||||
}
|
}
|
||||||
int getMode()
|
int getMode() const
|
||||||
{
|
{
|
||||||
return fMode;
|
return fMode;
|
||||||
}
|
}
|
||||||
@ -88,36 +93,40 @@ class WECmdArgs
|
|||||||
{
|
{
|
||||||
return fArgMode;
|
return fArgMode;
|
||||||
}
|
}
|
||||||
bool isHelpMode()
|
bool isHelpMode() const
|
||||||
{
|
{
|
||||||
return fHelp;
|
return fHelp;
|
||||||
}
|
}
|
||||||
int getDebugLvl()
|
int getDebugLvl() const
|
||||||
{
|
{
|
||||||
return fDebugLvl;
|
return fDebugLvl;
|
||||||
}
|
}
|
||||||
char getEnclChar()
|
char getEnclChar() const
|
||||||
{
|
{
|
||||||
return fEnclosedChar;
|
return fEnclosedChar;
|
||||||
}
|
}
|
||||||
char getEscChar()
|
char getEscChar() const
|
||||||
{
|
{
|
||||||
return fEscChar;
|
return fEscChar;
|
||||||
}
|
}
|
||||||
char getDelimChar()
|
char getDelimChar() const
|
||||||
{
|
{
|
||||||
return fColDelim;
|
return fColDelim;
|
||||||
}
|
}
|
||||||
|
int getSkipRows() const
|
||||||
|
{
|
||||||
|
return fSkipRows;
|
||||||
|
}
|
||||||
ImportDataMode getImportDataMode() const
|
ImportDataMode getImportDataMode() const
|
||||||
{
|
{
|
||||||
return fImportDataMode;
|
return fImportDataMode;
|
||||||
}
|
}
|
||||||
bool getConsoleLog()
|
bool getConsoleLog() const
|
||||||
{
|
{
|
||||||
return fConsoleLog;
|
return fConsoleLog;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isCpimportInvokeMode()
|
bool isCpimportInvokeMode() const
|
||||||
{
|
{
|
||||||
return (fBlockMode3) ? false : fCpiInvoke;
|
return (fBlockMode3) ? false : fCpiInvoke;
|
||||||
}
|
}
|
||||||
@ -125,11 +134,15 @@ class WECmdArgs
|
|||||||
{
|
{
|
||||||
return fQuiteMode;
|
return fQuiteMode;
|
||||||
}
|
}
|
||||||
void setJobId(std::string fJobId)
|
void setJobId(const std::string& fJobId)
|
||||||
{
|
{
|
||||||
this->fJobId = fJobId;
|
this->fJobId = fJobId;
|
||||||
}
|
}
|
||||||
void setLocFile(std::string fLocFile)
|
void setOrigJobId()
|
||||||
|
{
|
||||||
|
this->fOrigJobId = fJobId;
|
||||||
|
}
|
||||||
|
void setLocFile(const std::string& fLocFile)
|
||||||
{
|
{
|
||||||
this->fLocFile = fLocFile;
|
this->fLocFile = fLocFile;
|
||||||
}
|
}
|
||||||
@ -141,7 +154,7 @@ class WECmdArgs
|
|||||||
{
|
{
|
||||||
this->fArgMode = ArgMode;
|
this->fArgMode = ArgMode;
|
||||||
}
|
}
|
||||||
void setPmFile(std::string fPmFile)
|
void setPmFile(const std::string& fPmFile)
|
||||||
{
|
{
|
||||||
this->fPmFile = fPmFile;
|
this->fPmFile = fPmFile;
|
||||||
}
|
}
|
||||||
@ -183,7 +196,7 @@ class WECmdArgs
|
|||||||
{
|
{
|
||||||
fUUID = jobUUID;
|
fUUID = jobUUID;
|
||||||
}
|
}
|
||||||
bool getConsoleOutput()
|
bool getConsoleOutput() const
|
||||||
{
|
{
|
||||||
return fConsoleOutput;
|
return fConsoleOutput;
|
||||||
}
|
}
|
||||||
@ -194,7 +207,7 @@ class WECmdArgs
|
|||||||
|
|
||||||
bool getPmStatus(int Id);
|
bool getPmStatus(int Id);
|
||||||
bool str2PmList(std::string& PmList, VecInts& V);
|
bool str2PmList(std::string& PmList, VecInts& V);
|
||||||
int getPmVecSize()
|
size_t getPmVecSize() const
|
||||||
{
|
{
|
||||||
return fPmVec.size();
|
return fPmVec.size();
|
||||||
}
|
}
|
||||||
@ -265,7 +278,7 @@ class WECmdArgs
|
|||||||
{
|
{
|
||||||
return fErrorDir;
|
return fErrorDir;
|
||||||
}
|
}
|
||||||
void setErrorDir(std::string fErrorDir)
|
void setErrorDir(const std::string& fErrorDir)
|
||||||
{
|
{
|
||||||
this->fErrorDir = fErrorDir;
|
this->fErrorDir = fErrorDir;
|
||||||
}
|
}
|
||||||
@ -273,24 +286,26 @@ class WECmdArgs
|
|||||||
std::string PrepMode2ListOfFiles(std::string& FileName); // Bug 4342
|
std::string PrepMode2ListOfFiles(std::string& FileName); // Bug 4342
|
||||||
void getColumnList(std::set<std::string>& columnList) const;
|
void getColumnList(std::set<std::string>& columnList) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
static void checkIntArg(const std::string& name, long min, long max, int value);
|
||||||
private: // variables for SplitterApp
|
private: // variables for SplitterApp
|
||||||
VecArgs fVecArgs;
|
VecArgs fVecArgs;
|
||||||
VecInts fPmVec;
|
VecInts fPmVec;
|
||||||
|
|
||||||
VecArgs fVecJobFiles; // JobFiles splitter from master JobFile
|
VecArgs fVecJobFiles; // JobFiles splitter from master JobFile
|
||||||
int fMultiTableCount; // MultiTable count
|
int fMultiTableCount{0}; // MultiTable count
|
||||||
VecArgs fColFldsFromJobFile; // List of columns from any job file, that
|
VecArgs fColFldsFromJobFile; // List of columns from any job file, that
|
||||||
// represent fields in the import data
|
// represent fields in the import data
|
||||||
|
|
||||||
std::string fJobId; // JobID
|
std::string fJobId; // JobID
|
||||||
std::string fOrigJobId; // Original JobID, in case we have to split it
|
std::string fOrigJobId; // Original JobID, in case we have to split it
|
||||||
bool fJobLogOnly; // Job number is only for log filename only
|
bool fJobLogOnly{false}; // Job number is only for log filename only
|
||||||
bool fHelp; // Help mode
|
bool fHelp{false}; // Help mode
|
||||||
int fMode; // splitter Mode
|
int fMode{1}; // splitter Mode
|
||||||
int fArgMode; // Argument mode, dep. on this fMode is decided.
|
int fArgMode{-1}; // Argument mode, dep. on this fMode is decided.
|
||||||
bool fQuiteMode; // in quite mode or not
|
bool fQuiteMode{true}; // in quite mode or not
|
||||||
bool fConsoleLog; // Log everything to console - w.r.t cpimport
|
bool fConsoleLog{false}; // Log everything to console - w.r.t cpimport
|
||||||
int fVerbose; // how many v's
|
int fVerbose{0}; // how many v's
|
||||||
std::string fPmFile; // FileName at PM
|
std::string fPmFile; // FileName at PM
|
||||||
std::string fPmFilePath; // Path of input file in PM
|
std::string fPmFilePath; // Path of input file in PM
|
||||||
std::string fLocFile; // Local file name
|
std::string fLocFile; // Local file name
|
||||||
@ -305,32 +320,33 @@ class WECmdArgs
|
|||||||
std::string fS3Host; // S3 Host
|
std::string fS3Host; // S3 Host
|
||||||
std::string fS3Region; // S3 Region
|
std::string fS3Region; // S3 Region
|
||||||
|
|
||||||
unsigned int fBatchQty; // No. of batch Qty.
|
int fBatchQty{10000}; // No. of batch Qty.
|
||||||
int fNoOfReadThrds; // No. of read buffers
|
int fNoOfReadThrds{0}; // No. of read buffers
|
||||||
// std::string fConfig; // config filename
|
int fDebugLvl{0}; // Debug level
|
||||||
int fDebugLvl; // Debug level
|
int fMaxErrors{-1}; // Max allowable errors
|
||||||
int fMaxErrors; // Max allowable errors
|
int fReadBufSize{0}; // Read buffer size
|
||||||
int fReadBufSize; // Read buffer size
|
int fIOReadBufSize{0}; // I/O read buffer size
|
||||||
int fIOReadBufSize; // I/O read buffer size
|
int fSetBufSize{0}; // Buff size w/setvbuf
|
||||||
int fSetBufSize; // Buff size w/setvbuf
|
char fColDelim{'|'}; // column delimiter
|
||||||
char fColDelim; // column delimiter
|
char fEnclosedChar{0}; // enclosed by char
|
||||||
char fEnclosedChar; // enclosed by char
|
char fEscChar{0}; // esc char
|
||||||
char fEscChar; // esc char
|
int fSkipRows{0}; // skip header
|
||||||
int fNoOfWriteThrds; // No. of write threads
|
int fNoOfWriteThrds{0}; // No. of write threads
|
||||||
bool fNullStrMode; // set null string mode - treat null as null
|
bool fNullStrMode{false}; // set null string mode - treat null as null
|
||||||
ImportDataMode fImportDataMode; // Importing text or binary data
|
ImportDataMode fImportDataMode{IMPORT_DATA_TEXT}; // Importing text or binary data
|
||||||
std::string fPrgmName; // argv[0]
|
std::string fPrgmName; // argv[0]
|
||||||
std::string fSchema; // Schema name - positional parmater
|
std::string fSchema; // Schema name - positional parmater
|
||||||
std::string fTable; // Table name - table name parameter
|
std::string fTable; // Table name - table name parameter
|
||||||
|
|
||||||
bool fCpiInvoke; // invoke cpimport in mode 3
|
bool fCpiInvoke{false}; // invoke cpimport in mode 3
|
||||||
bool fBlockMode3; // Do not allow Mode 3
|
bool fBlockMode3{false}; // Do not allow Mode 3
|
||||||
bool fbTruncationAsError; // Treat string truncation as error
|
bool fbTruncationAsError{false}; // Treat string truncation as error
|
||||||
boost::uuids::uuid fUUID;
|
boost::uuids::uuid fUUID{boost::uuids::nil_generator()()};
|
||||||
bool fConsoleOutput; // If false, no output to console.
|
bool fConsoleOutput{true}; // If false, no output to console.
|
||||||
std::string fTimeZone; // Timezone to use for TIMESTAMP datatype
|
std::string fTimeZone{"SYSTEM"}; // Timezone to use for TIMESTAMP datatype
|
||||||
std::string fUsername; // Username of the data files owner
|
std::string fUsername; // Username of the data files owner
|
||||||
std::string fErrorDir;
|
std::string fErrorDir{MCSLOGDIR "/cpimport/"};
|
||||||
|
std::unique_ptr<boost::program_options::options_description> fOptions;
|
||||||
};
|
};
|
||||||
//----------------------------------------------------------------------
|
//----------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -79,6 +79,7 @@ WEFileReadThread::WEFileReadThread(WESDHandler& aSdh)
|
|||||||
, fEncl('\0')
|
, fEncl('\0')
|
||||||
, fEsc('\\')
|
, fEsc('\\')
|
||||||
, fDelim('|')
|
, fDelim('|')
|
||||||
|
, fSkipRows(0)
|
||||||
{
|
{
|
||||||
// TODO batch qty to get from config
|
// TODO batch qty to get from config
|
||||||
fBatchQty = 10000;
|
fBatchQty = 10000;
|
||||||
@ -187,6 +188,8 @@ void WEFileReadThread::setup(std::string FileName)
|
|||||||
if (aEncl != 0)
|
if (aEncl != 0)
|
||||||
fEnclEsc = true;
|
fEnclEsc = true;
|
||||||
|
|
||||||
|
fSkipRows = fSdh.getSkipRows();
|
||||||
|
|
||||||
// BUG 4342 - Need to support "list of infiles"
|
// BUG 4342 - Need to support "list of infiles"
|
||||||
// chkForListOfFiles(FileName); - List prepared in sdhandler.
|
// chkForListOfFiles(FileName); - List prepared in sdhandler.
|
||||||
|
|
||||||
@ -216,12 +219,10 @@ void WEFileReadThread::setup(std::string FileName)
|
|||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
bool WEFileReadThread::chkForListOfFiles(std::string& FileName)
|
bool WEFileReadThread::chkForListOfFiles(const std::string& fileName)
|
||||||
{
|
{
|
||||||
// cout << "Inside chkForListOfFiles("<< FileName << ")" << endl;
|
// cout << "Inside chkForListOfFiles("<< FileName << ")" << endl;
|
||||||
std::string aFileName = FileName;
|
istringstream iss(fileName);
|
||||||
|
|
||||||
istringstream iss(aFileName);
|
|
||||||
ostringstream oss;
|
ostringstream oss;
|
||||||
size_t start = 0, end = 0;
|
size_t start = 0, end = 0;
|
||||||
const char* sep = " ,|";
|
const char* sep = " ,|";
|
||||||
@ -229,8 +230,8 @@ bool WEFileReadThread::chkForListOfFiles(std::string& FileName)
|
|||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
end = aFileName.find_first_of(sep, start);
|
end = fileName.find_first_of(sep, start);
|
||||||
std::string aFile = aFileName.substr(start, end - start);
|
std::string aFile = fileName.substr(start, end - start);
|
||||||
if (aFile == "STDIN" || aFile == "stdin")
|
if (aFile == "STDIN" || aFile == "stdin")
|
||||||
aFile = "/dev/stdin";
|
aFile = "/dev/stdin";
|
||||||
|
|
||||||
@ -270,9 +271,9 @@ std::string WEFileReadThread::getNextInputDataFile()
|
|||||||
}
|
}
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
void WEFileReadThread::add2InputDataFileList(std::string& FileName)
|
void WEFileReadThread::add2InputDataFileList(const std::string& fileName)
|
||||||
{
|
{
|
||||||
fInfileList.push_front(FileName);
|
fInfileList.push_front(fileName);
|
||||||
}
|
}
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
@ -371,17 +372,33 @@ unsigned int WEFileReadThread::readDataFile(messageqcpp::SBS& Sbs)
|
|||||||
|
|
||||||
// For now we are going to send KEEPALIVES
|
// For now we are going to send KEEPALIVES
|
||||||
//*Sbs << (ByteStream::byte)(WE_CLT_SRV_KEEPALIVE);
|
//*Sbs << (ByteStream::byte)(WE_CLT_SRV_KEEPALIVE);
|
||||||
if ((fInFile.good()) && (!fInFile.eof()))
|
if (fInFile.good() && !fInFile.eof())
|
||||||
{
|
{
|
||||||
// cout << "Inside WEFileReadThread::readDataFile" << endl;
|
// cout << "Inside WEFileReadThread::readDataFile" << endl;
|
||||||
// char aBuff[1024*1024]; // TODO May have to change it later
|
// char aBuff[1024*1024]; // TODO May have to change it later
|
||||||
// char*pStart = aBuff;
|
// char*pStart = aBuff;
|
||||||
unsigned int aIdx = 0;
|
unsigned int aIdx = 0;
|
||||||
int aLen = 0;
|
int aLen = 0;
|
||||||
*Sbs << (ByteStream::byte)(WE_CLT_SRV_DATA);
|
*Sbs << static_cast<ByteStream::byte>(WE_CLT_SRV_DATA);
|
||||||
|
|
||||||
while ((!fInFile.eof()) && (aIdx < getBatchQty()))
|
while (!fInFile.eof() && aIdx < getBatchQty())
|
||||||
{
|
{
|
||||||
|
if (fSkipRows > 0)
|
||||||
|
{
|
||||||
|
fSkipRows--;
|
||||||
|
fInFile.getline(fBuff, fBuffSize - 1);
|
||||||
|
if (fSdh.getDebugLvl() > 3)
|
||||||
|
{
|
||||||
|
aLen = fInFile.gcount();
|
||||||
|
if (aLen > 0 && aLen < fBuffSize - 2)
|
||||||
|
{
|
||||||
|
fBuff[aLen - 1] = 0;
|
||||||
|
cout << "Skip header row (" << fSkipRows<< " to go): " << fBuff << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (fEnclEsc)
|
if (fEnclEsc)
|
||||||
{
|
{
|
||||||
// pStart = aBuff;
|
// pStart = aBuff;
|
||||||
@ -551,6 +568,9 @@ void WEFileReadThread::openInFile()
|
|||||||
fInFile.rdbuf(fIfFile.rdbuf()); //@BUG 4326
|
fInFile.rdbuf(fIfFile.rdbuf()); //@BUG 4326
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Got new file, so reset fSkipRows
|
||||||
|
fSkipRows = fSdh.getSkipRows();
|
||||||
|
|
||||||
//@BUG 4326 -below three lines commented out
|
//@BUG 4326 -below three lines commented out
|
||||||
// if (!fInFile.is_open()) fInFile.open(fInFileName.c_str());
|
// if (!fInFile.is_open()) fInFile.open(fInFileName.c_str());
|
||||||
// if (!fInFile.good())
|
// if (!fInFile.good())
|
||||||
@ -657,13 +677,13 @@ void WEFileReadThread::initS3Connection(const WECmdArgs& args)
|
|||||||
s3Host = args.getS3Host();
|
s3Host = args.getS3Host();
|
||||||
ms3_library_init();
|
ms3_library_init();
|
||||||
s3Connection =
|
s3Connection =
|
||||||
ms3_init(s3Key.c_str(), s3Secret.c_str(), s3Region.c_str(), (s3Host.empty() ? NULL : s3Host.c_str()));
|
ms3_init(s3Key.c_str(), s3Secret.c_str(), s3Region.c_str(), (s3Host.empty() ? nullptr : s3Host.c_str()));
|
||||||
if (!s3Connection)
|
if (!s3Connection)
|
||||||
throw runtime_error("failed to get an S3 connection");
|
throw runtime_error("failed to get an S3 connection");
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
s3Connection = NULL;
|
s3Connection = nullptr;
|
||||||
buf = NULL;
|
buf = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
@ -42,13 +42,11 @@ class WEFileReadThread;
|
|||||||
class WEReadThreadRunner
|
class WEReadThreadRunner
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
WEReadThreadRunner(WEFileReadThread& Owner) : fRef(Owner)
|
explicit WEReadThreadRunner(WEFileReadThread& Owner) : fRef(Owner)
|
||||||
{
|
{
|
||||||
// ctor
|
// ctor
|
||||||
}
|
}
|
||||||
~WEReadThreadRunner()
|
~WEReadThreadRunner() = default;
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
void operator()(); // Thread function
|
void operator()(); // Thread function
|
||||||
|
|
||||||
@ -61,7 +59,7 @@ class WEReadThreadRunner
|
|||||||
class WEFileReadThread
|
class WEFileReadThread
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
WEFileReadThread(WESDHandler& aSdh);
|
explicit WEFileReadThread(WESDHandler& aSdh);
|
||||||
virtual ~WEFileReadThread();
|
virtual ~WEFileReadThread();
|
||||||
|
|
||||||
void reset();
|
void reset();
|
||||||
@ -82,9 +80,9 @@ class WEFileReadThread
|
|||||||
{
|
{
|
||||||
return fContinue;
|
return fContinue;
|
||||||
}
|
}
|
||||||
void setContinue(bool fContinue)
|
void setContinue(bool cont)
|
||||||
{
|
{
|
||||||
this->fContinue = fContinue;
|
fContinue = cont;
|
||||||
}
|
}
|
||||||
std::string getInFileName() const
|
std::string getInFileName() const
|
||||||
{
|
{
|
||||||
@ -98,30 +96,34 @@ class WEFileReadThread
|
|||||||
{
|
{
|
||||||
return fBatchQty;
|
return fBatchQty;
|
||||||
}
|
}
|
||||||
void setFpThread(boost::thread* fpThread)
|
void setFpThread(boost::thread* pThread)
|
||||||
{
|
{
|
||||||
this->fpThread = fpThread;
|
fpThread = pThread;
|
||||||
}
|
}
|
||||||
void setInFileName(std::string fInFileName)
|
void setInFileName(const std::string& inFileName)
|
||||||
{
|
{
|
||||||
if ((0 == fInFileName.compare("STDIN")) || (0 == fInFileName.compare("stdin")))
|
if (0 == inFileName.compare("STDIN") || 0 == inFileName.compare("stdin"))
|
||||||
this->fInFileName = "/dev/stdin";
|
{
|
||||||
|
fInFileName = "/dev/stdin";
|
||||||
|
}
|
||||||
else
|
else
|
||||||
this->fInFileName = fInFileName;
|
{
|
||||||
|
fInFileName = inFileName;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
//@BUG 4326
|
//@BUG 4326
|
||||||
const std::istream& getInFile() const
|
const std::istream& getInFile() const
|
||||||
{
|
{
|
||||||
return fInFile;
|
return fInFile;
|
||||||
}
|
}
|
||||||
void setBatchQty(unsigned int BatchQty)
|
void setBatchQty(unsigned int batchQty)
|
||||||
{
|
{
|
||||||
fBatchQty = BatchQty;
|
fBatchQty = batchQty;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool chkForListOfFiles(std::string& FileName);
|
bool chkForListOfFiles(const std::string& fileName);
|
||||||
std::string getNextInputDataFile();
|
std::string getNextInputDataFile();
|
||||||
void add2InputDataFileList(std::string& FileName);
|
void add2InputDataFileList(const std::string& fileName);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
enum
|
enum
|
||||||
@ -130,9 +132,9 @@ class WEFileReadThread
|
|||||||
};
|
};
|
||||||
|
|
||||||
// don't allow anyone else to set
|
// don't allow anyone else to set
|
||||||
void setTgtPmId(unsigned int fTgtPmId)
|
void setTgtPmId(unsigned int tgtPmId)
|
||||||
{
|
{
|
||||||
this->fTgtPmId = fTgtPmId;
|
fTgtPmId = tgtPmId;
|
||||||
}
|
}
|
||||||
|
|
||||||
WESDHandler& fSdh;
|
WESDHandler& fSdh;
|
||||||
@ -148,11 +150,12 @@ class WEFileReadThread
|
|||||||
|
|
||||||
unsigned int fTgtPmId;
|
unsigned int fTgtPmId;
|
||||||
unsigned int fBatchQty;
|
unsigned int fBatchQty;
|
||||||
bool fEnclEsc; // Encl/Esc char is set
|
bool fEnclEsc; // Encl/Esc char is set
|
||||||
char fEncl; // Encl char
|
char fEncl; // Encl char
|
||||||
char fEsc; // Esc char
|
char fEsc; // Esc char
|
||||||
char fDelim; // Column Delimit char
|
char fDelim; // Column Delimit char
|
||||||
char* fBuff; // main data buffer
|
size_t fSkipRows; // Header rows to skip
|
||||||
|
char* fBuff; // main data buffer
|
||||||
int fBuffSize;
|
int fBuffSize;
|
||||||
|
|
||||||
/* To support mode 1 imports from objects on S3 */
|
/* To support mode 1 imports from objects on S3 */
|
||||||
|
@ -767,7 +767,7 @@ void WESDHandler::setup()
|
|||||||
oss << "Running distributed import (mode ";
|
oss << "Running distributed import (mode ";
|
||||||
oss << fRef.fCmdArgs.getMode() << ") on ";
|
oss << fRef.fCmdArgs.getMode() << ") on ";
|
||||||
|
|
||||||
if (fRef.fCmdArgs.getPmVecSize() == fPmCount)
|
if (fRef.fCmdArgs.getPmVecSize() == static_cast<size_t>(fPmCount))
|
||||||
oss << "all PMs...";
|
oss << "all PMs...";
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -2548,20 +2548,20 @@ void WESDHandler::exportJobFile(std::string& JobId, std::string& JobFileName)
|
|||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
bool WESDHandler::getConsoleLog()
|
bool WESDHandler::getConsoleLog() const
|
||||||
{
|
{
|
||||||
return fRef.fCmdArgs.getConsoleLog();
|
return fRef.fCmdArgs.getConsoleLog();
|
||||||
}
|
}
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
char WESDHandler::getEnclChar()
|
char WESDHandler::getEnclChar() const
|
||||||
{
|
{
|
||||||
return fRef.fCmdArgs.getEnclChar();
|
return fRef.fCmdArgs.getEnclChar();
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
char WESDHandler::getEscChar()
|
char WESDHandler::getEscChar() const
|
||||||
{
|
{
|
||||||
return fRef.fCmdArgs.getEscChar();
|
return fRef.fCmdArgs.getEscChar();
|
||||||
}
|
}
|
||||||
@ -2575,11 +2575,16 @@ int WESDHandler::getReadBufSize()
|
|||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
char WESDHandler::getDelimChar()
|
char WESDHandler::getDelimChar() const
|
||||||
{
|
{
|
||||||
return fRef.fCmdArgs.getDelimChar();
|
return fRef.fCmdArgs.getDelimChar();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t WESDHandler::getSkipRows() const
|
||||||
|
{
|
||||||
|
return fRef.fCmdArgs.getSkipRows();
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
std::string WESDHandler::getTableName() const
|
std::string WESDHandler::getTableName() const
|
||||||
|
@ -143,10 +143,11 @@ class WESDHandler
|
|||||||
void sendHeartbeats();
|
void sendHeartbeats();
|
||||||
std::string getTableName() const;
|
std::string getTableName() const;
|
||||||
std::string getSchemaName() const;
|
std::string getSchemaName() const;
|
||||||
char getEnclChar();
|
char getEnclChar() const;
|
||||||
char getEscChar();
|
char getEscChar() const;
|
||||||
char getDelimChar();
|
char getDelimChar() const;
|
||||||
bool getConsoleLog();
|
size_t getSkipRows() const;
|
||||||
|
bool getConsoleLog() const;
|
||||||
int getReadBufSize();
|
int getReadBufSize();
|
||||||
ImportDataMode getImportDataMode() const;
|
ImportDataMode getImportDataMode() const;
|
||||||
void sysLog(const logging::Message::Args& msgArgs, logging::LOG_TYPE logType,
|
void sysLog(const logging::Message::Args& msgArgs, logging::LOG_TYPE logType,
|
||||||
|
@ -46,9 +46,7 @@ class WEColOORInfo // Column Out-Of-Range Info
|
|||||||
WEColOORInfo() : fColNum(0), fColType(execplan::CalpontSystemCatalog::INT), fNoOfOORs(0)
|
WEColOORInfo() : fColNum(0), fColType(execplan::CalpontSystemCatalog::INT), fNoOfOORs(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
~WEColOORInfo()
|
~WEColOORInfo() = default;
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
int fColNum;
|
int fColNum;
|
||||||
@ -63,14 +61,12 @@ class WESdHandlerException : public std::exception
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
std::string fWhat;
|
std::string fWhat;
|
||||||
WESdHandlerException(std::string& What) throw()
|
explicit WESdHandlerException(const std::string& What) noexcept
|
||||||
{
|
{
|
||||||
fWhat = What;
|
fWhat = What;
|
||||||
}
|
}
|
||||||
virtual ~WESdHandlerException() throw()
|
~WESdHandlerException() noexcept override = default;
|
||||||
{
|
const char* what() const noexcept override
|
||||||
}
|
|
||||||
virtual const char* what() const throw()
|
|
||||||
{
|
{
|
||||||
return fWhat.c_str();
|
return fWhat.c_str();
|
||||||
}
|
}
|
||||||
@ -82,12 +78,10 @@ class WESdHandlerException : public std::exception
|
|||||||
class WESplClientRunner
|
class WESplClientRunner
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
WESplClientRunner(WESplClient& Sc) : fOwner(Sc)
|
explicit WESplClientRunner(WESplClient& Sc) : fOwner(Sc)
|
||||||
{ /* ctor */
|
{ /* ctor */
|
||||||
}
|
}
|
||||||
virtual ~WESplClientRunner()
|
virtual ~WESplClientRunner() = default;
|
||||||
{ /* dtor */
|
|
||||||
}
|
|
||||||
void operator()();
|
void operator()();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -389,9 +383,7 @@ class WESplClient
|
|||||||
WERowsUploadInfo() : fRowsRead(0), fRowsInserted(0)
|
WERowsUploadInfo() : fRowsRead(0), fRowsInserted(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
~WERowsUploadInfo()
|
~WERowsUploadInfo() = default;
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
int64_t fRowsRead;
|
int64_t fRowsRead;
|
||||||
|
@ -64,7 +64,6 @@ WESplitterApp::WESplitterApp(WECmdArgs& CmdArgs) : fCmdArgs(CmdArgs), fDh(*this)
|
|||||||
fpSysLog = SimpleSysLog::instance();
|
fpSysLog = SimpleSysLog::instance();
|
||||||
fpSysLog->setLoggingID(logging::LoggingID(SUBSYSTEM_ID_WE_SPLIT));
|
fpSysLog->setLoggingID(logging::LoggingID(SUBSYSTEM_ID_WE_SPLIT));
|
||||||
setupSignalHandlers();
|
setupSignalHandlers();
|
||||||
std::string err;
|
|
||||||
fDh.setDebugLvl(fCmdArgs.getDebugLvl());
|
fDh.setDebugLvl(fCmdArgs.getDebugLvl());
|
||||||
|
|
||||||
fDh.check4CpiInvokeMode();
|
fDh.check4CpiInvokeMode();
|
||||||
@ -100,6 +99,7 @@ WESplitterApp::WESplitterApp(WECmdArgs& CmdArgs) : fCmdArgs(CmdArgs), fDh(*this)
|
|||||||
}
|
}
|
||||||
catch (std::exception& ex)
|
catch (std::exception& ex)
|
||||||
{
|
{
|
||||||
|
std::string err;
|
||||||
// err = string("Error in constructing WESplitterApp") + ex.what();
|
// err = string("Error in constructing WESplitterApp") + ex.what();
|
||||||
err = ex.what(); // cleaning up for BUG 4298
|
err = ex.what(); // cleaning up for BUG 4298
|
||||||
logging::Message::Args errMsgArgs;
|
logging::Message::Args errMsgArgs;
|
||||||
@ -139,10 +139,10 @@ WESplitterApp::~WESplitterApp()
|
|||||||
// fDh.shutdown();
|
// fDh.shutdown();
|
||||||
usleep(1000); // 1 millisec just checking
|
usleep(1000); // 1 millisec just checking
|
||||||
|
|
||||||
std::string aStr = "Calling WESplitterApp Destructor\n";
|
|
||||||
|
|
||||||
if (fDh.getDebugLvl())
|
if (fDh.getDebugLvl())
|
||||||
cout << aStr << endl;
|
{
|
||||||
|
cout << "Calling WESplitterApp Destructor" << endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
@ -151,18 +151,18 @@ WESplitterApp::~WESplitterApp()
|
|||||||
|
|
||||||
void WESplitterApp::setupSignalHandlers()
|
void WESplitterApp::setupSignalHandlers()
|
||||||
{
|
{
|
||||||
struct sigaction sa;
|
struct sigaction sa{};
|
||||||
memset(&sa, 0, sizeof(sa));
|
memset(&sa, 0, sizeof(sa));
|
||||||
sa.sa_handler = WESplitterApp::onSigInterrupt;
|
sa.sa_handler = WESplitterApp::onSigInterrupt;
|
||||||
sigaction(SIGINT, &sa, 0);
|
sigaction(SIGINT, &sa, nullptr);
|
||||||
sa.sa_handler = WESplitterApp::onSigTerminate;
|
sa.sa_handler = WESplitterApp::onSigTerminate;
|
||||||
sigaction(SIGTERM, &sa, 0);
|
sigaction(SIGTERM, &sa, nullptr);
|
||||||
sa.sa_handler = SIG_IGN;
|
sa.sa_handler = SIG_IGN;
|
||||||
sigaction(SIGPIPE, &sa, 0);
|
sigaction(SIGPIPE, &sa, nullptr);
|
||||||
sa.sa_handler = WESplitterApp::onSigHup;
|
sa.sa_handler = WESplitterApp::onSigHup;
|
||||||
sigaction(SIGHUP, &sa, 0);
|
sigaction(SIGHUP, &sa, nullptr);
|
||||||
sa.sa_handler = WESplitterApp::onSigInterrupt;
|
sa.sa_handler = WESplitterApp::onSigInterrupt;
|
||||||
sigaction(SIGUSR1, &sa, 0);
|
sigaction(SIGUSR1, &sa, nullptr);
|
||||||
/*
|
/*
|
||||||
signal(SIGPIPE, SIG_IGN);
|
signal(SIGPIPE, SIG_IGN);
|
||||||
signal(SIGINT, WESplitterApp::onSigInterrupt);
|
signal(SIGINT, WESplitterApp::onSigInterrupt);
|
||||||
@ -258,7 +258,7 @@ void WESplitterApp::processMessages()
|
|||||||
}
|
}
|
||||||
|
|
||||||
aBs.restart();
|
aBs.restart();
|
||||||
std::string aCpImpCmd = fCmdArgs.getCpImportCmdLine();
|
std::string aCpImpCmd = fCmdArgs.getCpImportCmdLine(false);
|
||||||
fDh.fLog.logMsg(aCpImpCmd, MSGLVL_INFO2);
|
fDh.fLog.logMsg(aCpImpCmd, MSGLVL_INFO2);
|
||||||
|
|
||||||
if (fDh.getDebugLvl())
|
if (fDh.getDebugLvl())
|
||||||
@ -315,7 +315,7 @@ void WESplitterApp::processMessages()
|
|||||||
}
|
}
|
||||||
|
|
||||||
aBs.restart();
|
aBs.restart();
|
||||||
std::string aCpImpCmd = fCmdArgs.getCpImportCmdLine();
|
std::string aCpImpCmd = fCmdArgs.getCpImportCmdLine(false);
|
||||||
fDh.fLog.logMsg(aCpImpCmd, MSGLVL_INFO2);
|
fDh.fLog.logMsg(aCpImpCmd, MSGLVL_INFO2);
|
||||||
|
|
||||||
if (fDh.getDebugLvl())
|
if (fDh.getDebugLvl())
|
||||||
@ -467,7 +467,7 @@ void WESplitterApp::invokeCpimport()
|
|||||||
fCmdArgs.setJobUUID(u);
|
fCmdArgs.setJobUUID(u);
|
||||||
|
|
||||||
fCmdArgs.setMode(3);
|
fCmdArgs.setMode(3);
|
||||||
std::string aCmdLineStr = fCmdArgs.getCpImportCmdLine();
|
std::string aCmdLineStr = fCmdArgs.getCpImportCmdLine(true);
|
||||||
|
|
||||||
if (fDh.getDebugLvl())
|
if (fDh.getDebugLvl())
|
||||||
cout << "CPI CmdLineArgs : " << aCmdLineStr << endl;
|
cout << "CPI CmdLineArgs : " << aCmdLineStr << endl;
|
||||||
@ -477,7 +477,6 @@ void WESplitterApp::invokeCpimport()
|
|||||||
std::istringstream ss(aCmdLineStr);
|
std::istringstream ss(aCmdLineStr);
|
||||||
std::string arg;
|
std::string arg;
|
||||||
std::vector<std::string> v2;
|
std::vector<std::string> v2;
|
||||||
v2.reserve(50);
|
|
||||||
|
|
||||||
while (ss >> arg)
|
while (ss >> arg)
|
||||||
{
|
{
|
||||||
@ -490,7 +489,7 @@ void WESplitterApp::invokeCpimport()
|
|||||||
Cmds.push_back(const_cast<char*>(v2[j].c_str()));
|
Cmds.push_back(const_cast<char*>(v2[j].c_str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
Cmds.push_back(0); // null terminate
|
Cmds.push_back(nullptr); // null terminate
|
||||||
|
|
||||||
int aRet = execvp(Cmds[0], &Cmds[0]); // NOTE - works with full Path
|
int aRet = execvp(Cmds[0], &Cmds[0]); // NOTE - works with full Path
|
||||||
|
|
||||||
@ -515,7 +514,7 @@ void WESplitterApp::updateWithJobFile(int aIdx)
|
|||||||
int main(int argc, char** argv)
|
int main(int argc, char** argv)
|
||||||
{
|
{
|
||||||
std::string err;
|
std::string err;
|
||||||
std::cin.sync_with_stdio(false);
|
std::istream::sync_with_stdio(false);
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
@ -528,7 +527,7 @@ int main(int argc, char** argv)
|
|||||||
for (int idx = 0; idx < aTblCnt; idx++)
|
for (int idx = 0; idx < aTblCnt; idx++)
|
||||||
{
|
{
|
||||||
aWESplitterApp.fDh.reset();
|
aWESplitterApp.fDh.reset();
|
||||||
aWESplitterApp.fContinue = true;
|
WriteEngine::WESplitterApp::fContinue = true;
|
||||||
aWESplitterApp.updateWithJobFile(idx);
|
aWESplitterApp.updateWithJobFile(idx);
|
||||||
|
|
||||||
try
|
try
|
||||||
@ -541,10 +540,10 @@ int main(int argc, char** argv)
|
|||||||
err = ex.what(); // cleaning up for BUG 4298
|
err = ex.what(); // cleaning up for BUG 4298
|
||||||
logging::Message::Args errMsgArgs;
|
logging::Message::Args errMsgArgs;
|
||||||
errMsgArgs.add(err);
|
errMsgArgs.add(err);
|
||||||
aWESplitterApp.fpSysLog->logMsg(errMsgArgs, logging::LOG_TYPE_ERROR, logging::M0000);
|
WriteEngine::WESplitterApp::fpSysLog->logMsg(errMsgArgs, logging::LOG_TYPE_ERROR, logging::M0000);
|
||||||
SPLTR_EXIT_STATUS = 1;
|
SPLTR_EXIT_STATUS = 1;
|
||||||
aWESplitterApp.fDh.fLog.logMsg(err, WriteEngine::MSGLVL_ERROR);
|
aWESplitterApp.fDh.fLog.logMsg(err, WriteEngine::MSGLVL_ERROR);
|
||||||
aWESplitterApp.fContinue = false;
|
WriteEngine::WESplitterApp::fContinue = false;
|
||||||
// throw runtime_error(err); BUG 4298
|
// throw runtime_error(err); BUG 4298
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -46,20 +46,23 @@ namespace WriteEngine
|
|||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// WEXmlgetter constructor
|
// WEXmlgetter constructor
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
WEXmlgetter::WEXmlgetter(std::string& ConfigName) : fConfigName(ConfigName), fDoc(NULL), fpRoot(NULL)
|
WEXmlgetter::WEXmlgetter(const std::string& ConfigName)
|
||||||
|
: fConfigName(ConfigName)
|
||||||
|
, fDoc(nullptr)
|
||||||
|
, fpRoot(nullptr)
|
||||||
{
|
{
|
||||||
// xmlNodePtr curPtr;
|
// xmlNodePtr curPtr;
|
||||||
fDoc = xmlParseFile(ConfigName.c_str());
|
fDoc = xmlParseFile(ConfigName.c_str());
|
||||||
|
|
||||||
if (fDoc == NULL)
|
if (fDoc == nullptr)
|
||||||
throw runtime_error("WEXmlgetter::getConfig(): no XML document!");
|
throw runtime_error("WEXmlgetter::getConfig(): no XML document!");
|
||||||
|
|
||||||
fpRoot = xmlDocGetRootElement(fDoc);
|
fpRoot = xmlDocGetRootElement(fDoc);
|
||||||
|
|
||||||
if (fpRoot == NULL)
|
if (fpRoot == nullptr)
|
||||||
{
|
{
|
||||||
xmlFreeDoc(fDoc);
|
xmlFreeDoc(fDoc);
|
||||||
fDoc = NULL;
|
fDoc = nullptr;
|
||||||
throw runtime_error("WEXmlgetter::getConfig(): no XML Root Tag!");
|
throw runtime_error("WEXmlgetter::getConfig(): no XML Root Tag!");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -70,24 +73,24 @@ WEXmlgetter::WEXmlgetter(std::string& ConfigName) : fConfigName(ConfigName), fDo
|
|||||||
WEXmlgetter::~WEXmlgetter()
|
WEXmlgetter::~WEXmlgetter()
|
||||||
{
|
{
|
||||||
xmlFreeDoc(fDoc);
|
xmlFreeDoc(fDoc);
|
||||||
fDoc = NULL;
|
fDoc = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Get/return the property or attribute value (strVal) for the specified xml tag
|
// Get/return the property or attribute value (strVal) for the specified xml tag
|
||||||
// (pNode) and property/attribute (pTag)
|
// (pNode) and property/attribute (pTag)
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
bool WEXmlgetter::getNodeAttribute(const xmlNode* pNode, const char* pTag, std::string& strVal) const
|
bool WEXmlgetter::getNodeAttribute(const xmlNode* pNode, const char* pTag, std::string& strVal)
|
||||||
{
|
{
|
||||||
xmlChar* pTmp = NULL;
|
xmlChar* pTmp = nullptr;
|
||||||
bool bFound = false;
|
bool bFound = false;
|
||||||
|
|
||||||
pTmp = xmlGetProp(const_cast<xmlNode*>(pNode), (xmlChar*)pTag);
|
pTmp = xmlGetProp(pNode, reinterpret_cast<const xmlChar*>(pTag));
|
||||||
|
|
||||||
if (pTmp)
|
if (pTmp)
|
||||||
{
|
{
|
||||||
bFound = true;
|
bFound = true;
|
||||||
strVal = (char*)pTmp;
|
strVal = reinterpret_cast<char*>(pTmp);
|
||||||
xmlFree(pTmp);
|
xmlFree(pTmp);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -101,19 +104,19 @@ bool WEXmlgetter::getNodeAttribute(const xmlNode* pNode, const char* pTag, std::
|
|||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Get/return the node content (strVal) for the specified xml tag (pNode)
|
// Get/return the node content (strVal) for the specified xml tag (pNode)
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
bool WEXmlgetter::getNodeContent(const xmlNode* pNode, std::string& strVal) const
|
bool WEXmlgetter::getNodeContent(const xmlNode* pNode, std::string& strVal)
|
||||||
{
|
{
|
||||||
xmlChar* pTmp = NULL;
|
xmlChar* pTmp = nullptr;
|
||||||
bool bFound = false;
|
bool bFound = false;
|
||||||
|
|
||||||
if (pNode->children != NULL)
|
if (pNode->children != nullptr)
|
||||||
{
|
{
|
||||||
pTmp = xmlNodeGetContent(pNode->children);
|
pTmp = xmlNodeGetContent(pNode->children);
|
||||||
|
|
||||||
if (pTmp)
|
if (pTmp)
|
||||||
{
|
{
|
||||||
bFound = true;
|
bFound = true;
|
||||||
strVal = (char*)pTmp;
|
strVal = reinterpret_cast<char*>(pTmp);
|
||||||
xmlFree(pTmp);
|
xmlFree(pTmp);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -152,29 +155,29 @@ void WEXmlgetter::getConfig(const string& section, const string& name, vector<st
|
|||||||
{
|
{
|
||||||
string res;
|
string res;
|
||||||
|
|
||||||
if (section.length() == 0)
|
if (section.empty())
|
||||||
throw invalid_argument("Config::getConfig: section must have a length");
|
throw invalid_argument("Config::getConfig: section must have a length");
|
||||||
|
|
||||||
xmlNode* pPtr = fpRoot->xmlChildrenNode;
|
const xmlNode* pPtr = fpRoot->xmlChildrenNode;
|
||||||
|
|
||||||
while (pPtr != NULL)
|
while (pPtr != nullptr)
|
||||||
{
|
{
|
||||||
// cout << "pPtr->name: " <<
|
// cout << "pPtr->name: " <<
|
||||||
// (const xmlChar*)pPtr->name << std::endl;
|
// (const xmlChar*)pPtr->name << std::endl;
|
||||||
|
|
||||||
if ((!xmlStrcmp(pPtr->name, (const xmlChar*)section.c_str())))
|
if ((!xmlStrcmp(pPtr->name, reinterpret_cast<const xmlChar*>(section.c_str()))))
|
||||||
{
|
{
|
||||||
xmlNodePtr pPtr2 = pPtr->xmlChildrenNode;
|
xmlNodePtr pPtr2 = pPtr->xmlChildrenNode;
|
||||||
|
|
||||||
while (pPtr2 != NULL)
|
while (pPtr2 != nullptr)
|
||||||
{
|
{
|
||||||
// cout << " pPtr2->name: " <<
|
// cout << " pPtr2->name: " <<
|
||||||
// (const xmlChar*)pPtr2->name << std::endl;
|
// (const xmlChar*)pPtr2->name << std::endl;
|
||||||
|
|
||||||
if ((!xmlStrcmp(pPtr2->name, (const xmlChar*)name.c_str())))
|
if ((!xmlStrcmp(pPtr2->name, reinterpret_cast<const xmlChar*>(name.c_str()))))
|
||||||
{
|
{
|
||||||
xmlNodePtr pPtr3 = pPtr2->xmlChildrenNode;
|
xmlNodePtr pPtr3 = pPtr2->xmlChildrenNode;
|
||||||
values.push_back((const char*)pPtr3->content);
|
values.emplace_back(reinterpret_cast<const char*>(pPtr3->content));
|
||||||
|
|
||||||
// cout << " pPtr3->name: " <<
|
// cout << " pPtr3->name: " <<
|
||||||
// (const xmlChar*)pPtr3->name <<
|
// (const xmlChar*)pPtr3->name <<
|
||||||
@ -204,8 +207,8 @@ std::string WEXmlgetter::getValue(const vector<string>& sections) const
|
|||||||
{
|
{
|
||||||
std::string aRet;
|
std::string aRet;
|
||||||
const xmlNode* pPtr = fpRoot;
|
const xmlNode* pPtr = fpRoot;
|
||||||
int aSize = sections.size();
|
auto aSize = sections.size();
|
||||||
int aIdx = 0;
|
size_t aIdx = 0;
|
||||||
|
|
||||||
// cout << aSize << endl;
|
// cout << aSize << endl;
|
||||||
while (aIdx < aSize)
|
while (aIdx < aSize)
|
||||||
@ -213,7 +216,7 @@ std::string WEXmlgetter::getValue(const vector<string>& sections) const
|
|||||||
// cout << aIdx <<" "<< sections[aIdx] << endl;
|
// cout << aIdx <<" "<< sections[aIdx] << endl;
|
||||||
pPtr = getNode(pPtr, sections[aIdx]);
|
pPtr = getNode(pPtr, sections[aIdx]);
|
||||||
|
|
||||||
if ((pPtr == NULL) || (aIdx == aSize - 1))
|
if ((pPtr == nullptr) || (aIdx == aSize - 1))
|
||||||
break;
|
break;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -223,7 +226,7 @@ std::string WEXmlgetter::getValue(const vector<string>& sections) const
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pPtr != NULL)
|
if (pPtr != nullptr)
|
||||||
{
|
{
|
||||||
// aRet = (const char*)pPtr->content;
|
// aRet = (const char*)pPtr->content;
|
||||||
std::string aBuff;
|
std::string aBuff;
|
||||||
@ -240,17 +243,17 @@ std::string WEXmlgetter::getValue(const vector<string>& sections) const
|
|||||||
// a node with the specified name (section). The xmlNode (if found) is
|
// a node with the specified name (section). The xmlNode (if found) is
|
||||||
// returned.
|
// returned.
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
const xmlNode* WEXmlgetter::getNode(const xmlNode* pParent, const string& section) const
|
const xmlNode* WEXmlgetter::getNode(const xmlNode* pParent, const string& section)
|
||||||
{
|
{
|
||||||
if (pParent == NULL)
|
if (pParent == nullptr)
|
||||||
return NULL;
|
return nullptr;
|
||||||
|
|
||||||
const xmlNode* pPtr = pParent;
|
const xmlNode* pPtr = pParent;
|
||||||
|
|
||||||
while (pPtr != NULL)
|
while (pPtr != nullptr)
|
||||||
{
|
{
|
||||||
// cout << "getNode Name " << (const char*)pPtr->name << endl;
|
// cout << "getNode Name " << (const char*)pPtr->name << endl;
|
||||||
if (!xmlStrcmp(pPtr->name, (const xmlChar*)section.c_str()))
|
if (!xmlStrcmp(pPtr->name, reinterpret_cast<const xmlChar*>(section.c_str())))
|
||||||
return pPtr;
|
return pPtr;
|
||||||
else
|
else
|
||||||
pPtr = pPtr->next;
|
pPtr = pPtr->next;
|
||||||
@ -268,12 +271,12 @@ std::string WEXmlgetter::getAttribute(const vector<string>& sections, const stri
|
|||||||
{
|
{
|
||||||
std::string aRet;
|
std::string aRet;
|
||||||
const xmlNode* pPtr = fpRoot;
|
const xmlNode* pPtr = fpRoot;
|
||||||
int aSize = sections.size();
|
auto aSize = sections.size();
|
||||||
|
|
||||||
if (aSize == 0)
|
if (aSize == 0)
|
||||||
throw invalid_argument("WEXmlgetter::getAttribute(): section must be valid");
|
throw invalid_argument("WEXmlgetter::getAttribute(): section must be valid");
|
||||||
|
|
||||||
int aIdx = 0;
|
size_t aIdx = 0;
|
||||||
|
|
||||||
// cout << aSize << endl;
|
// cout << aSize << endl;
|
||||||
while (aIdx < aSize)
|
while (aIdx < aSize)
|
||||||
@ -281,7 +284,7 @@ std::string WEXmlgetter::getAttribute(const vector<string>& sections, const stri
|
|||||||
// cout << aIdx <<" "<< sections[aIdx] << endl;
|
// cout << aIdx <<" "<< sections[aIdx] << endl;
|
||||||
pPtr = getNode(pPtr, sections[aIdx]);
|
pPtr = getNode(pPtr, sections[aIdx]);
|
||||||
|
|
||||||
if ((pPtr == NULL) || (aIdx == aSize - 1))
|
if ((pPtr == nullptr) || (aIdx == aSize - 1))
|
||||||
break;
|
break;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -291,7 +294,7 @@ std::string WEXmlgetter::getAttribute(const vector<string>& sections, const stri
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pPtr != NULL)
|
if (pPtr != nullptr)
|
||||||
{
|
{
|
||||||
std::string aBuff;
|
std::string aBuff;
|
||||||
|
|
||||||
@ -315,10 +318,10 @@ std::string WEXmlgetter::getAttribute(const vector<string>& sections, const stri
|
|||||||
// is returned.
|
// is returned.
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
void WEXmlgetter::getAttributeListForAllChildren(const vector<string>& sections, const string& attributeTag,
|
void WEXmlgetter::getAttributeListForAllChildren(const vector<string>& sections, const string& attributeTag,
|
||||||
vector<string>& attributeValues)
|
vector<string>& attributeValues) const
|
||||||
{
|
{
|
||||||
const xmlNode* pPtr = fpRoot;
|
const xmlNode* pPtr = fpRoot;
|
||||||
int aSize = sections.size();
|
auto aSize = sections.size();
|
||||||
|
|
||||||
if (aSize == 0)
|
if (aSize == 0)
|
||||||
{
|
{
|
||||||
@ -328,13 +331,13 @@ void WEXmlgetter::getAttributeListForAllChildren(const vector<string>& sections,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Step down the branch that has the nodes of interest
|
// Step down the branch that has the nodes of interest
|
||||||
int aIdx = 0;
|
size_t aIdx = 0;
|
||||||
|
|
||||||
while (aIdx < aSize)
|
while (aIdx < aSize)
|
||||||
{
|
{
|
||||||
pPtr = getNode(pPtr, sections[aIdx]);
|
pPtr = getNode(pPtr, sections[aIdx]);
|
||||||
|
|
||||||
if ((pPtr == NULL) || (aIdx == aSize - 1))
|
if ((pPtr == nullptr) || (aIdx == aSize - 1))
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -347,9 +350,9 @@ void WEXmlgetter::getAttributeListForAllChildren(const vector<string>& sections,
|
|||||||
|
|
||||||
// Look for all the "matching" nodes at the end of the branch, and
|
// Look for all the "matching" nodes at the end of the branch, and
|
||||||
// get the requested attribute value for each matching node.
|
// get the requested attribute value for each matching node.
|
||||||
if (pPtr != NULL)
|
if (pPtr != nullptr)
|
||||||
{
|
{
|
||||||
while (pPtr != NULL)
|
while (pPtr != nullptr)
|
||||||
{
|
{
|
||||||
std::string attrib;
|
std::string attrib;
|
||||||
|
|
||||||
|
@ -36,23 +36,23 @@ namespace WriteEngine
|
|||||||
class WEXmlgetter
|
class WEXmlgetter
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
WEXmlgetter(std::string& ConfigName);
|
explicit WEXmlgetter(const std::string& ConfigName);
|
||||||
virtual ~WEXmlgetter();
|
~WEXmlgetter();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
//..Public methods
|
//..Public methods
|
||||||
std::string getValue(const std::vector<std::string>& section) const;
|
std::string getValue(const std::vector<std::string>& sections) const;
|
||||||
std::string getAttribute(const std::vector<std::string>& sections, const std::string& Tag) const;
|
std::string getAttribute(const std::vector<std::string>& sections, const std::string& Tag) const;
|
||||||
void getConfig(const std::string& section, const std::string& name, std::vector<std::string>& values) const;
|
void getConfig(const std::string& section, const std::string& name, std::vector<std::string>& values) const;
|
||||||
void getAttributeListForAllChildren(const std::vector<std::string>& sections,
|
void getAttributeListForAllChildren(const std::vector<std::string>& sections,
|
||||||
const std::string& attributeTag,
|
const std::string& attributeTag,
|
||||||
std::vector<std::string>& attributeValues);
|
std::vector<std::string>& attributeValues) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
//..Private methods
|
//..Private methods
|
||||||
const xmlNode* getNode(const xmlNode* pParent, const std::string& section) const;
|
static const xmlNode* getNode(const xmlNode* pParent, const std::string& section);
|
||||||
bool getNodeAttribute(const xmlNode* pNode, const char* pTag, std::string& strVal) const;
|
static bool getNodeAttribute(const xmlNode* pNode, const char* pTag, std::string& strVal);
|
||||||
bool getNodeContent(const xmlNode* pNode, std::string& strVal) const;
|
static bool getNodeContent(const xmlNode* pNode, std::string& strVal);
|
||||||
|
|
||||||
//..Private data members
|
//..Private data members
|
||||||
std::string fConfigName; // xml filename
|
std::string fConfigName; // xml filename
|
||||||
|
@ -35,18 +35,19 @@ namespace WriteEngine
|
|||||||
{
|
{
|
||||||
/* static */ const std::string XMLGenData::DELIMITER("-d");
|
/* static */ const std::string XMLGenData::DELIMITER("-d");
|
||||||
/* static */ const std::string XMLGenData::DESCRIPTION("-s");
|
/* static */ const std::string XMLGenData::DESCRIPTION("-s");
|
||||||
/* static */ const std::string XMLGenData::ENCLOSED_BY_CHAR("-E");
|
/* static */ const std::string XMLGenData::ENCLOSED_BY_CHAR("-E");
|
||||||
/* static */ const std::string XMLGenData::ESCAPE_CHAR("-C");
|
/* static */ const std::string XMLGenData::ESCAPE_CHAR("-C");
|
||||||
/* static */ const std::string XMLGenData::JOBID("-j");
|
/* static */ const std::string XMLGenData::JOBID("-j");
|
||||||
/* static */ const std::string XMLGenData::MAXERROR("-e");
|
/* static */ const std::string XMLGenData::MAXERROR("-e");
|
||||||
/* static */ const std::string XMLGenData::NAME("-n");
|
/* static */ const std::string XMLGenData::NAME("-n");
|
||||||
/* static */ const std::string XMLGenData::PATH("-p");
|
/* static */ const std::string XMLGenData::PATH("-p");
|
||||||
/* static */ const std::string XMLGenData::RPT_DEBUG("-b");
|
/* static */ const std::string XMLGenData::RPT_DEBUG("-b");
|
||||||
/* static */ const std::string XMLGenData::USER("-u");
|
/* static */ const std::string XMLGenData::USER("-u");
|
||||||
/* static */ const std::string XMLGenData::NO_OF_READ_BUFFER("-r");
|
/* static */ const std::string XMLGenData::NO_OF_READ_BUFFER("-r");
|
||||||
/* static */ const std::string XMLGenData::READ_BUFFER_CAPACITY("-c");
|
/* static */ const std::string XMLGenData::READ_BUFFER_CAPACITY("-c");
|
||||||
/* static */ const std::string XMLGenData::WRITE_BUFFER_SIZE("-w");
|
/* static */ const std::string XMLGenData::WRITE_BUFFER_SIZE("-w");
|
||||||
/* static */ const std::string XMLGenData::EXT("-x");
|
/* static */ const std::string XMLGenData::EXT("-x");
|
||||||
|
/* static */ const std::string XMLGenData::SKIP_ROWS("-O");
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// XMLGenData constructor
|
// XMLGenData constructor
|
||||||
@ -54,39 +55,38 @@ namespace WriteEngine
|
|||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
XMLGenData::XMLGenData()
|
XMLGenData::XMLGenData()
|
||||||
{
|
{
|
||||||
fParms.insert(ParmList::value_type(DELIMITER, std::string("|")));
|
fParms.emplace(DELIMITER, "|");
|
||||||
fParms.insert(ParmList::value_type(DESCRIPTION, std::string()));
|
fParms.emplace(DESCRIPTION, "");
|
||||||
fParms.insert(ParmList::value_type(ENCLOSED_BY_CHAR, std::string("")));
|
fParms.emplace(ENCLOSED_BY_CHAR, "");
|
||||||
fParms.insert(ParmList::value_type(ESCAPE_CHAR, std::string("\\")));
|
fParms.emplace(ESCAPE_CHAR, "\\");
|
||||||
fParms.insert(ParmList::value_type(JOBID, std::string("299")));
|
fParms.emplace(JOBID, "299");
|
||||||
fParms.insert(ParmList::value_type(MAXERROR, std::string("10")));
|
fParms.emplace(MAXERROR, "10");
|
||||||
fParms.insert(ParmList::value_type(NAME, std::string()));
|
fParms.emplace(NAME, "");
|
||||||
boost::filesystem::path p{std::string(Config::getBulkRoot())};
|
boost::filesystem::path p{std::string(Config::getBulkRoot())};
|
||||||
p /= JOBDIR;
|
p /= JOBDIR;
|
||||||
fParms.insert(ParmList::value_type(PATH, p.string()));
|
fParms.emplace(PATH, p.string());
|
||||||
|
|
||||||
fParms.insert(ParmList::value_type(RPT_DEBUG, std::string("0")));
|
fParms.emplace(RPT_DEBUG, "0");
|
||||||
fParms.insert(ParmList::value_type(USER, std::string()));
|
fParms.emplace(USER, "");
|
||||||
fParms.insert(ParmList::value_type(NO_OF_READ_BUFFER, std::string("5")));
|
fParms.emplace(NO_OF_READ_BUFFER, "5");
|
||||||
fParms.insert(ParmList::value_type(READ_BUFFER_CAPACITY, std::string("1048576")));
|
fParms.emplace(READ_BUFFER_CAPACITY, "1048576");
|
||||||
fParms.insert(ParmList::value_type(WRITE_BUFFER_SIZE, std::string("10485760")));
|
fParms.emplace(WRITE_BUFFER_SIZE, "10485760");
|
||||||
fParms.insert(ParmList::value_type(EXT, std::string("tbl")));
|
fParms.emplace(EXT, "tbl");
|
||||||
|
fParms.emplace(SKIP_ROWS, "0");
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// XMLGenData destructor
|
// XMLGenData destructor
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
/* virtual */
|
/* virtual */
|
||||||
XMLGenData::~XMLGenData()
|
XMLGenData::~XMLGenData() = default;
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Return value for the specified parm.
|
// Return value for the specified parm.
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
std::string XMLGenData::getParm(const std::string& key) const
|
std::string XMLGenData::getParm(const std::string& key) const
|
||||||
{
|
{
|
||||||
ParmList::const_iterator p = fParms.find(key);
|
auto p = fParms.find(key);
|
||||||
|
|
||||||
if (fParms.end() != p)
|
if (fParms.end() != p)
|
||||||
return p->second;
|
return p->second;
|
||||||
|
@ -60,10 +60,13 @@ class XMLGenData
|
|||||||
EXPORT const static std::string READ_BUFFER_CAPACITY;
|
EXPORT const static std::string READ_BUFFER_CAPACITY;
|
||||||
EXPORT const static std::string WRITE_BUFFER_SIZE;
|
EXPORT const static std::string WRITE_BUFFER_SIZE;
|
||||||
EXPORT const static std::string EXT;
|
EXPORT const static std::string EXT;
|
||||||
|
EXPORT const static std::string SKIP_ROWS;
|
||||||
|
|
||||||
/** @brief XMLGenData constructor
|
/** @brief XMLGenData constructor
|
||||||
*/
|
*/
|
||||||
EXPORT XMLGenData();
|
EXPORT XMLGenData();
|
||||||
|
XMLGenData(const XMLGenData&) = delete;
|
||||||
|
XMLGenData& operator=(const XMLGenData&) = delete;
|
||||||
|
|
||||||
/** @brief XMLGenData destructor
|
/** @brief XMLGenData destructor
|
||||||
*/
|
*/
|
||||||
@ -92,10 +95,6 @@ class XMLGenData
|
|||||||
ParmList fParms;
|
ParmList fParms;
|
||||||
std::string fSchema;
|
std::string fSchema;
|
||||||
LoadNames fLoadNames;
|
LoadNames fLoadNames;
|
||||||
|
|
||||||
private:
|
|
||||||
XMLGenData(const XMLGenData&); // disable default copy ctor
|
|
||||||
XMLGenData& operator=(const XMLGenData&); // disable default assignment
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace WriteEngine
|
} // namespace WriteEngine
|
||||||
|
@ -147,6 +147,11 @@ void XMLGenProc::startXMLFile()
|
|||||||
xmlTextWriterWriteElement(fWriter, BAD_CAST xmlTagTable[TAG_ESCAPE_CHAR],
|
xmlTextWriterWriteElement(fWriter, BAD_CAST xmlTagTable[TAG_ESCAPE_CHAR],
|
||||||
BAD_CAST fInputMgr->getParm(XMLGenData::ESCAPE_CHAR).c_str());
|
BAD_CAST fInputMgr->getParm(XMLGenData::ESCAPE_CHAR).c_str());
|
||||||
|
|
||||||
|
if (auto skipRows = fInputMgr->getParm(XMLGenData::SKIP_ROWS); !skipRows.empty())
|
||||||
|
{
|
||||||
|
xmlTextWriterWriteElement(fWriter, BAD_CAST xmlTagTable[TAG_SKIP_ROWS], BAD_CAST skipRows.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
// Added new tags for configurable parameters
|
// Added new tags for configurable parameters
|
||||||
xmlTextWriterStartElement(fWriter, BAD_CAST xmlTagTable[TAG_READ_BUFFERS]);
|
xmlTextWriterStartElement(fWriter, BAD_CAST xmlTagTable[TAG_READ_BUFFERS]);
|
||||||
xmlTextWriterWriteFormatAttribute(fWriter, BAD_CAST xmlTagTable[TAG_NO_OF_READ_BUFFERS], "%d",
|
xmlTextWriterWriteFormatAttribute(fWriter, BAD_CAST xmlTagTable[TAG_NO_OF_READ_BUFFERS], "%d",
|
||||||
|
@ -130,6 +130,7 @@ void XMLJob::printJobInfo(Log& logger) const
|
|||||||
oss1 << "Read Buffers: " << job.numberOfReadBuffers << endl;
|
oss1 << "Read Buffers: " << job.numberOfReadBuffers << endl;
|
||||||
oss1 << "Read Buffer Size: " << job.readBufferSize << endl;
|
oss1 << "Read Buffer Size: " << job.readBufferSize << endl;
|
||||||
oss1 << "setvbuf Size: " << job.writeBufferSize << endl;
|
oss1 << "setvbuf Size: " << job.writeBufferSize << endl;
|
||||||
|
oss1 << "Header rows : " << job.fSkipRows << endl;
|
||||||
oss1 << "Create Date : " << job.createDate << endl;
|
oss1 << "Create Date : " << job.createDate << endl;
|
||||||
oss1 << "Create Time : " << job.createTime << endl;
|
oss1 << "Create Time : " << job.createTime << endl;
|
||||||
oss1 << "Schema Name : " << job.schema << endl;
|
oss1 << "Schema Name : " << job.schema << endl;
|
||||||
@ -223,7 +224,8 @@ void XMLJob::printJobInfoBrief(Log& logger) const
|
|||||||
oss1 << "n/a";
|
oss1 << "n/a";
|
||||||
|
|
||||||
oss1 << "); ReadBufs(" << job.numberOfReadBuffers << "); ReadBufSize(" << job.readBufferSize
|
oss1 << "); ReadBufs(" << job.numberOfReadBuffers << "); ReadBufSize(" << job.readBufferSize
|
||||||
<< "); setvbufSize(" << job.writeBufferSize << ')';
|
<< "); setvbufSize(" << job.writeBufferSize << "); "
|
||||||
|
<< "SkipRows(" << job.fSkipRows << ")";
|
||||||
logger.logMsg(oss1.str(), MSGLVL_INFO2);
|
logger.logMsg(oss1.str(), MSGLVL_INFO2);
|
||||||
|
|
||||||
for (unsigned int i = 0; i < job.jobTableList.size(); i++)
|
for (unsigned int i = 0; i < job.jobTableList.size(); i++)
|
||||||
@ -316,6 +318,8 @@ bool XMLJob::processNode(xmlNode* pNode)
|
|||||||
setJobData(pNode, TAG_ENCLOSED_BY_CHAR, true, TYPE_CHAR);
|
setJobData(pNode, TAG_ENCLOSED_BY_CHAR, true, TYPE_CHAR);
|
||||||
else if (isTag(pNode, TAG_ESCAPE_CHAR))
|
else if (isTag(pNode, TAG_ESCAPE_CHAR))
|
||||||
setJobData(pNode, TAG_ESCAPE_CHAR, true, TYPE_CHAR);
|
setJobData(pNode, TAG_ESCAPE_CHAR, true, TYPE_CHAR);
|
||||||
|
else if (isTag(pNode, TAG_SKIP_ROWS))
|
||||||
|
setJobData(pNode, TAG_SKIP_ROWS, true, TYPE_INT);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ostringstream oss;
|
ostringstream oss;
|
||||||
@ -432,6 +436,12 @@ void XMLJob::setJobData(xmlNode* pNode, const xmlTag tag, bool bExpectContent, X
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case TAG_SKIP_ROWS:
|
||||||
|
{
|
||||||
|
fJob.fSkipRows = intVal;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
default: break;
|
default: break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -73,6 +73,7 @@ enum xmlTag
|
|||||||
TAG_TBL_OID,
|
TAG_TBL_OID,
|
||||||
TAG_WIDTH,
|
TAG_WIDTH,
|
||||||
TAG_SCHEMA_NAME,
|
TAG_SCHEMA_NAME,
|
||||||
|
TAG_SKIP_ROWS,
|
||||||
NUM_OF_XML_TAGS
|
NUM_OF_XML_TAGS
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -93,6 +94,7 @@ const char xmlTagTable[NUM_OF_XML_TAGS + 1][MAX_XML_TAG_NAME_SIZE] = {
|
|||||||
"origName", //@bug 3599: deprecated; kept for backwards compatibility
|
"origName", //@bug 3599: deprecated; kept for backwards compatibility
|
||||||
"precision", "scale",
|
"precision", "scale",
|
||||||
"tblName", //@bug 3599: replaces origName
|
"tblName", //@bug 3599: replaces origName
|
||||||
"tblOid", "width", "Name"};
|
"tblOid", "width", "Name",
|
||||||
|
"skipRows"};
|
||||||
|
|
||||||
} // namespace WriteEngine
|
} // namespace WriteEngine
|
||||||
|
Reference in New Issue
Block a user