Feature/mcol 4882 cpimport skip rows (#3594)

* feat(cpimport): MCOL-4882 add a parameter to skip header rows * chore(cpimport): MCOL-4882 Use boost::program_options to arguments parsing * feat(cpimport.bin): MCOL-4882 Add missing changes * add test * fix clang * add missing cmdline argument * fix bug * Fix double lines skipping * Fix incorrect --silent (-N) parsing * fix default --max-errors processing * fix overwriting default username * move initialization to members declaration
2025-07-30 19:23:07 +03:00 · 2025-07-11 21:35:43 +02:00
parent 1c8d5ec04e
commit 78c1b5034d
30 changed files with 1379 additions and 1469 deletions
--- a/writeengine/bulk/cpimport.cpp
+++ b/writeengine/bulk/cpimport.cpp
@ -49,6 +49,7 @@
 #include "dataconvert.h"
 #include "mcsconfig.h"
 #include "mariadb_my_sys.h"
+#include "we_cmdargs.h"

 using namespace std;
 using namespace WriteEngine;
@ -56,8 +57,8 @@ using namespace execplan;

 namespace
 {
-char* pgmName = 0;
 const std::string IMPORT_PATH_CWD(".");
+unique_ptr<WECmdArgs> cmdArgs;
 bool bDebug = false;
 uint32_t cpimportJobId = 0;

@ -88,103 +89,6 @@ const char* taskLabels[] = {"",
                            "processing data"};
 }  // namespace

-//------------------------------------------------------------------------------
-// Print command line usage
-//------------------------------------------------------------------------------
-void printUsage()
-{
-  cout << endl
-       << "Simple usage using positional parameters "
-          "(no XML job file):"
-       << endl
-       << "    cpimport.bin dbName tblName [loadFile] [-j jobID] " << endl
-       << "    [-h] [-r readers] [-w parsers] [-s c] [-f path] [-b readBufs] " << endl
-       << "    [-c readBufSize] [-e maxErrs] [-B libBufSize] [-n NullOption] " << endl
-       << "    [-E encloseChar] [-C escapeChar] [-I binaryOpt] [-S] "
-          "[-d debugLevel] [-i] "
-       << endl
-       << "     [-D] [-N] [-L rejectDir] [-T timeZone]" << endl
-       << "    [-U username]" << endl
-       << endl;
-
-  cout << endl
-       << "Traditional usage without positional parameters "
-          "(XML job file required):"
-       << endl
-       << "    cpimport.bin -j jobID " << endl
-       << "    [-h] [-r readers] [-w parsers] [-s c] [-f path] [-b readBufs] " << endl
-       << "    [-c readBufSize] [-e maxErrs] [-B libBufSize] [-n NullOption] " << endl
-       << "    [-E encloseChar] [-C escapeChar] [-I binaryOpt] [-S] "
-          "[-d debugLevel] [-i] "
-       << endl
-       << "    [-p path] [-l loadFile]" << endl
-       << "     [-D] [-N] [-L rejectDir] [-T timeZone]" << endl
-       << "    [-U username]" << endl
-       << endl;
-
-  cout << "    Positional parameters:" << endl
-       << "        dbName    Name of database to load" << endl
-       << "        tblName   Name of table to load" << endl
-       << "        loadFile  Optional input file name in current directory, " << "unless a fully" << endl
-       << "                  qualified name is given.  If not given, " << "input read from stdin." << endl
-       << endl;
-
-  cout << "    Options:" << endl
-       << "        -b Number of read buffers" << endl
-       << "        -c Application read buffer size (in bytes)" << endl
-       << "        -d Print different level (1-3) debug message " << endl
-       << "        -e Maximum number of allowable errors per table" << endl
-       << "        -f Data file directory path; " << endl
-       << "           In simple usage:" << endl
-       << "             Default is current working directory." << endl
-       << "             -f option only applies if loadFile is specified." << endl
-       << "           In traditional usage: " << endl
-       << "             Default is <BulkRoot>/data/import." << endl
-       << "             'STDIN' (all caps) redirects input from stdin." << endl
-       << "        -h Print this message" << endl
-       << "        -i Print extended info to console, else this info only goes "
-          "to log file."
-       << endl
-       << "        -j Job id.  In simple usage, default is the table OID." << endl
-       << "        -l Name of input file to be loaded, relative to -f path," << endl
-       << "           unless a fully qualified input file name is given." << endl
-       << "        -n NullOption (0-treat the string NULL as data (default);" << endl
-       << "                       1-treat the string NULL as a NULL value)" << endl
-       << "        -p Path for XML job description file" << endl
-       << "        -r Number of readers" << endl
-       << "        -s 'c' is the delimiter between column values" << endl
-       << "        -w Number of parsers" << endl
-       << "        -B I/O library read buffer size (in bytes)" << endl
-       << "        -E Enclosed by character if field values are enclosed" << endl
-       << "        -C Escape character used in conjunction with 'enclosed by' " << "character," << endl
-       << "           or as part of NULL escape sequence ('\\N'); default is '\\'" << endl
-       << "        -I Binary import; binaryOpt 1-import NULL values" << endl
-       << "                                    2-saturate NULL values" << endl
-       << "        -S Treat string truncations as errors" << endl
-       << "        -D Disable timeout when waiting for table lock" << endl
-       << "        -N Disable console output" << endl
-       << "        -L send *.err and *.bad (reject) files here" << endl
-       << "        -T Timezone used for TIMESTAMP datatype" << endl
-       << "           Possible values: \"SYSTEM\" (default)" << endl
-       << "                          : Offset in the form +/-HH:MM" << endl
-       << endl
-       << "        -y S3 Authentication Key (for S3 imports)" << endl
-       << "        -K S3 Authentication Secret (for S3 imports)" << endl
-       << "        -t S3 Bucket (for S3 imports)" << endl
-       << "        -H S3 Hostname (for S3 imports, Amazon's S3 default)" << endl
-       << "        -g S3 Regions (for S3 imports)" << endl
-       << "        -U username of new data files owner. Default is mysql" << endl;
-
-  cout << "    Example1:" << endl
-       << "        cpimport.bin -j 1234" << endl
-       << "    Example2: Some column values are enclosed within double quotes." << endl
-       << "        cpimport.bin -j 3000 -E '\"'" << endl
-       << "    Example3: Import a nation table without a Job XML file" << endl
-       << "        cpimport.bin -j 301 tpch nation nation.tbl" << endl;
-
-  exit(EXIT_SUCCESS);
-}
-
 //------------------------------------------------------------------------------
 // Signal handler to catch SIGTERM signal to terminate the process
 //------------------------------------------------------------------------------
@ -227,40 +131,6 @@ void handleSigAbrt(int /*i*/)
  BulkStatus::setJobStatus(EXIT_FAILURE);
 }

-//------------------------------------------------------------------------------
-// If error occurs during startup, this function is called to log the specified
-// message and terminate the process.
-//------------------------------------------------------------------------------
-void startupError(const std::string& errMsg, bool showHint)
-{
-  BRMWrapper::getInstance()->finishCpimportJob(cpimportJobId);
-  // Log to console
-  if (!BulkLoad::disableConsoleOutput())
-    cerr << errMsg << endl;
-
-  if (showHint)
-  {
-    std::ostringstream oss;
-    oss << "Try '" << pgmName << " -h' for more information.";
-
-    if (!BulkLoad::disableConsoleOutput())
-      cerr << oss.str() << endl;
-  }
-
-  // Log to syslog
-  logging::Message::Args errMsgArgs;
-  errMsgArgs.add(errMsg);
-  SimpleSysLog::instance()->logMsg(errMsgArgs, logging::LOG_TYPE_ERROR, logging::M0087);
-
-  std::string jobIdStr("0");
-  logging::Message::Args endMsgArgs;
-  endMsgArgs.add(jobIdStr);
-  endMsgArgs.add("FAILED");
-  SimpleSysLog::instance()->logMsg(endMsgArgs, logging::LOG_TYPE_INFO, logging::M0082);
-
-  exit(EXIT_FAILURE);
-}
-
 //------------------------------------------------------------------------------
 // Initialize signal handling
 //------------------------------------------------------------------------------
@ -307,540 +177,6 @@ void setupSignalHandlers()
  sigaction(SIGABRT, &act, 0);
 }

-//------------------------------------------------------------------------------
-// Parse the command line arguments
-//------------------------------------------------------------------------------
-void parseCmdLineArgs(int argc, char** argv, BulkLoad& curJob, std::string& sJobIdStr,
-                      std::string& sXMLJobDir, std::string& sModuleIDandPID, bool& bLogInfo2ToConsole,
-                      std::string& xmlGenSchema, std::string& xmlGenTable, bool& bValidateColumnList)
-{
-  std::string importPath;
-  std::string rptFileName;
-  int option;
-  bool bImportFileArg = false;
-  BulkModeType bulkMode = BULK_MODE_LOCAL;
-  std::string jobUUID;
-
-  while ((option = getopt(argc, argv, "b:c:d:e:f:hij:kl:m:n:p:r:s:u:w:B:C:DE:I:P:R:ST:X:NL:y:K:t:H:g:U:")) !=
-         EOF)
-  {
-    switch (option)
-    {
-      case 'b':  // -b: no. of read buffers
-      {
-        errno = 0;
-        long lValue = strtol(optarg, 0, 10);
-
-        if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
-        {
-          startupError(std::string("Option -b is invalid or out of range."), true);
-        }
-
-        int noOfReadBuffers = lValue;
-        curJob.setReadBufferCount(noOfReadBuffers);
-        break;
-      }
-
-      case 'c':  // -c: read buffer size
-      {
-        errno = 0;
-        long lValue = strtol(optarg, 0, 10);
-
-        if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
-        {
-          startupError(std::string("Option -c is invalid or out of range."), true);
-        }
-
-        int readBufferSize = lValue;
-        curJob.setReadBufferSize(readBufferSize);
-        break;
-      }
-
-      case 'd':  // -d: debug level
-      {
-        errno = 0;
-        long lValue = strtol(optarg, 0, 10);
-
-        if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
-        {
-          startupError(std::string("Option -d is invalid or out of range."), true);
-        }
-
-        int debugLevel = lValue;
-
-        if (debugLevel > 0 && debugLevel <= 3)
-        {
-          bDebug = true;
-          curJob.setAllDebug((DebugLevel)debugLevel);
-
-          if (!BulkLoad::disableConsoleOutput())
-            cout << "\nDebug level is set to " << debugLevel << endl;
-        }
-
-        break;
-      }
-
-      case 'e':  // -e: max allowed errors
-      {
-        errno = 0;
-        long lValue = strtol(optarg, 0, 10);
-
-        if ((errno != 0) || (lValue < 0) || (lValue > INT_MAX))
-        {
-          startupError(std::string("Option -e is invalid or out of range."), true);
-        }
-
-        int maxErrors = lValue;
-        curJob.setMaxErrorCount(maxErrors);
-        break;
-      }
-
-      case 'f':  // -f: import path
-      {
-        importPath = optarg;
-        std::string setAltErrMsg;
-
-        if (curJob.setAlternateImportDir(importPath, setAltErrMsg) != NO_ERROR)
-          startupError(setAltErrMsg, false);
-
-        break;
-      }
-
-      case 'h':  // -h: help
-      {
-        printUsage();
-        break;
-      }
-
-      case 'i':  // -i: log info to console
-      {
-        bLogInfo2ToConsole = true;
-        break;
-      }
-
-      case 'j':  // -j: jobID
-      {
-        errno = 0;
-        long lValue = strtol(optarg, 0, 10);
-
-        if ((errno != 0) || (lValue < 0) || (lValue > INT_MAX))
-        {
-          startupError(std::string("Option -j is invalid or out of range."), true);
-        }
-
-        sJobIdStr = optarg;
-        break;
-      }
-
-      case 'k':  // -k: hidden option to keep (not delete)
-      {
-        //     bulk rollback meta-data files
-        curJob.setKeepRbMetaFiles(true);
-        break;
-      }
-
-      case 'l':  // -l: import load file(s)
-      {
-        bImportFileArg = true;
-        curJob.addToCmdLineImportFileList(std::string(optarg));
-        break;
-      }
-
-      case 'm':  // -m: bulk load mode
-      {
-        bulkMode = (BulkModeType)atoi(optarg);
-
-        if ((bulkMode != BULK_MODE_REMOTE_SINGLE_SRC) && (bulkMode != BULK_MODE_REMOTE_MULTIPLE_SRC) &&
-            (bulkMode != BULK_MODE_LOCAL))
-        {
-          startupError(std::string("Invalid bulk mode; can be 1,2, or 3"), true);
-        }
-
-        break;
-      }
-
-      case 'n':  // -n: treat "NULL" as null
-      {
-        int nullStringMode = atoi(optarg);
-
-        if ((nullStringMode != 0) && (nullStringMode != 1))
-        {
-          startupError(std::string("Invalid NULL option; value can be 0 or 1"), true);
-        }
-
-        if (nullStringMode)
-          curJob.setNullStringMode(true);
-        else
-          curJob.setNullStringMode(false);
-
-        break;
-      }
-
-      case 'p':  // -p: Job XML path
-      {
-        sXMLJobDir = optarg;
-        break;
-      }
-
-      case 'r':  // -r: num read threads
-      {
-        errno = 0;
-        long lValue = strtol(optarg, 0, 10);
-
-        if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
-        {
-          startupError(std::string("Option -r is invalid or out of range."), true);
-        }
-
-        int numOfReaders = lValue;
-#if !defined(__LP64__) && !defined(_MSC_VER)
-
-        if (numOfReaders > 1)
-        {
-          cerr << "Note: resetting number of read threads to maximum" << endl;
-          numOfReaders = 1;
-        }
-
-#endif
-        curJob.setNoOfReadThreads(numOfReaders);
-
-        if (!BulkLoad::disableConsoleOutput())
-          cout << "number of read threads : " << numOfReaders << endl;
-
-        break;
-      }
-
-      case 's':  // -s: column delimiter
-      {
-        char delim;
-
-        if (!strcmp(optarg, "\\t"))
-        {
-          delim = '\t';
-
-          if (!BulkLoad::disableConsoleOutput())
-            cout << "Column delimiter : " << "\\t" << endl;
-        }
-        else
-        {
-          delim = optarg[0];
-
-          if (delim == '\t')  // special case to print a <TAB>
-          {
-            if (!BulkLoad::disableConsoleOutput())
-              cout << "Column delimiter : '\\t'" << endl;
-          }
-          else
-          {
-            if (!BulkLoad::disableConsoleOutput())
-              cout << "Column delimiter : " << delim << endl;
-          }
-        }
-
-        curJob.setColDelimiter(delim);
-        break;
-      }
-
-      case 'u':  // -u: import job UUID
-      {
-        jobUUID = optarg;
-        curJob.setJobUUID(jobUUID);
-        break;
-      }
-
-      case 'w':  // -w: num parse threads
-      {
-        errno = 0;
-        long lValue = strtol(optarg, 0, 10);
-
-        if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
-        {
-          startupError(std::string("Option -w is invalid or out of range."), true);
-        }
-
-        int numOfParser = lValue;
-#if !defined(__LP64__) && !defined(_MSC_VER)
-
-        if (numOfParser > 3)
-        {
-          cerr << "Note: resetting number of parse threads to maximum" << endl;
-          numOfParser = 3;
-        }
-
-#endif
-        curJob.setNoOfParseThreads(numOfParser);
-
-        if (!BulkLoad::disableConsoleOutput())
-          cout << "number of parse threads : " << numOfParser << endl;
-
-        break;
-      }
-
-      case 'B':  // -B: setvbuf read size
-      {
-        errno = 0;
-        long lValue = strtol(optarg, 0, 10);
-
-        if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
-        {
-          startupError(std::string("Option -B is invalid or out of range."), true);
-        }
-
-        int vbufReadSize = lValue;
-        curJob.setVbufReadSize(vbufReadSize);
-        break;
-      }
-
-      case 'C':  // -C: enclosed escape char
-      {
-        curJob.setEscapeChar(optarg[0]);
-
-        if (!BulkLoad::disableConsoleOutput())
-          cout << "Escape Character  : " << optarg[0] << endl;
-
-        break;
-      }
-
-      case 'E':  // -E: enclosed by char
-      {
-        curJob.setEnclosedByChar(optarg[0]);
-
-        if (!BulkLoad::disableConsoleOutput())
-          cout << "Enclosed by Character : " << optarg[0] << endl;
-
-        break;
-      }
-
-      case 'I':  // -I: Binary import mode
-      {
-        ImportDataMode importMode = (ImportDataMode)atoi(optarg);
-
-        if ((importMode != IMPORT_DATA_BIN_ACCEPT_NULL) && (importMode != IMPORT_DATA_BIN_SAT_NULL))
-        {
-          startupError(std::string("Invalid binary import option; value can be 1"
-                                   "(accept NULL values) or 2(saturate NULL values)"),
-                       true);
-        }
-
-        curJob.setImportDataMode(importMode);
-        break;
-      }
-
-      case 'L':  // -L: Error log directory
-      {
-        curJob.setErrorDir(optarg);
-        break;
-      }
-
-      case 'P':  // -P: Calling moduleid
-      {
-        //     and PID
-        sModuleIDandPID = optarg;
-        break;
-      }
-
-      case 'R':  // -R: distributed mode
-      {
-        //     report file
-        rptFileName = optarg;
-        break;
-      }
-
-      case 'S':  // -S: Char & VarChar data
-      {
-        //     greater than col def
-        curJob.setTruncationAsError(true);  //     are reported as err
-        break;
-      }
-
-      case 'T':
-      {
-        std::string timeZone = optarg;
-        long offset;
-
-        if (dataconvert::timeZoneToOffset(timeZone.c_str(), timeZone.size(), &offset))
-        {
-          startupError(std::string("Value for option -T is invalid"), true);
-        }
-
-        curJob.setTimeZone(offset);
-        break;
-      }
-
-      case 'X':  // Hidden extra options
-      {
-        if (!strcmp(optarg, "AllowMissingColumn"))
-          bValidateColumnList = false;
-
-        break;
-      }
-
-      case 'D':  // disable table lock waiting timeout
-      {
-        curJob.disableTimeOut(true);
-        break;
-      }
-
-      case 'N':  // silent the output to console
-      {
-        BulkLoad::disableConsoleOutput(true);
-        break;
-      }
-
-      case 'y':
-      {
-        curJob.setS3Key(optarg);
-        break;
-      }
-
-      case 'K':
-      {
-        curJob.setS3Secret(optarg);
-        break;
-      }
-
-      case 't':
-      {
-        curJob.setS3Bucket(optarg);
-        break;
-      }
-
-      case 'H':
-      {
-        curJob.setS3Host(optarg);
-        break;
-      }
-
-      case 'g':
-      {
-        curJob.setS3Region(optarg);
-        break;
-      }
-
-      case 'U':
-      {
-        curJob.setUsername(optarg);
-        break;
-      }
-
-      default:
-      {
-        ostringstream oss;
-        oss << "Unrecognized command line option (" << option << ")";
-        startupError(oss.str(), true);
-      }
-    }
-  }
-
-  curJob.setDefaultJobUUID();
-
-  // Inconsistent to specify -f STDIN with -l importFile
-  if ((bImportFileArg) && (importPath == "STDIN"))
-  {
-    startupError(std::string("-f STDIN is invalid with -l importFile."), true);
-  }
-
-  // If distributed mode, make sure report filename is specified and that we
-  // can create the file using the specified path.
-  if ((bulkMode == BULK_MODE_REMOTE_SINGLE_SRC) || (bulkMode == BULK_MODE_REMOTE_MULTIPLE_SRC))
-  {
-    if (rptFileName.empty())
-    {
-      startupError(std::string("Bulk modes 1 and 2 require -R rptFileName."), true);
-    }
-    else
-    {
-      std::ofstream rptFile(rptFileName.c_str());
-
-      if (rptFile.fail())
-      {
-        std::ostringstream oss;
-        oss << "Unable to open report file " << rptFileName;
-        startupError(oss.str(), false);
-      }
-
-      rptFile.close();
-    }
-
-    curJob.setBulkLoadMode(bulkMode, rptFileName);
-  }
-
-  // Get positional arguments, User can provide:
-  // 1. no positional parameters
-  // 2. Two positional parameters (schema and table names)
-  // 3. Three positional parameters (schema, table, and import file name)
-  if (optind < argc)  // see if db schema name is given
-  {
-    xmlGenSchema = argv[optind];  // 1st pos parm
-    optind++;
-
-    if (optind < argc)  // see if table name is given
-    {
-      // Validate invalid options in conjunction with 2-3 positional
-      // parameter mode, which means we are using temp Job XML file.
-      if (bImportFileArg)
-      {
-        startupError(std::string("-l importFile is invalid with positional parameters"), true);
-      }
-
-      if (!sXMLJobDir.empty())
-      {
-        startupError(std::string("-p path is invalid with positional parameters."), true);
-      }
-
-      if (importPath == "STDIN")
-      {
-        startupError(std::string("-f STDIN is invalid with positional parameters."), true);
-      }
-
-      xmlGenTable = argv[optind];  // 2nd pos parm
-      optind++;
-
-      if (optind < argc)  // see if input file name is given
-      {
-        // 3rd pos parm
-        curJob.addToCmdLineImportFileList(std::string(argv[optind]));
-
-        // Default to CWD if loadfile name given w/o -f path
-        if (importPath.empty())
-        {
-          std::string setAltErrMsg;
-
-          if (curJob.setAlternateImportDir(std::string("."), setAltErrMsg) != NO_ERROR)
-            startupError(setAltErrMsg, false);
-        }
-      }
-      else
-      {
-        // Invalid to specify -f if no load file name given
-        if (!importPath.empty())
-        {
-          startupError(std::string("-f requires 3rd positional parameter (load file name)."), true);
-        }
-
-        // Default to STDIN if no import file name given
-        std::string setAltErrMsg;
-
-        if (curJob.setAlternateImportDir(std::string("STDIN"), setAltErrMsg) != NO_ERROR)
-          startupError(setAltErrMsg, false);
-      }
-    }
-    else
-    {
-      startupError(std::string("No table name specified with schema."), true);
-    }
-  }
-  else
-  {
-    // JobID is a required parameter with no positional parm mode,
-    // because we need the jobid to identify the input job xml file.
-    if (sJobIdStr.empty())
-    {
-      startupError(std::string("No JobID specified."), true);
-    }
-  }
-}
-
 //------------------------------------------------------------------------------
 // Print the path of the input load file(s), and the name of the job xml file.
 //------------------------------------------------------------------------------
@ -857,8 +193,7 @@ void printInputSource(const std::string& alternateImportDir, const std::string&
    if (alternateImportDir == IMPORT_PATH_CWD)
    {
      char cwdBuf[4096];
-      char* bufPtr = &cwdBuf[0];
-      bufPtr = ::getcwd(cwdBuf, sizeof(cwdBuf));
+      char* bufPtr = ::getcwd(cwdBuf, sizeof(cwdBuf));

      if (!(BulkLoad::disableConsoleOutput()))
        cout << "Input file(s) will be read from : " << bufPtr << endl;
@ -900,14 +235,14 @@ void getTableOID(const std::string& xmlGenSchema, const std::string& xmlGenTable
    std::ostringstream oss;
    oss << "Unable to set default JobID; " << "Error getting OID for table " << tbl.schema << '.' << tbl.table
        << ": " << ex.what();
-    startupError(oss.str(), false);
+    cmdArgs->startupError(oss.str(), false);
  }
  catch (...)
  {
    std::ostringstream oss;
    oss << "Unable to set default JobID; " << "Unknown error getting OID for table " << tbl.schema << '.'
        << tbl.table;
-    startupError(oss.str(), false);
+    cmdArgs->startupError(oss.str(), false);
  }

  std::ostringstream oss;
@ -950,7 +285,7 @@ void constructTempXmlFile(const std::string& tempJobDir, const std::string& sJob
  {
    std::ostringstream oss;
    oss << "cpimport.bin error creating temporary Job XML file name: " << xmlErrMsg;
-    startupError(oss.str(), false);
+    cmdArgs->startupError(oss.str(), false);
  }

  printInputSource(alternateImportDir, sFileName.string(), S3Bucket);
@ -970,7 +305,7 @@ void constructTempXmlFile(const std::string& tempJobDir, const std::string& sJob
    {
      std::ostringstream oss;
      oss << "No columns for " << xmlGenSchema << '.' << xmlGenTable;
-      startupError(oss.str(), false);
+      cmdArgs->startupError(oss.str(), false);
    }
  }
  catch (runtime_error& ex)
@ -979,7 +314,7 @@ void constructTempXmlFile(const std::string& tempJobDir, const std::string& sJob
    oss << "cpimport.bin runtime exception constructing temporary "
           "Job XML file: "
        << ex.what();
-    startupError(oss.str(), false);
+    cmdArgs->startupError(oss.str(), false);
  }
  catch (exception& ex)
  {
@ -987,13 +322,13 @@ void constructTempXmlFile(const std::string& tempJobDir, const std::string& sJob
    oss << "cpimport.bin exception constructing temporary "
           "Job XML file: "
        << ex.what();
-    startupError(oss.str(), false);
+    cmdArgs->startupError(oss.str(), false);
  }
  catch (...)
  {
-    startupError(std::string("cpimport.bin "
-                             "unknown exception constructing temporary Job XML file"),
-                 false);
+    cmdArgs->startupError(std::string("cpimport.bin "
+                                      "unknown exception constructing temporary Job XML file"),
+                          false);
  }

  genProc.writeXMLFile(sFileName.string());
@ -1009,9 +344,9 @@ void verifyNode()
  // Validate running on a PM
  if (localModuleType != "pm")
  {
-    startupError(std::string("Exiting, "
-                             "cpimport.bin can only be run on a PM node"),
-                 true);
+    cmdArgs->startupError(std::string("Exiting, "
+                                      "cpimport.bin can only be run on a PM node"),
+                          true);
  }
 }

@ -1049,34 +384,22 @@ int main(int argc, char** argv)
  setlocale(LC_NUMERIC, "C");

  // Initialize singleton instance of syslogging
-  if (argc > 0)
-    pgmName = argv[0];
-
  logging::IDBErrorInfo::instance();
  SimpleSysLog::instance()->setLoggingID(logging::LoggingID(SUBSYSTEM_ID_WE_BULK));

  // Log job initiation unless user is asking for help
+  cmdArgs = make_unique<WECmdArgs>(argc, argv);
  std::ostringstream ossArgList;
-  bool bHelpFlag = false;

  for (int m = 1; m < argc; m++)
  {
-    if (strcmp(argv[m], "-h") == 0)
-    {
-      bHelpFlag = true;
-      break;
-    }
-
    if (!strcmp(argv[m], "\t"))  // special case to print a <TAB>
      ossArgList << "'\\t'" << ' ';
    else
      ossArgList << argv[m] << ' ';
  }

-  if (!bHelpFlag)
-  {
-    logInitiateMsg(ossArgList.str().c_str());
-  }
+  logInitiateMsg(ossArgList.str().c_str());

  BulkLoad curJob;
  string sJobIdStr;
@ -1099,8 +422,8 @@ int main(int argc, char** argv)
    task = TASK_CMD_LINE_PARSING;
    string xmlGenSchema;
    string xmlGenTable;
-    parseCmdLineArgs(argc, argv, curJob, sJobIdStr, sXMLJobDir, sModuleIDandPID, bLogInfo2ToConsole,
-                     xmlGenSchema, xmlGenTable, bValidateColumnList);
+    cmdArgs->fillParams(curJob, sJobIdStr, sXMLJobDir, sModuleIDandPID, bLogInfo2ToConsole, xmlGenSchema,
+                        xmlGenTable, bValidateColumnList);

    //--------------------------------------------------------------------------
    // Save basename portion of program path from argv[0]
@ -1154,9 +477,9 @@ int main(int argc, char** argv)

    if (!BRMWrapper::getInstance()->isSystemReady())
    {
-      startupError(std::string("System is not ready.  Verify that ColumnStore is up and ready "
-                               "before running cpimport."),
-                   false);
+      cmdArgs->startupError(std::string("System is not ready.  Verify that ColumnStore is up and ready "
+                                        "before running cpimport."),
+                            false);
    }

    if (bDebug)
@ -1173,7 +496,7 @@ int main(int argc, char** argv)
      WErrorCodes ec;
      std::ostringstream oss;
      oss << ec.errorString(brmReadWriteStatus) << "  cpimport.bin is terminating.";
-      startupError(oss.str(), false);
+      cmdArgs->startupError(oss.str(), false);
    }

    if (bDebug)
@ -1190,7 +513,7 @@ int main(int argc, char** argv)
      WErrorCodes ec;
      std::ostringstream oss;
      oss << ec.errorString(brmShutdownPending) << "  cpimport.bin is terminating.";
-      startupError(oss.str(), false);
+      cmdArgs->startupError(oss.str(), false);
    }

    if (bDebug)
@ -1207,7 +530,7 @@ int main(int argc, char** argv)
      WErrorCodes ec;
      std::ostringstream oss;
      oss << ec.errorString(brmSuspendPending) << "  cpimport.bin is terminating.";
-      startupError(oss.str(), false);
+      cmdArgs->startupError(oss.str(), false);
    }

    if (bDebug)
@ -1268,7 +591,7 @@ int main(int argc, char** argv)
      {
        std::ostringstream oss;
        oss << "cpimport.bin error creating Job XML file name: " << xmlErrMsg;
-        startupError(oss.str(), false);
+        cmdArgs->startupError(oss.str(), false);
      }

      printInputSource(curJob.getAlternateImportDir(), sFileName.string(), curJob.getS3Bucket());
@ -1300,13 +623,14 @@ int main(int argc, char** argv)
    }

    rc = BRMWrapper::getInstance()->newCpimportJob(cpimportJobId);
+    // TODO kemm: pass cpimportJobId to WECmdArgs
    if (rc != NO_ERROR)
    {
      WErrorCodes ec;
      std::ostringstream oss;
      oss << "Error in creating new cpimport job on Controller node; " << ec.errorString(rc)
          << "; cpimport is terminating.";
-      startupError(oss.str(), false);
+      cmdArgs->startupError(oss.str(), false);
    }

    //--------------------------------------------------------------------------
@ -1321,7 +645,7 @@ int main(int argc, char** argv)
      WErrorCodes ec;
      std::ostringstream oss;
      oss << "Error in loading job information; " << ec.errorString(rc) << "; cpimport.bin is terminating.";
-      startupError(oss.str(), false);
+      cmdArgs->startupError(oss.str(), false);
    }

    if (bDebug)
@ -1353,7 +677,7 @@ int main(int argc, char** argv)

    if (task != TASK_PROCESS_DATA)
    {
-      startupError(exceptionMsg, false);
+      cmdArgs->startupError(exceptionMsg, false);
    }

    rc = ERR_UNKNOWN;
@ -1379,7 +703,7 @@ int main(int argc, char** argv)
      failMsg += exceptionMsg;
    }

-    endMsgArgs.add(failMsg.c_str());
+    endMsgArgs.add(failMsg);
  }
  else
  {