Feature/mcol 4882 cpimport skip rows (#3594)

* feat(cpimport): MCOL-4882 add a parameter to skip header rows * chore(cpimport): MCOL-4882 Use boost::program_options to arguments parsing * feat(cpimport.bin): MCOL-4882 Add missing changes * add test * fix clang * add missing cmdline argument * fix bug * Fix double lines skipping * Fix incorrect --silent (-N) parsing * fix default --max-errors processing * fix overwriting default username * move initialization to members declaration
2025-07-29 08:21:15 +03:00 · 2025-07-11 21:35:43 +02:00
parent 1c8d5ec04e
commit 78c1b5034d
30 changed files with 1379 additions and 1469 deletions
--- a/writeengine/splitter/CMakeLists.txt
+++ b/writeengine/splitter/CMakeLists.txt
@ -26,4 +26,5 @@ columnstore_link(
    batchloader
    threadpool
    marias3
+    boost_program_options
 )
--- a/writeengine/splitter/we_cmdargs.cpp
+++ b/writeengine/splitter/we_cmdargs.cpp
@ -29,6 +29,8 @@
 #include <exception>
 #include <stdexcept>
 #include <cerrno>
+#include <boost/program_options.hpp>
+namespace po = boost::program_options;
 using namespace std;

 #include <boost/uuid/uuid.hpp>
@ -50,38 +52,96 @@ namespace WriteEngine
 //----------------------------------------------------------------------
 //----------------------------------------------------------------------
 WECmdArgs::WECmdArgs(int argc, char** argv)
- : fMultiTableCount(0)
- , fJobLogOnly(false)
- , fHelp(false)
- , fMode(1)
- , fArgMode(-1)
- , fQuiteMode(true)
- , fConsoleLog(false)
- , fVerbose(0)
- , fBatchQty(10000)
- , fNoOfReadThrds(0)
- , fDebugLvl(0)
- , fMaxErrors(-1)
- , fReadBufSize(0)
- , fIOReadBufSize(0)
- , fSetBufSize(0)
- , fColDelim('|')
- , fEnclosedChar(0)
- , fEscChar(0)
- , fNoOfWriteThrds(0)
- , fNullStrMode(false)
- , fImportDataMode(IMPORT_DATA_TEXT)
- , fCpiInvoke(false)
- , fBlockMode3(false)
- , fbTruncationAsError(false)
- , fUUID(boost::uuids::nil_generator()())
- , fConsoleOutput(true)
- , fTimeZone("SYSTEM")
- , fErrorDir(string(MCSLOGDIR) + "/cpimport/")
 {
  try
  {
    appTestFunction();
+    fOptions = std::make_unique<po::options_description>();
+#define DECLARE_INT_ARG(name, stor, min, max, desc) \
+    (name,\
+      po::value<int>(&stor)\
+        ->notifier([](auto&& value) { checkIntArg(name, min, max, value); }),\
+      desc)
+
+    fOptions->add_options()
+      ("help,h", "Print this message.")
+      DECLARE_INT_ARG("read-buffer,b", fIOReadBufSize, 1, INT_MAX, "Number of read buffers.")
+      DECLARE_INT_ARG("read-buffer-size,c", fReadBufSize, 1, INT_MAX,
+        "Application read buffer size (in bytes)")
+      DECLARE_INT_ARG("debug,d", fDebugLvl, 1, 3, "Print different level(1-3) debug message")
+      ("verbose,v", po::value<string>())
+      ("silent,N", po::bool_switch())
+      DECLARE_INT_ARG("max-errors,e", fMaxErrors, 0, INT_MAX,
+          "Maximum number of allowable error per table per PM")
+      ("file-path,f", po::value<string>(&fPmFilePath),
+        "Data file directory path. Default is current working directory.\n"
+        "\tIn Mode 1, represents the local input file path.\n"
+        "\tIn Mode 2, represents the PM based input file path.\n"
+        "\tIn Mode 3, represents the local input file path.")
+      DECLARE_INT_ARG("mode,m", fArgMode, 0, 3,
+        "\t1 - rows will be loaded in a distributed manner acress PMs.\n"
+        "\t2 - PM based input files loaded into their respective PM.\n"
+        "\t3 - input files will be loaded on the local PM.")
+      ("filename,l", po::value<string>(&fPmFile),
+        "Name of import file to be loaded, relative to 'file-path'")
+      DECLARE_INT_ARG("batch-quantity,q", fBatchQty, 1, INT_MAX,
+        "Batch quantity, Number of rows distributed per batch in Mode 1")
+      ("console-log,i", po::bool_switch(&fConsoleLog),
+        "Print extended info to console in Mode 3.")
+      ("job-id,j", po::value<string>(),
+        "Job ID. In simple usage, default is the table OID unless a fully qualified input "
+        "file name is given.")
+      ("null-strings,n", po::value(&fNullStrMode)->implicit_value(true),
+        "NullOption (0-treat the string NULL as data (default);\n"
+        "1-treat the string NULL as a NULL value)")
+      ("xml-job-path,p", po::value<string>(&fJobPath), "Path for the XML job description file.")
+      DECLARE_INT_ARG("readers,r", fNoOfReadThrds, 1, INT_MAX, "Number of readers.")
+      ("separator,s", po::value<string>(), "Delimiter between column values.")
+      DECLARE_INT_ARG("io-buffer-size,B", fSetBufSize, 1, INT_MAX,
+        "I/O library read buffer size (in bytes)")
+      DECLARE_INT_ARG("writers,w", fNoOfWriteThrds, 1, INT_MAX, "Number of parsers.")
+      ("enclosed-by,E", po::value<char>(&fEnclosedChar),
+        "Enclosed by character if field values are enclosed.")
+      ("escape-char,C", po::value<char>(&fEscChar)->default_value('\\'),
+        "Escape character used in conjunction with 'enclosed-by'"
+        "character, or as a part of NULL escape sequence ('\\N');\n"
+        "default is '\\'")
+      ("headers,O",
+        po::value<int>(&fSkipRows)->implicit_value(1)
+          ->notifier([](auto&& value) { checkIntArg("headers,O", 0, INT_MAX, value); }),
+        "Number of header rows to skip.")
+      ("binary-mode,I", po::value<int>(),
+        "Import binary data; how to treat NULL values:\n"
+        "\t1 - import NULL values\n"
+        "\t2 - saturate NULL values\n")
+      ("pm,P", po::value<vector<unsigned int>>(&fPmVec),
+        "List of PMs ex: -P 1,2,3. Default is all PMs.")
+      ("truncation-as-error,S", po::bool_switch(&fbTruncationAsError),
+        "Treat string truncations as errors.")
+      ("tz,T", po::value<string>(),
+        "Timezone used for TIMESTAMP datatype. Possible values:\n"
+        "\t\"SYSTEM\" (default)\n"
+        "\tOffset in the form +/-HH:MM")
+      ("s3-key,y", po::value<string>(&fS3Key),
+        "S3 Authentication Key (for S3 imports)")
+      ("s3-secret,K", po::value<string>(&fS3Secret),
+        "S3 Authentication Secret (for S3 imports)")
+      ("s3-bucket,t", po::value<string>(&fS3Bucket),
+        "S3 Bucket (for S3 imports)")
+      ("s3-hostname,H", po::value<string>(&fS3Host),
+        "S3 Hostname (for S3 imports, Amazon's S3 default)")
+      ("s3-region,g", po::value<string>(&fS3Region),
+        "S3 Region (for S3 imports)")
+      ("errors-dir,L", po::value<string>(&fErrorDir)->default_value(MCSLOGDIR),
+        "Directory for the output .err and .bad files")
+      ("username,U", po::value<string>(&fUsername), "Username of the files owner.")
+      ("dbname", po::value<string>(), "Name of the database to load")
+      ("table", po::value<string>(), "Name of table to load")
+      ("load-file", po::value<string>(),
+        "Optional input file name in current directory, "
+        "unless a fully qualified name is given. If not given, input read from STDIN.");
+
+#undef DECLARE_INT_ARG
    parseCmdLineArgs(argc, argv);
  }
  catch (std::exception& exp)
@ -92,6 +152,8 @@ WECmdArgs::WECmdArgs(int argc, char** argv)
  }
 }

+WECmdArgs::~WECmdArgs() = default;
+
 //----------------------------------------------------------------------

 void WECmdArgs::appTestFunction()
@ -107,8 +169,18 @@ void WECmdArgs::appTestFunction()
  return;
 }

+void WECmdArgs::checkIntArg(const std::string& name, long min, long max, int value)
+{
+  if (value < min || value > max)
+  {
+    ostringstream oss;
+    oss << "Argument " << name << " is out of range [" << min << ", " << max << "]";
+    throw runtime_error(oss.str());
+  }
+}
+
 //----------------------------------------------------------------------
-std::string WECmdArgs::getCpImportCmdLine()
+std::string WECmdArgs::getCpImportCmdLine(bool skipRows)
 {
  std::ostringstream aSS;
  std::string aCmdLine;
@ -185,6 +257,11 @@ std::string WECmdArgs::getCpImportCmdLine()
  if (fEscChar != 0)
    aSS << " -C " << fEscChar;

+  if (skipRows && fSkipRows)
+  {
+    aSS << " -O " << fSkipRows;
+  }
+
  if (fNullStrMode)
    aSS << " -n " << '1';

@ -321,6 +398,12 @@ bool WECmdArgs::checkForCornerCases()
  // BUG 4210
  this->checkJobIdCase();  // Need to do this before we go further

+  if (fSkipRows && fImportDataMode != IMPORT_DATA_TEXT)
+  {
+    cout << "Invalid option -O with binary file" << endl;
+    throw runtime_error("Invalid option -O with binary file");
+  }
+
  if (fMode == 0)
  {
    if (!fJobId.empty())
@ -522,52 +605,7 @@ void WECmdArgs::usage()
  cout << "\t\t\tunless a fully qualified name is given.\n";
  cout << "\t\t\tIf not given, input read from STDIN.\n";

-  cout << "\n\nOptions:\n"
-       << "\t-b\tNumber of read buffers\n"
-       << "\t-c\tApplication read buffer size(in bytes)\n"
-       << "\t-d\tPrint different level(1-3) debug message\n"
-       << "\t-e\tMax number of allowable error per table per PM\n"
-       << "\t-f\tData file directory path.\n"
-       << "\t\t\tDefault is current working directory.\n"
-       << "\t\t\tIn Mode 1, -f represents the local input file path.\n"
-       << "\t\t\tIn Mode 2, -f represents the PM based input file path.\n"
-       << "\t\t\tIn Mode 3, -f represents the local input file path.\n"
-       << "\t-l\tName of import file to be loaded, relative to -f path,\n"
-       << "\t-h\tPrint this message.\n"
-       << "\t-q\tBatch Quantity, Number of rows distributed per batch in Mode 1\n"
-       << "\t-i\tPrint extended info to console in Mode 3.\n"
-       << "\t-j\tJob ID. In simple usage, default is the table OID.\n"
-       << "\t\t\tunless a fully qualified input file name is given.\n"
-       << "\t-n\tNullOption (0-treat the string NULL as data (default);\n"
-       << "\t\t\t1-treat the string NULL as a NULL value)\n"
-       << "\t-p\tPath for XML job description file.\n"
-       << "\t-r\tNumber of readers.\n"
-       << "\t-s\t'c' is the delimiter between column values.\n"
-       << "\t-B\tI/O library read buffer size (in bytes)\n"
-       << "\t-w\tNumber of parsers.\n"
-       << "\t-E\tEnclosed by character if field values are enclosed.\n"
-       << "\t-C\tEscape character used in conjunction with 'enclosed by'\n"
-       << "\t\t\tcharacter, or as part of NULL escape sequence ('\\N');\n"
-       << "\t\t\tdefault is '\\'\n"
-       << "\t-I\tImport binary data; how to treat NULL values:\n"
-       << "\t\t\t1 - import NULL values\n"
-       << "\t\t\t2 - saturate NULL values\n"
-       << "\t-P\tList of PMs ex: -P 1,2,3. Default is all PMs.\n"
-       << "\t-S\tTreat string truncations as errors.\n"
-       << "\t-m\tmode\n"
-       << "\t\t\t1 - rows will be loaded in a distributed manner across PMs.\n"
-       << "\t\t\t2 - PM based input files loaded onto their respective PM.\n"
-       << "\t\t\t3 - input files will be loaded on the local PM.\n"
-       << "\t-T\tTimezone used for TIMESTAMP datatype.\n"
-       << "\t\tPossible values: \"SYSTEM\" (default)\n"
-       << "\t\t               : Offset in the form +/-HH:MM\n"
-       << "\t-y\tS3 Authentication Key (for S3 imports)\n"
-       << "\t-K\tS3 Authentication Secret (for S3 imports)\n"
-       << "\t-t\tS3 Bucket (for S3 imports)\n"
-       << "\t-H\tS3 Hostname (for S3 imports, Amazon's S3 default)\n"
-       << "\t-g\tS3 Region (for S3 imports)\n"
-       << "\t-L\tDirectory for the output .err and .bad files.\n"
-       << "\t\tDefault is " << string(MCSLOGDIR);
+  cout << "\n\n" << (*fOptions) << endl;

  cout << "\nExample1: Traditional usage\n"
       << "\tcpimport -j 1234";
@ -591,375 +629,112 @@ void WECmdArgs::usage()

 void WECmdArgs::parseCmdLineArgs(int argc, char** argv)
 {
-  int aCh;
  std::string importPath;
  bool aJobType = false;

  if (argc > 0)
    fPrgmName = string(MCSBINDIR) + "/" + "cpimport.bin";  // argv[0] is splitter but we need cpimport

-  while ((aCh = getopt(argc, argv, "d:j:w:s:v:l:r:b:e:B:f:q:ihm:E:C:P:I:n:p:c:ST:Ny:K:t:H:g:U:L:")) != EOF)
+  po::positional_options_description pos_opt;
+  pos_opt.add("dbname", 1)
+    .add("table", 1)
+    .add("load-file", 1);
+
+  po::variables_map vm;
+  po::store(po::command_line_parser(argc, argv).options(*fOptions).positional(pos_opt).run(), vm);
+  po::notify(vm);
+
+  if (vm.contains("silent"))
  {
-    switch (aCh)
+    fConsoleOutput = !vm["silent"].as<bool>();
+  }
+  if (vm.contains("help"))
+  {
+    fHelp = true;
+    usage();
+    return;
+  }
+  if (vm.contains("separator"))
+  {
+    auto value = vm["separator"].as<std::string>();
+    if (value == "\\t")
    {
-      case 'm':
+      fColDelim = '\t';
+      if (fDebugLvl)
      {
-        fArgMode = atoi(optarg);
-
-        // cout << "Mode level set to " << fMode << endl;
-        if ((fArgMode > -1) && (fArgMode <= 3))
-        {
-        }
-        else
-          throw runtime_error("Wrong Mode level");
-
-        break;
+        cout << "Column delimiter : \\t" << endl;
      }
-
-      case 'B':
+    }
+    else
+    {
+      fColDelim = value[0];
+      if (fDebugLvl)
      {
-        errno = 0;
-        long lValue = strtol(optarg, 0, 10);
-
-        if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
-          throw runtime_error("Option -B is invalid or out of range");
-
-        fSetBufSize = lValue;
-        break;
-      }
-
-      case 'b':
-      {
-        errno = 0;
-        long lValue = strtol(optarg, 0, 10);
-
-        if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
-          throw runtime_error("Option -b is invalid or out of range");
-
-        fIOReadBufSize = lValue;
-        break;
-      }
-
-      case 'e':
-      {
-        errno = 0;
-        long lValue = strtol(optarg, 0, 10);
-
-        if ((errno != 0) || (lValue < 0) || (lValue > INT_MAX))
-          throw runtime_error("Option -e is invalid or out of range");
-
-        fMaxErrors = lValue;
-        break;
-      }
-
-      case 'i':
-      {
-        fConsoleLog = true;
-        break;
-      }
-
-      case 'c':
-      {
-        errno = 0;
-        long lValue = strtol(optarg, 0, 10);
-
-        if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
-          throw runtime_error("Option -c is invalid or out of range");
-
-        fReadBufSize = lValue;
-        break;
-      }
-
-      case 'j':  // -j: jobID
-      {
-        errno = 0;
-        long lValue = strtol(optarg, 0, 10);
-
-        if ((errno != 0) || (lValue < 0) || (lValue > INT_MAX))
-          throw runtime_error("Option -j is invalid or out of range");
-
-        fJobId = optarg;
-        fOrigJobId = fJobId;  // in case if we need to split it.
-
-        if (0 == fJobId.length())
-          throw runtime_error("Wrong JobID Value");
-
-        aJobType = true;
-        break;
-      }
-
-      case 'v':  // verbose
-      {
-        string aVerbLen = optarg;
-        fVerbose = aVerbLen.length();
-        fDebugLvl = fVerbose;
-        break;
-      }
-
-      case 'd':  // -d debug
-      {
-        errno = 0;
-        long lValue = strtol(optarg, 0, 10);
-
-        if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
-          throw runtime_error("Option -d is invalid or out of range");
-
-        fDebugLvl = lValue;
-
-        if (fDebugLvl > 0 && fDebugLvl <= 3)
-        {
-          cout << "\nDebug level set to " << fDebugLvl << endl;
-        }
-        else
-        {
-          throw runtime_error("Wrong Debug level");
-        }
-
-        break;
-      }
-
-      case 'r':  // -r: num read threads
-      {
-        errno = 0;
-        long lValue = strtol(optarg, 0, 10);
-
-        if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
-          throw runtime_error("Option -r is invalid or out of range");
-
-        fNoOfReadThrds = lValue;
-        break;
-      }
-
-      case 'w':  // -w: num parse threads
-      {
-        errno = 0;
-        long lValue = strtol(optarg, 0, 10);
-
-        if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX))
-          throw runtime_error("Option -w is invalid or out of range");
-
-        fNoOfWriteThrds = lValue;
-        break;
-      }
-
-      case 's':  // -s: column delimiter
-      {
-        if (!strcmp(optarg, "\\t"))
-        {
-          fColDelim = '\t';
-
-          if (fDebugLvl)
-            cout << "Column delimiter : "
-                 << "\\t" << endl;
-        }
-        else
-        {
-          fColDelim = optarg[0];
-
-          if (fDebugLvl)
-            cout << "Column delimiter : " << fColDelim << endl;
-        }
-
-        break;
-      }
-
-      case 'l':  // -l: if JobId (-j), it can be input file
-      {
-        fPmFile = optarg;
-
-        if (0 == fPmFile.length())
-          throw runtime_error("Wrong local filename");
-
-        break;
-      }
-
-      case 'f':  // -f: import file path
-      {
-        fPmFilePath = optarg;
-        break;
-      }
-
-      case 'n':  // -n: treat "NULL" as null
-      {
-        // default is 0, ie it is equal to not giving this option
-        int nullStringMode = atoi(optarg);
-
-        if ((nullStringMode != 0) && (nullStringMode != 1))
-        {
-          throw(runtime_error("Invalid NULL option; value can be 0 or 1"));
-        }
-
-        if (nullStringMode)
-          fNullStrMode = true;
-        else
-          fNullStrMode = false;  // This is default
-
-        break;
-      }
-
-      case 'P':  // -p: list of PM's
-      {
-        try
-        {
-          std::string aPmList = optarg;
-
-          if (!str2PmList(aPmList, fPmVec))
-            throw(runtime_error("PM list is wrong"));
-        }
-        catch (runtime_error& ex)
-        {
-          throw(ex);
-        }
-
-        break;
-      }
-
-      case 'p':
-      {
-        fJobPath = optarg;
-        break;
-      }
-
-      case 'E':  // -E: enclosed by char
-      {
-        fEnclosedChar = optarg[0];
-        // cout << "Enclosed by Character : " << optarg[0] << endl;
-        break;
-      }
-
-      case 'C':  // -C: enclosed escape char
-      {
-        fEscChar = optarg[0];
-        // cout << "Escape Character  : " << optarg[0] << endl;
-        break;
-      }
-
-      case 'h':  // -h: help
-      {
-        // usage(); // will exit(1) here
-        fHelp = true;
-        break;
-      }
-
-      case 'I':  // -I: binary mode (null handling)
-      {
-        // default is text mode, unless -I option is specified
-        int binaryMode = atoi(optarg);
-
-        if (binaryMode == 1)
-        {
-          fImportDataMode = IMPORT_DATA_BIN_ACCEPT_NULL;
-        }
-        else if (binaryMode == 2)
-        {
-          fImportDataMode = IMPORT_DATA_BIN_SAT_NULL;
-        }
-        else
-        {
-          throw(runtime_error("Invalid Binary mode; value can be 1 or 2"));
-        }
-
-        break;
-      }
-
-      case 'S':  // -S: Treat string truncations as errors
-      {
-        setTruncationAsError(true);
-        // cout << "TruncationAsError  : true" << endl;
-        break;
-      }
-
-      case 'T':
-      {
-        std::string timeZone = optarg;
-        long offset;
-
-        if (timeZone != "SYSTEM" && dataconvert::timeZoneToOffset(timeZone.c_str(), timeZone.size(), &offset))
-        {
-          throw(runtime_error("Value for option -T is invalid"));
-        }
-
-        fTimeZone = timeZone;
-        break;
-      }
-
-      case 'q':  // -q: batch quantity - default value is 10000
-      {
-        errno = 0;
-        long long lValue = strtoll(optarg, 0, 10);
-
-        if ((errno != 0) || (lValue < 1) || (lValue > UINT_MAX))
-          throw runtime_error("Option -q is invalid or out of range");
-
-        fBatchQty = lValue;
-
-        if (fBatchQty < 10000)
-          fBatchQty = 10000;
-        else if (fBatchQty > 100000)
-          fBatchQty = 10000;
-
-        break;
-      }
-
-      case 'N':  //-N no console output
-      {
-        fConsoleOutput = false;
-        break;
-      }
-
-      case 'y':  //-y S3 Key
-      {
-        fS3Key = optarg;
-        break;
-      }
-
-      case 'K':  //-K S3 Secret
-      {
-        fS3Secret = optarg;
-        break;
-      }
-
-      case 'H':  //-H S3 Host
-      {
-        fS3Host = optarg;
-        break;
-      }
-
-      case 't':  //-t S3 bucket
-      {
-        fS3Bucket = optarg;
-        break;
-      }
-
-      case 'g':  //-g S3 Region
-      {
-        fS3Region = optarg;
-        break;
-      }
-
-      case 'U':  //-U username of the files owner
-      {
-        fUsername = optarg;
-        break;
-      }
-
-      case 'L':  // -L set the output location of .bad/.err files
-      {
-        fErrorDir = optarg;
-        break;
-      }
-
-      default:
-      {
-        std::string aErr = std::string("Unknown command line option ") + std::to_string(aCh);
-        // cout << "Unknown command line option " << aCh << endl;
-        throw(runtime_error(aErr));
+        cout << "Column delimiter : " << fColDelim << endl;
      }
    }
  }
+  if (vm.contains("binary-mode"))
+  {
+    int value = vm["binary-mode"].as<int>();
+    if (value == 1)
+    {
+      fImportDataMode = IMPORT_DATA_BIN_ACCEPT_NULL;
+    }
+    else if (value == 2)
+    {
+      fImportDataMode = IMPORT_DATA_BIN_SAT_NULL;
+    }
+    else
+    {
+      throw runtime_error("Invalid Binary mode; value can be 1 or 2");
+    }
+  }
+  if (vm.contains("tz"))
+  {
+    auto tz = vm["tz"].as<std::string>();
+    long offset;
+    if (tz != "SYSTEM" && dataconvert::timeZoneToOffset(tz.c_str(), tz.size(), &offset))
+    {
+      throw runtime_error("Value for option --tz/-T is invalid");
+    }
+    fTimeZone = tz;
+  }
+  if (vm.contains("job-id"))
+  {
+    errno = 0;
+    string optarg = vm["job-id"].as<std::string>();
+    long lValue = strtol(optarg.c_str(), nullptr, 10);
+    if (errno != 0 || lValue < 0 || lValue > INT_MAX)
+    {
+      throw runtime_error("Option --job-id/-j is invalid or out of range");
+    }
+    fJobId = optarg;
+    fOrigJobId = fJobId;

-  if (fHelp)
-    usage();  // BUG 4210
+    if (fJobId.empty())
+    {
+      throw runtime_error("Wrong JobID Value");
+    }
+
+    aJobType = true;
+  }
+  if (vm.contains("verbose"))
+  {
+    string optarg = vm["verbose"].as<std::string>();
+    fVerbose = fDebugLvl = optarg.length();
+  }
+  if (vm.contains("batch-quantity"))
+  {
+    if (fBatchQty < 10000)
+    {
+      fBatchQty = 10000;
+    }
+    else if (fBatchQty > 100000)
+    {
+      fBatchQty = 10000;
+    }
+  }

  if (fArgMode != -1)
    fMode = fArgMode;  // BUG 4210
@ -976,26 +751,23 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv)
    if (0 == fArgMode)
      throw runtime_error("Incompatible mode and option types");

-    if (optind < argc)
+    if (vm.contains("dbname"))
    {
-      fSchema = argv[optind];  // 1st pos parm
-      optind++;
+      fSchema = vm["dbname"].as<string>();

-      if (optind < argc)
-      {
-        fTable = argv[optind];  // 2nd pos parm
-        optind++;
-      }
-      else
+
+      if (!vm.contains("table"))
      {
        // if schema is there, table name should be there
        throw runtime_error("No table name specified with schema.");
      }

-      if (optind < argc)  // see if input file name is given
+      fTable = vm["table"].as<string>();  // 2nd pos parm
+
+      if (vm.contains("load-file"))  // see if input file name is given
      {
        // 3rd pos parm
-        fLocFile = argv[optind];
+        fLocFile = vm["load-file"].as<string>();

        if ((fLocFile.at(0) != '/') && (fLocFile != "STDIN"))
        {
@ -1074,7 +846,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv)
  // 1. no positional parameters	- Mode 0 & stdin
  // 2. Two positional parameters (schema and table names) - Mode 1/2, stdin
  // 3. Three positional parameters (schema, table, and import file name)
-  else if (optind < argc)  // see if db schema name is given
+  else if (vm.contains("dbname"))  // see if db schema name is given
  {
    if (fArgMode == 0)
    {
@ -1088,13 +860,12 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv)
      }
      else
      {
-        fLocFile = argv[optind];
-        optind++;
+        fLocFile = vm["dbname"].as<string>();
      }

-      if (optind < argc)  // dest filename provided
+      if (vm.contains("table"))  // dest filename provided
      {
-        fPmFile = argv[optind];
+        fPmFile = vm["table"].as<string>();

        if ((fPmFile.at(0) != '/') && (fS3Key.empty()))
        {
@ -1144,19 +915,16 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv)
      */
    }
    else
-      fSchema = argv[optind];  // 1st pos parm
+      fSchema = vm["dbname"].as<string>();  // 1st pos parm

-    optind++;
-
-    if (optind < argc)  // see if table name is given
+    if (vm.contains("table"))  // see if table name is given
    {
-      fTable = argv[optind];  // 2nd pos parm
-      optind++;
+      fTable = vm["table"].as<string>();  // 2nd pos parm

-      if (optind < argc)  // see if input file name is given
+      if (vm.contains("load-file"))  // see if input file name is given
      {
        // 3rd pos parm
-        fLocFile = argv[optind];
+        fLocFile = vm["load-file"].as<string>();

        // BUG 4379 if -f option given we need to use that path,
        // over riding bug 4231. look at the code below
@ -1543,9 +1311,7 @@ void WECmdArgs::setEnclByAndEscCharFromJobFile(std::string& JobName)
  if (fEnclosedChar == 0)  // check anything in Jobxml file
  {
    WEXmlgetter aXmlGetter(JobName);
-    vector<string> aSections;
-    aSections.push_back("BulkJob");
-    aSections.push_back("EnclosedByChar");
+    const vector<string> aSections{"BulkJob", "EnclosedByChar"};

    try
    {
@ -1569,9 +1335,7 @@ void WECmdArgs::setEnclByAndEscCharFromJobFile(std::string& JobName)
  if (fEscChar == 0)  // check anything in Jobxml file
  {
    WEXmlgetter aXmlGetter(JobName);
-    vector<string> aSections;
-    aSections.push_back("BulkJob");
-    aSections.push_back("EscapeChar");
+    const vector<string> aSections{"BulkJob", "EscapeChar"};

    try
    {
--- a/writeengine/splitter/we_cmdargs.h
+++ b/writeengine/splitter/we_cmdargs.h
@ -24,28 +24,33 @@
 #include <set>

 #include <boost/uuid/uuid.hpp>
+#include <boost/uuid/nil_generator.hpp>

 #include "we_xmlgetter.h"
 #include "we_type.h"

+namespace boost::program_options
+{
+class options_description;
+}
+
 namespace WriteEngine
 {
 class WECmdArgs
 {
 public:
  WECmdArgs(int argc, char** argv);
-  virtual ~WECmdArgs() = default;
+  virtual ~WECmdArgs();

  typedef std::vector<unsigned int> VecInts;
  typedef std::vector<std::string> VecArgs;

  void appTestFunction();
  void parseCmdLineArgs(int argc, char** argv);
-  std::string getCpImportCmdLine();
+  std::string getCpImportCmdLine(bool skipRows);
  void setSchemaAndTableFromJobFile(std::string& JobName);
  void setEnclByAndEscCharFromJobFile(std::string& JobName);
  void usage();
-  void usageMode3();
  bool checkForCornerCases();
  void checkForBulkLogDir(const std::string& BulkRoot);

@ -76,11 +81,11 @@ class WECmdArgs
  {
    return fLocFile;
  }
-  int getReadBufSize()
+  int getReadBufSize() const
  {
    return fReadBufSize;
  }
-  int getMode()
+  int getMode() const
  {
    return fMode;
  }
@ -88,36 +93,40 @@ class WECmdArgs
  {
    return fArgMode;
  }
-  bool isHelpMode()
+  bool isHelpMode() const
  {
    return fHelp;
  }
-  int getDebugLvl()
+  int getDebugLvl() const
  {
    return fDebugLvl;
  }
-  char getEnclChar()
+  char getEnclChar() const
  {
    return fEnclosedChar;
  }
-  char getEscChar()
+  char getEscChar() const
  {
    return fEscChar;
  }
-  char getDelimChar()
+  char getDelimChar() const
  {
    return fColDelim;
  }
+  int getSkipRows() const
+  {
+    return fSkipRows;
+  }
  ImportDataMode getImportDataMode() const
  {
    return fImportDataMode;
  }
-  bool getConsoleLog()
+  bool getConsoleLog() const
  {
    return fConsoleLog;
  }

-  bool isCpimportInvokeMode()
+  bool isCpimportInvokeMode() const
  {
    return (fBlockMode3) ? false : fCpiInvoke;
  }
@ -125,11 +134,15 @@ class WECmdArgs
  {
    return fQuiteMode;
  }
-  void setJobId(std::string fJobId)
+  void setJobId(const std::string& fJobId)
  {
    this->fJobId = fJobId;
  }
-  void setLocFile(std::string fLocFile)
+  void setOrigJobId()
+  {
+    this->fOrigJobId = fJobId;
+  }
+  void setLocFile(const std::string& fLocFile)
  {
    this->fLocFile = fLocFile;
  }
@ -141,7 +154,7 @@ class WECmdArgs
  {
    this->fArgMode = ArgMode;
  }
-  void setPmFile(std::string fPmFile)
+  void setPmFile(const std::string& fPmFile)
  {
    this->fPmFile = fPmFile;
  }
@ -183,7 +196,7 @@ class WECmdArgs
  {
    fUUID = jobUUID;
  }
-  bool getConsoleOutput()
+  bool getConsoleOutput() const
  {
    return fConsoleOutput;
  }
@ -194,7 +207,7 @@ class WECmdArgs

  bool getPmStatus(int Id);
  bool str2PmList(std::string& PmList, VecInts& V);
-  int getPmVecSize()
+  size_t getPmVecSize() const
  {
    return fPmVec.size();
  }
@ -265,7 +278,7 @@ class WECmdArgs
  {
    return fErrorDir;
  }
-  void setErrorDir(std::string fErrorDir)
+  void setErrorDir(const std::string& fErrorDir)
  {
    this->fErrorDir = fErrorDir;
  }
@ -273,24 +286,26 @@ class WECmdArgs
  std::string PrepMode2ListOfFiles(std::string& FileName);  // Bug 4342
  void getColumnList(std::set<std::string>& columnList) const;

+ private:
+  static void checkIntArg(const std::string& name, long min, long max, int value);
 private:  // variables for SplitterApp
  VecArgs fVecArgs;
  VecInts fPmVec;

  VecArgs fVecJobFiles;         // JobFiles splitter from master JobFile
-  int fMultiTableCount;         // MultiTable count
+  int fMultiTableCount{0};      // MultiTable count
  VecArgs fColFldsFromJobFile;  // List of columns from any job file, that
-  // represent fields in the import data
+                                // represent fields in the import data

  std::string fJobId;       // JobID
  std::string fOrigJobId;   // Original JobID, in case we have to split it
-  bool fJobLogOnly;         // Job number is only for log filename only
-  bool fHelp;               // Help mode
-  int fMode;                // splitter Mode
-  int fArgMode;             // Argument mode, dep. on this fMode is decided.
-  bool fQuiteMode;          // in quite mode or not
-  bool fConsoleLog;         // Log everything to console - w.r.t cpimport
-  int fVerbose;             // how many v's
+  bool fJobLogOnly{false};  // Job number is only for log filename only
+  bool fHelp{false};        // Help mode
+  int fMode{1};             // splitter Mode
+  int fArgMode{-1};         // Argument mode, dep. on this fMode is decided.
+  bool fQuiteMode{true};    // in quite mode or not
+  bool fConsoleLog{false};  // Log everything to console - w.r.t cpimport
+  int fVerbose{0};          // how many v's
  std::string fPmFile;      // FileName at PM
  std::string fPmFilePath;  // Path of input file in PM
  std::string fLocFile;     // Local file name
@ -305,32 +320,33 @@ class WECmdArgs
  std::string fS3Host;      // S3 Host
  std::string fS3Region;    // S3 Region

-  unsigned int fBatchQty;  // No. of batch Qty.
-  int fNoOfReadThrds;      // No. of read buffers
-  // std::string fConfig;	// config filename
-  int fDebugLvl;                   // Debug level
-  int fMaxErrors;                  // Max allowable errors
-  int fReadBufSize;                // Read buffer size
-  int fIOReadBufSize;              // I/O read buffer size
-  int fSetBufSize;                 // Buff size w/setvbuf
-  char fColDelim;                  // column delimiter
-  char fEnclosedChar;              // enclosed by char
-  char fEscChar;                   // esc char
-  int fNoOfWriteThrds;             // No. of write threads
-  bool fNullStrMode;               // set null string mode - treat null as null
-  ImportDataMode fImportDataMode;  // Importing text or binary data
-  std::string fPrgmName;           // argv[0]
-  std::string fSchema;             // Schema name - positional parmater
-  std::string fTable;              // Table name - table name parameter
+  int fBatchQty{10000};     // No. of batch Qty.
+  int fNoOfReadThrds{0};    // No. of read buffers
+  int fDebugLvl{0};         // Debug level
+  int fMaxErrors{-1};       // Max allowable errors
+  int fReadBufSize{0};      // Read buffer size
+  int fIOReadBufSize{0};    // I/O read buffer size
+  int fSetBufSize{0};       // Buff size w/setvbuf
+  char fColDelim{'|'};      // column delimiter
+  char fEnclosedChar{0};    // enclosed by char
+  char fEscChar{0};         // esc char
+  int fSkipRows{0};         // skip header
+  int fNoOfWriteThrds{0};   // No. of write threads
+  bool fNullStrMode{false}; // set null string mode - treat null as null
+  ImportDataMode fImportDataMode{IMPORT_DATA_TEXT};  // Importing text or binary data
+  std::string fPrgmName;    // argv[0]
+  std::string fSchema;      // Schema name - positional parmater
+  std::string fTable;       // Table name - table name parameter

-  bool fCpiInvoke;           // invoke cpimport in mode 3
-  bool fBlockMode3;          // Do not allow Mode 3
-  bool fbTruncationAsError;  // Treat string truncation as error
-  boost::uuids::uuid fUUID;
-  bool fConsoleOutput;    // If false, no output to console.
-  std::string fTimeZone;  // Timezone to use for TIMESTAMP datatype
-  std::string fUsername;  // Username of the data files owner
-  std::string fErrorDir;
+  bool fCpiInvoke{false};            // invoke cpimport in mode 3
+  bool fBlockMode3{false};           // Do not allow Mode 3
+  bool fbTruncationAsError{false};   // Treat string truncation as error
+  boost::uuids::uuid fUUID{boost::uuids::nil_generator()()};
+  bool fConsoleOutput{true};         // If false, no output to console.
+  std::string fTimeZone{"SYSTEM"};   // Timezone to use for TIMESTAMP datatype
+  std::string fUsername;             // Username of the data files owner
+  std::string fErrorDir{MCSLOGDIR "/cpimport/"};
+  std::unique_ptr<boost::program_options::options_description> fOptions;
 };
 //----------------------------------------------------------------------

--- a/writeengine/splitter/we_filereadthread.cpp
+++ b/writeengine/splitter/we_filereadthread.cpp
@ -79,6 +79,7 @@ WEFileReadThread::WEFileReadThread(WESDHandler& aSdh)
 , fEncl('\0')
 , fEsc('\\')
 , fDelim('|')
+ , fSkipRows(0)
 {
  // TODO batch qty to get from config
  fBatchQty = 10000;
@ -187,6 +188,8 @@ void WEFileReadThread::setup(std::string FileName)
    if (aEncl != 0)
      fEnclEsc = true;

+    fSkipRows = fSdh.getSkipRows();
+
    // BUG 4342 - Need to support "list of infiles"
    // chkForListOfFiles(FileName); - List prepared in sdhandler.

@ -216,12 +219,10 @@ void WEFileReadThread::setup(std::string FileName)

 //------------------------------------------------------------------------------

-bool WEFileReadThread::chkForListOfFiles(std::string& FileName)
+bool WEFileReadThread::chkForListOfFiles(const std::string& fileName)
 {
  // cout << "Inside chkForListOfFiles("<< FileName << ")" << endl;
-  std::string aFileName = FileName;
-
-  istringstream iss(aFileName);
+  istringstream iss(fileName);
  ostringstream oss;
  size_t start = 0, end = 0;
  const char* sep = " ,|";
@ -229,8 +230,8 @@ bool WEFileReadThread::chkForListOfFiles(std::string& FileName)

  do
  {
-    end = aFileName.find_first_of(sep, start);
-    std::string aFile = aFileName.substr(start, end - start);
+    end = fileName.find_first_of(sep, start);
+    std::string aFile = fileName.substr(start, end - start);
    if (aFile == "STDIN" || aFile == "stdin")
      aFile = "/dev/stdin";

@ -270,9 +271,9 @@ std::string WEFileReadThread::getNextInputDataFile()
 }
 //------------------------------------------------------------------------------

-void WEFileReadThread::add2InputDataFileList(std::string& FileName)
+void WEFileReadThread::add2InputDataFileList(const std::string& fileName)
 {
-  fInfileList.push_front(FileName);
+  fInfileList.push_front(fileName);
 }
 //------------------------------------------------------------------------------

@ -371,17 +372,33 @@ unsigned int WEFileReadThread::readDataFile(messageqcpp::SBS& Sbs)

  // For now we are going to send KEEPALIVES
  //*Sbs << (ByteStream::byte)(WE_CLT_SRV_KEEPALIVE);
-  if ((fInFile.good()) && (!fInFile.eof()))
+  if (fInFile.good() && !fInFile.eof())
  {
    // cout << "Inside WEFileReadThread::readDataFile" << endl;
    // char aBuff[1024*1024];			// TODO May have to change it later
    // char*pStart = aBuff;
    unsigned int aIdx = 0;
    int aLen = 0;
-    *Sbs << (ByteStream::byte)(WE_CLT_SRV_DATA);
+    *Sbs << static_cast<ByteStream::byte>(WE_CLT_SRV_DATA);

-    while ((!fInFile.eof()) && (aIdx < getBatchQty()))
+    while (!fInFile.eof() && aIdx < getBatchQty())
    {
+      if (fSkipRows > 0)
+      {
+        fSkipRows--;
+        fInFile.getline(fBuff, fBuffSize - 1);
+        if (fSdh.getDebugLvl() > 3)
+        {
+          aLen = fInFile.gcount();
+          if (aLen > 0 && aLen < fBuffSize - 2)
+          {
+            fBuff[aLen - 1] = 0;
+            cout << "Skip header row (" << fSkipRows<< " to go): " << fBuff << endl;
+          }
+        }
+        continue;
+      }
+
      if (fEnclEsc)
      {
        // pStart = aBuff;
@ -551,6 +568,9 @@ void WEFileReadThread::openInFile()
      fInFile.rdbuf(fIfFile.rdbuf());  //@BUG 4326
    }

+    // Got new file, so reset fSkipRows
+    fSkipRows = fSdh.getSkipRows();
+
    //@BUG 4326  -below three lines commented out
    //		if (!fInFile.is_open()) fInFile.open(fInFileName.c_str());
    //		if (!fInFile.good())
@ -657,13 +677,13 @@ void WEFileReadThread::initS3Connection(const WECmdArgs& args)
    s3Host = args.getS3Host();
    ms3_library_init();
    s3Connection =
-        ms3_init(s3Key.c_str(), s3Secret.c_str(), s3Region.c_str(), (s3Host.empty() ? NULL : s3Host.c_str()));
+        ms3_init(s3Key.c_str(), s3Secret.c_str(), s3Region.c_str(), (s3Host.empty() ? nullptr : s3Host.c_str()));
    if (!s3Connection)
      throw runtime_error("failed to get an S3 connection");
  }
  else
-    s3Connection = NULL;
-  buf = NULL;
+    s3Connection = nullptr;
+  buf = nullptr;
 }

 //------------------------------------------------------------------------------
--- a/writeengine/splitter/we_filereadthread.h
+++ b/writeengine/splitter/we_filereadthread.h
@ -42,13 +42,11 @@ class WEFileReadThread;
 class WEReadThreadRunner
 {
 public:
-  WEReadThreadRunner(WEFileReadThread& Owner) : fRef(Owner)
+  explicit WEReadThreadRunner(WEFileReadThread& Owner) : fRef(Owner)
  {
    // ctor
  }
-  ~WEReadThreadRunner()
-  {
-  }
+  ~WEReadThreadRunner() = default;

  void operator()();  // Thread function

@ -61,7 +59,7 @@ class WEReadThreadRunner
 class WEFileReadThread
 {
 public:
-  WEFileReadThread(WESDHandler& aSdh);
+  explicit WEFileReadThread(WESDHandler& aSdh);
  virtual ~WEFileReadThread();

  void reset();
@ -82,9 +80,9 @@ class WEFileReadThread
  {
    return fContinue;
  }
-  void setContinue(bool fContinue)
+  void setContinue(bool cont)
  {
-    this->fContinue = fContinue;
+    fContinue = cont;
  }
  std::string getInFileName() const
  {
@ -98,30 +96,34 @@ class WEFileReadThread
  {
    return fBatchQty;
  }
-  void setFpThread(boost::thread* fpThread)
+  void setFpThread(boost::thread* pThread)
  {
-    this->fpThread = fpThread;
+    fpThread = pThread;
  }
-  void setInFileName(std::string fInFileName)
+  void setInFileName(const std::string& inFileName)
  {
-    if ((0 == fInFileName.compare("STDIN")) || (0 == fInFileName.compare("stdin")))
-      this->fInFileName = "/dev/stdin";
+    if (0 == inFileName.compare("STDIN") || 0 == inFileName.compare("stdin"))
+    {
+      fInFileName = "/dev/stdin";
+    }
    else
-      this->fInFileName = fInFileName;
+    {
+      fInFileName = inFileName;
+    }
  }
  //@BUG 4326
  const std::istream& getInFile() const
  {
    return fInFile;
  }
-  void setBatchQty(unsigned int BatchQty)
+  void setBatchQty(unsigned int batchQty)
  {
-    fBatchQty = BatchQty;
+    fBatchQty = batchQty;
  }

-  bool chkForListOfFiles(std::string& FileName);
+  bool chkForListOfFiles(const std::string& fileName);
  std::string getNextInputDataFile();
-  void add2InputDataFileList(std::string& FileName);
+  void add2InputDataFileList(const std::string& fileName);

 private:
  enum
@ -130,9 +132,9 @@ class WEFileReadThread
  };

  // don't allow anyone else to set
-  void setTgtPmId(unsigned int fTgtPmId)
+  void setTgtPmId(unsigned int tgtPmId)
  {
-    this->fTgtPmId = fTgtPmId;
+    fTgtPmId = tgtPmId;
  }

  WESDHandler& fSdh;
@ -148,11 +150,12 @@ class WEFileReadThread

  unsigned int fTgtPmId;
  unsigned int fBatchQty;
-  bool fEnclEsc;  // Encl/Esc char is set
-  char fEncl;     // Encl char
-  char fEsc;      // Esc char
-  char fDelim;    // Column Delimit char
-  char* fBuff;    // main data buffer
+  bool fEnclEsc;    // Encl/Esc char is set
+  char fEncl;       // Encl char
+  char fEsc;        // Esc char
+  char fDelim;      // Column Delimit char
+  size_t fSkipRows; // Header rows to skip
+  char* fBuff;      // main data buffer
  int fBuffSize;

  /* To support mode 1 imports from objects on S3 */
--- a/writeengine/splitter/we_sdhandler.cpp
+++ b/writeengine/splitter/we_sdhandler.cpp
@ -767,7 +767,7 @@ void WESDHandler::setup()
    oss << "Running distributed import (mode ";
    oss << fRef.fCmdArgs.getMode() << ") on ";

-    if (fRef.fCmdArgs.getPmVecSize() == fPmCount)
+    if (fRef.fCmdArgs.getPmVecSize() == static_cast<size_t>(fPmCount))
      oss << "all PMs...";
    else
    {
@ -2548,20 +2548,20 @@ void WESDHandler::exportJobFile(std::string& JobId, std::string& JobFileName)
 }

 //------------------------------------------------------------------------------
-bool WESDHandler::getConsoleLog()
+bool WESDHandler::getConsoleLog() const
 {
  return fRef.fCmdArgs.getConsoleLog();
 }
 //------------------------------------------------------------------------------

-char WESDHandler::getEnclChar()
+char WESDHandler::getEnclChar() const
 {
  return fRef.fCmdArgs.getEnclChar();
 }

 //------------------------------------------------------------------------------

-char WESDHandler::getEscChar()
+char WESDHandler::getEscChar() const
 {
  return fRef.fCmdArgs.getEscChar();
 }
@ -2575,11 +2575,16 @@ int WESDHandler::getReadBufSize()

 //------------------------------------------------------------------------------

-char WESDHandler::getDelimChar()
+char WESDHandler::getDelimChar() const
 {
  return fRef.fCmdArgs.getDelimChar();
 }

+size_t WESDHandler::getSkipRows() const
+{
+  return fRef.fCmdArgs.getSkipRows();
+}
+
 //------------------------------------------------------------------------------

 std::string WESDHandler::getTableName() const
--- a/writeengine/splitter/we_sdhandler.h
+++ b/writeengine/splitter/we_sdhandler.h
@ -143,10 +143,11 @@ class WESDHandler
  void sendHeartbeats();
  std::string getTableName() const;
  std::string getSchemaName() const;
-  char getEnclChar();
-  char getEscChar();
-  char getDelimChar();
-  bool getConsoleLog();
+  char getEnclChar() const;
+  char getEscChar() const;
+  char getDelimChar() const;
+  size_t getSkipRows() const;
+  bool getConsoleLog() const;
  int getReadBufSize();
  ImportDataMode getImportDataMode() const;
  void sysLog(const logging::Message::Args& msgArgs, logging::LOG_TYPE logType,
--- a/writeengine/splitter/we_splclient.h
+++ b/writeengine/splitter/we_splclient.h
@ -46,9 +46,7 @@ class WEColOORInfo  // Column Out-Of-Range Info
  WEColOORInfo() : fColNum(0), fColType(execplan::CalpontSystemCatalog::INT), fNoOfOORs(0)
  {
  }
-  ~WEColOORInfo()
-  {
-  }
+  ~WEColOORInfo() = default;

 public:
  int fColNum;
@ -63,14 +61,12 @@ class WESdHandlerException : public std::exception
 {
 public:
  std::string fWhat;
-  WESdHandlerException(std::string& What) throw()
+  explicit WESdHandlerException(const std::string& What) noexcept
  {
    fWhat = What;
  }
-  virtual ~WESdHandlerException() throw()
-  {
-  }
-  virtual const char* what() const throw()
+  ~WESdHandlerException() noexcept override = default;
+  const char* what() const noexcept override
  {
    return fWhat.c_str();
  }
@ -82,12 +78,10 @@ class WESdHandlerException : public std::exception
 class WESplClientRunner
 {
 public:
-  WESplClientRunner(WESplClient& Sc) : fOwner(Sc)
+  explicit WESplClientRunner(WESplClient& Sc) : fOwner(Sc)
  { /* ctor */
  }
-  virtual ~WESplClientRunner()
-  { /* dtor */
-  }
+  virtual ~WESplClientRunner() = default;
  void operator()();

 public:
@ -389,9 +383,7 @@ class WESplClient
    WERowsUploadInfo() : fRowsRead(0), fRowsInserted(0)
    {
    }
-    ~WERowsUploadInfo()
-    {
-    }
+    ~WERowsUploadInfo() = default;

   public:
    int64_t fRowsRead;
--- a/writeengine/splitter/we_splitterapp.cpp
+++ b/writeengine/splitter/we_splitterapp.cpp
@ -64,7 +64,6 @@ WESplitterApp::WESplitterApp(WECmdArgs& CmdArgs) : fCmdArgs(CmdArgs), fDh(*this)
  fpSysLog = SimpleSysLog::instance();
  fpSysLog->setLoggingID(logging::LoggingID(SUBSYSTEM_ID_WE_SPLIT));
  setupSignalHandlers();
-  std::string err;
  fDh.setDebugLvl(fCmdArgs.getDebugLvl());

  fDh.check4CpiInvokeMode();
@ -100,6 +99,7 @@ WESplitterApp::WESplitterApp(WECmdArgs& CmdArgs) : fCmdArgs(CmdArgs), fDh(*this)
      }
      catch (std::exception& ex)
      {
+        std::string err;
        // err = string("Error in constructing WESplitterApp") + ex.what();
        err = ex.what();  // cleaning up for BUG 4298
        logging::Message::Args errMsgArgs;
@ -139,10 +139,10 @@ WESplitterApp::~WESplitterApp()
  // fDh.shutdown();
  usleep(1000);  // 1 millisec just checking

-  std::string aStr = "Calling WESplitterApp Destructor\n";
-
  if (fDh.getDebugLvl())
-    cout << aStr << endl;
+  {
+    cout << "Calling WESplitterApp Destructor" << endl;
+  }
 }

 //------------------------------------------------------------------------------
@ -151,18 +151,18 @@ WESplitterApp::~WESplitterApp()

 void WESplitterApp::setupSignalHandlers()
 {
-  struct sigaction sa;
+  struct sigaction sa{};
  memset(&sa, 0, sizeof(sa));
  sa.sa_handler = WESplitterApp::onSigInterrupt;
-  sigaction(SIGINT, &sa, 0);
+  sigaction(SIGINT, &sa, nullptr);
  sa.sa_handler = WESplitterApp::onSigTerminate;
-  sigaction(SIGTERM, &sa, 0);
+  sigaction(SIGTERM, &sa, nullptr);
  sa.sa_handler = SIG_IGN;
-  sigaction(SIGPIPE, &sa, 0);
+  sigaction(SIGPIPE, &sa, nullptr);
  sa.sa_handler = WESplitterApp::onSigHup;
-  sigaction(SIGHUP, &sa, 0);
+  sigaction(SIGHUP, &sa, nullptr);
  sa.sa_handler = WESplitterApp::onSigInterrupt;
-  sigaction(SIGUSR1, &sa, 0);
+  sigaction(SIGUSR1, &sa, nullptr);
  /*
      signal(SIGPIPE, SIG_IGN);
      signal(SIGINT, WESplitterApp::onSigInterrupt);
@ -258,7 +258,7 @@ void WESplitterApp::processMessages()
      }

      aBs.restart();
-      std::string aCpImpCmd = fCmdArgs.getCpImportCmdLine();
+      std::string aCpImpCmd = fCmdArgs.getCpImportCmdLine(false);
      fDh.fLog.logMsg(aCpImpCmd, MSGLVL_INFO2);

      if (fDh.getDebugLvl())
@ -315,7 +315,7 @@ void WESplitterApp::processMessages()
      }

      aBs.restart();
-      std::string aCpImpCmd = fCmdArgs.getCpImportCmdLine();
+      std::string aCpImpCmd = fCmdArgs.getCpImportCmdLine(false);
      fDh.fLog.logMsg(aCpImpCmd, MSGLVL_INFO2);

      if (fDh.getDebugLvl())
@ -467,7 +467,7 @@ void WESplitterApp::invokeCpimport()
  fCmdArgs.setJobUUID(u);

  fCmdArgs.setMode(3);
-  std::string aCmdLineStr = fCmdArgs.getCpImportCmdLine();
+  std::string aCmdLineStr = fCmdArgs.getCpImportCmdLine(true);

  if (fDh.getDebugLvl())
    cout << "CPI CmdLineArgs : " << aCmdLineStr << endl;
@ -477,7 +477,6 @@ void WESplitterApp::invokeCpimport()
  std::istringstream ss(aCmdLineStr);
  std::string arg;
  std::vector<std::string> v2;
-  v2.reserve(50);

  while (ss >> arg)
  {
@ -490,7 +489,7 @@ void WESplitterApp::invokeCpimport()
    Cmds.push_back(const_cast<char*>(v2[j].c_str()));
  }

-  Cmds.push_back(0);  // null terminate
+  Cmds.push_back(nullptr);  // null terminate

  int aRet = execvp(Cmds[0], &Cmds[0]);  // NOTE - works with full Path

@ -515,7 +514,7 @@ void WESplitterApp::updateWithJobFile(int aIdx)
 int main(int argc, char** argv)
 {
  std::string err;
-  std::cin.sync_with_stdio(false);
+  std::istream::sync_with_stdio(false);

  try
  {
@ -528,7 +527,7 @@ int main(int argc, char** argv)
      for (int idx = 0; idx < aTblCnt; idx++)
      {
        aWESplitterApp.fDh.reset();
-        aWESplitterApp.fContinue = true;
+        WriteEngine::WESplitterApp::fContinue = true;
        aWESplitterApp.updateWithJobFile(idx);

        try
@ -541,10 +540,10 @@ int main(int argc, char** argv)
          err = ex.what();  // cleaning up for BUG 4298
          logging::Message::Args errMsgArgs;
          errMsgArgs.add(err);
-          aWESplitterApp.fpSysLog->logMsg(errMsgArgs, logging::LOG_TYPE_ERROR, logging::M0000);
+          WriteEngine::WESplitterApp::fpSysLog->logMsg(errMsgArgs, logging::LOG_TYPE_ERROR, logging::M0000);
          SPLTR_EXIT_STATUS = 1;
          aWESplitterApp.fDh.fLog.logMsg(err, WriteEngine::MSGLVL_ERROR);
-          aWESplitterApp.fContinue = false;
+          WriteEngine::WESplitterApp::fContinue = false;
          // throw runtime_error(err); BUG 4298
        }

--- a/writeengine/splitter/we_xmlgetter.cpp
+++ b/writeengine/splitter/we_xmlgetter.cpp
@ -46,20 +46,23 @@ namespace WriteEngine
 //------------------------------------------------------------------------------
 // WEXmlgetter constructor
 //------------------------------------------------------------------------------
-WEXmlgetter::WEXmlgetter(std::string& ConfigName) : fConfigName(ConfigName), fDoc(NULL), fpRoot(NULL)
+WEXmlgetter::WEXmlgetter(const std::string& ConfigName)
+ : fConfigName(ConfigName)
+ , fDoc(nullptr)
+ , fpRoot(nullptr)
 {
  //  xmlNodePtr curPtr;
  fDoc = xmlParseFile(ConfigName.c_str());

-  if (fDoc == NULL)
+  if (fDoc == nullptr)
    throw runtime_error("WEXmlgetter::getConfig(): no XML document!");

  fpRoot = xmlDocGetRootElement(fDoc);

-  if (fpRoot == NULL)
+  if (fpRoot == nullptr)
  {
    xmlFreeDoc(fDoc);
-    fDoc = NULL;
+    fDoc = nullptr;
    throw runtime_error("WEXmlgetter::getConfig(): no XML Root Tag!");
  }
 }
@ -70,24 +73,24 @@ WEXmlgetter::WEXmlgetter(std::string& ConfigName) : fConfigName(ConfigName), fDo
 WEXmlgetter::~WEXmlgetter()
 {
  xmlFreeDoc(fDoc);
-  fDoc = NULL;
+  fDoc = nullptr;
 }

 //------------------------------------------------------------------------------
 // Get/return the property or attribute value (strVal) for the specified xml tag
 // (pNode) and property/attribute (pTag)
 //------------------------------------------------------------------------------
-bool WEXmlgetter::getNodeAttribute(const xmlNode* pNode, const char* pTag, std::string& strVal) const
+bool WEXmlgetter::getNodeAttribute(const xmlNode* pNode, const char* pTag, std::string& strVal)
 {
-  xmlChar* pTmp = NULL;
+  xmlChar* pTmp = nullptr;
  bool bFound = false;

-  pTmp = xmlGetProp(const_cast<xmlNode*>(pNode), (xmlChar*)pTag);
+  pTmp = xmlGetProp(pNode, reinterpret_cast<const xmlChar*>(pTag));

  if (pTmp)
  {
    bFound = true;
-    strVal = (char*)pTmp;
+    strVal = reinterpret_cast<char*>(pTmp);
    xmlFree(pTmp);
  }
  else
@ -101,19 +104,19 @@ bool WEXmlgetter::getNodeAttribute(const xmlNode* pNode, const char* pTag, std::
 //------------------------------------------------------------------------------
 // Get/return the node content (strVal) for the specified xml tag (pNode)
 //------------------------------------------------------------------------------
-bool WEXmlgetter::getNodeContent(const xmlNode* pNode, std::string& strVal) const
+bool WEXmlgetter::getNodeContent(const xmlNode* pNode, std::string& strVal)
 {
-  xmlChar* pTmp = NULL;
+  xmlChar* pTmp = nullptr;
  bool bFound = false;

-  if (pNode->children != NULL)
+  if (pNode->children != nullptr)
  {
    pTmp = xmlNodeGetContent(pNode->children);

    if (pTmp)
    {
      bFound = true;
-      strVal = (char*)pTmp;
+      strVal = reinterpret_cast<char*>(pTmp);
      xmlFree(pTmp);
    }
    else
@ -152,29 +155,29 @@ void WEXmlgetter::getConfig(const string& section, const string& name, vector<st
 {
  string res;

-  if (section.length() == 0)
+  if (section.empty())
    throw invalid_argument("Config::getConfig: section must have a length");

-  xmlNode* pPtr = fpRoot->xmlChildrenNode;
+  const xmlNode* pPtr = fpRoot->xmlChildrenNode;

-  while (pPtr != NULL)
+  while (pPtr != nullptr)
  {
    // cout << "pPtr->name:    " <<
    //	(const xmlChar*)pPtr->name << std::endl;

-    if ((!xmlStrcmp(pPtr->name, (const xmlChar*)section.c_str())))
+    if ((!xmlStrcmp(pPtr->name, reinterpret_cast<const xmlChar*>(section.c_str()))))
    {
      xmlNodePtr pPtr2 = pPtr->xmlChildrenNode;

-      while (pPtr2 != NULL)
+      while (pPtr2 != nullptr)
      {
        // cout << "  pPtr2->name: " <<
        //	(const xmlChar*)pPtr2->name << std::endl;

-        if ((!xmlStrcmp(pPtr2->name, (const xmlChar*)name.c_str())))
+        if ((!xmlStrcmp(pPtr2->name, reinterpret_cast<const xmlChar*>(name.c_str()))))
        {
          xmlNodePtr pPtr3 = pPtr2->xmlChildrenNode;
-          values.push_back((const char*)pPtr3->content);
+          values.emplace_back(reinterpret_cast<const char*>(pPtr3->content));

          // cout << "    pPtr3->name: " <<
          //	(const xmlChar*)pPtr3->name <<
@ -204,8 +207,8 @@ std::string WEXmlgetter::getValue(const vector<string>& sections) const
 {
  std::string aRet;
  const xmlNode* pPtr = fpRoot;
-  int aSize = sections.size();
-  int aIdx = 0;
+  auto aSize = sections.size();
+  size_t aIdx = 0;

  // cout << aSize << endl;
  while (aIdx < aSize)
@ -213,7 +216,7 @@ std::string WEXmlgetter::getValue(const vector<string>& sections) const
    // cout << aIdx <<" "<< sections[aIdx] << endl;
    pPtr = getNode(pPtr, sections[aIdx]);

-    if ((pPtr == NULL) || (aIdx == aSize - 1))
+    if ((pPtr == nullptr) || (aIdx == aSize - 1))
      break;
    else
    {
@ -223,7 +226,7 @@ std::string WEXmlgetter::getValue(const vector<string>& sections) const
    }
  }

-  if (pPtr != NULL)
+  if (pPtr != nullptr)
  {
    // aRet = (const char*)pPtr->content;
    std::string aBuff;
@ -240,17 +243,17 @@ std::string WEXmlgetter::getValue(const vector<string>& sections) const
 // a node with the specified name (section).  The xmlNode (if found) is
 // returned.
 //------------------------------------------------------------------------------
-const xmlNode* WEXmlgetter::getNode(const xmlNode* pParent, const string& section) const
+const xmlNode* WEXmlgetter::getNode(const xmlNode* pParent, const string& section)
 {
-  if (pParent == NULL)
-    return NULL;
+  if (pParent == nullptr)
+    return nullptr;

  const xmlNode* pPtr = pParent;

-  while (pPtr != NULL)
+  while (pPtr != nullptr)
  {
    // cout << "getNode Name " << (const char*)pPtr->name << endl;
-    if (!xmlStrcmp(pPtr->name, (const xmlChar*)section.c_str()))
+    if (!xmlStrcmp(pPtr->name, reinterpret_cast<const xmlChar*>(section.c_str())))
      return pPtr;
    else
      pPtr = pPtr->next;
@ -268,12 +271,12 @@ std::string WEXmlgetter::getAttribute(const vector<string>& sections, const stri
 {
  std::string aRet;
  const xmlNode* pPtr = fpRoot;
-  int aSize = sections.size();
+  auto aSize = sections.size();

  if (aSize == 0)
    throw invalid_argument("WEXmlgetter::getAttribute(): section must be valid");

-  int aIdx = 0;
+  size_t aIdx = 0;

  // cout << aSize << endl;
  while (aIdx < aSize)
@ -281,7 +284,7 @@ std::string WEXmlgetter::getAttribute(const vector<string>& sections, const stri
    // cout << aIdx <<" "<< sections[aIdx] << endl;
    pPtr = getNode(pPtr, sections[aIdx]);

-    if ((pPtr == NULL) || (aIdx == aSize - 1))
+    if ((pPtr == nullptr) || (aIdx == aSize - 1))
      break;
    else
    {
@ -291,7 +294,7 @@ std::string WEXmlgetter::getAttribute(const vector<string>& sections, const stri
    }
  }

-  if (pPtr != NULL)
+  if (pPtr != nullptr)
  {
    std::string aBuff;

@ -315,10 +318,10 @@ std::string WEXmlgetter::getAttribute(const vector<string>& sections, const stri
 // is returned.
 //------------------------------------------------------------------------------
 void WEXmlgetter::getAttributeListForAllChildren(const vector<string>& sections, const string& attributeTag,
-                                                 vector<string>& attributeValues)
+                                                 vector<string>& attributeValues) const
 {
  const xmlNode* pPtr = fpRoot;
-  int aSize = sections.size();
+  auto aSize = sections.size();

  if (aSize == 0)
  {
@ -328,13 +331,13 @@ void WEXmlgetter::getAttributeListForAllChildren(const vector<string>& sections,
  }

  // Step down the branch that has the nodes of interest
-  int aIdx = 0;
+  size_t aIdx = 0;

  while (aIdx < aSize)
  {
    pPtr = getNode(pPtr, sections[aIdx]);

-    if ((pPtr == NULL) || (aIdx == aSize - 1))
+    if ((pPtr == nullptr) || (aIdx == aSize - 1))
    {
      break;
    }
@ -347,9 +350,9 @@ void WEXmlgetter::getAttributeListForAllChildren(const vector<string>& sections,

  // Look for all the "matching" nodes at the end of the branch, and
  // get the requested attribute value for each matching node.
-  if (pPtr != NULL)
+  if (pPtr != nullptr)
  {
-    while (pPtr != NULL)
+    while (pPtr != nullptr)
    {
      std::string attrib;

--- a/writeengine/splitter/we_xmlgetter.h
+++ b/writeengine/splitter/we_xmlgetter.h
@ -36,23 +36,23 @@ namespace WriteEngine
 class WEXmlgetter
 {
 public:
-  WEXmlgetter(std::string& ConfigName);
-  virtual ~WEXmlgetter();
+  explicit WEXmlgetter(const std::string& ConfigName);
+  ~WEXmlgetter();

 public:
  //..Public methods
-  std::string getValue(const std::vector<std::string>& section) const;
+  std::string getValue(const std::vector<std::string>& sections) const;
  std::string getAttribute(const std::vector<std::string>& sections, const std::string& Tag) const;
  void getConfig(const std::string& section, const std::string& name, std::vector<std::string>& values) const;
  void getAttributeListForAllChildren(const std::vector<std::string>& sections,
                                      const std::string& attributeTag,
-                                      std::vector<std::string>& attributeValues);
+                                      std::vector<std::string>& attributeValues) const;

 private:
  //..Private methods
-  const xmlNode* getNode(const xmlNode* pParent, const std::string& section) const;
-  bool getNodeAttribute(const xmlNode* pNode, const char* pTag, std::string& strVal) const;
-  bool getNodeContent(const xmlNode* pNode, std::string& strVal) const;
+  static const xmlNode* getNode(const xmlNode* pParent, const std::string& section);
+  static bool getNodeAttribute(const xmlNode* pNode, const char* pTag, std::string& strVal);
+  static bool getNodeContent(const xmlNode* pNode, std::string& strVal);

  //..Private data members
  std::string fConfigName;  // xml filename