From a4dde484c59e307232e093007e25b69513301a69 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Fri, 11 Jul 2025 16:57:08 +0000 Subject: [PATCH 01/51] chore(build): no build without PLUGIN_COLUMNSTORE --- CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index ee826f6c8..8172ab1e1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,3 +1,7 @@ +if("NO" STREQUAL "${PLUGIN_COLUMNSTORE}" OR NOT DEFINED PLUGIN_COLUMNSTORE) + return() +endif() + cmake_minimum_required(VERSION 3.13) project(Columnstore) From 78c1b5034d3ce55a3da79ca1638d8fdb503c2ad5 Mon Sep 17 00:00:00 2001 From: Alexey Antipovsky Date: Fri, 11 Jul 2025 21:35:43 +0200 Subject: [PATCH 02/51] Feature/mcol 4882 cpimport skip rows (#3594) * feat(cpimport): MCOL-4882 add a parameter to skip header rows * chore(cpimport): MCOL-4882 Use boost::program_options to arguments parsing * feat(cpimport.bin): MCOL-4882 Add missing changes * add test * fix clang * add missing cmdline argument * fix bug * Fix double lines skipping * Fix incorrect --silent (-N) parsing * fix default --max-errors processing * fix overwriting default username * move initialization to members declaration --- .../r/MCOL-4882-cpimport-skip-headers.result | 22 + .../t/MCOL-4882-cpimport-skip-headers.test | 43 + writeengine/bulk/CMakeLists.txt | 3 +- writeengine/bulk/cpimport.cpp | 738 +----------------- writeengine/bulk/we_bulkload.cpp | 35 +- writeengine/bulk/we_bulkload.h | 88 ++- writeengine/bulk/we_bulkloadbuffer.cpp | 28 +- writeengine/bulk/we_bulkloadbuffer.h | 10 +- writeengine/bulk/we_cmdargs.cpp | 559 +++++++++++++ writeengine/bulk/we_cmdargs.h | 130 +++ writeengine/bulk/we_tableinfo.cpp | 18 +- writeengine/bulk/we_tableinfo.h | 15 +- writeengine/server/we_dataloader.cpp | 5 +- writeengine/shared/we_type.h | 2 + writeengine/splitter/CMakeLists.txt | 1 + writeengine/splitter/we_cmdargs.cpp | 682 ++++++---------- writeengine/splitter/we_cmdargs.h | 120 +-- writeengine/splitter/we_filereadthread.cpp | 48 +- writeengine/splitter/we_filereadthread.h | 51 +- writeengine/splitter/we_sdhandler.cpp | 15 +- writeengine/splitter/we_sdhandler.h | 9 +- writeengine/splitter/we_splclient.h | 22 +- writeengine/splitter/we_splitterapp.cpp | 37 +- writeengine/splitter/we_xmlgetter.cpp | 81 +- writeengine/splitter/we_xmlgetter.h | 14 +- writeengine/xml/we_xmlgendata.cpp | 44 +- writeengine/xml/we_xmlgendata.h | 7 +- writeengine/xml/we_xmlgenproc.cpp | 5 + writeengine/xml/we_xmljob.cpp | 12 +- writeengine/xml/we_xmltag.h | 4 +- 30 files changed, 1379 insertions(+), 1469 deletions(-) create mode 100644 mysql-test/columnstore/basic/r/MCOL-4882-cpimport-skip-headers.result create mode 100644 mysql-test/columnstore/basic/t/MCOL-4882-cpimport-skip-headers.test create mode 100644 writeengine/bulk/we_cmdargs.cpp create mode 100644 writeengine/bulk/we_cmdargs.h diff --git a/mysql-test/columnstore/basic/r/MCOL-4882-cpimport-skip-headers.result b/mysql-test/columnstore/basic/r/MCOL-4882-cpimport-skip-headers.result new file mode 100644 index 000000000..3b209e3bd --- /dev/null +++ b/mysql-test/columnstore/basic/r/MCOL-4882-cpimport-skip-headers.result @@ -0,0 +1,22 @@ +DROP DATABASE IF EXISTS mcol4882; +CREATE DATABASE mcol4882; +USE mcol4882; +CREATE TABLE t1(col1 INT, col2 VARCHAR(64)) ENGINE=Columnstore; +SELECT * FROM t1; +col1 col2 +1 test1 +2 test2 +3 test3 +TRUNCATE t1; +SELECT * FROM t1; +col1 col2 +2 test2 +3 test3 +TRUNCATE t1; +SELECT * FROM t1; +col1 col2 +3 test3 +TRUNCATE t1; +SELECT * FROM t1; +col1 col2 +DROP DATABASE mcol4882; diff --git a/mysql-test/columnstore/basic/t/MCOL-4882-cpimport-skip-headers.test b/mysql-test/columnstore/basic/t/MCOL-4882-cpimport-skip-headers.test new file mode 100644 index 000000000..167bb8f65 --- /dev/null +++ b/mysql-test/columnstore/basic/t/MCOL-4882-cpimport-skip-headers.test @@ -0,0 +1,43 @@ +if (!$MYSQL_TEST_ROOT){ + skip Should be run by root to execute cpimport; +} + +-- source ../include/have_columnstore.inc + +--disable_warnings +DROP DATABASE IF EXISTS mcol4882; +--enable_warnings + +CREATE DATABASE mcol4882; +USE mcol4882; + +CREATE TABLE t1(col1 INT, col2 VARCHAR(64)) ENGINE=Columnstore; + +--exec printf '1,test1\n2,test2\n3,test3\n' > /tmp/mcol4882.csv + +--disable_result_log +--exec $MCS_CPIMPORT -s , mcol4882 t1 /tmp/mcol4882.csv +--enable_result_log +SELECT * FROM t1; +TRUNCATE t1; + +--disable_result_log +--exec $MCS_CPIMPORT -s , --headers -- mcol4882 t1 /tmp/mcol4882.csv +--enable_result_log +SELECT * FROM t1; +TRUNCATE t1; + +--disable_result_log +--exec $MCS_CPIMPORT -s , --headers 2 mcol4882 t1 /tmp/mcol4882.csv +--enable_result_log +SELECT * FROM t1; +TRUNCATE t1; + +--disable_result_log +--exec $MCS_CPIMPORT -s , --headers 5 mcol4882 t1 /tmp/mcol4882.csv +--enable_result_log +SELECT * FROM t1; + +# Clean UP +--exec rm -f /tmp/mcol4882.csv +DROP DATABASE mcol4882; diff --git a/writeengine/bulk/CMakeLists.txt b/writeengine/bulk/CMakeLists.txt index 15068d037..0bb90a586 100644 --- a/writeengine/bulk/CMakeLists.txt +++ b/writeengine/bulk/CMakeLists.txt @@ -9,6 +9,7 @@ set(we_bulk_STAT_SRCS we_bulkload.cpp we_bulkloadbuffer.cpp we_bulkstatus.cpp + we_cmdargs.cpp we_colopbulk.cpp we_colbuf.cpp we_colbufcompressed.cpp @@ -28,7 +29,7 @@ set(we_bulk_STAT_SRCS add_definitions(-D_FILE_OFFSET_BITS=64) columnstore_static_library(we_bulk ${we_bulk_STAT_SRCS}) -columnstore_link(we_bulk ${NETSNMP_LIBRARIES} loggingcpp) +columnstore_link(we_bulk ${NETSNMP_LIBRARIES} loggingcpp boost_program_options) remove_definitions(-D_FILE_OFFSET_BITS=64) diff --git a/writeengine/bulk/cpimport.cpp b/writeengine/bulk/cpimport.cpp index ddf07a83d..3715c658d 100644 --- a/writeengine/bulk/cpimport.cpp +++ b/writeengine/bulk/cpimport.cpp @@ -49,6 +49,7 @@ #include "dataconvert.h" #include "mcsconfig.h" #include "mariadb_my_sys.h" +#include "we_cmdargs.h" using namespace std; using namespace WriteEngine; @@ -56,8 +57,8 @@ using namespace execplan; namespace { -char* pgmName = 0; const std::string IMPORT_PATH_CWD("."); +unique_ptr cmdArgs; bool bDebug = false; uint32_t cpimportJobId = 0; @@ -88,103 +89,6 @@ const char* taskLabels[] = {"", "processing data"}; } // namespace -//------------------------------------------------------------------------------ -// Print command line usage -//------------------------------------------------------------------------------ -void printUsage() -{ - cout << endl - << "Simple usage using positional parameters " - "(no XML job file):" - << endl - << " cpimport.bin dbName tblName [loadFile] [-j jobID] " << endl - << " [-h] [-r readers] [-w parsers] [-s c] [-f path] [-b readBufs] " << endl - << " [-c readBufSize] [-e maxErrs] [-B libBufSize] [-n NullOption] " << endl - << " [-E encloseChar] [-C escapeChar] [-I binaryOpt] [-S] " - "[-d debugLevel] [-i] " - << endl - << " [-D] [-N] [-L rejectDir] [-T timeZone]" << endl - << " [-U username]" << endl - << endl; - - cout << endl - << "Traditional usage without positional parameters " - "(XML job file required):" - << endl - << " cpimport.bin -j jobID " << endl - << " [-h] [-r readers] [-w parsers] [-s c] [-f path] [-b readBufs] " << endl - << " [-c readBufSize] [-e maxErrs] [-B libBufSize] [-n NullOption] " << endl - << " [-E encloseChar] [-C escapeChar] [-I binaryOpt] [-S] " - "[-d debugLevel] [-i] " - << endl - << " [-p path] [-l loadFile]" << endl - << " [-D] [-N] [-L rejectDir] [-T timeZone]" << endl - << " [-U username]" << endl - << endl; - - cout << " Positional parameters:" << endl - << " dbName Name of database to load" << endl - << " tblName Name of table to load" << endl - << " loadFile Optional input file name in current directory, " << "unless a fully" << endl - << " qualified name is given. If not given, " << "input read from stdin." << endl - << endl; - - cout << " Options:" << endl - << " -b Number of read buffers" << endl - << " -c Application read buffer size (in bytes)" << endl - << " -d Print different level (1-3) debug message " << endl - << " -e Maximum number of allowable errors per table" << endl - << " -f Data file directory path; " << endl - << " In simple usage:" << endl - << " Default is current working directory." << endl - << " -f option only applies if loadFile is specified." << endl - << " In traditional usage: " << endl - << " Default is /data/import." << endl - << " 'STDIN' (all caps) redirects input from stdin." << endl - << " -h Print this message" << endl - << " -i Print extended info to console, else this info only goes " - "to log file." - << endl - << " -j Job id. In simple usage, default is the table OID." << endl - << " -l Name of input file to be loaded, relative to -f path," << endl - << " unless a fully qualified input file name is given." << endl - << " -n NullOption (0-treat the string NULL as data (default);" << endl - << " 1-treat the string NULL as a NULL value)" << endl - << " -p Path for XML job description file" << endl - << " -r Number of readers" << endl - << " -s 'c' is the delimiter between column values" << endl - << " -w Number of parsers" << endl - << " -B I/O library read buffer size (in bytes)" << endl - << " -E Enclosed by character if field values are enclosed" << endl - << " -C Escape character used in conjunction with 'enclosed by' " << "character," << endl - << " or as part of NULL escape sequence ('\\N'); default is '\\'" << endl - << " -I Binary import; binaryOpt 1-import NULL values" << endl - << " 2-saturate NULL values" << endl - << " -S Treat string truncations as errors" << endl - << " -D Disable timeout when waiting for table lock" << endl - << " -N Disable console output" << endl - << " -L send *.err and *.bad (reject) files here" << endl - << " -T Timezone used for TIMESTAMP datatype" << endl - << " Possible values: \"SYSTEM\" (default)" << endl - << " : Offset in the form +/-HH:MM" << endl - << endl - << " -y S3 Authentication Key (for S3 imports)" << endl - << " -K S3 Authentication Secret (for S3 imports)" << endl - << " -t S3 Bucket (for S3 imports)" << endl - << " -H S3 Hostname (for S3 imports, Amazon's S3 default)" << endl - << " -g S3 Regions (for S3 imports)" << endl - << " -U username of new data files owner. Default is mysql" << endl; - - cout << " Example1:" << endl - << " cpimport.bin -j 1234" << endl - << " Example2: Some column values are enclosed within double quotes." << endl - << " cpimport.bin -j 3000 -E '\"'" << endl - << " Example3: Import a nation table without a Job XML file" << endl - << " cpimport.bin -j 301 tpch nation nation.tbl" << endl; - - exit(EXIT_SUCCESS); -} - //------------------------------------------------------------------------------ // Signal handler to catch SIGTERM signal to terminate the process //------------------------------------------------------------------------------ @@ -227,40 +131,6 @@ void handleSigAbrt(int /*i*/) BulkStatus::setJobStatus(EXIT_FAILURE); } -//------------------------------------------------------------------------------ -// If error occurs during startup, this function is called to log the specified -// message and terminate the process. -//------------------------------------------------------------------------------ -void startupError(const std::string& errMsg, bool showHint) -{ - BRMWrapper::getInstance()->finishCpimportJob(cpimportJobId); - // Log to console - if (!BulkLoad::disableConsoleOutput()) - cerr << errMsg << endl; - - if (showHint) - { - std::ostringstream oss; - oss << "Try '" << pgmName << " -h' for more information."; - - if (!BulkLoad::disableConsoleOutput()) - cerr << oss.str() << endl; - } - - // Log to syslog - logging::Message::Args errMsgArgs; - errMsgArgs.add(errMsg); - SimpleSysLog::instance()->logMsg(errMsgArgs, logging::LOG_TYPE_ERROR, logging::M0087); - - std::string jobIdStr("0"); - logging::Message::Args endMsgArgs; - endMsgArgs.add(jobIdStr); - endMsgArgs.add("FAILED"); - SimpleSysLog::instance()->logMsg(endMsgArgs, logging::LOG_TYPE_INFO, logging::M0082); - - exit(EXIT_FAILURE); -} - //------------------------------------------------------------------------------ // Initialize signal handling //------------------------------------------------------------------------------ @@ -307,540 +177,6 @@ void setupSignalHandlers() sigaction(SIGABRT, &act, 0); } -//------------------------------------------------------------------------------ -// Parse the command line arguments -//------------------------------------------------------------------------------ -void parseCmdLineArgs(int argc, char** argv, BulkLoad& curJob, std::string& sJobIdStr, - std::string& sXMLJobDir, std::string& sModuleIDandPID, bool& bLogInfo2ToConsole, - std::string& xmlGenSchema, std::string& xmlGenTable, bool& bValidateColumnList) -{ - std::string importPath; - std::string rptFileName; - int option; - bool bImportFileArg = false; - BulkModeType bulkMode = BULK_MODE_LOCAL; - std::string jobUUID; - - while ((option = getopt(argc, argv, "b:c:d:e:f:hij:kl:m:n:p:r:s:u:w:B:C:DE:I:P:R:ST:X:NL:y:K:t:H:g:U:")) != - EOF) - { - switch (option) - { - case 'b': // -b: no. of read buffers - { - errno = 0; - long lValue = strtol(optarg, 0, 10); - - if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX)) - { - startupError(std::string("Option -b is invalid or out of range."), true); - } - - int noOfReadBuffers = lValue; - curJob.setReadBufferCount(noOfReadBuffers); - break; - } - - case 'c': // -c: read buffer size - { - errno = 0; - long lValue = strtol(optarg, 0, 10); - - if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX)) - { - startupError(std::string("Option -c is invalid or out of range."), true); - } - - int readBufferSize = lValue; - curJob.setReadBufferSize(readBufferSize); - break; - } - - case 'd': // -d: debug level - { - errno = 0; - long lValue = strtol(optarg, 0, 10); - - if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX)) - { - startupError(std::string("Option -d is invalid or out of range."), true); - } - - int debugLevel = lValue; - - if (debugLevel > 0 && debugLevel <= 3) - { - bDebug = true; - curJob.setAllDebug((DebugLevel)debugLevel); - - if (!BulkLoad::disableConsoleOutput()) - cout << "\nDebug level is set to " << debugLevel << endl; - } - - break; - } - - case 'e': // -e: max allowed errors - { - errno = 0; - long lValue = strtol(optarg, 0, 10); - - if ((errno != 0) || (lValue < 0) || (lValue > INT_MAX)) - { - startupError(std::string("Option -e is invalid or out of range."), true); - } - - int maxErrors = lValue; - curJob.setMaxErrorCount(maxErrors); - break; - } - - case 'f': // -f: import path - { - importPath = optarg; - std::string setAltErrMsg; - - if (curJob.setAlternateImportDir(importPath, setAltErrMsg) != NO_ERROR) - startupError(setAltErrMsg, false); - - break; - } - - case 'h': // -h: help - { - printUsage(); - break; - } - - case 'i': // -i: log info to console - { - bLogInfo2ToConsole = true; - break; - } - - case 'j': // -j: jobID - { - errno = 0; - long lValue = strtol(optarg, 0, 10); - - if ((errno != 0) || (lValue < 0) || (lValue > INT_MAX)) - { - startupError(std::string("Option -j is invalid or out of range."), true); - } - - sJobIdStr = optarg; - break; - } - - case 'k': // -k: hidden option to keep (not delete) - { - // bulk rollback meta-data files - curJob.setKeepRbMetaFiles(true); - break; - } - - case 'l': // -l: import load file(s) - { - bImportFileArg = true; - curJob.addToCmdLineImportFileList(std::string(optarg)); - break; - } - - case 'm': // -m: bulk load mode - { - bulkMode = (BulkModeType)atoi(optarg); - - if ((bulkMode != BULK_MODE_REMOTE_SINGLE_SRC) && (bulkMode != BULK_MODE_REMOTE_MULTIPLE_SRC) && - (bulkMode != BULK_MODE_LOCAL)) - { - startupError(std::string("Invalid bulk mode; can be 1,2, or 3"), true); - } - - break; - } - - case 'n': // -n: treat "NULL" as null - { - int nullStringMode = atoi(optarg); - - if ((nullStringMode != 0) && (nullStringMode != 1)) - { - startupError(std::string("Invalid NULL option; value can be 0 or 1"), true); - } - - if (nullStringMode) - curJob.setNullStringMode(true); - else - curJob.setNullStringMode(false); - - break; - } - - case 'p': // -p: Job XML path - { - sXMLJobDir = optarg; - break; - } - - case 'r': // -r: num read threads - { - errno = 0; - long lValue = strtol(optarg, 0, 10); - - if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX)) - { - startupError(std::string("Option -r is invalid or out of range."), true); - } - - int numOfReaders = lValue; -#if !defined(__LP64__) && !defined(_MSC_VER) - - if (numOfReaders > 1) - { - cerr << "Note: resetting number of read threads to maximum" << endl; - numOfReaders = 1; - } - -#endif - curJob.setNoOfReadThreads(numOfReaders); - - if (!BulkLoad::disableConsoleOutput()) - cout << "number of read threads : " << numOfReaders << endl; - - break; - } - - case 's': // -s: column delimiter - { - char delim; - - if (!strcmp(optarg, "\\t")) - { - delim = '\t'; - - if (!BulkLoad::disableConsoleOutput()) - cout << "Column delimiter : " << "\\t" << endl; - } - else - { - delim = optarg[0]; - - if (delim == '\t') // special case to print a - { - if (!BulkLoad::disableConsoleOutput()) - cout << "Column delimiter : '\\t'" << endl; - } - else - { - if (!BulkLoad::disableConsoleOutput()) - cout << "Column delimiter : " << delim << endl; - } - } - - curJob.setColDelimiter(delim); - break; - } - - case 'u': // -u: import job UUID - { - jobUUID = optarg; - curJob.setJobUUID(jobUUID); - break; - } - - case 'w': // -w: num parse threads - { - errno = 0; - long lValue = strtol(optarg, 0, 10); - - if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX)) - { - startupError(std::string("Option -w is invalid or out of range."), true); - } - - int numOfParser = lValue; -#if !defined(__LP64__) && !defined(_MSC_VER) - - if (numOfParser > 3) - { - cerr << "Note: resetting number of parse threads to maximum" << endl; - numOfParser = 3; - } - -#endif - curJob.setNoOfParseThreads(numOfParser); - - if (!BulkLoad::disableConsoleOutput()) - cout << "number of parse threads : " << numOfParser << endl; - - break; - } - - case 'B': // -B: setvbuf read size - { - errno = 0; - long lValue = strtol(optarg, 0, 10); - - if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX)) - { - startupError(std::string("Option -B is invalid or out of range."), true); - } - - int vbufReadSize = lValue; - curJob.setVbufReadSize(vbufReadSize); - break; - } - - case 'C': // -C: enclosed escape char - { - curJob.setEscapeChar(optarg[0]); - - if (!BulkLoad::disableConsoleOutput()) - cout << "Escape Character : " << optarg[0] << endl; - - break; - } - - case 'E': // -E: enclosed by char - { - curJob.setEnclosedByChar(optarg[0]); - - if (!BulkLoad::disableConsoleOutput()) - cout << "Enclosed by Character : " << optarg[0] << endl; - - break; - } - - case 'I': // -I: Binary import mode - { - ImportDataMode importMode = (ImportDataMode)atoi(optarg); - - if ((importMode != IMPORT_DATA_BIN_ACCEPT_NULL) && (importMode != IMPORT_DATA_BIN_SAT_NULL)) - { - startupError(std::string("Invalid binary import option; value can be 1" - "(accept NULL values) or 2(saturate NULL values)"), - true); - } - - curJob.setImportDataMode(importMode); - break; - } - - case 'L': // -L: Error log directory - { - curJob.setErrorDir(optarg); - break; - } - - case 'P': // -P: Calling moduleid - { - // and PID - sModuleIDandPID = optarg; - break; - } - - case 'R': // -R: distributed mode - { - // report file - rptFileName = optarg; - break; - } - - case 'S': // -S: Char & VarChar data - { - // greater than col def - curJob.setTruncationAsError(true); // are reported as err - break; - } - - case 'T': - { - std::string timeZone = optarg; - long offset; - - if (dataconvert::timeZoneToOffset(timeZone.c_str(), timeZone.size(), &offset)) - { - startupError(std::string("Value for option -T is invalid"), true); - } - - curJob.setTimeZone(offset); - break; - } - - case 'X': // Hidden extra options - { - if (!strcmp(optarg, "AllowMissingColumn")) - bValidateColumnList = false; - - break; - } - - case 'D': // disable table lock waiting timeout - { - curJob.disableTimeOut(true); - break; - } - - case 'N': // silent the output to console - { - BulkLoad::disableConsoleOutput(true); - break; - } - - case 'y': - { - curJob.setS3Key(optarg); - break; - } - - case 'K': - { - curJob.setS3Secret(optarg); - break; - } - - case 't': - { - curJob.setS3Bucket(optarg); - break; - } - - case 'H': - { - curJob.setS3Host(optarg); - break; - } - - case 'g': - { - curJob.setS3Region(optarg); - break; - } - - case 'U': - { - curJob.setUsername(optarg); - break; - } - - default: - { - ostringstream oss; - oss << "Unrecognized command line option (" << option << ")"; - startupError(oss.str(), true); - } - } - } - - curJob.setDefaultJobUUID(); - - // Inconsistent to specify -f STDIN with -l importFile - if ((bImportFileArg) && (importPath == "STDIN")) - { - startupError(std::string("-f STDIN is invalid with -l importFile."), true); - } - - // If distributed mode, make sure report filename is specified and that we - // can create the file using the specified path. - if ((bulkMode == BULK_MODE_REMOTE_SINGLE_SRC) || (bulkMode == BULK_MODE_REMOTE_MULTIPLE_SRC)) - { - if (rptFileName.empty()) - { - startupError(std::string("Bulk modes 1 and 2 require -R rptFileName."), true); - } - else - { - std::ofstream rptFile(rptFileName.c_str()); - - if (rptFile.fail()) - { - std::ostringstream oss; - oss << "Unable to open report file " << rptFileName; - startupError(oss.str(), false); - } - - rptFile.close(); - } - - curJob.setBulkLoadMode(bulkMode, rptFileName); - } - - // Get positional arguments, User can provide: - // 1. no positional parameters - // 2. Two positional parameters (schema and table names) - // 3. Three positional parameters (schema, table, and import file name) - if (optind < argc) // see if db schema name is given - { - xmlGenSchema = argv[optind]; // 1st pos parm - optind++; - - if (optind < argc) // see if table name is given - { - // Validate invalid options in conjunction with 2-3 positional - // parameter mode, which means we are using temp Job XML file. - if (bImportFileArg) - { - startupError(std::string("-l importFile is invalid with positional parameters"), true); - } - - if (!sXMLJobDir.empty()) - { - startupError(std::string("-p path is invalid with positional parameters."), true); - } - - if (importPath == "STDIN") - { - startupError(std::string("-f STDIN is invalid with positional parameters."), true); - } - - xmlGenTable = argv[optind]; // 2nd pos parm - optind++; - - if (optind < argc) // see if input file name is given - { - // 3rd pos parm - curJob.addToCmdLineImportFileList(std::string(argv[optind])); - - // Default to CWD if loadfile name given w/o -f path - if (importPath.empty()) - { - std::string setAltErrMsg; - - if (curJob.setAlternateImportDir(std::string("."), setAltErrMsg) != NO_ERROR) - startupError(setAltErrMsg, false); - } - } - else - { - // Invalid to specify -f if no load file name given - if (!importPath.empty()) - { - startupError(std::string("-f requires 3rd positional parameter (load file name)."), true); - } - - // Default to STDIN if no import file name given - std::string setAltErrMsg; - - if (curJob.setAlternateImportDir(std::string("STDIN"), setAltErrMsg) != NO_ERROR) - startupError(setAltErrMsg, false); - } - } - else - { - startupError(std::string("No table name specified with schema."), true); - } - } - else - { - // JobID is a required parameter with no positional parm mode, - // because we need the jobid to identify the input job xml file. - if (sJobIdStr.empty()) - { - startupError(std::string("No JobID specified."), true); - } - } -} - //------------------------------------------------------------------------------ // Print the path of the input load file(s), and the name of the job xml file. //------------------------------------------------------------------------------ @@ -857,8 +193,7 @@ void printInputSource(const std::string& alternateImportDir, const std::string& if (alternateImportDir == IMPORT_PATH_CWD) { char cwdBuf[4096]; - char* bufPtr = &cwdBuf[0]; - bufPtr = ::getcwd(cwdBuf, sizeof(cwdBuf)); + char* bufPtr = ::getcwd(cwdBuf, sizeof(cwdBuf)); if (!(BulkLoad::disableConsoleOutput())) cout << "Input file(s) will be read from : " << bufPtr << endl; @@ -900,14 +235,14 @@ void getTableOID(const std::string& xmlGenSchema, const std::string& xmlGenTable std::ostringstream oss; oss << "Unable to set default JobID; " << "Error getting OID for table " << tbl.schema << '.' << tbl.table << ": " << ex.what(); - startupError(oss.str(), false); + cmdArgs->startupError(oss.str(), false); } catch (...) { std::ostringstream oss; oss << "Unable to set default JobID; " << "Unknown error getting OID for table " << tbl.schema << '.' << tbl.table; - startupError(oss.str(), false); + cmdArgs->startupError(oss.str(), false); } std::ostringstream oss; @@ -950,7 +285,7 @@ void constructTempXmlFile(const std::string& tempJobDir, const std::string& sJob { std::ostringstream oss; oss << "cpimport.bin error creating temporary Job XML file name: " << xmlErrMsg; - startupError(oss.str(), false); + cmdArgs->startupError(oss.str(), false); } printInputSource(alternateImportDir, sFileName.string(), S3Bucket); @@ -970,7 +305,7 @@ void constructTempXmlFile(const std::string& tempJobDir, const std::string& sJob { std::ostringstream oss; oss << "No columns for " << xmlGenSchema << '.' << xmlGenTable; - startupError(oss.str(), false); + cmdArgs->startupError(oss.str(), false); } } catch (runtime_error& ex) @@ -979,7 +314,7 @@ void constructTempXmlFile(const std::string& tempJobDir, const std::string& sJob oss << "cpimport.bin runtime exception constructing temporary " "Job XML file: " << ex.what(); - startupError(oss.str(), false); + cmdArgs->startupError(oss.str(), false); } catch (exception& ex) { @@ -987,13 +322,13 @@ void constructTempXmlFile(const std::string& tempJobDir, const std::string& sJob oss << "cpimport.bin exception constructing temporary " "Job XML file: " << ex.what(); - startupError(oss.str(), false); + cmdArgs->startupError(oss.str(), false); } catch (...) { - startupError(std::string("cpimport.bin " - "unknown exception constructing temporary Job XML file"), - false); + cmdArgs->startupError(std::string("cpimport.bin " + "unknown exception constructing temporary Job XML file"), + false); } genProc.writeXMLFile(sFileName.string()); @@ -1009,9 +344,9 @@ void verifyNode() // Validate running on a PM if (localModuleType != "pm") { - startupError(std::string("Exiting, " - "cpimport.bin can only be run on a PM node"), - true); + cmdArgs->startupError(std::string("Exiting, " + "cpimport.bin can only be run on a PM node"), + true); } } @@ -1049,34 +384,22 @@ int main(int argc, char** argv) setlocale(LC_NUMERIC, "C"); // Initialize singleton instance of syslogging - if (argc > 0) - pgmName = argv[0]; - logging::IDBErrorInfo::instance(); SimpleSysLog::instance()->setLoggingID(logging::LoggingID(SUBSYSTEM_ID_WE_BULK)); // Log job initiation unless user is asking for help + cmdArgs = make_unique(argc, argv); std::ostringstream ossArgList; - bool bHelpFlag = false; for (int m = 1; m < argc; m++) { - if (strcmp(argv[m], "-h") == 0) - { - bHelpFlag = true; - break; - } - if (!strcmp(argv[m], "\t")) // special case to print a ossArgList << "'\\t'" << ' '; else ossArgList << argv[m] << ' '; } - if (!bHelpFlag) - { - logInitiateMsg(ossArgList.str().c_str()); - } + logInitiateMsg(ossArgList.str().c_str()); BulkLoad curJob; string sJobIdStr; @@ -1099,8 +422,8 @@ int main(int argc, char** argv) task = TASK_CMD_LINE_PARSING; string xmlGenSchema; string xmlGenTable; - parseCmdLineArgs(argc, argv, curJob, sJobIdStr, sXMLJobDir, sModuleIDandPID, bLogInfo2ToConsole, - xmlGenSchema, xmlGenTable, bValidateColumnList); + cmdArgs->fillParams(curJob, sJobIdStr, sXMLJobDir, sModuleIDandPID, bLogInfo2ToConsole, xmlGenSchema, + xmlGenTable, bValidateColumnList); //-------------------------------------------------------------------------- // Save basename portion of program path from argv[0] @@ -1154,9 +477,9 @@ int main(int argc, char** argv) if (!BRMWrapper::getInstance()->isSystemReady()) { - startupError(std::string("System is not ready. Verify that ColumnStore is up and ready " - "before running cpimport."), - false); + cmdArgs->startupError(std::string("System is not ready. Verify that ColumnStore is up and ready " + "before running cpimport."), + false); } if (bDebug) @@ -1173,7 +496,7 @@ int main(int argc, char** argv) WErrorCodes ec; std::ostringstream oss; oss << ec.errorString(brmReadWriteStatus) << " cpimport.bin is terminating."; - startupError(oss.str(), false); + cmdArgs->startupError(oss.str(), false); } if (bDebug) @@ -1190,7 +513,7 @@ int main(int argc, char** argv) WErrorCodes ec; std::ostringstream oss; oss << ec.errorString(brmShutdownPending) << " cpimport.bin is terminating."; - startupError(oss.str(), false); + cmdArgs->startupError(oss.str(), false); } if (bDebug) @@ -1207,7 +530,7 @@ int main(int argc, char** argv) WErrorCodes ec; std::ostringstream oss; oss << ec.errorString(brmSuspendPending) << " cpimport.bin is terminating."; - startupError(oss.str(), false); + cmdArgs->startupError(oss.str(), false); } if (bDebug) @@ -1268,7 +591,7 @@ int main(int argc, char** argv) { std::ostringstream oss; oss << "cpimport.bin error creating Job XML file name: " << xmlErrMsg; - startupError(oss.str(), false); + cmdArgs->startupError(oss.str(), false); } printInputSource(curJob.getAlternateImportDir(), sFileName.string(), curJob.getS3Bucket()); @@ -1300,13 +623,14 @@ int main(int argc, char** argv) } rc = BRMWrapper::getInstance()->newCpimportJob(cpimportJobId); + // TODO kemm: pass cpimportJobId to WECmdArgs if (rc != NO_ERROR) { WErrorCodes ec; std::ostringstream oss; oss << "Error in creating new cpimport job on Controller node; " << ec.errorString(rc) << "; cpimport is terminating."; - startupError(oss.str(), false); + cmdArgs->startupError(oss.str(), false); } //-------------------------------------------------------------------------- @@ -1321,7 +645,7 @@ int main(int argc, char** argv) WErrorCodes ec; std::ostringstream oss; oss << "Error in loading job information; " << ec.errorString(rc) << "; cpimport.bin is terminating."; - startupError(oss.str(), false); + cmdArgs->startupError(oss.str(), false); } if (bDebug) @@ -1353,7 +677,7 @@ int main(int argc, char** argv) if (task != TASK_PROCESS_DATA) { - startupError(exceptionMsg, false); + cmdArgs->startupError(exceptionMsg, false); } rc = ERR_UNKNOWN; @@ -1379,7 +703,7 @@ int main(int argc, char** argv) failMsg += exceptionMsg; } - endMsgArgs.add(failMsg.c_str()); + endMsgArgs.add(failMsg); } else { diff --git a/writeengine/bulk/we_bulkload.cpp b/writeengine/bulk/we_bulkload.cpp index 9d259abc1..2b4f1ca1f 100644 --- a/writeengine/bulk/we_bulkload.cpp +++ b/writeengine/bulk/we_bulkload.cpp @@ -72,7 +72,7 @@ const std::string ERR_LOG_SUFFIX = ".err"; // Job err log file suffix namespace WriteEngine { /* static */ std::vector> BulkLoad::fTableInfo; -/* static */ boost::mutex* BulkLoad::fDDLMutex = 0; +/* static */ boost::mutex* BulkLoad::fDDLMutex = new boost::mutex(); /* static */ const std::string BulkLoad::DIR_BULK_JOB("job"); /* static */ const std::string BulkLoad::DIR_BULK_TEMP_JOB("tmpjob"); @@ -140,35 +140,8 @@ struct CancellationThread // Constructor //------------------------------------------------------------------------------ BulkLoad::BulkLoad() - : fColOp(new ColumnOpBulk()) - , fColDelim('\0') - , fNoOfBuffers(-1) - , fBufferSize(-1) - , fFileVbufSize(-1) - , fMaxErrors(-1) - , fNoOfParseThreads(3) - , fNoOfReadThreads(1) - , fKeepRbMetaFiles(false) - , fNullStringMode(false) - , fEnclosedByChar('\0') - , // not enabled unless user overrides enclosed by char - fEscapeChar('\0') - , fTotalTime(0.0) - , fBulkMode(BULK_MODE_LOCAL) - , fbTruncationAsError(false) - , fImportDataMode(IMPORT_DATA_TEXT) - , fbContinue(false) - , fDisableTimeOut(false) - , fUUID(boost::uuids::nil_generator()()) - , fTimeZone(dataconvert::systemTimeZoneOffset()) - , fUsername("mysql") // MCOL-4328 default file owner { - fTableInfo.clear(); setDebugLevel(DEBUG_0); - - fDDLMutex = new boost::mutex(); - memset(&fStartTime, 0, sizeof(timeval)); - memset(&fEndTime, 0, sizeof(timeval)); } //------------------------------------------------------------------------------ @@ -540,6 +513,7 @@ int BulkLoad::preProcess(Job& job, int tableNo, std::shared_ptr& tabl tableInfo->setImportDataMode(fImportDataMode); tableInfo->setTimeZone(fTimeZone); tableInfo->setJobUUID(fUUID); + tableInfo->setSkipRows(fSkipRows); // MCOL-4328 Get username gid and uid if they are set // We inject uid and gid into TableInfo and All ColumnInfo-s later. @@ -1002,6 +976,11 @@ int BulkLoad::processJob() fEscapeChar = '\\'; } + if (fSkipRows == 0) + { + fSkipRows = curJob.fSkipRows; + } + // std::cout << "bulkload::fEnclosedByChar<" << fEnclosedByChar << '>' << // std::endl << "bulkload::fEscapeChar<" << fEscapeChar << '>' << std::endl; diff --git a/writeengine/bulk/we_bulkload.h b/writeengine/bulk/we_bulkload.h index 91be178c8..f9ab26ffd 100644 --- a/writeengine/bulk/we_bulkload.h +++ b/writeengine/bulk/we_bulkload.h @@ -29,7 +29,7 @@ #include #include -#include +#include #include #include #include @@ -48,12 +48,7 @@ #include #include #include - -#if 0 // defined(_MSC_VER) && defined(WE_BULKLOAD_DLLEXPORT) -#define EXPORT __declspec(dllexport) -#else -#define EXPORT -#endif +#include /** Namespace WriteEngine */ namespace WriteEngine @@ -65,18 +60,18 @@ class BulkLoad : public FileOp /** * @brief BulkLoad constructor */ - EXPORT BulkLoad(); + BulkLoad(); /** * @brief BulkLoad destructor */ - EXPORT ~BulkLoad() override; + ~BulkLoad() override; /** * @brief Load job information */ - EXPORT int loadJobInfo(const std::string& fullFileName, bool bUseTempJobFile, int argc, char** argv, - bool bLogInfo2ToConsole, bool bValidateColumnList); + int loadJobInfo(const std::string& fullFileName, bool bUseTempJobFile, int argc, char** argv, + bool bLogInfo2ToConsole, bool bValidateColumnList); /** * @brief Pre process jobs to validate and assign values to the job structure @@ -91,7 +86,7 @@ class BulkLoad : public FileOp /** * @brief Process job */ - EXPORT int processJob(); + int processJob(); /** * @brief Set Debug level for this BulkLoad object and any data members @@ -126,12 +121,13 @@ class BulkLoad : public FileOp return fUUID; } - EXPORT int setAlternateImportDir(const std::string& loadDir, std::string& errMsg); + int setAlternateImportDir(const std::string& loadDir, std::string& errMsg); void setImportDataMode(ImportDataMode importMode); void setColDelimiter(char delim); void setBulkLoadMode(BulkModeType bulkMode, const std::string& rptFileName); void setEnclosedByChar(char enChar); void setEscapeChar(char esChar); + void setSkipRows(size_t skipRows); void setKeepRbMetaFiles(bool keepMeta); void setMaxErrorCount(unsigned int maxErrors); void setNoOfParseThreads(int parseThreads); @@ -181,7 +177,7 @@ class BulkLoad : public FileOp //-------------------------------------------------------------------------- XMLJob fJobInfo; // current job information - boost::scoped_ptr fColOp; // column operation + boost::scoped_ptr fColOp{new ColumnOpBulk()}; // column operation std::string fRootDir; // job process root directory std::string fJobFileName; // job description file name @@ -189,49 +185,50 @@ class BulkLoad : public FileOp Log fLog; // logger int fNumOfParser; // total number of parser - char fColDelim; // delimits col values within a row + char fColDelim{0}; // delimits col values within a row - int fNoOfBuffers; // Number of read buffers - int fBufferSize; // Read buffer size - int fFileVbufSize; // Internal file system buffer size - long long fMaxErrors; // Max allowable errors per job + int fNoOfBuffers{-1}; // Number of read buffers + int fBufferSize{-1}; // Read buffer size + int fFileVbufSize{-1}; // Internal file system buffer size + long long fMaxErrors{-1}; // Max allowable errors per job std::string fAlternateImportDir; // Alternate bulk import directory std::string fErrorDir; // Opt. where error records record std::string fProcessName; // Application process name static std::vector> fTableInfo; // Vector of Table information - int fNoOfParseThreads; // Number of parse threads - int fNoOfReadThreads; // Number of read threads + int fNoOfParseThreads{3}; // Number of parse threads + int fNoOfReadThreads{1}; // Number of read threads boost::thread_group fReadThreads; // Read thread group boost::thread_group fParseThreads; // Parse thread group boost::mutex fReadMutex; // Manages table selection by each - // read thread + // read thread boost::mutex fParseMutex; // Manages table/buffer/column - // selection by each parsing thread - BRM::TxnID fTxnID; // TransID acquired from SessionMgr - bool fKeepRbMetaFiles; // Keep/delete bulkRB metadata files - bool fNullStringMode; // Treat "NULL" as NULL value - char fEnclosedByChar; // Char used to enclose column value - char fEscapeChar; // Escape char within enclosed value - timeval fStartTime; // job start time - timeval fEndTime; // job end time - double fTotalTime; // elapsed time for current phase - std::vector fCmdLineImportFiles; // Import Files from cmd line - BulkModeType fBulkMode; // Distributed bulk mode (1,2, or 3) - std::string fBRMRptFileName; // Name of distributed mode rpt file - bool fbTruncationAsError; // Treat string truncation as error - ImportDataMode fImportDataMode; // Importing text or binary data - bool fbContinue; // true when read and parse r running + // selection by each parsing thread + BRM::TxnID fTxnID; // TransID acquired from SessionMgr + bool fKeepRbMetaFiles{false}; // Keep/delete bulkRB metadata files + bool fNullStringMode{false}; // Treat "NULL" as NULL value + char fEnclosedByChar{0}; // Char used to enclose column value + char fEscapeChar{0}; // Escape char within enclosed value + size_t fSkipRows{0}; // Header rows to skip + timeval fStartTime{0, 0}; // job start time + timeval fEndTime{0, 0}; // job end time + double fTotalTime{0.0}; // elapsed time for current phase + std::vector fCmdLineImportFiles; // Import Files from cmd line + BulkModeType fBulkMode{BULK_MODE_LOCAL}; // Distributed bulk mode (1,2, or 3) + std::string fBRMRptFileName; // Name of distributed mode rpt file + bool fbTruncationAsError{false}; // Treat string truncation as error + ImportDataMode fImportDataMode{IMPORT_DATA_TEXT}; // Importing text or binary data + bool fbContinue{false}; // true when read and parse r running // static boost::mutex* fDDLMutex; // Insure only 1 DDL op at a time - EXPORT static const std::string DIR_BULK_JOB; // Bulk job directory - EXPORT static const std::string DIR_BULK_TEMP_JOB; // Dir for tmp job files + static const std::string DIR_BULK_JOB; // Bulk job directory + static const std::string DIR_BULK_TEMP_JOB; // Dir for tmp job files static const std::string DIR_BULK_IMPORT; // Bulk job import dir static const std::string DIR_BULK_LOG; // Bulk job log directory - bool fDisableTimeOut; // disable timeout when waiting for table lock - boost::uuids::uuid fUUID; // job UUID + bool fDisableTimeOut{false}; // disable timeout when waiting for table lock + boost::uuids::uuid fUUID{boost::uuids::nil_generator()()}; // job UUID static bool fNoConsoleOutput; // disable output to console - long fTimeZone; // Timezone offset (in seconds) relative to UTC, + long fTimeZone{dataconvert::systemTimeZoneOffset()};// Timezone offset (in seconds) relative to UTC, // to use for TIMESTAMP data type. For example, // for EST which is UTC-5:00, offset will be -18000s. std::string fS3Key; // S3 Key @@ -239,7 +236,7 @@ class BulkLoad : public FileOp std::string fS3Host; // S3 Host std::string fS3Bucket; // S3 Bucket std::string fS3Region; // S3 Region - std::string fUsername; // data files owner name mysql by default + std::string fUsername{"mysql"}; // data files owner name mysql by default //-------------------------------------------------------------------------- // Private Functions @@ -417,6 +414,11 @@ inline void BulkLoad::setEscapeChar(char esChar) fEscapeChar = esChar; } +inline void BulkLoad::setSkipRows(size_t skipRows) +{ + fSkipRows = skipRows; +} + inline void BulkLoad::setImportDataMode(ImportDataMode importMode) { fImportDataMode = importMode; diff --git a/writeengine/bulk/we_bulkloadbuffer.cpp b/writeengine/bulk/we_bulkloadbuffer.cpp index 8b74b8c15..cd43d6cd0 100644 --- a/writeengine/bulk/we_bulkloadbuffer.cpp +++ b/writeengine/bulk/we_bulkloadbuffer.cpp @@ -2047,8 +2047,8 @@ int BulkLoadBuffer::parseDictSection(ColumnInfo& columnInfo, int tokenPos, RID s } int BulkLoadBuffer::fillFromMemory(const BulkLoadBuffer& overFlowBufIn, const char* input, size_t length, - size_t* parse_length, RID& totalReadRows, RID& correctTotalRows, - const boost::ptr_vector& columnsInfo, + size_t* parse_length, size_t& skipRows, RID& totalReadRows, + RID& correctTotalRows, const boost::ptr_vector& columnsInfo, unsigned int allowedErrCntThisCall) { boost::mutex::scoped_lock lock(fSyncUpdatesBLB); @@ -2119,7 +2119,7 @@ int BulkLoadBuffer::fillFromMemory(const BulkLoadBuffer& overFlowBufIn, const ch if (fImportDataMode == IMPORT_DATA_TEXT) { - tokenize(columnsInfo, allowedErrCntThisCall); + tokenize(columnsInfo, allowedErrCntThisCall, skipRows); } else { @@ -2150,8 +2150,9 @@ int BulkLoadBuffer::fillFromMemory(const BulkLoadBuffer& overFlowBufIn, const ch // correctTotalRows (input/output) - total valid row count from tokenize() // (cumulative) //------------------------------------------------------------------------------ -int BulkLoadBuffer::fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* handle, RID& totalReadRows, - RID& correctTotalRows, const boost::ptr_vector& columnsInfo, +int BulkLoadBuffer::fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* handle, size_t& skipRows, + RID& totalReadRows, RID& correctTotalRows, + const boost::ptr_vector& columnsInfo, unsigned int allowedErrCntThisCall) { boost::mutex::scoped_lock lock(fSyncUpdatesBLB); @@ -2164,10 +2165,10 @@ int BulkLoadBuffer::fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* hand { memcpy(fData, fOverflowBuf, fOverflowSize); - if (fOverflowBuf != NULL) + if (fOverflowBuf != nullptr) { delete[] fOverflowBuf; - fOverflowBuf = NULL; + fOverflowBuf = nullptr; } } @@ -2219,7 +2220,7 @@ int BulkLoadBuffer::fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* hand if (fImportDataMode == IMPORT_DATA_TEXT) { - tokenize(columnsInfo, allowedErrCntThisCall); + tokenize(columnsInfo, allowedErrCntThisCall, skipRows); } else { @@ -2276,7 +2277,7 @@ int BulkLoadBuffer::fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* hand // depending on whether the user has enabled the "enclosed by" feature. //------------------------------------------------------------------------------ void BulkLoadBuffer::tokenize(const boost::ptr_vector& columnsInfo, - unsigned int allowedErrCntThisCall) + unsigned int allowedErrCntThisCall, size_t& skipRows) { unsigned offset = 0; // length of field unsigned curCol = 0; // dest db column counter within a row @@ -2334,6 +2335,15 @@ void BulkLoadBuffer::tokenize(const boost::ptr_vector& columnsInfo, while (p < pEndOfData) { c = *p; + if (UNLIKELY(skipRows > 0)) + { + if (c == NEWLINE_CHAR) + { + --skipRows; + } + ++p; + continue; + } // If we have stripped "enclosed" characters, then save raw data if (rawDataRowLength > 0) diff --git a/writeengine/bulk/we_bulkloadbuffer.h b/writeengine/bulk/we_bulkloadbuffer.h index 509c725c7..475c98663 100644 --- a/writeengine/bulk/we_bulkloadbuffer.h +++ b/writeengine/bulk/we_bulkloadbuffer.h @@ -215,7 +215,8 @@ class BulkLoadBuffer /** @brief tokenize the buffer contents and fill up the token array. */ - void tokenize(const boost::ptr_vector& columnsInfo, unsigned int allowedErrCntThisCall); + void tokenize(const boost::ptr_vector& columnsInfo, unsigned int allowedErrCntThisCall, + size_t& skipRows); /** @brief Binary tokenization of the buffer, and fill up the token array. */ @@ -273,13 +274,14 @@ class BulkLoadBuffer bool tryAndLockColumn(const int& columnId, const int& id); int fillFromMemory(const BulkLoadBuffer& overFlowBufIn, const char* input, size_t length, - size_t* parse_length, RID& totalReadRows, RID& correctTotalRows, + size_t* parse_length, size_t& skipRows, RID& totalReadRows, RID& correctTotalRows, const boost::ptr_vector& columnsInfo, unsigned int allowedErrCntThisCall); /** @brief Read the table data into the buffer */ - int fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* handle, RID& totalRows, RID& correctTotalRows, - const boost::ptr_vector& columnsInfo, unsigned int allowedErrCntThisCall); + int fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* handle, size_t& skipRows, RID& totalRows, + RID& correctTotalRows, const boost::ptr_vector& columnsInfo, + unsigned int allowedErrCntThisCall); /** @brief Get the overflow size */ diff --git a/writeengine/bulk/we_cmdargs.cpp b/writeengine/bulk/we_cmdargs.cpp new file mode 100644 index 000000000..1989f0719 --- /dev/null +++ b/writeengine/bulk/we_cmdargs.cpp @@ -0,0 +1,559 @@ +/* Copyright (C) 2014 InfiniDB, Inc. + Copyright (C) 2016 MariaDB Corporation + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include "we_simplesyslog.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +namespace po = boost::program_options; +using namespace std; + +#include +#include +#include +#include + +#include "dataconvert.h" +#include "liboamcpp.h" +using namespace oam; + +#include "we_cmdargs.h" + +#include "mcsconfig.h" + +namespace WriteEngine +{ +//---------------------------------------------------------------------- +//---------------------------------------------------------------------- +WECmdArgs::WECmdArgs(int argc, char** argv) +{ + try + { + fOptions = std::make_unique(); + fVisibleOptions = std::make_unique(); +#define DECLARE_INT_ARG(name, stor, min, max, desc) \ + (name,\ + po::value(&stor)\ + ->notifier([this](auto&& value) { checkIntArg(name, min, max, value); }),\ + desc) + + fVisibleOptions->add_options() + ("help,h", "Print this message.") + DECLARE_INT_ARG("read-buffer,b", fIOReadBufSize, 1, INT_MAX, "Number of read buffers.") + DECLARE_INT_ARG("read-buffer-size,c", fReadBufSize, 1, INT_MAX, + "Application read buffer size (in bytes)") + DECLARE_INT_ARG("debug,d", fDebugLvl, 1, 3, "Print different level(1-3) debug message") + DECLARE_INT_ARG("max-errors,e", fMaxErrors, 0, INT_MAX, + "Maximum number of allowable error per table per PM") + ("file-path,f", po::value(&fPmFilePath), + "Data file directory path. Default is current working directory.\n" + "\tIn Mode 1, represents the local input file path.\n" + "\tIn Mode 2, represents the PM based input file path.\n" + "\tIn Mode 3, represents the local input file path.") + DECLARE_INT_ARG("mode,m", fArgMode, 1, 3, + "\t1 - rows will be loaded in a distributed manner acress PMs.\n" + "\t2 - PM based input files loaded into their respective PM.\n" + "\t3 - input files will be loaded on the local PM.") + ("filename,l", po::value(&fPmFile), + "Name of import file to be loaded, relative to 'file-path'") + ("console-log,i", po::bool_switch(&fConsoleLog), + "Print extended info to console in Mode 3.") + ("job-id,j", po::value(), + "Job ID. In simple usage, default is the table OID unless a fully qualified input " + "file name is given.") + ("null-strings,n", po::value(&fNullStrMode)->implicit_value(true), + "NullOption (0-treat the string NULL as data (default);\n" + "1-treat the string NULL as a NULL value)") + ("xml-job-path,p", po::value(&fJobPath), "Path for the XML job description file.") + DECLARE_INT_ARG("readers,r", fNoOfReadThrds, 1, INT_MAX, "Number of readers.") + ("separator,s", po::value(), "Delimiter between column values.") + DECLARE_INT_ARG("io-buffer-size,B", fSetBufSize, 1, INT_MAX, + "I/O library read buffer size (in bytes)") + DECLARE_INT_ARG("writers,w", fNoOfWriteThrds, 1, INT_MAX, "Number of parsers.") + ("enclosed-by,E", po::value(&fEnclosedChar), + "Enclosed by character if field values are enclosed.") + ("escape-char,C", po::value(&fEscChar)->default_value('\\'), + "Escape character used in conjunction with 'enclosed-by'" + "character, or as a part of NULL escape sequence ('\\N');\n" + "default is '\\'") + ("headers,O", + po::value(&fSkipRows)->implicit_value(1) + ->notifier([this](auto&& value) { checkIntArg("headers,O", 0, INT_MAX, value); }), + "Number of header rows to skip.") + ("binary-mode,I", po::value(), + "Import binary data; how to treat NULL values:\n" + "\t1 - import NULL values\n" + "\t2 - saturate NULL values\n") + ("calling-module,P", po::value(&fModuleIDandPID), "Calling module ID and PID.") + ("truncation-as-error,S", po::bool_switch(&fbTruncationAsError), + "Treat string truncations as errors.") + ("tz,T", po::value(), + "Timezone used for TIMESTAMP datatype. Possible values:\n" + "\t\"SYSTEM\" (default)\n" + "\tOffset in the form +/-HH:MM") + ("disable-tablelock-timeout,D", po::bool_switch(&fDisableTableLockTimeOut), + "Disable timeout when waiting for table lock.") + ("silent,N", po::bool_switch(&fSilent), "Disable console output.") + ("s3-key,y", po::value(&fS3Key), + "S3 Authentication Key (for S3 imports)") + ("s3-secret,K", po::value(&fS3Secret), + "S3 Authentication Secret (for S3 imports)") + ("s3-bucket,t", po::value(&fS3Bucket), + "S3 Bucket (for S3 imports)") + ("s3-hostname,H", po::value(&fS3Host), + "S3 Hostname (for S3 imports, Amazon's S3 default)") + ("s3-region,g", po::value(&fS3Region), + "S3 Region (for S3 imports)") + ("errors-dir,L", po::value(&fErrorDir)->default_value(MCSLOGDIR), + "Directory for the output .err and .bad files") + ("job-uuid,u", po::value(&fUUID), "import job UUID") + ("username,U", po::value(&fUsername), "Username of the files owner.") + ("dbname", po::value(), "Name of the database to load") + ("table", po::value(), "Name of table to load") + ("load-file", po::value(), + "Optional input file name in current directory, " + "unless a fully qualified name is given. If not given, input read from STDIN."); + + po::options_description hidden("Hidden options"); + hidden.add_options() + ("keep-rollback-metadata,k", po::bool_switch(&fKeepRollbackMetaData), + "Keep rollback metadata.") + ("report-file,R", po::value(&fReportFilename), "Report file name.") + ("allow-missing-columns,X", po::value(), "Allow missing columns."); + + fOptions->add(*fVisibleOptions).add(hidden); + +#undef DECLARE_INT_ARG + parseCmdLineArgs(argc, argv); + } + catch (std::exception& exp) + { + startupError(exp.what(), true); + } +} + +WECmdArgs::~WECmdArgs() = default; + +//---------------------------------------------------------------------- + +void WECmdArgs::checkIntArg(const std::string& name, long min, long max, int value) const +{ + if (value < min || value > max) + { + ostringstream oss; + oss << "Argument " << name << " is out of range [" << min << ", " << max << "]"; + startupError(oss.str(), true); + } +} + +//---------------------------------------------------------------------- + +void WECmdArgs::usage() const +{ + cout << endl + << "Simple usage using positional parameters " + "(no XML job file):" + << endl + << " " << fPrgmName << " dbName tblName [loadFile] [-j jobID] " << endl + << " [-h] [-r readers] [-w parsers] [-s c] [-f path] [-b readBufs] " << endl + << " [-c readBufSize] [-e maxErrs] [-B libBufSize] [-n NullOption] " << endl + << " [-E encloseChar] [-C escapeChar] [-I binaryOpt] [-S] " + "[-d debugLevel] [-i] " + << endl + << " [-D] [-N] [-L rejectDir] [-T timeZone]" << endl + << " [-U username]" << endl + << endl; + + cout << endl + << "Traditional usage without positional parameters " + "(XML job file required):" + << endl + << " " << fPrgmName << " -j jobID " << endl + << " [-h] [-r readers] [-w parsers] [-s c] [-f path] [-b readBufs] " << endl + << " [-c readBufSize] [-e maxErrs] [-B libBufSize] [-n NullOption] " << endl + << " [-E encloseChar] [-C escapeChar] [-I binaryOpt] [-S] " + "[-d debugLevel] [-i] " + << endl + << " [-p path] [-l loadFile]" << endl + << " [-D] [-N] [-L rejectDir] [-T timeZone]" << endl + << " [-U username]" << endl + << endl; + + cout << "\n\n" << (*fVisibleOptions) << endl; + + cout << " Example1:" << endl + << " " << fPrgmName << " -j 1234" << endl + << " Example2: Some column values are enclosed within double quotes." << endl + << " " << fPrgmName << " -j 3000 -E '\"'" << endl + << " Example3: Import a nation table without a Job XML file" << endl + << " " << fPrgmName << " -j 301 tpch nation nation.tbl" << endl; + + exit(1); +} + +//----------------------------------------------------------------------------- + +void WECmdArgs::parseCmdLineArgs(int argc, char** argv) +{ + std::string importPath; + + if (argc > 0) + fPrgmName = string(MCSBINDIR) + "/" + "cpimport.bin"; // argv[0] is splitter but we need cpimport + + po::positional_options_description pos_opt; + pos_opt.add("dbname", 1) + .add("table", 1) + .add("load-file", 1); + + po::variables_map vm; + po::store(po::command_line_parser(argc, argv).options(*fOptions).positional(pos_opt).run(), vm); + po::notify(vm); + + if (vm.contains("help")) + { + fHelp = true; + usage(); + return; + } + if (vm.contains("separator")) + { + auto value = vm["separator"].as(); + if (value == "\\t") + { + fColDelim = '\t'; + } + else + { + fColDelim = value[0]; + } + } + if (vm.contains("binary-mode")) + { + int value = vm["binary-mode"].as(); + if (value == 1) + { + fImportDataMode = IMPORT_DATA_BIN_ACCEPT_NULL; + } + else if (value == 2) + { + fImportDataMode = IMPORT_DATA_BIN_SAT_NULL; + } + else + { + startupError("Invalid Binary mode; value can be 1 or 2"); + } + } + if (vm.contains("tz")) + { + auto tz = vm["tz"].as(); + long offset; + if (tz != "SYSTEM" && dataconvert::timeZoneToOffset(tz.c_str(), tz.size(), &offset)) + { + startupError("Value for option --tz/-T is invalid"); + } + fTimeZone = tz; + } + if (vm.contains("job-id")) + { + errno = 0; + string optarg = vm["job-id"].as(); + long lValue = strtol(optarg.c_str(), nullptr, 10); + if (errno != 0 || lValue < 0 || lValue > INT_MAX) + { + startupError("Option --job-id/-j is invalid or outof range"); + } + fJobId = optarg; + fOrigJobId = fJobId; + + if (0 == fJobId.length()) + { + startupError("Wrong JobID Value"); + } + } + if (vm.contains("allow-missing-columns")) + { + if (vm["allow-missing-columns"].as() == "AllowMissingColumn") + { + fAllowMissingColumn = true; + } + } + + if (fArgMode != -1) + fMode = fArgMode; // BUG 4210 + + if (2 == fArgMode && fPmFilePath.empty()) + throw runtime_error("-f option is mandatory with mode 2."); + + if (vm.contains("dbname")) + { + fSchema = vm["dbname"].as(); + } + if (vm.contains("table")) + { + fTable = vm["table"].as(); + } + if (vm.contains("load-file")) + { + fLocFile = vm["load-file"].as(); + } +} + +void WECmdArgs::fillParams(BulkLoad& curJob, std::string& sJobIdStr, std::string& sXMLJobDir, + std::string& sModuleIDandPID, bool& bLogInfo2ToConsole, std::string& xmlGenSchema, + std::string& xmlGenTable, bool& bValidateColumnList) +{ + std::string importPath; + std::string rptFileName; + bool bImportFileArg = false; + BulkModeType bulkMode = BULK_MODE_LOCAL; + std::string jobUUID; + + curJob.setReadBufferCount(fIOReadBufSize); + curJob.setReadBufferSize(fReadBufSize); + if (fMaxErrors >= 0) + { + curJob.setMaxErrorCount(fMaxErrors); + } + if (!fPmFilePath.empty()) + { + importPath = fPmFilePath; + string setAltErrMsg; + if (curJob.setAlternateImportDir(importPath, setAltErrMsg) != NO_ERROR) + { + startupError(setAltErrMsg, false); + } + } + bLogInfo2ToConsole = fConsoleLog; + sJobIdStr = fJobId; + curJob.setKeepRbMetaFiles(fKeepRollbackMetaData); + bulkMode = static_cast(fMode); + curJob.setNullStringMode(fNullStrMode); + sXMLJobDir = fJobPath; + curJob.setNoOfReadThreads(fNoOfReadThrds); + curJob.setColDelimiter(fColDelim); + curJob.setJobUUID(fUUID); + curJob.setNoOfParseThreads(fNoOfWriteThrds); + curJob.setVbufReadSize(fReadBufSize); + if (fEscChar != -1) + { + curJob.setEscapeChar(fEscChar); + } + if (fEnclosedChar != -1) + { + curJob.setEnclosedByChar(fEnclosedChar); + } + curJob.setImportDataMode(fImportDataMode); + curJob.setErrorDir(fErrorDir); + sModuleIDandPID = fModuleIDandPID; + rptFileName = fReportFilename; + curJob.setTruncationAsError(fbTruncationAsError); + if (!fTimeZone.empty()) + { + long offset; + if (dataconvert::timeZoneToOffset(fTimeZone.c_str(), fTimeZone.size(), &offset)) + { + startupError("Invalid timezone specified"); + } + curJob.setTimeZone(offset); + } + bValidateColumnList = !fAllowMissingColumn; + curJob.disableTimeOut(fDisableTableLockTimeOut); + curJob.disableConsoleOutput(fSilent); + curJob.setS3Key(fS3Key); + curJob.setS3Bucket(fS3Bucket); + curJob.setS3Secret(fS3Secret); + curJob.setS3Region(fS3Region); + curJob.setS3Host(fS3Host); + if (!fUsername.empty()) + { + curJob.setUsername(fUsername); + } + curJob.setSkipRows(fSkipRows); + + curJob.setDefaultJobUUID(); + + // Inconsistent to specify -f STDIN with -l importFile + if (bImportFileArg && importPath == "STDIN") + { + startupError(std::string("-f STDIN is invalid with -l importFile."), true); + } + + // If distributed mode, make sure report filename is specified and that we + // can create the file using the specified path. + if (bulkMode == BULK_MODE_REMOTE_SINGLE_SRC || bulkMode == BULK_MODE_REMOTE_MULTIPLE_SRC) + { + if (rptFileName.empty()) + { + startupError(std::string("Bulk modes 1 and 2 require -R rptFileName."), true); + } + else + { + std::ofstream rptFile(rptFileName.c_str()); + + if (rptFile.fail()) + { + std::ostringstream oss; + oss << "Unable to open report file " << rptFileName; + startupError(oss.str(), false); + } + + rptFile.close(); + } + + curJob.setBulkLoadMode(bulkMode, rptFileName); + } + + // Get positional arguments, User can provide: + // 1. no positional parameters + // 2. Two positional parameters (schema and table names) + // 3. Three positional parameters (schema, table, and import file name) + if (!fSchema.empty()) + { + xmlGenSchema = fSchema; + + if (!fTable.empty()) + { + // Validate invalid options in conjunction with 2-3 positional + // parameter mode, which means we are using temp Job XML file. + if (bImportFileArg) + { + startupError(std::string("-l importFile is invalid with positional parameters"), true); + } + + if (!sXMLJobDir.empty()) + { + startupError(std::string("-p path is invalid with positional parameters."), true); + } + + if (importPath == "STDIN") + { + startupError(std::string("-f STDIN is invalid with positional parameters."), true); + } + + xmlGenTable = fTable; + + if (!fLocFile.empty()) + { + // 3rd pos parm + curJob.addToCmdLineImportFileList(fLocFile); + + // Default to CWD if loadfile name given w/o -f path + if (importPath.empty()) + { + std::string setAltErrMsg; + + if (curJob.setAlternateImportDir(std::string("."), setAltErrMsg) != NO_ERROR) + startupError(setAltErrMsg, false); + } + } + else + { + // Invalid to specify -f if no load file name given + if (!importPath.empty()) + { + startupError(std::string("-f requires 3rd positional parameter (load file name)."), true); + } + + // Default to STDIN if no import file name given + std::string setAltErrMsg; + + if (curJob.setAlternateImportDir(std::string("STDIN"), setAltErrMsg) != NO_ERROR) + startupError(setAltErrMsg, false); + } + } + else + { + startupError(std::string("No table name specified with schema."), true); + } + } + else + { + // JobID is a required parameter with no positional parm mode, + // because we need the jobid to identify the input job xml file. + if (sJobIdStr.empty()) + { + startupError(std::string("No JobID specified."), true); + } + } + + // Dump some configuration info + if (!fSilent) + { + if (fDebugLvl != 0) + { + cout << "Debug level is set to " << fDebugLvl << endl; + } + if (fNoOfReadThrds != 0) + { + cout << "number of read threads : " << fNoOfReadThrds << endl; + } + cout << "Column delimiter : " << (fColDelim == '\t' ? "\\t" : string{fColDelim}) << endl; + if (fNoOfWriteThrds != 0) + { + cout << "number of parse threads : " << fNoOfWriteThrds << endl; + } + if (fEscChar != 0) + { + cout << "Escape Character : " << fEscChar << endl; + } + if (fEnclosedChar != 0) + { + cout << "Enclosed by Character : " << fEnclosedChar << endl; + } + } +} + +void WECmdArgs::startupError(const std::string& errMsg, bool showHint) const +{ + BRMWrapper::getInstance()->finishCpimportJob(fCpimportJobId); + // Log to console + if (!BulkLoad::disableConsoleOutput()) + cerr << errMsg << endl; + + if (showHint && !fSilent) + { + cerr << "Try '" << fPrgmName << " -h' for more information." << endl; + } + + // Log to syslog + logging::Message::Args errMsgArgs; + errMsgArgs.add(errMsg); + SimpleSysLog::instance()->logMsg(errMsgArgs, logging::LOG_TYPE_ERROR, logging::M0087); + + std::string jobIdStr("0"); + logging::Message::Args endMsgArgs; + endMsgArgs.add(jobIdStr); + endMsgArgs.add("FAILED"); + SimpleSysLog::instance()->logMsg(endMsgArgs, logging::LOG_TYPE_INFO, logging::M0082); + + exit(EXIT_FAILURE); +} + +} /* namespace WriteEngine */ diff --git a/writeengine/bulk/we_cmdargs.h b/writeengine/bulk/we_cmdargs.h new file mode 100644 index 000000000..5446fdc61 --- /dev/null +++ b/writeengine/bulk/we_cmdargs.h @@ -0,0 +1,130 @@ +/* Copyright (C) 2014 InfiniDB, Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/******************************************************************************* + * $Id$ + * + *******************************************************************************/ +#pragma once + +#include +#include +#include "we_bulkload.h" + +#include "we_type.h" + +namespace boost::program_options +{ +class options_description; +} + +namespace WriteEngine +{ +class WECmdArgs +{ +public: + WECmdArgs(int argc, char** argv); + ~WECmdArgs(); + + using VecInts = std::vector; + using VecArgs = std::vector; + + void parseCmdLineArgs(int argc, char** argv); + void usage() const; + bool checkForCornerCases(); + + void startupError(const std::string& errMsg, bool showHint = false) const; + void fillParams(BulkLoad& curJob, std::string& sJobIdStr, + std::string& sXMLJobDir, std::string& sModuleIDandPID, bool& bLogInfo2ToConsole, + std::string& xmlGenSchema, std::string& xmlGenTable, bool& bValidateColumnList); + + void setCpimportJobId(uint32_t cpimportJobId) + { + fCpimportJobId = cpimportJobId; + } + +private: + void checkIntArg(const std::string& name, long min, long max, int value) const; + VecArgs fVecArgs; + VecInts fPmVec; + + VecArgs fVecJobFiles; // JobFiles splitter from master JobFile + int fMultiTableCount{0}; // MultiTable count + VecArgs fColFldsFromJobFile; // List of columns from any job file, that + // represent fields in the import data + + std::string fJobId; // JobID + std::string fOrigJobId; // Original JobID, in case we have to split it + bool fJobLogOnly{false}; // Job number is only for log filename only + bool fHelp{false}; // Help mode + int fMode{BULK_MODE_LOCAL}; // splitter Mode + int fArgMode{-1}; // Argument mode, dep. on this fMode is decided. + bool fQuiteMode{true}; // in quite mode or not + bool fConsoleLog{false}; // Log everything to console - w.r.t cpimport + std::string fPmFile; // FileName at PM + std::string fPmFilePath; // Path of input file in PM + std::string fLocFile; // Local file name + std::string fBrmRptFile; // BRM report file + std::string fJobPath; // Path to Job File + std::string fTmpFileDir; // Temp file directory. + std::string fBulkRoot; // Bulk Root path + std::string fJobFile; // Job File Name + std::string fS3Key; // S3 key + std::string fS3Secret; // S3 Secret + std::string fS3Bucket; // S3 Bucket + std::string fS3Host; // S3 Host + std::string fS3Region; // S3 Region + + int fNoOfReadThrds{1}; // No. of read buffers + int fDebugLvl{0}; // Debug level + int fMaxErrors{-1}; // Max allowable errors + int fReadBufSize{-1}; // Read buffer size + int fIOReadBufSize{-1}; // I/O read buffer size + int fSetBufSize{0}; // Buff size w/setvbuf + char fColDelim{0}; // column delimiter + char fEnclosedChar{0}; // enclosed by char + char fEscChar{0}; // esc char + int fSkipRows{0}; // skip header + int fNoOfWriteThrds{3}; // No. of write threads + bool fNullStrMode{false}; // set null string mode - treat null as null + ImportDataMode fImportDataMode{IMPORT_DATA_TEXT}; // Importing text or binary data + std::string fPrgmName; // argv[0] + std::string fSchema; // Schema name - positional parmater + std::string fTable; // Table name - table name parameter + + bool fBlockMode3{false}; // Do not allow Mode 3 + bool fbTruncationAsError{false}; // Treat string truncation as error + std::string fUUID{boost::uuids::to_string(boost::uuids::nil_generator()())}; + bool fConsoleOutput{true}; // If false, no output to console. + std::string fTimeZone{"SYSTEM"}; // Timezone to use for TIMESTAMP datatype + std::string fUsername; // Username of the data files owner + std::string fErrorDir{MCSLOGDIR "/cpimport"}; + bool fDisableTableLockTimeOut{false}; + bool fSilent{false}; + std::string fModuleIDandPID; + + std::string fReportFilename; + bool fKeepRollbackMetaData{false}; + bool fAllowMissingColumn{false}; + + uint32_t fCpimportJobId{}; + + std::unique_ptr fOptions; + std::unique_ptr fVisibleOptions; +}; + +} // namespace WriteEngine diff --git a/writeengine/bulk/we_tableinfo.cpp b/writeengine/bulk/we_tableinfo.cpp index 6b23fa26c..1df5b752b 100644 --- a/writeengine/bulk/we_tableinfo.cpp +++ b/writeengine/bulk/we_tableinfo.cpp @@ -145,6 +145,8 @@ TableInfo::TableInfo(Log* logger, const BRM::TxnID txnID, const string& processN , fNullStringMode(false) , fEnclosedByChar('\0') , fEscapeChar('\\') + , fSkipRows(0) + , fSkipRowsCur(0) , fProcessingBegun(false) , fBulkMode(BULK_MODE_LOCAL) , fBRMReporter(logger, tableName) @@ -269,7 +271,7 @@ int TableInfo::readTableData() int fileCounter = 0; unsigned long long qtSentAt = 0; - if (fHandle == NULL) + if (fHandle == nullptr) { fFileName = fLoadFileList[fileCounter]; int rc = openTableFile(); @@ -421,13 +423,14 @@ int TableInfo::readTableData() if (fReadFromS3) { readRc = fBuffers[readBufNo].fillFromMemory(fBuffers[prevReadBuf], fFileBuffer, fS3ReadLength, - &fS3ParseLength, totalRowsPerInputFile, validTotalRows, - fColumns, allowedErrCntThisCall); + &fS3ParseLength, fSkipRowsCur, totalRowsPerInputFile, + validTotalRows, fColumns, allowedErrCntThisCall); } else { - readRc = fBuffers[readBufNo].fillFromFile(fBuffers[prevReadBuf], fHandle, totalRowsPerInputFile, - validTotalRows, fColumns, allowedErrCntThisCall); + readRc = fBuffers[readBufNo].fillFromFile(fBuffers[prevReadBuf], fHandle, fSkipRowsCur, + totalRowsPerInputFile, validTotalRows, fColumns, + allowedErrCntThisCall); } if (readRc != NO_ERROR) @@ -1208,7 +1211,6 @@ bool TableInfo::bufferReadyForParse(const int& bufferId, bool report) const int TableInfo::initializeBuffers(int noOfBuffers, const JobFieldRefList& jobFieldRefList, unsigned int fixedBinaryRecLen) { - fReadBufCount = noOfBuffers; // initialize and populate the buffer vector. @@ -1258,7 +1260,7 @@ void TableInfo::addColumn(ColumnInfo* info) //------------------------------------------------------------------------------ int TableInfo::openTableFile() { - if (fHandle != NULL) + if (fHandle != nullptr) return NO_ERROR; if (fReadFromStdin) @@ -1322,6 +1324,8 @@ int TableInfo::openTableFile() fLog->logMsg(oss.str(), MSGLVL_INFO2); } + fSkipRowsCur = fSkipRows; + return NO_ERROR; } diff --git a/writeengine/bulk/we_tableinfo.h b/writeengine/bulk/we_tableinfo.h index 996661cf9..3d4e836a8 100644 --- a/writeengine/bulk/we_tableinfo.h +++ b/writeengine/bulk/we_tableinfo.h @@ -148,8 +148,9 @@ class TableInfo : public WeUIDGID size_t fS3ParseLength; bool fNullStringMode; // Treat "NULL" as a null value char fEnclosedByChar; // Character to enclose col values - char fEscapeChar; // Escape character used in conjunc- - // tion with fEnclosedByChar + char fEscapeChar; // Escape character used in conjunction with fEnclosedByChar + size_t fSkipRows; // Header rows to skip + size_t fSkipRowsCur; // Header rows left oto skip in the current file bool fProcessingBegun; // Has processing begun on this tbl BulkModeType fBulkMode; // Distributed bulk mode (1,2, or 3) std::string fBRMRptFileName; // Name of distributed mode rpt file @@ -334,6 +335,10 @@ class TableInfo : public WeUIDGID */ void setEscapeChar(char esChar); + /** @brief Set how many header rows should be skipped. + */ + void setSkipRows(size_t skipRows); + /** @brief Has processing begun for this table. */ bool hasProcessingBegun(); @@ -579,6 +584,12 @@ inline void TableInfo::setEscapeChar(char esChar) fEscapeChar = esChar; } +inline void TableInfo::setSkipRows(size_t skipRows) +{ + fSkipRows = skipRows; +} + + inline void TableInfo::setFileBufferSize(const int fileBufSize) { fFileBufSize = fileBufSize; diff --git a/writeengine/server/we_dataloader.cpp b/writeengine/server/we_dataloader.cpp index 022963db5..16d648fd6 100644 --- a/writeengine/server/we_dataloader.cpp +++ b/writeengine/server/we_dataloader.cpp @@ -239,12 +239,13 @@ bool WEDataLoader::setupCpimport() // fork the cpimport std::string aCmdLine = fCmdLineStr; std::istringstream ss(aCmdLine); std::string arg; - std::vector v2(20, ""); + std::vector v2; unsigned int i = 0; while (ss >> arg) { - v2[i++] = arg; + v2.push_back(arg); + i++; } for (unsigned int j = 0; j < i; ++j) diff --git a/writeengine/shared/we_type.h b/writeengine/shared/we_type.h index 4b74c0efd..faef43018 100644 --- a/writeengine/shared/we_type.h +++ b/writeengine/shared/we_type.h @@ -525,6 +525,7 @@ struct Job /** @brief Job Structure */ int numberOfReadBuffers; unsigned readBufferSize; unsigned writeBufferSize; + int fSkipRows; Job() : id(0) , fDelimiter('|') @@ -533,6 +534,7 @@ struct Job /** @brief Job Structure */ , numberOfReadBuffers(0) , readBufferSize(0) , writeBufferSize(0) + , fSkipRows(0) { } }; diff --git a/writeengine/splitter/CMakeLists.txt b/writeengine/splitter/CMakeLists.txt index 96656adf9..7b05928f6 100644 --- a/writeengine/splitter/CMakeLists.txt +++ b/writeengine/splitter/CMakeLists.txt @@ -26,4 +26,5 @@ columnstore_link( batchloader threadpool marias3 + boost_program_options ) diff --git a/writeengine/splitter/we_cmdargs.cpp b/writeengine/splitter/we_cmdargs.cpp index 32d126346..34f67ccb7 100644 --- a/writeengine/splitter/we_cmdargs.cpp +++ b/writeengine/splitter/we_cmdargs.cpp @@ -29,6 +29,8 @@ #include #include #include +#include +namespace po = boost::program_options; using namespace std; #include @@ -50,38 +52,96 @@ namespace WriteEngine //---------------------------------------------------------------------- //---------------------------------------------------------------------- WECmdArgs::WECmdArgs(int argc, char** argv) - : fMultiTableCount(0) - , fJobLogOnly(false) - , fHelp(false) - , fMode(1) - , fArgMode(-1) - , fQuiteMode(true) - , fConsoleLog(false) - , fVerbose(0) - , fBatchQty(10000) - , fNoOfReadThrds(0) - , fDebugLvl(0) - , fMaxErrors(-1) - , fReadBufSize(0) - , fIOReadBufSize(0) - , fSetBufSize(0) - , fColDelim('|') - , fEnclosedChar(0) - , fEscChar(0) - , fNoOfWriteThrds(0) - , fNullStrMode(false) - , fImportDataMode(IMPORT_DATA_TEXT) - , fCpiInvoke(false) - , fBlockMode3(false) - , fbTruncationAsError(false) - , fUUID(boost::uuids::nil_generator()()) - , fConsoleOutput(true) - , fTimeZone("SYSTEM") - , fErrorDir(string(MCSLOGDIR) + "/cpimport/") { try { appTestFunction(); + fOptions = std::make_unique(); +#define DECLARE_INT_ARG(name, stor, min, max, desc) \ + (name,\ + po::value(&stor)\ + ->notifier([](auto&& value) { checkIntArg(name, min, max, value); }),\ + desc) + + fOptions->add_options() + ("help,h", "Print this message.") + DECLARE_INT_ARG("read-buffer,b", fIOReadBufSize, 1, INT_MAX, "Number of read buffers.") + DECLARE_INT_ARG("read-buffer-size,c", fReadBufSize, 1, INT_MAX, + "Application read buffer size (in bytes)") + DECLARE_INT_ARG("debug,d", fDebugLvl, 1, 3, "Print different level(1-3) debug message") + ("verbose,v", po::value()) + ("silent,N", po::bool_switch()) + DECLARE_INT_ARG("max-errors,e", fMaxErrors, 0, INT_MAX, + "Maximum number of allowable error per table per PM") + ("file-path,f", po::value(&fPmFilePath), + "Data file directory path. Default is current working directory.\n" + "\tIn Mode 1, represents the local input file path.\n" + "\tIn Mode 2, represents the PM based input file path.\n" + "\tIn Mode 3, represents the local input file path.") + DECLARE_INT_ARG("mode,m", fArgMode, 0, 3, + "\t1 - rows will be loaded in a distributed manner acress PMs.\n" + "\t2 - PM based input files loaded into their respective PM.\n" + "\t3 - input files will be loaded on the local PM.") + ("filename,l", po::value(&fPmFile), + "Name of import file to be loaded, relative to 'file-path'") + DECLARE_INT_ARG("batch-quantity,q", fBatchQty, 1, INT_MAX, + "Batch quantity, Number of rows distributed per batch in Mode 1") + ("console-log,i", po::bool_switch(&fConsoleLog), + "Print extended info to console in Mode 3.") + ("job-id,j", po::value(), + "Job ID. In simple usage, default is the table OID unless a fully qualified input " + "file name is given.") + ("null-strings,n", po::value(&fNullStrMode)->implicit_value(true), + "NullOption (0-treat the string NULL as data (default);\n" + "1-treat the string NULL as a NULL value)") + ("xml-job-path,p", po::value(&fJobPath), "Path for the XML job description file.") + DECLARE_INT_ARG("readers,r", fNoOfReadThrds, 1, INT_MAX, "Number of readers.") + ("separator,s", po::value(), "Delimiter between column values.") + DECLARE_INT_ARG("io-buffer-size,B", fSetBufSize, 1, INT_MAX, + "I/O library read buffer size (in bytes)") + DECLARE_INT_ARG("writers,w", fNoOfWriteThrds, 1, INT_MAX, "Number of parsers.") + ("enclosed-by,E", po::value(&fEnclosedChar), + "Enclosed by character if field values are enclosed.") + ("escape-char,C", po::value(&fEscChar)->default_value('\\'), + "Escape character used in conjunction with 'enclosed-by'" + "character, or as a part of NULL escape sequence ('\\N');\n" + "default is '\\'") + ("headers,O", + po::value(&fSkipRows)->implicit_value(1) + ->notifier([](auto&& value) { checkIntArg("headers,O", 0, INT_MAX, value); }), + "Number of header rows to skip.") + ("binary-mode,I", po::value(), + "Import binary data; how to treat NULL values:\n" + "\t1 - import NULL values\n" + "\t2 - saturate NULL values\n") + ("pm,P", po::value>(&fPmVec), + "List of PMs ex: -P 1,2,3. Default is all PMs.") + ("truncation-as-error,S", po::bool_switch(&fbTruncationAsError), + "Treat string truncations as errors.") + ("tz,T", po::value(), + "Timezone used for TIMESTAMP datatype. Possible values:\n" + "\t\"SYSTEM\" (default)\n" + "\tOffset in the form +/-HH:MM") + ("s3-key,y", po::value(&fS3Key), + "S3 Authentication Key (for S3 imports)") + ("s3-secret,K", po::value(&fS3Secret), + "S3 Authentication Secret (for S3 imports)") + ("s3-bucket,t", po::value(&fS3Bucket), + "S3 Bucket (for S3 imports)") + ("s3-hostname,H", po::value(&fS3Host), + "S3 Hostname (for S3 imports, Amazon's S3 default)") + ("s3-region,g", po::value(&fS3Region), + "S3 Region (for S3 imports)") + ("errors-dir,L", po::value(&fErrorDir)->default_value(MCSLOGDIR), + "Directory for the output .err and .bad files") + ("username,U", po::value(&fUsername), "Username of the files owner.") + ("dbname", po::value(), "Name of the database to load") + ("table", po::value(), "Name of table to load") + ("load-file", po::value(), + "Optional input file name in current directory, " + "unless a fully qualified name is given. If not given, input read from STDIN."); + +#undef DECLARE_INT_ARG parseCmdLineArgs(argc, argv); } catch (std::exception& exp) @@ -92,6 +152,8 @@ WECmdArgs::WECmdArgs(int argc, char** argv) } } +WECmdArgs::~WECmdArgs() = default; + //---------------------------------------------------------------------- void WECmdArgs::appTestFunction() @@ -107,8 +169,18 @@ void WECmdArgs::appTestFunction() return; } +void WECmdArgs::checkIntArg(const std::string& name, long min, long max, int value) +{ + if (value < min || value > max) + { + ostringstream oss; + oss << "Argument " << name << " is out of range [" << min << ", " << max << "]"; + throw runtime_error(oss.str()); + } +} + //---------------------------------------------------------------------- -std::string WECmdArgs::getCpImportCmdLine() +std::string WECmdArgs::getCpImportCmdLine(bool skipRows) { std::ostringstream aSS; std::string aCmdLine; @@ -185,6 +257,11 @@ std::string WECmdArgs::getCpImportCmdLine() if (fEscChar != 0) aSS << " -C " << fEscChar; + if (skipRows && fSkipRows) + { + aSS << " -O " << fSkipRows; + } + if (fNullStrMode) aSS << " -n " << '1'; @@ -321,6 +398,12 @@ bool WECmdArgs::checkForCornerCases() // BUG 4210 this->checkJobIdCase(); // Need to do this before we go further + if (fSkipRows && fImportDataMode != IMPORT_DATA_TEXT) + { + cout << "Invalid option -O with binary file" << endl; + throw runtime_error("Invalid option -O with binary file"); + } + if (fMode == 0) { if (!fJobId.empty()) @@ -522,52 +605,7 @@ void WECmdArgs::usage() cout << "\t\t\tunless a fully qualified name is given.\n"; cout << "\t\t\tIf not given, input read from STDIN.\n"; - cout << "\n\nOptions:\n" - << "\t-b\tNumber of read buffers\n" - << "\t-c\tApplication read buffer size(in bytes)\n" - << "\t-d\tPrint different level(1-3) debug message\n" - << "\t-e\tMax number of allowable error per table per PM\n" - << "\t-f\tData file directory path.\n" - << "\t\t\tDefault is current working directory.\n" - << "\t\t\tIn Mode 1, -f represents the local input file path.\n" - << "\t\t\tIn Mode 2, -f represents the PM based input file path.\n" - << "\t\t\tIn Mode 3, -f represents the local input file path.\n" - << "\t-l\tName of import file to be loaded, relative to -f path,\n" - << "\t-h\tPrint this message.\n" - << "\t-q\tBatch Quantity, Number of rows distributed per batch in Mode 1\n" - << "\t-i\tPrint extended info to console in Mode 3.\n" - << "\t-j\tJob ID. In simple usage, default is the table OID.\n" - << "\t\t\tunless a fully qualified input file name is given.\n" - << "\t-n\tNullOption (0-treat the string NULL as data (default);\n" - << "\t\t\t1-treat the string NULL as a NULL value)\n" - << "\t-p\tPath for XML job description file.\n" - << "\t-r\tNumber of readers.\n" - << "\t-s\t'c' is the delimiter between column values.\n" - << "\t-B\tI/O library read buffer size (in bytes)\n" - << "\t-w\tNumber of parsers.\n" - << "\t-E\tEnclosed by character if field values are enclosed.\n" - << "\t-C\tEscape character used in conjunction with 'enclosed by'\n" - << "\t\t\tcharacter, or as part of NULL escape sequence ('\\N');\n" - << "\t\t\tdefault is '\\'\n" - << "\t-I\tImport binary data; how to treat NULL values:\n" - << "\t\t\t1 - import NULL values\n" - << "\t\t\t2 - saturate NULL values\n" - << "\t-P\tList of PMs ex: -P 1,2,3. Default is all PMs.\n" - << "\t-S\tTreat string truncations as errors.\n" - << "\t-m\tmode\n" - << "\t\t\t1 - rows will be loaded in a distributed manner across PMs.\n" - << "\t\t\t2 - PM based input files loaded onto their respective PM.\n" - << "\t\t\t3 - input files will be loaded on the local PM.\n" - << "\t-T\tTimezone used for TIMESTAMP datatype.\n" - << "\t\tPossible values: \"SYSTEM\" (default)\n" - << "\t\t : Offset in the form +/-HH:MM\n" - << "\t-y\tS3 Authentication Key (for S3 imports)\n" - << "\t-K\tS3 Authentication Secret (for S3 imports)\n" - << "\t-t\tS3 Bucket (for S3 imports)\n" - << "\t-H\tS3 Hostname (for S3 imports, Amazon's S3 default)\n" - << "\t-g\tS3 Region (for S3 imports)\n" - << "\t-L\tDirectory for the output .err and .bad files.\n" - << "\t\tDefault is " << string(MCSLOGDIR); + cout << "\n\n" << (*fOptions) << endl; cout << "\nExample1: Traditional usage\n" << "\tcpimport -j 1234"; @@ -591,375 +629,112 @@ void WECmdArgs::usage() void WECmdArgs::parseCmdLineArgs(int argc, char** argv) { - int aCh; std::string importPath; bool aJobType = false; if (argc > 0) fPrgmName = string(MCSBINDIR) + "/" + "cpimport.bin"; // argv[0] is splitter but we need cpimport - while ((aCh = getopt(argc, argv, "d:j:w:s:v:l:r:b:e:B:f:q:ihm:E:C:P:I:n:p:c:ST:Ny:K:t:H:g:U:L:")) != EOF) + po::positional_options_description pos_opt; + pos_opt.add("dbname", 1) + .add("table", 1) + .add("load-file", 1); + + po::variables_map vm; + po::store(po::command_line_parser(argc, argv).options(*fOptions).positional(pos_opt).run(), vm); + po::notify(vm); + + if (vm.contains("silent")) { - switch (aCh) + fConsoleOutput = !vm["silent"].as(); + } + if (vm.contains("help")) + { + fHelp = true; + usage(); + return; + } + if (vm.contains("separator")) + { + auto value = vm["separator"].as(); + if (value == "\\t") { - case 'm': + fColDelim = '\t'; + if (fDebugLvl) { - fArgMode = atoi(optarg); - - // cout << "Mode level set to " << fMode << endl; - if ((fArgMode > -1) && (fArgMode <= 3)) - { - } - else - throw runtime_error("Wrong Mode level"); - - break; + cout << "Column delimiter : \\t" << endl; } - - case 'B': + } + else + { + fColDelim = value[0]; + if (fDebugLvl) { - errno = 0; - long lValue = strtol(optarg, 0, 10); - - if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX)) - throw runtime_error("Option -B is invalid or out of range"); - - fSetBufSize = lValue; - break; - } - - case 'b': - { - errno = 0; - long lValue = strtol(optarg, 0, 10); - - if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX)) - throw runtime_error("Option -b is invalid or out of range"); - - fIOReadBufSize = lValue; - break; - } - - case 'e': - { - errno = 0; - long lValue = strtol(optarg, 0, 10); - - if ((errno != 0) || (lValue < 0) || (lValue > INT_MAX)) - throw runtime_error("Option -e is invalid or out of range"); - - fMaxErrors = lValue; - break; - } - - case 'i': - { - fConsoleLog = true; - break; - } - - case 'c': - { - errno = 0; - long lValue = strtol(optarg, 0, 10); - - if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX)) - throw runtime_error("Option -c is invalid or out of range"); - - fReadBufSize = lValue; - break; - } - - case 'j': // -j: jobID - { - errno = 0; - long lValue = strtol(optarg, 0, 10); - - if ((errno != 0) || (lValue < 0) || (lValue > INT_MAX)) - throw runtime_error("Option -j is invalid or out of range"); - - fJobId = optarg; - fOrigJobId = fJobId; // in case if we need to split it. - - if (0 == fJobId.length()) - throw runtime_error("Wrong JobID Value"); - - aJobType = true; - break; - } - - case 'v': // verbose - { - string aVerbLen = optarg; - fVerbose = aVerbLen.length(); - fDebugLvl = fVerbose; - break; - } - - case 'd': // -d debug - { - errno = 0; - long lValue = strtol(optarg, 0, 10); - - if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX)) - throw runtime_error("Option -d is invalid or out of range"); - - fDebugLvl = lValue; - - if (fDebugLvl > 0 && fDebugLvl <= 3) - { - cout << "\nDebug level set to " << fDebugLvl << endl; - } - else - { - throw runtime_error("Wrong Debug level"); - } - - break; - } - - case 'r': // -r: num read threads - { - errno = 0; - long lValue = strtol(optarg, 0, 10); - - if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX)) - throw runtime_error("Option -r is invalid or out of range"); - - fNoOfReadThrds = lValue; - break; - } - - case 'w': // -w: num parse threads - { - errno = 0; - long lValue = strtol(optarg, 0, 10); - - if ((errno != 0) || (lValue < 1) || (lValue > INT_MAX)) - throw runtime_error("Option -w is invalid or out of range"); - - fNoOfWriteThrds = lValue; - break; - } - - case 's': // -s: column delimiter - { - if (!strcmp(optarg, "\\t")) - { - fColDelim = '\t'; - - if (fDebugLvl) - cout << "Column delimiter : " - << "\\t" << endl; - } - else - { - fColDelim = optarg[0]; - - if (fDebugLvl) - cout << "Column delimiter : " << fColDelim << endl; - } - - break; - } - - case 'l': // -l: if JobId (-j), it can be input file - { - fPmFile = optarg; - - if (0 == fPmFile.length()) - throw runtime_error("Wrong local filename"); - - break; - } - - case 'f': // -f: import file path - { - fPmFilePath = optarg; - break; - } - - case 'n': // -n: treat "NULL" as null - { - // default is 0, ie it is equal to not giving this option - int nullStringMode = atoi(optarg); - - if ((nullStringMode != 0) && (nullStringMode != 1)) - { - throw(runtime_error("Invalid NULL option; value can be 0 or 1")); - } - - if (nullStringMode) - fNullStrMode = true; - else - fNullStrMode = false; // This is default - - break; - } - - case 'P': // -p: list of PM's - { - try - { - std::string aPmList = optarg; - - if (!str2PmList(aPmList, fPmVec)) - throw(runtime_error("PM list is wrong")); - } - catch (runtime_error& ex) - { - throw(ex); - } - - break; - } - - case 'p': - { - fJobPath = optarg; - break; - } - - case 'E': // -E: enclosed by char - { - fEnclosedChar = optarg[0]; - // cout << "Enclosed by Character : " << optarg[0] << endl; - break; - } - - case 'C': // -C: enclosed escape char - { - fEscChar = optarg[0]; - // cout << "Escape Character : " << optarg[0] << endl; - break; - } - - case 'h': // -h: help - { - // usage(); // will exit(1) here - fHelp = true; - break; - } - - case 'I': // -I: binary mode (null handling) - { - // default is text mode, unless -I option is specified - int binaryMode = atoi(optarg); - - if (binaryMode == 1) - { - fImportDataMode = IMPORT_DATA_BIN_ACCEPT_NULL; - } - else if (binaryMode == 2) - { - fImportDataMode = IMPORT_DATA_BIN_SAT_NULL; - } - else - { - throw(runtime_error("Invalid Binary mode; value can be 1 or 2")); - } - - break; - } - - case 'S': // -S: Treat string truncations as errors - { - setTruncationAsError(true); - // cout << "TruncationAsError : true" << endl; - break; - } - - case 'T': - { - std::string timeZone = optarg; - long offset; - - if (timeZone != "SYSTEM" && dataconvert::timeZoneToOffset(timeZone.c_str(), timeZone.size(), &offset)) - { - throw(runtime_error("Value for option -T is invalid")); - } - - fTimeZone = timeZone; - break; - } - - case 'q': // -q: batch quantity - default value is 10000 - { - errno = 0; - long long lValue = strtoll(optarg, 0, 10); - - if ((errno != 0) || (lValue < 1) || (lValue > UINT_MAX)) - throw runtime_error("Option -q is invalid or out of range"); - - fBatchQty = lValue; - - if (fBatchQty < 10000) - fBatchQty = 10000; - else if (fBatchQty > 100000) - fBatchQty = 10000; - - break; - } - - case 'N': //-N no console output - { - fConsoleOutput = false; - break; - } - - case 'y': //-y S3 Key - { - fS3Key = optarg; - break; - } - - case 'K': //-K S3 Secret - { - fS3Secret = optarg; - break; - } - - case 'H': //-H S3 Host - { - fS3Host = optarg; - break; - } - - case 't': //-t S3 bucket - { - fS3Bucket = optarg; - break; - } - - case 'g': //-g S3 Region - { - fS3Region = optarg; - break; - } - - case 'U': //-U username of the files owner - { - fUsername = optarg; - break; - } - - case 'L': // -L set the output location of .bad/.err files - { - fErrorDir = optarg; - break; - } - - default: - { - std::string aErr = std::string("Unknown command line option ") + std::to_string(aCh); - // cout << "Unknown command line option " << aCh << endl; - throw(runtime_error(aErr)); + cout << "Column delimiter : " << fColDelim << endl; } } } + if (vm.contains("binary-mode")) + { + int value = vm["binary-mode"].as(); + if (value == 1) + { + fImportDataMode = IMPORT_DATA_BIN_ACCEPT_NULL; + } + else if (value == 2) + { + fImportDataMode = IMPORT_DATA_BIN_SAT_NULL; + } + else + { + throw runtime_error("Invalid Binary mode; value can be 1 or 2"); + } + } + if (vm.contains("tz")) + { + auto tz = vm["tz"].as(); + long offset; + if (tz != "SYSTEM" && dataconvert::timeZoneToOffset(tz.c_str(), tz.size(), &offset)) + { + throw runtime_error("Value for option --tz/-T is invalid"); + } + fTimeZone = tz; + } + if (vm.contains("job-id")) + { + errno = 0; + string optarg = vm["job-id"].as(); + long lValue = strtol(optarg.c_str(), nullptr, 10); + if (errno != 0 || lValue < 0 || lValue > INT_MAX) + { + throw runtime_error("Option --job-id/-j is invalid or out of range"); + } + fJobId = optarg; + fOrigJobId = fJobId; - if (fHelp) - usage(); // BUG 4210 + if (fJobId.empty()) + { + throw runtime_error("Wrong JobID Value"); + } + + aJobType = true; + } + if (vm.contains("verbose")) + { + string optarg = vm["verbose"].as(); + fVerbose = fDebugLvl = optarg.length(); + } + if (vm.contains("batch-quantity")) + { + if (fBatchQty < 10000) + { + fBatchQty = 10000; + } + else if (fBatchQty > 100000) + { + fBatchQty = 10000; + } + } if (fArgMode != -1) fMode = fArgMode; // BUG 4210 @@ -976,26 +751,23 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) if (0 == fArgMode) throw runtime_error("Incompatible mode and option types"); - if (optind < argc) + if (vm.contains("dbname")) { - fSchema = argv[optind]; // 1st pos parm - optind++; + fSchema = vm["dbname"].as(); - if (optind < argc) - { - fTable = argv[optind]; // 2nd pos parm - optind++; - } - else + + if (!vm.contains("table")) { // if schema is there, table name should be there throw runtime_error("No table name specified with schema."); } - if (optind < argc) // see if input file name is given + fTable = vm["table"].as(); // 2nd pos parm + + if (vm.contains("load-file")) // see if input file name is given { // 3rd pos parm - fLocFile = argv[optind]; + fLocFile = vm["load-file"].as(); if ((fLocFile.at(0) != '/') && (fLocFile != "STDIN")) { @@ -1074,7 +846,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) // 1. no positional parameters - Mode 0 & stdin // 2. Two positional parameters (schema and table names) - Mode 1/2, stdin // 3. Three positional parameters (schema, table, and import file name) - else if (optind < argc) // see if db schema name is given + else if (vm.contains("dbname")) // see if db schema name is given { if (fArgMode == 0) { @@ -1088,13 +860,12 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) } else { - fLocFile = argv[optind]; - optind++; + fLocFile = vm["dbname"].as(); } - if (optind < argc) // dest filename provided + if (vm.contains("table")) // dest filename provided { - fPmFile = argv[optind]; + fPmFile = vm["table"].as(); if ((fPmFile.at(0) != '/') && (fS3Key.empty())) { @@ -1144,19 +915,16 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) */ } else - fSchema = argv[optind]; // 1st pos parm + fSchema = vm["dbname"].as(); // 1st pos parm - optind++; - - if (optind < argc) // see if table name is given + if (vm.contains("table")) // see if table name is given { - fTable = argv[optind]; // 2nd pos parm - optind++; + fTable = vm["table"].as(); // 2nd pos parm - if (optind < argc) // see if input file name is given + if (vm.contains("load-file")) // see if input file name is given { // 3rd pos parm - fLocFile = argv[optind]; + fLocFile = vm["load-file"].as(); // BUG 4379 if -f option given we need to use that path, // over riding bug 4231. look at the code below @@ -1543,9 +1311,7 @@ void WECmdArgs::setEnclByAndEscCharFromJobFile(std::string& JobName) if (fEnclosedChar == 0) // check anything in Jobxml file { WEXmlgetter aXmlGetter(JobName); - vector aSections; - aSections.push_back("BulkJob"); - aSections.push_back("EnclosedByChar"); + const vector aSections{"BulkJob", "EnclosedByChar"}; try { @@ -1569,9 +1335,7 @@ void WECmdArgs::setEnclByAndEscCharFromJobFile(std::string& JobName) if (fEscChar == 0) // check anything in Jobxml file { WEXmlgetter aXmlGetter(JobName); - vector aSections; - aSections.push_back("BulkJob"); - aSections.push_back("EscapeChar"); + const vector aSections{"BulkJob", "EscapeChar"}; try { diff --git a/writeengine/splitter/we_cmdargs.h b/writeengine/splitter/we_cmdargs.h index 64eb3dedb..64d17b598 100644 --- a/writeengine/splitter/we_cmdargs.h +++ b/writeengine/splitter/we_cmdargs.h @@ -24,28 +24,33 @@ #include #include +#include #include "we_xmlgetter.h" #include "we_type.h" +namespace boost::program_options +{ +class options_description; +} + namespace WriteEngine { class WECmdArgs { public: WECmdArgs(int argc, char** argv); - virtual ~WECmdArgs() = default; + virtual ~WECmdArgs(); typedef std::vector VecInts; typedef std::vector VecArgs; void appTestFunction(); void parseCmdLineArgs(int argc, char** argv); - std::string getCpImportCmdLine(); + std::string getCpImportCmdLine(bool skipRows); void setSchemaAndTableFromJobFile(std::string& JobName); void setEnclByAndEscCharFromJobFile(std::string& JobName); void usage(); - void usageMode3(); bool checkForCornerCases(); void checkForBulkLogDir(const std::string& BulkRoot); @@ -76,11 +81,11 @@ class WECmdArgs { return fLocFile; } - int getReadBufSize() + int getReadBufSize() const { return fReadBufSize; } - int getMode() + int getMode() const { return fMode; } @@ -88,36 +93,40 @@ class WECmdArgs { return fArgMode; } - bool isHelpMode() + bool isHelpMode() const { return fHelp; } - int getDebugLvl() + int getDebugLvl() const { return fDebugLvl; } - char getEnclChar() + char getEnclChar() const { return fEnclosedChar; } - char getEscChar() + char getEscChar() const { return fEscChar; } - char getDelimChar() + char getDelimChar() const { return fColDelim; } + int getSkipRows() const + { + return fSkipRows; + } ImportDataMode getImportDataMode() const { return fImportDataMode; } - bool getConsoleLog() + bool getConsoleLog() const { return fConsoleLog; } - bool isCpimportInvokeMode() + bool isCpimportInvokeMode() const { return (fBlockMode3) ? false : fCpiInvoke; } @@ -125,11 +134,15 @@ class WECmdArgs { return fQuiteMode; } - void setJobId(std::string fJobId) + void setJobId(const std::string& fJobId) { this->fJobId = fJobId; } - void setLocFile(std::string fLocFile) + void setOrigJobId() + { + this->fOrigJobId = fJobId; + } + void setLocFile(const std::string& fLocFile) { this->fLocFile = fLocFile; } @@ -141,7 +154,7 @@ class WECmdArgs { this->fArgMode = ArgMode; } - void setPmFile(std::string fPmFile) + void setPmFile(const std::string& fPmFile) { this->fPmFile = fPmFile; } @@ -183,7 +196,7 @@ class WECmdArgs { fUUID = jobUUID; } - bool getConsoleOutput() + bool getConsoleOutput() const { return fConsoleOutput; } @@ -194,7 +207,7 @@ class WECmdArgs bool getPmStatus(int Id); bool str2PmList(std::string& PmList, VecInts& V); - int getPmVecSize() + size_t getPmVecSize() const { return fPmVec.size(); } @@ -265,7 +278,7 @@ class WECmdArgs { return fErrorDir; } - void setErrorDir(std::string fErrorDir) + void setErrorDir(const std::string& fErrorDir) { this->fErrorDir = fErrorDir; } @@ -273,24 +286,26 @@ class WECmdArgs std::string PrepMode2ListOfFiles(std::string& FileName); // Bug 4342 void getColumnList(std::set& columnList) const; + private: + static void checkIntArg(const std::string& name, long min, long max, int value); private: // variables for SplitterApp VecArgs fVecArgs; VecInts fPmVec; VecArgs fVecJobFiles; // JobFiles splitter from master JobFile - int fMultiTableCount; // MultiTable count + int fMultiTableCount{0}; // MultiTable count VecArgs fColFldsFromJobFile; // List of columns from any job file, that - // represent fields in the import data + // represent fields in the import data std::string fJobId; // JobID std::string fOrigJobId; // Original JobID, in case we have to split it - bool fJobLogOnly; // Job number is only for log filename only - bool fHelp; // Help mode - int fMode; // splitter Mode - int fArgMode; // Argument mode, dep. on this fMode is decided. - bool fQuiteMode; // in quite mode or not - bool fConsoleLog; // Log everything to console - w.r.t cpimport - int fVerbose; // how many v's + bool fJobLogOnly{false}; // Job number is only for log filename only + bool fHelp{false}; // Help mode + int fMode{1}; // splitter Mode + int fArgMode{-1}; // Argument mode, dep. on this fMode is decided. + bool fQuiteMode{true}; // in quite mode or not + bool fConsoleLog{false}; // Log everything to console - w.r.t cpimport + int fVerbose{0}; // how many v's std::string fPmFile; // FileName at PM std::string fPmFilePath; // Path of input file in PM std::string fLocFile; // Local file name @@ -305,32 +320,33 @@ class WECmdArgs std::string fS3Host; // S3 Host std::string fS3Region; // S3 Region - unsigned int fBatchQty; // No. of batch Qty. - int fNoOfReadThrds; // No. of read buffers - // std::string fConfig; // config filename - int fDebugLvl; // Debug level - int fMaxErrors; // Max allowable errors - int fReadBufSize; // Read buffer size - int fIOReadBufSize; // I/O read buffer size - int fSetBufSize; // Buff size w/setvbuf - char fColDelim; // column delimiter - char fEnclosedChar; // enclosed by char - char fEscChar; // esc char - int fNoOfWriteThrds; // No. of write threads - bool fNullStrMode; // set null string mode - treat null as null - ImportDataMode fImportDataMode; // Importing text or binary data - std::string fPrgmName; // argv[0] - std::string fSchema; // Schema name - positional parmater - std::string fTable; // Table name - table name parameter + int fBatchQty{10000}; // No. of batch Qty. + int fNoOfReadThrds{0}; // No. of read buffers + int fDebugLvl{0}; // Debug level + int fMaxErrors{-1}; // Max allowable errors + int fReadBufSize{0}; // Read buffer size + int fIOReadBufSize{0}; // I/O read buffer size + int fSetBufSize{0}; // Buff size w/setvbuf + char fColDelim{'|'}; // column delimiter + char fEnclosedChar{0}; // enclosed by char + char fEscChar{0}; // esc char + int fSkipRows{0}; // skip header + int fNoOfWriteThrds{0}; // No. of write threads + bool fNullStrMode{false}; // set null string mode - treat null as null + ImportDataMode fImportDataMode{IMPORT_DATA_TEXT}; // Importing text or binary data + std::string fPrgmName; // argv[0] + std::string fSchema; // Schema name - positional parmater + std::string fTable; // Table name - table name parameter - bool fCpiInvoke; // invoke cpimport in mode 3 - bool fBlockMode3; // Do not allow Mode 3 - bool fbTruncationAsError; // Treat string truncation as error - boost::uuids::uuid fUUID; - bool fConsoleOutput; // If false, no output to console. - std::string fTimeZone; // Timezone to use for TIMESTAMP datatype - std::string fUsername; // Username of the data files owner - std::string fErrorDir; + bool fCpiInvoke{false}; // invoke cpimport in mode 3 + bool fBlockMode3{false}; // Do not allow Mode 3 + bool fbTruncationAsError{false}; // Treat string truncation as error + boost::uuids::uuid fUUID{boost::uuids::nil_generator()()}; + bool fConsoleOutput{true}; // If false, no output to console. + std::string fTimeZone{"SYSTEM"}; // Timezone to use for TIMESTAMP datatype + std::string fUsername; // Username of the data files owner + std::string fErrorDir{MCSLOGDIR "/cpimport/"}; + std::unique_ptr fOptions; }; //---------------------------------------------------------------------- diff --git a/writeengine/splitter/we_filereadthread.cpp b/writeengine/splitter/we_filereadthread.cpp index ac7e84d65..ea83d229d 100644 --- a/writeengine/splitter/we_filereadthread.cpp +++ b/writeengine/splitter/we_filereadthread.cpp @@ -79,6 +79,7 @@ WEFileReadThread::WEFileReadThread(WESDHandler& aSdh) , fEncl('\0') , fEsc('\\') , fDelim('|') + , fSkipRows(0) { // TODO batch qty to get from config fBatchQty = 10000; @@ -187,6 +188,8 @@ void WEFileReadThread::setup(std::string FileName) if (aEncl != 0) fEnclEsc = true; + fSkipRows = fSdh.getSkipRows(); + // BUG 4342 - Need to support "list of infiles" // chkForListOfFiles(FileName); - List prepared in sdhandler. @@ -216,12 +219,10 @@ void WEFileReadThread::setup(std::string FileName) //------------------------------------------------------------------------------ -bool WEFileReadThread::chkForListOfFiles(std::string& FileName) +bool WEFileReadThread::chkForListOfFiles(const std::string& fileName) { // cout << "Inside chkForListOfFiles("<< FileName << ")" << endl; - std::string aFileName = FileName; - - istringstream iss(aFileName); + istringstream iss(fileName); ostringstream oss; size_t start = 0, end = 0; const char* sep = " ,|"; @@ -229,8 +230,8 @@ bool WEFileReadThread::chkForListOfFiles(std::string& FileName) do { - end = aFileName.find_first_of(sep, start); - std::string aFile = aFileName.substr(start, end - start); + end = fileName.find_first_of(sep, start); + std::string aFile = fileName.substr(start, end - start); if (aFile == "STDIN" || aFile == "stdin") aFile = "/dev/stdin"; @@ -270,9 +271,9 @@ std::string WEFileReadThread::getNextInputDataFile() } //------------------------------------------------------------------------------ -void WEFileReadThread::add2InputDataFileList(std::string& FileName) +void WEFileReadThread::add2InputDataFileList(const std::string& fileName) { - fInfileList.push_front(FileName); + fInfileList.push_front(fileName); } //------------------------------------------------------------------------------ @@ -371,17 +372,33 @@ unsigned int WEFileReadThread::readDataFile(messageqcpp::SBS& Sbs) // For now we are going to send KEEPALIVES //*Sbs << (ByteStream::byte)(WE_CLT_SRV_KEEPALIVE); - if ((fInFile.good()) && (!fInFile.eof())) + if (fInFile.good() && !fInFile.eof()) { // cout << "Inside WEFileReadThread::readDataFile" << endl; // char aBuff[1024*1024]; // TODO May have to change it later // char*pStart = aBuff; unsigned int aIdx = 0; int aLen = 0; - *Sbs << (ByteStream::byte)(WE_CLT_SRV_DATA); + *Sbs << static_cast(WE_CLT_SRV_DATA); - while ((!fInFile.eof()) && (aIdx < getBatchQty())) + while (!fInFile.eof() && aIdx < getBatchQty()) { + if (fSkipRows > 0) + { + fSkipRows--; + fInFile.getline(fBuff, fBuffSize - 1); + if (fSdh.getDebugLvl() > 3) + { + aLen = fInFile.gcount(); + if (aLen > 0 && aLen < fBuffSize - 2) + { + fBuff[aLen - 1] = 0; + cout << "Skip header row (" << fSkipRows<< " to go): " << fBuff << endl; + } + } + continue; + } + if (fEnclEsc) { // pStart = aBuff; @@ -551,6 +568,9 @@ void WEFileReadThread::openInFile() fInFile.rdbuf(fIfFile.rdbuf()); //@BUG 4326 } + // Got new file, so reset fSkipRows + fSkipRows = fSdh.getSkipRows(); + //@BUG 4326 -below three lines commented out // if (!fInFile.is_open()) fInFile.open(fInFileName.c_str()); // if (!fInFile.good()) @@ -657,13 +677,13 @@ void WEFileReadThread::initS3Connection(const WECmdArgs& args) s3Host = args.getS3Host(); ms3_library_init(); s3Connection = - ms3_init(s3Key.c_str(), s3Secret.c_str(), s3Region.c_str(), (s3Host.empty() ? NULL : s3Host.c_str())); + ms3_init(s3Key.c_str(), s3Secret.c_str(), s3Region.c_str(), (s3Host.empty() ? nullptr : s3Host.c_str())); if (!s3Connection) throw runtime_error("failed to get an S3 connection"); } else - s3Connection = NULL; - buf = NULL; + s3Connection = nullptr; + buf = nullptr; } //------------------------------------------------------------------------------ diff --git a/writeengine/splitter/we_filereadthread.h b/writeengine/splitter/we_filereadthread.h index 83e819500..665b3bf95 100644 --- a/writeengine/splitter/we_filereadthread.h +++ b/writeengine/splitter/we_filereadthread.h @@ -42,13 +42,11 @@ class WEFileReadThread; class WEReadThreadRunner { public: - WEReadThreadRunner(WEFileReadThread& Owner) : fRef(Owner) + explicit WEReadThreadRunner(WEFileReadThread& Owner) : fRef(Owner) { // ctor } - ~WEReadThreadRunner() - { - } + ~WEReadThreadRunner() = default; void operator()(); // Thread function @@ -61,7 +59,7 @@ class WEReadThreadRunner class WEFileReadThread { public: - WEFileReadThread(WESDHandler& aSdh); + explicit WEFileReadThread(WESDHandler& aSdh); virtual ~WEFileReadThread(); void reset(); @@ -82,9 +80,9 @@ class WEFileReadThread { return fContinue; } - void setContinue(bool fContinue) + void setContinue(bool cont) { - this->fContinue = fContinue; + fContinue = cont; } std::string getInFileName() const { @@ -98,30 +96,34 @@ class WEFileReadThread { return fBatchQty; } - void setFpThread(boost::thread* fpThread) + void setFpThread(boost::thread* pThread) { - this->fpThread = fpThread; + fpThread = pThread; } - void setInFileName(std::string fInFileName) + void setInFileName(const std::string& inFileName) { - if ((0 == fInFileName.compare("STDIN")) || (0 == fInFileName.compare("stdin"))) - this->fInFileName = "/dev/stdin"; + if (0 == inFileName.compare("STDIN") || 0 == inFileName.compare("stdin")) + { + fInFileName = "/dev/stdin"; + } else - this->fInFileName = fInFileName; + { + fInFileName = inFileName; + } } //@BUG 4326 const std::istream& getInFile() const { return fInFile; } - void setBatchQty(unsigned int BatchQty) + void setBatchQty(unsigned int batchQty) { - fBatchQty = BatchQty; + fBatchQty = batchQty; } - bool chkForListOfFiles(std::string& FileName); + bool chkForListOfFiles(const std::string& fileName); std::string getNextInputDataFile(); - void add2InputDataFileList(std::string& FileName); + void add2InputDataFileList(const std::string& fileName); private: enum @@ -130,9 +132,9 @@ class WEFileReadThread }; // don't allow anyone else to set - void setTgtPmId(unsigned int fTgtPmId) + void setTgtPmId(unsigned int tgtPmId) { - this->fTgtPmId = fTgtPmId; + fTgtPmId = tgtPmId; } WESDHandler& fSdh; @@ -148,11 +150,12 @@ class WEFileReadThread unsigned int fTgtPmId; unsigned int fBatchQty; - bool fEnclEsc; // Encl/Esc char is set - char fEncl; // Encl char - char fEsc; // Esc char - char fDelim; // Column Delimit char - char* fBuff; // main data buffer + bool fEnclEsc; // Encl/Esc char is set + char fEncl; // Encl char + char fEsc; // Esc char + char fDelim; // Column Delimit char + size_t fSkipRows; // Header rows to skip + char* fBuff; // main data buffer int fBuffSize; /* To support mode 1 imports from objects on S3 */ diff --git a/writeengine/splitter/we_sdhandler.cpp b/writeengine/splitter/we_sdhandler.cpp index d4eb6af6e..e5a6d688b 100644 --- a/writeengine/splitter/we_sdhandler.cpp +++ b/writeengine/splitter/we_sdhandler.cpp @@ -767,7 +767,7 @@ void WESDHandler::setup() oss << "Running distributed import (mode "; oss << fRef.fCmdArgs.getMode() << ") on "; - if (fRef.fCmdArgs.getPmVecSize() == fPmCount) + if (fRef.fCmdArgs.getPmVecSize() == static_cast(fPmCount)) oss << "all PMs..."; else { @@ -2548,20 +2548,20 @@ void WESDHandler::exportJobFile(std::string& JobId, std::string& JobFileName) } //------------------------------------------------------------------------------ -bool WESDHandler::getConsoleLog() +bool WESDHandler::getConsoleLog() const { return fRef.fCmdArgs.getConsoleLog(); } //------------------------------------------------------------------------------ -char WESDHandler::getEnclChar() +char WESDHandler::getEnclChar() const { return fRef.fCmdArgs.getEnclChar(); } //------------------------------------------------------------------------------ -char WESDHandler::getEscChar() +char WESDHandler::getEscChar() const { return fRef.fCmdArgs.getEscChar(); } @@ -2575,11 +2575,16 @@ int WESDHandler::getReadBufSize() //------------------------------------------------------------------------------ -char WESDHandler::getDelimChar() +char WESDHandler::getDelimChar() const { return fRef.fCmdArgs.getDelimChar(); } +size_t WESDHandler::getSkipRows() const +{ + return fRef.fCmdArgs.getSkipRows(); +} + //------------------------------------------------------------------------------ std::string WESDHandler::getTableName() const diff --git a/writeengine/splitter/we_sdhandler.h b/writeengine/splitter/we_sdhandler.h index ec9e6a78d..55ef07178 100644 --- a/writeengine/splitter/we_sdhandler.h +++ b/writeengine/splitter/we_sdhandler.h @@ -143,10 +143,11 @@ class WESDHandler void sendHeartbeats(); std::string getTableName() const; std::string getSchemaName() const; - char getEnclChar(); - char getEscChar(); - char getDelimChar(); - bool getConsoleLog(); + char getEnclChar() const; + char getEscChar() const; + char getDelimChar() const; + size_t getSkipRows() const; + bool getConsoleLog() const; int getReadBufSize(); ImportDataMode getImportDataMode() const; void sysLog(const logging::Message::Args& msgArgs, logging::LOG_TYPE logType, diff --git a/writeengine/splitter/we_splclient.h b/writeengine/splitter/we_splclient.h index e38ee0d25..ae3b84a22 100644 --- a/writeengine/splitter/we_splclient.h +++ b/writeengine/splitter/we_splclient.h @@ -46,9 +46,7 @@ class WEColOORInfo // Column Out-Of-Range Info WEColOORInfo() : fColNum(0), fColType(execplan::CalpontSystemCatalog::INT), fNoOfOORs(0) { } - ~WEColOORInfo() - { - } + ~WEColOORInfo() = default; public: int fColNum; @@ -63,14 +61,12 @@ class WESdHandlerException : public std::exception { public: std::string fWhat; - WESdHandlerException(std::string& What) throw() + explicit WESdHandlerException(const std::string& What) noexcept { fWhat = What; } - virtual ~WESdHandlerException() throw() - { - } - virtual const char* what() const throw() + ~WESdHandlerException() noexcept override = default; + const char* what() const noexcept override { return fWhat.c_str(); } @@ -82,12 +78,10 @@ class WESdHandlerException : public std::exception class WESplClientRunner { public: - WESplClientRunner(WESplClient& Sc) : fOwner(Sc) + explicit WESplClientRunner(WESplClient& Sc) : fOwner(Sc) { /* ctor */ } - virtual ~WESplClientRunner() - { /* dtor */ - } + virtual ~WESplClientRunner() = default; void operator()(); public: @@ -389,9 +383,7 @@ class WESplClient WERowsUploadInfo() : fRowsRead(0), fRowsInserted(0) { } - ~WERowsUploadInfo() - { - } + ~WERowsUploadInfo() = default; public: int64_t fRowsRead; diff --git a/writeengine/splitter/we_splitterapp.cpp b/writeengine/splitter/we_splitterapp.cpp index a9f5a6d2f..4f670899e 100644 --- a/writeengine/splitter/we_splitterapp.cpp +++ b/writeengine/splitter/we_splitterapp.cpp @@ -64,7 +64,6 @@ WESplitterApp::WESplitterApp(WECmdArgs& CmdArgs) : fCmdArgs(CmdArgs), fDh(*this) fpSysLog = SimpleSysLog::instance(); fpSysLog->setLoggingID(logging::LoggingID(SUBSYSTEM_ID_WE_SPLIT)); setupSignalHandlers(); - std::string err; fDh.setDebugLvl(fCmdArgs.getDebugLvl()); fDh.check4CpiInvokeMode(); @@ -100,6 +99,7 @@ WESplitterApp::WESplitterApp(WECmdArgs& CmdArgs) : fCmdArgs(CmdArgs), fDh(*this) } catch (std::exception& ex) { + std::string err; // err = string("Error in constructing WESplitterApp") + ex.what(); err = ex.what(); // cleaning up for BUG 4298 logging::Message::Args errMsgArgs; @@ -139,10 +139,10 @@ WESplitterApp::~WESplitterApp() // fDh.shutdown(); usleep(1000); // 1 millisec just checking - std::string aStr = "Calling WESplitterApp Destructor\n"; - if (fDh.getDebugLvl()) - cout << aStr << endl; + { + cout << "Calling WESplitterApp Destructor" << endl; + } } //------------------------------------------------------------------------------ @@ -151,18 +151,18 @@ WESplitterApp::~WESplitterApp() void WESplitterApp::setupSignalHandlers() { - struct sigaction sa; + struct sigaction sa{}; memset(&sa, 0, sizeof(sa)); sa.sa_handler = WESplitterApp::onSigInterrupt; - sigaction(SIGINT, &sa, 0); + sigaction(SIGINT, &sa, nullptr); sa.sa_handler = WESplitterApp::onSigTerminate; - sigaction(SIGTERM, &sa, 0); + sigaction(SIGTERM, &sa, nullptr); sa.sa_handler = SIG_IGN; - sigaction(SIGPIPE, &sa, 0); + sigaction(SIGPIPE, &sa, nullptr); sa.sa_handler = WESplitterApp::onSigHup; - sigaction(SIGHUP, &sa, 0); + sigaction(SIGHUP, &sa, nullptr); sa.sa_handler = WESplitterApp::onSigInterrupt; - sigaction(SIGUSR1, &sa, 0); + sigaction(SIGUSR1, &sa, nullptr); /* signal(SIGPIPE, SIG_IGN); signal(SIGINT, WESplitterApp::onSigInterrupt); @@ -258,7 +258,7 @@ void WESplitterApp::processMessages() } aBs.restart(); - std::string aCpImpCmd = fCmdArgs.getCpImportCmdLine(); + std::string aCpImpCmd = fCmdArgs.getCpImportCmdLine(false); fDh.fLog.logMsg(aCpImpCmd, MSGLVL_INFO2); if (fDh.getDebugLvl()) @@ -315,7 +315,7 @@ void WESplitterApp::processMessages() } aBs.restart(); - std::string aCpImpCmd = fCmdArgs.getCpImportCmdLine(); + std::string aCpImpCmd = fCmdArgs.getCpImportCmdLine(false); fDh.fLog.logMsg(aCpImpCmd, MSGLVL_INFO2); if (fDh.getDebugLvl()) @@ -467,7 +467,7 @@ void WESplitterApp::invokeCpimport() fCmdArgs.setJobUUID(u); fCmdArgs.setMode(3); - std::string aCmdLineStr = fCmdArgs.getCpImportCmdLine(); + std::string aCmdLineStr = fCmdArgs.getCpImportCmdLine(true); if (fDh.getDebugLvl()) cout << "CPI CmdLineArgs : " << aCmdLineStr << endl; @@ -477,7 +477,6 @@ void WESplitterApp::invokeCpimport() std::istringstream ss(aCmdLineStr); std::string arg; std::vector v2; - v2.reserve(50); while (ss >> arg) { @@ -490,7 +489,7 @@ void WESplitterApp::invokeCpimport() Cmds.push_back(const_cast(v2[j].c_str())); } - Cmds.push_back(0); // null terminate + Cmds.push_back(nullptr); // null terminate int aRet = execvp(Cmds[0], &Cmds[0]); // NOTE - works with full Path @@ -515,7 +514,7 @@ void WESplitterApp::updateWithJobFile(int aIdx) int main(int argc, char** argv) { std::string err; - std::cin.sync_with_stdio(false); + std::istream::sync_with_stdio(false); try { @@ -528,7 +527,7 @@ int main(int argc, char** argv) for (int idx = 0; idx < aTblCnt; idx++) { aWESplitterApp.fDh.reset(); - aWESplitterApp.fContinue = true; + WriteEngine::WESplitterApp::fContinue = true; aWESplitterApp.updateWithJobFile(idx); try @@ -541,10 +540,10 @@ int main(int argc, char** argv) err = ex.what(); // cleaning up for BUG 4298 logging::Message::Args errMsgArgs; errMsgArgs.add(err); - aWESplitterApp.fpSysLog->logMsg(errMsgArgs, logging::LOG_TYPE_ERROR, logging::M0000); + WriteEngine::WESplitterApp::fpSysLog->logMsg(errMsgArgs, logging::LOG_TYPE_ERROR, logging::M0000); SPLTR_EXIT_STATUS = 1; aWESplitterApp.fDh.fLog.logMsg(err, WriteEngine::MSGLVL_ERROR); - aWESplitterApp.fContinue = false; + WriteEngine::WESplitterApp::fContinue = false; // throw runtime_error(err); BUG 4298 } diff --git a/writeengine/splitter/we_xmlgetter.cpp b/writeengine/splitter/we_xmlgetter.cpp index 222e4fd72..03992cb35 100644 --- a/writeengine/splitter/we_xmlgetter.cpp +++ b/writeengine/splitter/we_xmlgetter.cpp @@ -46,20 +46,23 @@ namespace WriteEngine //------------------------------------------------------------------------------ // WEXmlgetter constructor //------------------------------------------------------------------------------ -WEXmlgetter::WEXmlgetter(std::string& ConfigName) : fConfigName(ConfigName), fDoc(NULL), fpRoot(NULL) +WEXmlgetter::WEXmlgetter(const std::string& ConfigName) + : fConfigName(ConfigName) + , fDoc(nullptr) + , fpRoot(nullptr) { // xmlNodePtr curPtr; fDoc = xmlParseFile(ConfigName.c_str()); - if (fDoc == NULL) + if (fDoc == nullptr) throw runtime_error("WEXmlgetter::getConfig(): no XML document!"); fpRoot = xmlDocGetRootElement(fDoc); - if (fpRoot == NULL) + if (fpRoot == nullptr) { xmlFreeDoc(fDoc); - fDoc = NULL; + fDoc = nullptr; throw runtime_error("WEXmlgetter::getConfig(): no XML Root Tag!"); } } @@ -70,24 +73,24 @@ WEXmlgetter::WEXmlgetter(std::string& ConfigName) : fConfigName(ConfigName), fDo WEXmlgetter::~WEXmlgetter() { xmlFreeDoc(fDoc); - fDoc = NULL; + fDoc = nullptr; } //------------------------------------------------------------------------------ // Get/return the property or attribute value (strVal) for the specified xml tag // (pNode) and property/attribute (pTag) //------------------------------------------------------------------------------ -bool WEXmlgetter::getNodeAttribute(const xmlNode* pNode, const char* pTag, std::string& strVal) const +bool WEXmlgetter::getNodeAttribute(const xmlNode* pNode, const char* pTag, std::string& strVal) { - xmlChar* pTmp = NULL; + xmlChar* pTmp = nullptr; bool bFound = false; - pTmp = xmlGetProp(const_cast(pNode), (xmlChar*)pTag); + pTmp = xmlGetProp(pNode, reinterpret_cast(pTag)); if (pTmp) { bFound = true; - strVal = (char*)pTmp; + strVal = reinterpret_cast(pTmp); xmlFree(pTmp); } else @@ -101,19 +104,19 @@ bool WEXmlgetter::getNodeAttribute(const xmlNode* pNode, const char* pTag, std:: //------------------------------------------------------------------------------ // Get/return the node content (strVal) for the specified xml tag (pNode) //------------------------------------------------------------------------------ -bool WEXmlgetter::getNodeContent(const xmlNode* pNode, std::string& strVal) const +bool WEXmlgetter::getNodeContent(const xmlNode* pNode, std::string& strVal) { - xmlChar* pTmp = NULL; + xmlChar* pTmp = nullptr; bool bFound = false; - if (pNode->children != NULL) + if (pNode->children != nullptr) { pTmp = xmlNodeGetContent(pNode->children); if (pTmp) { bFound = true; - strVal = (char*)pTmp; + strVal = reinterpret_cast(pTmp); xmlFree(pTmp); } else @@ -152,29 +155,29 @@ void WEXmlgetter::getConfig(const string& section, const string& name, vectorxmlChildrenNode; + const xmlNode* pPtr = fpRoot->xmlChildrenNode; - while (pPtr != NULL) + while (pPtr != nullptr) { // cout << "pPtr->name: " << // (const xmlChar*)pPtr->name << std::endl; - if ((!xmlStrcmp(pPtr->name, (const xmlChar*)section.c_str()))) + if ((!xmlStrcmp(pPtr->name, reinterpret_cast(section.c_str())))) { xmlNodePtr pPtr2 = pPtr->xmlChildrenNode; - while (pPtr2 != NULL) + while (pPtr2 != nullptr) { // cout << " pPtr2->name: " << // (const xmlChar*)pPtr2->name << std::endl; - if ((!xmlStrcmp(pPtr2->name, (const xmlChar*)name.c_str()))) + if ((!xmlStrcmp(pPtr2->name, reinterpret_cast(name.c_str())))) { xmlNodePtr pPtr3 = pPtr2->xmlChildrenNode; - values.push_back((const char*)pPtr3->content); + values.emplace_back(reinterpret_cast(pPtr3->content)); // cout << " pPtr3->name: " << // (const xmlChar*)pPtr3->name << @@ -204,8 +207,8 @@ std::string WEXmlgetter::getValue(const vector& sections) const { std::string aRet; const xmlNode* pPtr = fpRoot; - int aSize = sections.size(); - int aIdx = 0; + auto aSize = sections.size(); + size_t aIdx = 0; // cout << aSize << endl; while (aIdx < aSize) @@ -213,7 +216,7 @@ std::string WEXmlgetter::getValue(const vector& sections) const // cout << aIdx <<" "<< sections[aIdx] << endl; pPtr = getNode(pPtr, sections[aIdx]); - if ((pPtr == NULL) || (aIdx == aSize - 1)) + if ((pPtr == nullptr) || (aIdx == aSize - 1)) break; else { @@ -223,7 +226,7 @@ std::string WEXmlgetter::getValue(const vector& sections) const } } - if (pPtr != NULL) + if (pPtr != nullptr) { // aRet = (const char*)pPtr->content; std::string aBuff; @@ -240,17 +243,17 @@ std::string WEXmlgetter::getValue(const vector& sections) const // a node with the specified name (section). The xmlNode (if found) is // returned. //------------------------------------------------------------------------------ -const xmlNode* WEXmlgetter::getNode(const xmlNode* pParent, const string& section) const +const xmlNode* WEXmlgetter::getNode(const xmlNode* pParent, const string& section) { - if (pParent == NULL) - return NULL; + if (pParent == nullptr) + return nullptr; const xmlNode* pPtr = pParent; - while (pPtr != NULL) + while (pPtr != nullptr) { // cout << "getNode Name " << (const char*)pPtr->name << endl; - if (!xmlStrcmp(pPtr->name, (const xmlChar*)section.c_str())) + if (!xmlStrcmp(pPtr->name, reinterpret_cast(section.c_str()))) return pPtr; else pPtr = pPtr->next; @@ -268,12 +271,12 @@ std::string WEXmlgetter::getAttribute(const vector& sections, const stri { std::string aRet; const xmlNode* pPtr = fpRoot; - int aSize = sections.size(); + auto aSize = sections.size(); if (aSize == 0) throw invalid_argument("WEXmlgetter::getAttribute(): section must be valid"); - int aIdx = 0; + size_t aIdx = 0; // cout << aSize << endl; while (aIdx < aSize) @@ -281,7 +284,7 @@ std::string WEXmlgetter::getAttribute(const vector& sections, const stri // cout << aIdx <<" "<< sections[aIdx] << endl; pPtr = getNode(pPtr, sections[aIdx]); - if ((pPtr == NULL) || (aIdx == aSize - 1)) + if ((pPtr == nullptr) || (aIdx == aSize - 1)) break; else { @@ -291,7 +294,7 @@ std::string WEXmlgetter::getAttribute(const vector& sections, const stri } } - if (pPtr != NULL) + if (pPtr != nullptr) { std::string aBuff; @@ -315,10 +318,10 @@ std::string WEXmlgetter::getAttribute(const vector& sections, const stri // is returned. //------------------------------------------------------------------------------ void WEXmlgetter::getAttributeListForAllChildren(const vector& sections, const string& attributeTag, - vector& attributeValues) + vector& attributeValues) const { const xmlNode* pPtr = fpRoot; - int aSize = sections.size(); + auto aSize = sections.size(); if (aSize == 0) { @@ -328,13 +331,13 @@ void WEXmlgetter::getAttributeListForAllChildren(const vector& sections, } // Step down the branch that has the nodes of interest - int aIdx = 0; + size_t aIdx = 0; while (aIdx < aSize) { pPtr = getNode(pPtr, sections[aIdx]); - if ((pPtr == NULL) || (aIdx == aSize - 1)) + if ((pPtr == nullptr) || (aIdx == aSize - 1)) { break; } @@ -347,9 +350,9 @@ void WEXmlgetter::getAttributeListForAllChildren(const vector& sections, // Look for all the "matching" nodes at the end of the branch, and // get the requested attribute value for each matching node. - if (pPtr != NULL) + if (pPtr != nullptr) { - while (pPtr != NULL) + while (pPtr != nullptr) { std::string attrib; diff --git a/writeengine/splitter/we_xmlgetter.h b/writeengine/splitter/we_xmlgetter.h index ba1480328..1d566b79c 100644 --- a/writeengine/splitter/we_xmlgetter.h +++ b/writeengine/splitter/we_xmlgetter.h @@ -36,23 +36,23 @@ namespace WriteEngine class WEXmlgetter { public: - WEXmlgetter(std::string& ConfigName); - virtual ~WEXmlgetter(); + explicit WEXmlgetter(const std::string& ConfigName); + ~WEXmlgetter(); public: //..Public methods - std::string getValue(const std::vector& section) const; + std::string getValue(const std::vector& sections) const; std::string getAttribute(const std::vector& sections, const std::string& Tag) const; void getConfig(const std::string& section, const std::string& name, std::vector& values) const; void getAttributeListForAllChildren(const std::vector& sections, const std::string& attributeTag, - std::vector& attributeValues); + std::vector& attributeValues) const; private: //..Private methods - const xmlNode* getNode(const xmlNode* pParent, const std::string& section) const; - bool getNodeAttribute(const xmlNode* pNode, const char* pTag, std::string& strVal) const; - bool getNodeContent(const xmlNode* pNode, std::string& strVal) const; + static const xmlNode* getNode(const xmlNode* pParent, const std::string& section); + static bool getNodeAttribute(const xmlNode* pNode, const char* pTag, std::string& strVal); + static bool getNodeContent(const xmlNode* pNode, std::string& strVal); //..Private data members std::string fConfigName; // xml filename diff --git a/writeengine/xml/we_xmlgendata.cpp b/writeengine/xml/we_xmlgendata.cpp index e619875f1..e977f44dd 100644 --- a/writeengine/xml/we_xmlgendata.cpp +++ b/writeengine/xml/we_xmlgendata.cpp @@ -35,18 +35,19 @@ namespace WriteEngine { /* static */ const std::string XMLGenData::DELIMITER("-d"); /* static */ const std::string XMLGenData::DESCRIPTION("-s"); - /* static */ const std::string XMLGenData::ENCLOSED_BY_CHAR("-E"); - /* static */ const std::string XMLGenData::ESCAPE_CHAR("-C"); - /* static */ const std::string XMLGenData::JOBID("-j"); +/* static */ const std::string XMLGenData::ENCLOSED_BY_CHAR("-E"); +/* static */ const std::string XMLGenData::ESCAPE_CHAR("-C"); +/* static */ const std::string XMLGenData::JOBID("-j"); /* static */ const std::string XMLGenData::MAXERROR("-e"); /* static */ const std::string XMLGenData::NAME("-n"); /* static */ const std::string XMLGenData::PATH("-p"); - /* static */ const std::string XMLGenData::RPT_DEBUG("-b"); +/* static */ const std::string XMLGenData::RPT_DEBUG("-b"); /* static */ const std::string XMLGenData::USER("-u"); /* static */ const std::string XMLGenData::NO_OF_READ_BUFFER("-r"); /* static */ const std::string XMLGenData::READ_BUFFER_CAPACITY("-c"); /* static */ const std::string XMLGenData::WRITE_BUFFER_SIZE("-w"); /* static */ const std::string XMLGenData::EXT("-x"); +/* static */ const std::string XMLGenData::SKIP_ROWS("-O"); //------------------------------------------------------------------------------ // XMLGenData constructor @@ -54,39 +55,38 @@ namespace WriteEngine //------------------------------------------------------------------------------ XMLGenData::XMLGenData() { - fParms.insert(ParmList::value_type(DELIMITER, std::string("|"))); - fParms.insert(ParmList::value_type(DESCRIPTION, std::string())); - fParms.insert(ParmList::value_type(ENCLOSED_BY_CHAR, std::string(""))); - fParms.insert(ParmList::value_type(ESCAPE_CHAR, std::string("\\"))); - fParms.insert(ParmList::value_type(JOBID, std::string("299"))); - fParms.insert(ParmList::value_type(MAXERROR, std::string("10"))); - fParms.insert(ParmList::value_type(NAME, std::string())); + fParms.emplace(DELIMITER, "|"); + fParms.emplace(DESCRIPTION, ""); + fParms.emplace(ENCLOSED_BY_CHAR, ""); + fParms.emplace(ESCAPE_CHAR, "\\"); + fParms.emplace(JOBID, "299"); + fParms.emplace(MAXERROR, "10"); + fParms.emplace(NAME, ""); boost::filesystem::path p{std::string(Config::getBulkRoot())}; p /= JOBDIR; - fParms.insert(ParmList::value_type(PATH, p.string())); + fParms.emplace(PATH, p.string()); - fParms.insert(ParmList::value_type(RPT_DEBUG, std::string("0"))); - fParms.insert(ParmList::value_type(USER, std::string())); - fParms.insert(ParmList::value_type(NO_OF_READ_BUFFER, std::string("5"))); - fParms.insert(ParmList::value_type(READ_BUFFER_CAPACITY, std::string("1048576"))); - fParms.insert(ParmList::value_type(WRITE_BUFFER_SIZE, std::string("10485760"))); - fParms.insert(ParmList::value_type(EXT, std::string("tbl"))); + fParms.emplace(RPT_DEBUG, "0"); + fParms.emplace(USER, ""); + fParms.emplace(NO_OF_READ_BUFFER, "5"); + fParms.emplace(READ_BUFFER_CAPACITY, "1048576"); + fParms.emplace(WRITE_BUFFER_SIZE, "10485760"); + fParms.emplace(EXT, "tbl"); + fParms.emplace(SKIP_ROWS, "0"); } //------------------------------------------------------------------------------ // XMLGenData destructor //------------------------------------------------------------------------------ /* virtual */ -XMLGenData::~XMLGenData() -{ -} +XMLGenData::~XMLGenData() = default; //------------------------------------------------------------------------------ // Return value for the specified parm. //------------------------------------------------------------------------------ std::string XMLGenData::getParm(const std::string& key) const { - ParmList::const_iterator p = fParms.find(key); + auto p = fParms.find(key); if (fParms.end() != p) return p->second; diff --git a/writeengine/xml/we_xmlgendata.h b/writeengine/xml/we_xmlgendata.h index 633f1ed8a..be9b4a2a9 100644 --- a/writeengine/xml/we_xmlgendata.h +++ b/writeengine/xml/we_xmlgendata.h @@ -60,10 +60,13 @@ class XMLGenData EXPORT const static std::string READ_BUFFER_CAPACITY; EXPORT const static std::string WRITE_BUFFER_SIZE; EXPORT const static std::string EXT; + EXPORT const static std::string SKIP_ROWS; /** @brief XMLGenData constructor */ EXPORT XMLGenData(); + XMLGenData(const XMLGenData&) = delete; + XMLGenData& operator=(const XMLGenData&) = delete; /** @brief XMLGenData destructor */ @@ -92,10 +95,6 @@ class XMLGenData ParmList fParms; std::string fSchema; LoadNames fLoadNames; - - private: - XMLGenData(const XMLGenData&); // disable default copy ctor - XMLGenData& operator=(const XMLGenData&); // disable default assignment }; } // namespace WriteEngine diff --git a/writeengine/xml/we_xmlgenproc.cpp b/writeengine/xml/we_xmlgenproc.cpp index 784f21891..c21251261 100644 --- a/writeengine/xml/we_xmlgenproc.cpp +++ b/writeengine/xml/we_xmlgenproc.cpp @@ -147,6 +147,11 @@ void XMLGenProc::startXMLFile() xmlTextWriterWriteElement(fWriter, BAD_CAST xmlTagTable[TAG_ESCAPE_CHAR], BAD_CAST fInputMgr->getParm(XMLGenData::ESCAPE_CHAR).c_str()); + if (auto skipRows = fInputMgr->getParm(XMLGenData::SKIP_ROWS); !skipRows.empty()) + { + xmlTextWriterWriteElement(fWriter, BAD_CAST xmlTagTable[TAG_SKIP_ROWS], BAD_CAST skipRows.c_str()); + } + // Added new tags for configurable parameters xmlTextWriterStartElement(fWriter, BAD_CAST xmlTagTable[TAG_READ_BUFFERS]); xmlTextWriterWriteFormatAttribute(fWriter, BAD_CAST xmlTagTable[TAG_NO_OF_READ_BUFFERS], "%d", diff --git a/writeengine/xml/we_xmljob.cpp b/writeengine/xml/we_xmljob.cpp index f37efeaeb..34b50e026 100644 --- a/writeengine/xml/we_xmljob.cpp +++ b/writeengine/xml/we_xmljob.cpp @@ -130,6 +130,7 @@ void XMLJob::printJobInfo(Log& logger) const oss1 << "Read Buffers: " << job.numberOfReadBuffers << endl; oss1 << "Read Buffer Size: " << job.readBufferSize << endl; oss1 << "setvbuf Size: " << job.writeBufferSize << endl; + oss1 << "Header rows : " << job.fSkipRows << endl; oss1 << "Create Date : " << job.createDate << endl; oss1 << "Create Time : " << job.createTime << endl; oss1 << "Schema Name : " << job.schema << endl; @@ -223,7 +224,8 @@ void XMLJob::printJobInfoBrief(Log& logger) const oss1 << "n/a"; oss1 << "); ReadBufs(" << job.numberOfReadBuffers << "); ReadBufSize(" << job.readBufferSize - << "); setvbufSize(" << job.writeBufferSize << ')'; + << "); setvbufSize(" << job.writeBufferSize << "); " + << "SkipRows(" << job.fSkipRows << ")"; logger.logMsg(oss1.str(), MSGLVL_INFO2); for (unsigned int i = 0; i < job.jobTableList.size(); i++) @@ -316,6 +318,8 @@ bool XMLJob::processNode(xmlNode* pNode) setJobData(pNode, TAG_ENCLOSED_BY_CHAR, true, TYPE_CHAR); else if (isTag(pNode, TAG_ESCAPE_CHAR)) setJobData(pNode, TAG_ESCAPE_CHAR, true, TYPE_CHAR); + else if (isTag(pNode, TAG_SKIP_ROWS)) + setJobData(pNode, TAG_SKIP_ROWS, true, TYPE_INT); else { ostringstream oss; @@ -432,6 +436,12 @@ void XMLJob::setJobData(xmlNode* pNode, const xmlTag tag, bool bExpectContent, X break; } + case TAG_SKIP_ROWS: + { + fJob.fSkipRows = intVal; + break; + } + default: break; } } diff --git a/writeengine/xml/we_xmltag.h b/writeengine/xml/we_xmltag.h index 96a14f810..e3bc51cdf 100644 --- a/writeengine/xml/we_xmltag.h +++ b/writeengine/xml/we_xmltag.h @@ -73,6 +73,7 @@ enum xmlTag TAG_TBL_OID, TAG_WIDTH, TAG_SCHEMA_NAME, + TAG_SKIP_ROWS, NUM_OF_XML_TAGS }; @@ -93,6 +94,7 @@ const char xmlTagTable[NUM_OF_XML_TAGS + 1][MAX_XML_TAG_NAME_SIZE] = { "origName", //@bug 3599: deprecated; kept for backwards compatibility "precision", "scale", "tblName", //@bug 3599: replaces origName - "tblOid", "width", "Name"}; + "tblOid", "width", "Name", + "skipRows"}; } // namespace WriteEngine From 43eecec7f3a3b30d2bcd05895c19d49905200556 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Sat, 12 Jul 2025 16:12:14 +0200 Subject: [PATCH 03/51] Revert "chore(build): no build without PLUGIN_COLUMNSTORE" This reverts commit a4dde484c59e307232e093007e25b69513301a69. --- CMakeLists.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8172ab1e1..ee826f6c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,3 @@ -if("NO" STREQUAL "${PLUGIN_COLUMNSTORE}" OR NOT DEFINED PLUGIN_COLUMNSTORE) - return() -endif() - cmake_minimum_required(VERSION 3.13) project(Columnstore) From 25c364a5208d722b1d54aeab2823066cd9a9afc6 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Sat, 12 Jul 2025 16:11:02 +0200 Subject: [PATCH 04/51] MCOL-6090 do not install anything unconditionally before checking whether the target exists --- CMakeLists.txt | 3 +++ cmake/configureEngine.cmake | 4 ---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ee826f6c8..b86fc76e1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,6 +53,9 @@ add_subdirectory(dbcon/mysql) if(NOT TARGET columnstore) return() endif() +# releasenum is used by external scripts for various tasks. Leave it alone. +columnstore_install_file(${CMAKE_CURRENT_BINARY_DIR}/build/releasenum ${ENGINE_SUPPORTDIR}) +columnstore_install_file(${CMAKE_CURRENT_BINARY_DIR}/gitversionEngine ${ENGINE_SUPPORTDIR}) set(COMPONENTS utils diff --git a/cmake/configureEngine.cmake b/cmake/configureEngine.cmake index cd8bff26a..0e909ff43 100644 --- a/cmake/configureEngine.cmake +++ b/cmake/configureEngine.cmake @@ -771,12 +771,8 @@ else() set(GIT_VERSION "source") endif() -# releasenum is used by external scripts for various tasks. Leave it alone. configure_file(${CMAKE_CURRENT_SOURCE_DIR}/build/releasenum.in ${CMAKE_CURRENT_BINARY_DIR}/build/releasenum IMMEDIATE) -columnstore_install_file(${CMAKE_CURRENT_BINARY_DIR}/build/releasenum ${ENGINE_SUPPORTDIR}) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/columnstoreversion.h.in ${CMAKE_CURRENT_SOURCE_DIR}/columnstoreversion.h) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/mcsconfig.h.in ${CMAKE_CURRENT_BINARY_DIR}/mcsconfig.h) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/gitversionEngine.in ${CMAKE_CURRENT_BINARY_DIR}/gitversionEngine IMMEDIATE) - -columnstore_install_file(${CMAKE_CURRENT_BINARY_DIR}/gitversionEngine ${ENGINE_SUPPORTDIR}) From 7dca1da8f2a03f580078698085b7e3195932bd97 Mon Sep 17 00:00:00 2001 From: Aleksei Antipovskii Date: Sun, 13 Jul 2025 17:25:29 +0200 Subject: [PATCH 05/51] fix(cpimport): MCOL-4882 fix -L arg default value --- writeengine/bulk/we_cmdargs.cpp | 7 +++++-- writeengine/splitter/we_cmdargs.cpp | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/writeengine/bulk/we_cmdargs.cpp b/writeengine/bulk/we_cmdargs.cpp index 1989f0719..132771002 100644 --- a/writeengine/bulk/we_cmdargs.cpp +++ b/writeengine/bulk/we_cmdargs.cpp @@ -131,7 +131,7 @@ WECmdArgs::WECmdArgs(int argc, char** argv) "S3 Hostname (for S3 imports, Amazon's S3 default)") ("s3-region,g", po::value(&fS3Region), "S3 Region (for S3 imports)") - ("errors-dir,L", po::value(&fErrorDir)->default_value(MCSLOGDIR), + ("errors-dir,L", po::value(&fErrorDir)->default_value(fErrorDir), "Directory for the output .err and .bad files") ("job-uuid,u", po::value(&fUUID), "import job UUID") ("username,U", po::value(&fUsername), "Username of the files owner.") @@ -514,7 +514,10 @@ void WECmdArgs::fillParams(BulkLoad& curJob, std::string& sJobIdStr, std::string { cout << "number of read threads : " << fNoOfReadThrds << endl; } - cout << "Column delimiter : " << (fColDelim == '\t' ? "\\t" : string{fColDelim}) << endl; + if (fColDelim != 0) + { + cout << "Column delimiter : " << (fColDelim == '\t' ? "\\t" : string{fColDelim}) << endl; + } if (fNoOfWriteThrds != 0) { cout << "number of parse threads : " << fNoOfWriteThrds << endl; diff --git a/writeengine/splitter/we_cmdargs.cpp b/writeengine/splitter/we_cmdargs.cpp index 34f67ccb7..816e2a9f6 100644 --- a/writeengine/splitter/we_cmdargs.cpp +++ b/writeengine/splitter/we_cmdargs.cpp @@ -132,7 +132,7 @@ WECmdArgs::WECmdArgs(int argc, char** argv) "S3 Hostname (for S3 imports, Amazon's S3 default)") ("s3-region,g", po::value(&fS3Region), "S3 Region (for S3 imports)") - ("errors-dir,L", po::value(&fErrorDir)->default_value(MCSLOGDIR), + ("errors-dir,L", po::value(&fErrorDir)->default_value(fErrorDir), "Directory for the output .err and .bad files") ("username,U", po::value(&fUsername), "Username of the files owner.") ("dbname", po::value(), "Name of the database to load") From 1ce46b5e0ba72636af7fc801a6c096c86672cfa4 Mon Sep 17 00:00:00 2001 From: Aleksei Antipovskii Date: Fri, 20 Jun 2025 17:20:40 +0200 Subject: [PATCH 06/51] feature(cpimport): MCOL-5164 ignore all errors (`-e all`) --- .../r/MCOL-5164-max-errors-and-report.result | 51 ++++++++++++++++++ .../t/MCOL-5164-max-errors-and-report.test | 52 +++++++++++++++++++ writeengine/bulk/we_bulkload.cpp | 4 +- writeengine/bulk/we_bulkload.h | 11 ++-- writeengine/bulk/we_bulkloadbuffer.cpp | 12 ++--- writeengine/bulk/we_bulkloadbuffer.h | 8 +-- writeengine/bulk/we_cmdargs.cpp | 26 +++++++--- writeengine/bulk/we_cmdargs.h | 2 +- writeengine/bulk/we_tableinfo.cpp | 8 ++- writeengine/bulk/we_tableinfo.h | 10 ++-- writeengine/shared/we_type.h | 7 +++ writeengine/splitter/we_cmdargs.cpp | 27 ++++++++-- writeengine/splitter/we_cmdargs.h | 2 +- writeengine/xml/we_xmlgenproc.cpp | 12 ++++- writeengine/xml/we_xmljob.cpp | 33 ++++++------ 15 files changed, 209 insertions(+), 56 deletions(-) create mode 100644 mysql-test/columnstore/basic/r/MCOL-5164-max-errors-and-report.result create mode 100644 mysql-test/columnstore/basic/t/MCOL-5164-max-errors-and-report.test diff --git a/mysql-test/columnstore/basic/r/MCOL-5164-max-errors-and-report.result b/mysql-test/columnstore/basic/r/MCOL-5164-max-errors-and-report.result new file mode 100644 index 000000000..904096a42 --- /dev/null +++ b/mysql-test/columnstore/basic/r/MCOL-5164-max-errors-and-report.result @@ -0,0 +1,51 @@ +DROP DATABASE IF EXISTS mcol5164; +CREATE DATABASE mcol5164; +USE mcol5164; +CREATE TABLE t1(col1 INT, col2 VARCHAR(64)) ENGINE=Columnstore; +SELECT * FROM t1; +col1 col2 +Rejected rows: +0,test0,wrong +1,test1,wrong +2,test2,wrong +3,test3,wrong +4,test4,wrong +5,test5,wrong +6,test6,wrong +7,test7,wrong +8,test8,wrong +9,test9,wrong +10,test10,wrong +TRUNCATE t1; +SELECT * FROM t1; +col1 col2 +11 test11-good +Rejected rows: +0,test0,wrong +1,test1,wrong +2,test2,wrong +3,test3,wrong +4,test4,wrong +5,test5,wrong +6,test6,wrong +7,test7,wrong +8,test8,wrong +9,test9,wrong +10,test10,wrong +TRUNCATE t1; +SELECT * FROM t1; +col1 col2 +11 test11-good +Rejected rows: +0,test0,wrong +1,test1,wrong +2,test2,wrong +3,test3,wrong +4,test4,wrong +5,test5,wrong +6,test6,wrong +7,test7,wrong +8,test8,wrong +9,test9,wrong +10,test10,wrong +DROP DATABASE mcol5164; diff --git a/mysql-test/columnstore/basic/t/MCOL-5164-max-errors-and-report.test b/mysql-test/columnstore/basic/t/MCOL-5164-max-errors-and-report.test new file mode 100644 index 000000000..68c6cfe80 --- /dev/null +++ b/mysql-test/columnstore/basic/t/MCOL-5164-max-errors-and-report.test @@ -0,0 +1,52 @@ +if (!$MYSQL_TEST_ROOT){ + skip Should be run by root to execute cpimport; +} + +-- source ../include/have_columnstore.inc + +--disable_warnings +DROP DATABASE IF EXISTS mcol5164; +--enable_warnings + +CREATE DATABASE mcol5164; +USE mcol5164; + +CREATE TABLE t1(col1 INT, col2 VARCHAR(64)) ENGINE=Columnstore; + +--exec mkdir -p /tmp/mtr-mcol5164 +--exec awk 'BEGIN { for (i = 0; i < 11; i++) { printf "%d,test%d,wrong\n", i, i; }; printf "%d,test%d-good", i, i; }' > /tmp/mtr-mcol5164/mcol5164.csv + +--disable_result_log +--error 1 # exceeds default max-errors +--exec $MCS_CPIMPORT -s , -L /tmp/mtr-mcol5164 mcol5164 t1 /tmp/mtr-mcol5164/mcol5164.csv +--enable_result_log +SELECT * FROM t1; +--exec echo Rejected rows: +--exec cat /tmp/mtr-mcol5164/mcol5164.csv*.bad +--exec rm -f /tmp/mtr-mcol5164/*.err +--exec rm -f /tmp/mtr-mcol5164/*.bad +TRUNCATE t1; + +# implicitly set max-errors +--disable_result_log +--exec $MCS_CPIMPORT -s , -e 11 -L /tmp/mtr-mcol5164 mcol5164 t1 /tmp/mtr-mcol5164/mcol5164.csv +--enable_result_log +SELECT * FROM t1; +--exec echo Rejected rows: +--exec cat /tmp/mtr-mcol5164/mcol5164.csv*.bad +--exec rm -f /tmp/mtr-mcol5164/*.err +--exec rm -f /tmp/mtr-mcol5164/*.bad +TRUNCATE t1; + +# max-errors = all +--disable_result_log +--exec $MCS_CPIMPORT -s , -e all -L /tmp/mtr-mcol5164 mcol5164 t1 /tmp/mtr-mcol5164/mcol5164.csv +--enable_result_log +SELECT * FROM t1; +--exec echo Rejected rows: +--exec cat /tmp/mtr-mcol5164/mcol5164.csv*.bad + + +# Clean UP +--exec rm -rf /tmp/mtr-mcol5164 +DROP DATABASE mcol5164; diff --git a/writeengine/bulk/we_bulkload.cpp b/writeengine/bulk/we_bulkload.cpp index 2b4f1ca1f..9e2b22099 100644 --- a/writeengine/bulk/we_bulkload.cpp +++ b/writeengine/bulk/we_bulkload.cpp @@ -530,14 +530,14 @@ int BulkLoad::preProcess(Job& job, int tableNo, std::shared_ptr& tabl if (pwd) tableInfo->setUIDGID(pwd->pw_uid, pwd->pw_gid); - if (fMaxErrors != -1) + if (fMaxErrors != MAX_ERRORS_DEFAULT) tableInfo->setMaxErrorRows(fMaxErrors); else tableInfo->setMaxErrorRows(job.jobTableList[tableNo].maxErrNum); // @bug 3929: cpimport.bin error messaging using up too much memory. // Validate that max allowed error count is within valid range - long long maxErrNum = tableInfo->getMaxErrorRows(); + int maxErrNum = tableInfo->getMaxErrorRows(); if (maxErrNum > MAX_ALLOW_ERROR_COUNT) { diff --git a/writeengine/bulk/we_bulkload.h b/writeengine/bulk/we_bulkload.h index f9ab26ffd..34585b4f2 100644 --- a/writeengine/bulk/we_bulkload.h +++ b/writeengine/bulk/we_bulkload.h @@ -129,7 +129,7 @@ class BulkLoad : public FileOp void setEscapeChar(char esChar); void setSkipRows(size_t skipRows); void setKeepRbMetaFiles(bool keepMeta); - void setMaxErrorCount(unsigned int maxErrors); + void setMaxErrorCount(int maxErrors); void setNoOfParseThreads(int parseThreads); void setNoOfReadThreads(int readThreads); void setNullStringMode(bool bMode); @@ -184,13 +184,13 @@ class BulkLoad : public FileOp Log fLog; // logger - int fNumOfParser; // total number of parser + int fNumOfParser{0}; // total number of parser char fColDelim{0}; // delimits col values within a row int fNoOfBuffers{-1}; // Number of read buffers int fBufferSize{-1}; // Read buffer size int fFileVbufSize{-1}; // Internal file system buffer size - long long fMaxErrors{-1}; // Max allowable errors per job + long long fMaxErrors{MAX_ERRORS_DEFAULT}; // Max allowable errors per job std::string fAlternateImportDir; // Alternate bulk import directory std::string fErrorDir; // Opt. where error records record std::string fProcessName; // Application process name @@ -429,10 +429,7 @@ inline void BulkLoad::setKeepRbMetaFiles(bool keepMeta) fKeepRbMetaFiles = keepMeta; } -// Mutator takes an unsigned int, but we store in a long long, because... -// TableInfo which eventually needs this attribute, takes an unsigned int, -// but we want to be able to init to -1, to indicate when it has not been set. -inline void BulkLoad::setMaxErrorCount(unsigned int maxErrors) +inline void BulkLoad::setMaxErrorCount(int maxErrors) { fMaxErrors = maxErrors; } diff --git a/writeengine/bulk/we_bulkloadbuffer.cpp b/writeengine/bulk/we_bulkloadbuffer.cpp index cd43d6cd0..5d65b3215 100644 --- a/writeengine/bulk/we_bulkloadbuffer.cpp +++ b/writeengine/bulk/we_bulkloadbuffer.cpp @@ -2049,7 +2049,7 @@ int BulkLoadBuffer::parseDictSection(ColumnInfo& columnInfo, int tokenPos, RID s int BulkLoadBuffer::fillFromMemory(const BulkLoadBuffer& overFlowBufIn, const char* input, size_t length, size_t* parse_length, size_t& skipRows, RID& totalReadRows, RID& correctTotalRows, const boost::ptr_vector& columnsInfo, - unsigned int allowedErrCntThisCall) + int allowedErrCntThisCall) { boost::mutex::scoped_lock lock(fSyncUpdatesBLB); reset(); @@ -2153,7 +2153,7 @@ int BulkLoadBuffer::fillFromMemory(const BulkLoadBuffer& overFlowBufIn, const ch int BulkLoadBuffer::fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* handle, size_t& skipRows, RID& totalReadRows, RID& correctTotalRows, const boost::ptr_vector& columnsInfo, - unsigned int allowedErrCntThisCall) + int allowedErrCntThisCall) { boost::mutex::scoped_lock lock(fSyncUpdatesBLB); reset(); @@ -2277,7 +2277,7 @@ int BulkLoadBuffer::fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* hand // depending on whether the user has enabled the "enclosed by" feature. //------------------------------------------------------------------------------ void BulkLoadBuffer::tokenize(const boost::ptr_vector& columnsInfo, - unsigned int allowedErrCntThisCall, size_t& skipRows) + int allowedErrCntThisCall, size_t& skipRows) { unsigned offset = 0; // length of field unsigned curCol = 0; // dest db column counter within a row @@ -2789,7 +2789,7 @@ void BulkLoadBuffer::tokenize(const boost::ptr_vector& columnsInfo, // Quit if we exceed max allowable errors for this call. // We set lastRowHead = p, so that the code that follows this // loop won't try to save any data in fOverflowBuf. - if (errorCount > allowedErrCntThisCall) + if (allowedErrCntThisCall != MAX_ERRORS_ALL && errorCount > static_cast(allowedErrCntThisCall)) { lastRowHead = p + 1; p++; @@ -2928,7 +2928,7 @@ void BulkLoadBuffer::resizeTokenArray() // then tokenize() will stop reading data and exit. //------------------------------------------------------------------------------ int BulkLoadBuffer::tokenizeBinary(const boost::ptr_vector& columnsInfo, - unsigned int allowedErrCntThisCall, bool bEndOfData) + int allowedErrCntThisCall, bool bEndOfData) { unsigned curCol = 0; // dest db column counter within a row unsigned curRowNum = 0; // "total" number of rows read during this call @@ -3082,7 +3082,7 @@ int BulkLoadBuffer::tokenizeBinary(const boost::ptr_vector& columnsI errorCount++; // Quit if we exceed max allowable errors for this call - if (errorCount > allowedErrCntThisCall) + if (allowedErrCntThisCall != MAX_ERRORS_ALL && errorCount > static_cast(allowedErrCntThisCall)) break; } diff --git a/writeengine/bulk/we_bulkloadbuffer.h b/writeengine/bulk/we_bulkloadbuffer.h index 475c98663..fafc330ee 100644 --- a/writeengine/bulk/we_bulkloadbuffer.h +++ b/writeengine/bulk/we_bulkloadbuffer.h @@ -215,12 +215,12 @@ class BulkLoadBuffer /** @brief tokenize the buffer contents and fill up the token array. */ - void tokenize(const boost::ptr_vector& columnsInfo, unsigned int allowedErrCntThisCall, + void tokenize(const boost::ptr_vector& columnsInfo, int allowedErrCntThisCall, size_t& skipRows); /** @brief Binary tokenization of the buffer, and fill up the token array. */ - int tokenizeBinary(const boost::ptr_vector& columnsInfo, unsigned int allowedErrCntThisCall, + int tokenizeBinary(const boost::ptr_vector& columnsInfo, int allowedErrCntThisCall, bool bEndOfData); /** @brief Determine if specified value is NULL or not. @@ -275,13 +275,13 @@ class BulkLoadBuffer int fillFromMemory(const BulkLoadBuffer& overFlowBufIn, const char* input, size_t length, size_t* parse_length, size_t& skipRows, RID& totalReadRows, RID& correctTotalRows, - const boost::ptr_vector& columnsInfo, unsigned int allowedErrCntThisCall); + const boost::ptr_vector& columnsInfo, int allowedErrCntThisCall); /** @brief Read the table data into the buffer */ int fillFromFile(const BulkLoadBuffer& overFlowBufIn, FILE* handle, size_t& skipRows, RID& totalRows, RID& correctTotalRows, const boost::ptr_vector& columnsInfo, - unsigned int allowedErrCntThisCall); + int allowedErrCntThisCall); /** @brief Get the overflow size */ diff --git a/writeengine/bulk/we_cmdargs.cpp b/writeengine/bulk/we_cmdargs.cpp index 132771002..640e42867 100644 --- a/writeengine/bulk/we_cmdargs.cpp +++ b/writeengine/bulk/we_cmdargs.cpp @@ -70,8 +70,7 @@ WECmdArgs::WECmdArgs(int argc, char** argv) DECLARE_INT_ARG("read-buffer-size,c", fReadBufSize, 1, INT_MAX, "Application read buffer size (in bytes)") DECLARE_INT_ARG("debug,d", fDebugLvl, 1, 3, "Print different level(1-3) debug message") - DECLARE_INT_ARG("max-errors,e", fMaxErrors, 0, INT_MAX, - "Maximum number of allowable error per table per PM") + ("max-errors,e", po::value(), "Maximum number (or 'all') of allowable error per table per PM") ("file-path,f", po::value(&fPmFilePath), "Data file directory path. Default is current working directory.\n" "\tIn Mode 1, represents the local input file path.\n" @@ -304,6 +303,24 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) fAllowMissingColumn = true; } } + if (vm.contains("max-errors")) + { + auto optarg= vm["max-errors"].as(); + if (optarg == "all") + { + fMaxErrors = MAX_ERRORS_ALL; + } + else + { + errno = 0; + long lValue = strtol(optarg.c_str(), nullptr, 10); + if (errno != 0 || lValue < 0 || lValue > INT_MAX) + { + startupError("Option --max-errors/-e is invalid or out of range"); + } + fMaxErrors = lValue; + } + } if (fArgMode != -1) fMode = fArgMode; // BUG 4210 @@ -337,10 +354,7 @@ void WECmdArgs::fillParams(BulkLoad& curJob, std::string& sJobIdStr, std::string curJob.setReadBufferCount(fIOReadBufSize); curJob.setReadBufferSize(fReadBufSize); - if (fMaxErrors >= 0) - { - curJob.setMaxErrorCount(fMaxErrors); - } + curJob.setMaxErrorCount(fMaxErrors); if (!fPmFilePath.empty()) { importPath = fPmFilePath; diff --git a/writeengine/bulk/we_cmdargs.h b/writeengine/bulk/we_cmdargs.h index 5446fdc61..6eb272f32 100644 --- a/writeengine/bulk/we_cmdargs.h +++ b/writeengine/bulk/we_cmdargs.h @@ -91,7 +91,7 @@ private: int fNoOfReadThrds{1}; // No. of read buffers int fDebugLvl{0}; // Debug level - int fMaxErrors{-1}; // Max allowable errors + int fMaxErrors{MAX_ERRORS_DEFAULT}; // Max allowable errors int fReadBufSize{-1}; // Read buffer size int fIOReadBufSize{-1}; // I/O read buffer size int fSetBufSize{0}; // Buff size w/setvbuf diff --git a/writeengine/bulk/we_tableinfo.cpp b/writeengine/bulk/we_tableinfo.cpp index 1df5b752b..d9a2c5888 100644 --- a/writeengine/bulk/we_tableinfo.cpp +++ b/writeengine/bulk/we_tableinfo.cpp @@ -412,7 +412,11 @@ int TableInfo::readTableData() // We keep a running total of read errors; fMaxErrorRows specifies // the error limit. Here's where we see how many more errors we // still have below the limit, and we pass this to fillFromFile(). - unsigned allowedErrCntThisCall = ((fMaxErrorRows > fTotalErrRows) ? (fMaxErrorRows - fTotalErrRows) : 0); + int allowedErrCntThisCall; + if (fMaxErrorRows == MAX_ERRORS_ALL) + allowedErrCntThisCall = MAX_ERRORS_ALL; + else + allowedErrCntThisCall = static_cast(fMaxErrorRows) > fTotalErrRows ? fMaxErrorRows - fTotalErrRows : 0; // Fill in the specified buffer. // fTotalReadRowsPerInputFile is ongoing total number of rows read, @@ -485,7 +489,7 @@ int TableInfo::readTableData() writeErrorList(&fBuffers[readBufNo].getErrorRows(), &fBuffers[readBufNo].getExactErrorRows(), false); fBuffers[readBufNo].clearErrRows(); - if (fTotalErrRows > fMaxErrorRows) + if (fMaxErrorRows != MAX_ERRORS_ALL && fTotalErrRows > static_cast(fMaxErrorRows)) { // flush the reject data file and output the rejected rows // flush err file and output the rejected row id and the reason. diff --git a/writeengine/bulk/we_tableinfo.h b/writeengine/bulk/we_tableinfo.h index 3d4e836a8..3cbfe204e 100644 --- a/writeengine/bulk/we_tableinfo.h +++ b/writeengine/bulk/we_tableinfo.h @@ -85,7 +85,7 @@ class TableInfo : public WeUIDGID // for this table. Is volatile to // insure parser & reader threads // see the latest value. - unsigned fMaxErrorRows; // Maximum error rows + int fMaxErrorRows; // Maximum error rows int fLastBufferId; // Id of the last buffer char* fFileBuffer; // File buffer passed to setvbuf() int fCurrentParseBuffer; // Id of leading current buffer being @@ -298,7 +298,7 @@ class TableInfo : public WeUIDGID /** @brief Get the number of maximum allowed error rows */ - unsigned getMaxErrorRows() const; + int getMaxErrorRows() const; /** @brief retrieve the tuncation as error setting for this * import. When set, this causes char and varchar strings @@ -309,7 +309,7 @@ class TableInfo : public WeUIDGID /** @brief set the maximum number of error rows allowed */ - void setMaxErrorRows(const unsigned int maxErrorRows); + void setMaxErrorRows(int maxErrorRows); /** @brief Set mode to treat "NULL" string as NULL value or not. */ @@ -513,7 +513,7 @@ inline Status TableInfo::getStatusTI() const return fStatusTI; } -inline unsigned TableInfo::getMaxErrorRows() const +inline int TableInfo::getMaxErrorRows() const { return fMaxErrorRows; } @@ -630,7 +630,7 @@ inline void TableInfo::setLoadFilesInput(bool bReadFromStdin, bool bReadFromS3, fS3Region = s3region; } -inline void TableInfo::setMaxErrorRows(const unsigned int maxErrorRows) +inline void TableInfo::setMaxErrorRows(int maxErrorRows) { fMaxErrorRows = maxErrorRows; } diff --git a/writeengine/shared/we_type.h b/writeengine/shared/we_type.h index faef43018..7772351ab 100644 --- a/writeengine/shared/we_type.h +++ b/writeengine/shared/we_type.h @@ -144,6 +144,13 @@ enum ImportDataMode IMPORT_DATA_BIN_SAT_NULL = 2 }; +// Max number of ignored errors +enum MaxErrors +{ + MAX_ERRORS_DEFAULT = -1, // default value + MAX_ERRORS_ALL = -2 // special case: ignore all errors +}; + /** * the set of Calpont column data type names; MUST match ColDataType in * calpontsystemcatalog.h. diff --git a/writeengine/splitter/we_cmdargs.cpp b/writeengine/splitter/we_cmdargs.cpp index 816e2a9f6..87ed30e1d 100644 --- a/writeengine/splitter/we_cmdargs.cpp +++ b/writeengine/splitter/we_cmdargs.cpp @@ -71,8 +71,7 @@ WECmdArgs::WECmdArgs(int argc, char** argv) DECLARE_INT_ARG("debug,d", fDebugLvl, 1, 3, "Print different level(1-3) debug message") ("verbose,v", po::value()) ("silent,N", po::bool_switch()) - DECLARE_INT_ARG("max-errors,e", fMaxErrors, 0, INT_MAX, - "Maximum number of allowable error per table per PM") + ("max-errors,e", po::value(), "Maximum number (or 'all') of allowable error per table per PM") ("file-path,f", po::value(&fPmFilePath), "Data file directory path. Default is current working directory.\n" "\tIn Mode 1, represents the local input file path.\n" @@ -230,7 +229,9 @@ std::string WECmdArgs::getCpImportCmdLine(bool skipRows) if (fNoOfWriteThrds > 0) aSS << " -w " << fNoOfWriteThrds; - if (fMaxErrors >= 0) + if (fMaxErrors == MAX_ERRORS_ALL) + aSS << " -e all "; + else if (fMaxErrors != MAX_ERRORS_DEFAULT) aSS << " -e " << fMaxErrors; // BUG 5088 @@ -446,7 +447,7 @@ bool WECmdArgs::checkForCornerCases() cout << "Invalid option -b with Mode 0" << endl; throw(runtime_error("Mismatched options.")); } - else if (fMaxErrors >= 0) + else if (fMaxErrors >= 0 || fMaxErrors == MAX_ERRORS_ALL) { cout << "Invalid option -e with Mode 0" << endl; throw(runtime_error("Mismatched options.")); @@ -735,6 +736,24 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) fBatchQty = 10000; } } + if (vm.contains("max-errors")) + { + auto optarg = vm["max-errors"].as(); + if (optarg == "all") + { + fMaxErrors = MAX_ERRORS_ALL; + } + else + { + errno = 0; + long lValue = strtol(optarg.c_str(), nullptr, 10); + if (errno != 0 || lValue < 0 || lValue > INT_MAX) + { + throw runtime_error("Option --max-errors/-e is invalid or out of range"); + } + fMaxErrors = lValue; + } + } if (fArgMode != -1) fMode = fArgMode; // BUG 4210 diff --git a/writeengine/splitter/we_cmdargs.h b/writeengine/splitter/we_cmdargs.h index 64d17b598..96b2aa1c3 100644 --- a/writeengine/splitter/we_cmdargs.h +++ b/writeengine/splitter/we_cmdargs.h @@ -323,7 +323,7 @@ class WECmdArgs int fBatchQty{10000}; // No. of batch Qty. int fNoOfReadThrds{0}; // No. of read buffers int fDebugLvl{0}; // Debug level - int fMaxErrors{-1}; // Max allowable errors + int fMaxErrors{MAX_ERRORS_DEFAULT}; // Max allowable errors int fReadBufSize{0}; // Read buffer size int fIOReadBufSize{0}; // I/O read buffer size int fSetBufSize{0}; // Buff size w/setvbuf diff --git a/writeengine/xml/we_xmlgenproc.cpp b/writeengine/xml/we_xmlgenproc.cpp index c21251261..de4d5771a 100644 --- a/writeengine/xml/we_xmlgenproc.cpp +++ b/writeengine/xml/we_xmlgenproc.cpp @@ -230,8 +230,16 @@ void XMLGenProc::makeTableData(const CalpontSystemCatalog::TableName& table, con } xmlTextWriterWriteAttribute(fWriter, BAD_CAST xmlTagTable[TAG_LOAD_NAME], BAD_CAST tmp.c_str()); - xmlTextWriterWriteFormatAttribute(fWriter, BAD_CAST xmlTagTable[TAG_MAX_ERR_ROW], "%d", - atoi(fInputMgr->getParm(XMLGenData::MAXERROR).c_str())); + auto sMaxErrors = fInputMgr->getParm(XMLGenData::MAXERROR); + if (sMaxErrors == "all") + { + xmlTextWriterWriteAttribute(fWriter, BAD_CAST xmlTagTable[TAG_MAX_ERR_ROW], BAD_CAST sMaxErrors.c_str()); + } + else + { + xmlTextWriterWriteFormatAttribute(fWriter, BAD_CAST xmlTagTable[TAG_MAX_ERR_ROW], "%d", + atoi(sMaxErrors.c_str())); + } } kount++; diff --git a/writeengine/xml/we_xmljob.cpp b/writeengine/xml/we_xmljob.cpp index 34b50e026..fd7bd85d6 100644 --- a/writeengine/xml/we_xmljob.cpp +++ b/writeengine/xml/we_xmljob.cpp @@ -49,9 +49,7 @@ namespace WriteEngine // Constructor //------------------------------------------------------------------------------ XMLJob::XMLJob() - : fDeleteTempFile(false) - , fValidateColList(true) - , fTimeZone(dataconvert::systemTimeZoneOffset()) + : fDeleteTempFile(false), fValidateColList(true), fTimeZone(dataconvert::systemTimeZoneOffset()) { } @@ -197,7 +195,7 @@ void XMLJob::printJobInfo(Log& logger) const logger.logMsg(oss3.str(), MSGLVL_INFO2); } // end of loop through columns in a table - } // end of loop through tables + } // end of loop through tables } //------------------------------------------------------------------------------ @@ -473,8 +471,13 @@ void XMLJob::setJobDataTable(xmlNode* pNode) if (getNodeAttributeStr(pNode, xmlTagTable[TAG_LOAD_NAME], bufString)) curTable.loadFileName = bufString; - if (getNodeAttribute(pNode, xmlTagTable[TAG_MAX_ERR_ROW], &intVal, TYPE_INT)) - curTable.maxErrNum = intVal; + if (getNodeAttributeStr(pNode, xmlTagTable[TAG_MAX_ERR_ROW], bufString)) + { + if (bufString == "all") + curTable.maxErrNum = MAX_ERRORS_ALL; + else + curTable.maxErrNum = atoi(bufString.c_str()); + } fJob.jobTableList.push_back(curTable); } @@ -683,7 +686,6 @@ void XMLJob::initSatLimits(JobColumn& curColumn) const { curColumn.fMaxIntSat = dataconvert::decimalRangeUp(curColumn.precision); curColumn.fMinIntSat = -curColumn.fMaxIntSat; - } else if (curColumn.typeName == ColDataTypeStr[CalpontSystemCatalog::UDECIMAL]) { @@ -987,12 +989,13 @@ void XMLJob::fillInXMLDataNotNullDefault(const std::string& fullTblName, { if (LIKELY(colType.colWidth == datatypes::MAXDECIMALWIDTH)) { - col.fDefaultWideDecimal = colType.decimal128FromString(col_defaultValue.safeString(), &bDefaultConvertError); + col.fDefaultWideDecimal = + colType.decimal128FromString(col_defaultValue.safeString(), &bDefaultConvertError); } else { - col.fDefaultInt = Convertor::convertDecimalString(col_defaultValue.str(), - col_defaultValue.length(), colType.scale); + col.fDefaultInt = Convertor::convertDecimalString(col_defaultValue.str(), col_defaultValue.length(), + colType.scale); if (errno == ERANGE) bDefaultConvertError = true; @@ -1004,9 +1007,8 @@ void XMLJob::fillInXMLDataNotNullDefault(const std::string& fullTblName, case execplan::CalpontSystemCatalog::DATE: { int convertStatus; - int32_t dt = dataconvert::DataConvert::convertColumnDate(col_defaultValue.str(), - dataconvert::CALPONTDATE_ENUM, convertStatus, - col_defaultValue.length()); + int32_t dt = dataconvert::DataConvert::convertColumnDate( + col_defaultValue.str(), dataconvert::CALPONTDATE_ENUM, convertStatus, col_defaultValue.length()); if (convertStatus != 0) bDefaultConvertError = true; @@ -1046,9 +1048,8 @@ void XMLJob::fillInXMLDataNotNullDefault(const std::string& fullTblName, case execplan::CalpontSystemCatalog::TIME: { int convertStatus; - int64_t dt = dataconvert::DataConvert::convertColumnTime(col_defaultValue.str(), - dataconvert::CALPONTTIME_ENUM, convertStatus, - col_defaultValue.length()); + int64_t dt = dataconvert::DataConvert::convertColumnTime( + col_defaultValue.str(), dataconvert::CALPONTTIME_ENUM, convertStatus, col_defaultValue.length()); if (convertStatus != 0) bDefaultConvertError = true; From c30b490027bfaca720fe990994e5b923e9935682 Mon Sep 17 00:00:00 2001 From: Aleksei Antipovskii Date: Thu, 10 Jul 2025 02:02:03 +0200 Subject: [PATCH 07/51] skip bad rows report test on multinode setup --- .../r/MCOL-5164-max-errors-and-report.result | 51 ------------------- .../basic/r/MCOL-5164-max-errors.result | 15 ++++++ .../basic/r/MCOL-5164-report-bad.result | 20 ++++++++ ...-report.test => MCOL-5164-max-errors.test} | 15 +----- .../basic/t/MCOL-5164-report-bad.test | 35 +++++++++++++ .../columnstore/include/check_multinode.inc | 4 ++ 6 files changed, 76 insertions(+), 64 deletions(-) delete mode 100644 mysql-test/columnstore/basic/r/MCOL-5164-max-errors-and-report.result create mode 100644 mysql-test/columnstore/basic/r/MCOL-5164-max-errors.result create mode 100644 mysql-test/columnstore/basic/r/MCOL-5164-report-bad.result rename mysql-test/columnstore/basic/t/{MCOL-5164-max-errors-and-report.test => MCOL-5164-max-errors.test} (66%) create mode 100644 mysql-test/columnstore/basic/t/MCOL-5164-report-bad.test create mode 100644 mysql-test/columnstore/include/check_multinode.inc diff --git a/mysql-test/columnstore/basic/r/MCOL-5164-max-errors-and-report.result b/mysql-test/columnstore/basic/r/MCOL-5164-max-errors-and-report.result deleted file mode 100644 index 904096a42..000000000 --- a/mysql-test/columnstore/basic/r/MCOL-5164-max-errors-and-report.result +++ /dev/null @@ -1,51 +0,0 @@ -DROP DATABASE IF EXISTS mcol5164; -CREATE DATABASE mcol5164; -USE mcol5164; -CREATE TABLE t1(col1 INT, col2 VARCHAR(64)) ENGINE=Columnstore; -SELECT * FROM t1; -col1 col2 -Rejected rows: -0,test0,wrong -1,test1,wrong -2,test2,wrong -3,test3,wrong -4,test4,wrong -5,test5,wrong -6,test6,wrong -7,test7,wrong -8,test8,wrong -9,test9,wrong -10,test10,wrong -TRUNCATE t1; -SELECT * FROM t1; -col1 col2 -11 test11-good -Rejected rows: -0,test0,wrong -1,test1,wrong -2,test2,wrong -3,test3,wrong -4,test4,wrong -5,test5,wrong -6,test6,wrong -7,test7,wrong -8,test8,wrong -9,test9,wrong -10,test10,wrong -TRUNCATE t1; -SELECT * FROM t1; -col1 col2 -11 test11-good -Rejected rows: -0,test0,wrong -1,test1,wrong -2,test2,wrong -3,test3,wrong -4,test4,wrong -5,test5,wrong -6,test6,wrong -7,test7,wrong -8,test8,wrong -9,test9,wrong -10,test10,wrong -DROP DATABASE mcol5164; diff --git a/mysql-test/columnstore/basic/r/MCOL-5164-max-errors.result b/mysql-test/columnstore/basic/r/MCOL-5164-max-errors.result new file mode 100644 index 000000000..a3ef737aa --- /dev/null +++ b/mysql-test/columnstore/basic/r/MCOL-5164-max-errors.result @@ -0,0 +1,15 @@ +DROP DATABASE IF EXISTS mcol5164; +CREATE DATABASE mcol5164; +USE mcol5164; +CREATE TABLE t1(col1 INT, col2 VARCHAR(64)) ENGINE=Columnstore; +SELECT * FROM t1; +col1 col2 +TRUNCATE t1; +SELECT * FROM t1; +col1 col2 +11 test11-good +TRUNCATE t1; +SELECT * FROM t1; +col1 col2 +11 test11-good +DROP DATABASE mcol5164; diff --git a/mysql-test/columnstore/basic/r/MCOL-5164-report-bad.result b/mysql-test/columnstore/basic/r/MCOL-5164-report-bad.result new file mode 100644 index 000000000..78f9ecb2a --- /dev/null +++ b/mysql-test/columnstore/basic/r/MCOL-5164-report-bad.result @@ -0,0 +1,20 @@ +DROP DATABASE IF EXISTS mcol5164rep; +CREATE DATABASE mcol5164rep; +USE mcol5164rep; +CREATE TABLE t1(col1 INT, col2 VARCHAR(64)) ENGINE=Columnstore; +SELECT * FROM t1; +col1 col2 +11 test11-good +Rejected rows: +0,test0,wrong +1,test1,wrong +2,test2,wrong +3,test3,wrong +4,test4,wrong +5,test5,wrong +6,test6,wrong +7,test7,wrong +8,test8,wrong +9,test9,wrong +10,test10,wrong +DROP DATABASE mcol5164rep; diff --git a/mysql-test/columnstore/basic/t/MCOL-5164-max-errors-and-report.test b/mysql-test/columnstore/basic/t/MCOL-5164-max-errors.test similarity index 66% rename from mysql-test/columnstore/basic/t/MCOL-5164-max-errors-and-report.test rename to mysql-test/columnstore/basic/t/MCOL-5164-max-errors.test index 68c6cfe80..02601268d 100644 --- a/mysql-test/columnstore/basic/t/MCOL-5164-max-errors-and-report.test +++ b/mysql-test/columnstore/basic/t/MCOL-5164-max-errors.test @@ -2,7 +2,7 @@ if (!$MYSQL_TEST_ROOT){ skip Should be run by root to execute cpimport; } --- source ../include/have_columnstore.inc +--source ../include/have_columnstore.inc --disable_warnings DROP DATABASE IF EXISTS mcol5164; @@ -14,17 +14,13 @@ USE mcol5164; CREATE TABLE t1(col1 INT, col2 VARCHAR(64)) ENGINE=Columnstore; --exec mkdir -p /tmp/mtr-mcol5164 ---exec awk 'BEGIN { for (i = 0; i < 11; i++) { printf "%d,test%d,wrong\n", i, i; }; printf "%d,test%d-good", i, i; }' > /tmp/mtr-mcol5164/mcol5164.csv +--exec awk 'BEGIN { for (i = 0; i < 11; i++) { printf "%d,test%d,wrong\n", i, i; }; printf "%d,test%d-good\n", i, i; }' > /tmp/mtr-mcol5164/mcol5164.csv --disable_result_log --error 1 # exceeds default max-errors --exec $MCS_CPIMPORT -s , -L /tmp/mtr-mcol5164 mcol5164 t1 /tmp/mtr-mcol5164/mcol5164.csv --enable_result_log SELECT * FROM t1; ---exec echo Rejected rows: ---exec cat /tmp/mtr-mcol5164/mcol5164.csv*.bad ---exec rm -f /tmp/mtr-mcol5164/*.err ---exec rm -f /tmp/mtr-mcol5164/*.bad TRUNCATE t1; # implicitly set max-errors @@ -32,10 +28,6 @@ TRUNCATE t1; --exec $MCS_CPIMPORT -s , -e 11 -L /tmp/mtr-mcol5164 mcol5164 t1 /tmp/mtr-mcol5164/mcol5164.csv --enable_result_log SELECT * FROM t1; ---exec echo Rejected rows: ---exec cat /tmp/mtr-mcol5164/mcol5164.csv*.bad ---exec rm -f /tmp/mtr-mcol5164/*.err ---exec rm -f /tmp/mtr-mcol5164/*.bad TRUNCATE t1; # max-errors = all @@ -43,9 +35,6 @@ TRUNCATE t1; --exec $MCS_CPIMPORT -s , -e all -L /tmp/mtr-mcol5164 mcol5164 t1 /tmp/mtr-mcol5164/mcol5164.csv --enable_result_log SELECT * FROM t1; ---exec echo Rejected rows: ---exec cat /tmp/mtr-mcol5164/mcol5164.csv*.bad - # Clean UP --exec rm -rf /tmp/mtr-mcol5164 diff --git a/mysql-test/columnstore/basic/t/MCOL-5164-report-bad.test b/mysql-test/columnstore/basic/t/MCOL-5164-report-bad.test new file mode 100644 index 000000000..0b06c4a55 --- /dev/null +++ b/mysql-test/columnstore/basic/t/MCOL-5164-report-bad.test @@ -0,0 +1,35 @@ +if (!$MYSQL_TEST_ROOT){ + skip Should be run by root to execute cpimport; +} + +--source ../include/have_columnstore.inc +--source ../include/check_multinode.inc + +--if ($columnstore_nodes_count != 1) { + --skip This test makes sense when run on a single-node setup +--} + +--disable_warnings +DROP DATABASE IF EXISTS mcol5164rep; +--enable_warnings + +CREATE DATABASE mcol5164rep; +USE mcol5164rep; + +CREATE TABLE t1(col1 INT, col2 VARCHAR(64)) ENGINE=Columnstore; + +--exec mkdir -p /tmp/mtr-mcol5164rep +--exec awk 'BEGIN { for (i = 0; i < 11; i++) { printf "%d,test%d,wrong\n", i, i; }; printf "%d,test%d-good\n", i, i; }' > /tmp/mtr-mcol5164rep/mcol5164rep.csv + +--disable_result_log +--exec $MCS_CPIMPORT -e all -s , -L /tmp/mtr-mcol5164rep mcol5164rep t1 /tmp/mtr-mcol5164rep/mcol5164rep.csv +--enable_result_log +SELECT * FROM t1; +--exec echo Rejected rows: +--exec cat /tmp/mtr-mcol5164rep/mcol5164rep.csv*.bad +--exec rm -f /tmp/mtr-mcol5164rep/mcol5164rep.csv*.err +--exec rm -f /tmp/mtr-mcol5164rep/mcol5164rep.csv*.bad + +# Clean UP +--exec rm -rf /tmp/mtr-mcol5164rep +DROP DATABASE mcol5164rep; diff --git a/mysql-test/columnstore/include/check_multinode.inc b/mysql-test/columnstore/include/check_multinode.inc new file mode 100644 index 000000000..a1355900e --- /dev/null +++ b/mysql-test/columnstore/include/check_multinode.inc @@ -0,0 +1,4 @@ +--let CHECK_MULTINODE_RESULT=$MYSQL_TMP_DIR/check_multinode_result.inc +--exec echo "--let columnstore_nodes_count=`mcsGetConfig PrimitiveServers Count`" > $CHECK_MULTINODE_RESULT +--source $CHECK_MULTINODE_RESULT +--remove_file $CHECK_MULTINODE_RESULT From dcc60c96b7a89ecb3039b8f3077f30800aa3e520 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Tue, 15 Jul 2025 15:22:35 +0000 Subject: [PATCH 08/51] Downgrade nlochman json for gcc 8.5 from 3.12 to 3.11.3 --- utils/json/json.hpp | 2837 ++++++++++++++++--------------------------- 1 file changed, 1038 insertions(+), 1799 deletions(-) diff --git a/utils/json/json.hpp b/utils/json/json.hpp index 82d69f7c5..8b72ea653 100644 --- a/utils/json/json.hpp +++ b/utils/json/json.hpp @@ -1,9 +1,9 @@ // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT /****************************************************************************\ @@ -34,10 +34,10 @@ // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -47,10 +47,10 @@ // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -59,24 +59,20 @@ #ifndef JSON_SKIP_LIBRARY_VERSION_CHECK #if defined(NLOHMANN_JSON_VERSION_MAJOR) && defined(NLOHMANN_JSON_VERSION_MINOR) && defined(NLOHMANN_JSON_VERSION_PATCH) - #if NLOHMANN_JSON_VERSION_MAJOR != 3 || NLOHMANN_JSON_VERSION_MINOR != 12 || NLOHMANN_JSON_VERSION_PATCH != 0 + #if NLOHMANN_JSON_VERSION_MAJOR != 3 || NLOHMANN_JSON_VERSION_MINOR != 11 || NLOHMANN_JSON_VERSION_PATCH != 3 #warning "Already included a different version of the library!" #endif #endif #endif #define NLOHMANN_JSON_VERSION_MAJOR 3 // NOLINT(modernize-macro-to-enum) -#define NLOHMANN_JSON_VERSION_MINOR 12 // NOLINT(modernize-macro-to-enum) -#define NLOHMANN_JSON_VERSION_PATCH 0 // NOLINT(modernize-macro-to-enum) +#define NLOHMANN_JSON_VERSION_MINOR 11 // NOLINT(modernize-macro-to-enum) +#define NLOHMANN_JSON_VERSION_PATCH 3 // NOLINT(modernize-macro-to-enum) #ifndef JSON_DIAGNOSTICS #define JSON_DIAGNOSTICS 0 #endif -#ifndef JSON_DIAGNOSTIC_POSITIONS - #define JSON_DIAGNOSTIC_POSITIONS 0 -#endif - #ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0 #endif @@ -87,12 +83,6 @@ #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS #endif -#if JSON_DIAGNOSTIC_POSITIONS - #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS _dp -#else - #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS -#endif - #if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp #else @@ -104,15 +94,14 @@ #endif // Construct the namespace ABI tags component -#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c) json_abi ## a ## b ## c -#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b, c) \ - NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b, c) +#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) json_abi ## a ## b +#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b) \ + NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) #define NLOHMANN_JSON_ABI_TAGS \ NLOHMANN_JSON_ABI_TAGS_CONCAT( \ NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \ - NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON, \ - NLOHMANN_JSON_ABI_TAG_DIAGNOSTIC_POSITIONS) + NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON) // Construct the namespace version component #define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \ @@ -160,10 +149,10 @@ // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -173,9 +162,6 @@ #include // forward_list #include // inserter, front_inserter, end #include // map -#ifdef JSON_HAS_CPP_17 - #include // optional -#endif #include // string #include // tuple, make_tuple #include // is_arithmetic, is_same, is_enum, underlying_type, is_convertible @@ -186,10 +172,10 @@ // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -206,10 +192,10 @@ // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -222,10 +208,10 @@ // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -234,10 +220,10 @@ // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -247,10 +233,10 @@ // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -334,11 +320,11 @@ NLOHMANN_JSON_NAMESPACE_END // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann -// SPDX-FileCopyrightText: 2016 - 2021 Evan Nemerson +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-FileCopyrightText: 2016-2021 Evan Nemerson // SPDX-License-Identifier: MIT /* Hedley - https://nemequ.github.io/hedley @@ -2398,20 +2384,15 @@ JSON_HEDLEY_DIAGNOSTIC_POP // C++ language standard detection // if the user manually specified the used c++ version this is skipped -#if !defined(JSON_HAS_CPP_23) && !defined(JSON_HAS_CPP_20) && !defined(JSON_HAS_CPP_17) && !defined(JSON_HAS_CPP_14) && !defined(JSON_HAS_CPP_11) - #if (defined(__cplusplus) && __cplusplus > 202002L) || (defined(_MSVC_LANG) && _MSVC_LANG > 202002L) - #define JSON_HAS_CPP_23 +#if !defined(JSON_HAS_CPP_20) && !defined(JSON_HAS_CPP_17) && !defined(JSON_HAS_CPP_14) && !defined(JSON_HAS_CPP_11) + #if (defined(__cplusplus) && __cplusplus >= 202002L) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) #define JSON_HAS_CPP_20 #define JSON_HAS_CPP_17 #define JSON_HAS_CPP_14 - #elif (defined(__cplusplus) && __cplusplus > 201703L) || (defined(_MSVC_LANG) && _MSVC_LANG > 201703L) - #define JSON_HAS_CPP_20 + #elif (defined(__cplusplus) && __cplusplus >= 201703L) || (defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464 #define JSON_HAS_CPP_17 #define JSON_HAS_CPP_14 - #elif (defined(__cplusplus) && __cplusplus > 201402L) || (defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464 - #define JSON_HAS_CPP_17 - #define JSON_HAS_CPP_14 - #elif (defined(__cplusplus) && __cplusplus > 201103L) || (defined(_HAS_CXX14) && _HAS_CXX14 == 1) + #elif (defined(__cplusplus) && __cplusplus >= 201402L) || (defined(_HAS_CXX14) && _HAS_CXX14 == 1) #define JSON_HAS_CPP_14 #endif // the cpp 11 flag is always specified because it is the minimal required version @@ -2587,9 +2568,7 @@ JSON_HEDLEY_DIAGNOSTIC_POP template \ inline void to_json(BasicJsonType& j, const ENUM_TYPE& e) \ { \ - /* NOLINTNEXTLINE(modernize-type-traits) we use C++11 */ \ static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ - /* NOLINTNEXTLINE(modernize-avoid-c-arrays) we don't want to depend on */ \ static const std::pair m[] = __VA_ARGS__; \ auto it = std::find_if(std::begin(m), std::end(m), \ [e](const std::pair& ej_pair) -> bool \ @@ -2601,9 +2580,7 @@ JSON_HEDLEY_DIAGNOSTIC_POP template \ inline void from_json(const BasicJsonType& j, ENUM_TYPE& e) \ { \ - /* NOLINTNEXTLINE(modernize-type-traits) we use C++11 */ \ static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ - /* NOLINTNEXTLINE(modernize-avoid-c-arrays) we don't want to depend on */ \ static const std::pair m[] = __VA_ARGS__; \ auto it = std::find_if(std::begin(m), std::end(m), \ [&j](const std::pair& ej_pair) -> bool \ @@ -2766,146 +2743,42 @@ JSON_HEDLEY_DIAGNOSTIC_POP #define NLOHMANN_JSON_TO(v1) nlohmann_json_j[#v1] = nlohmann_json_t.v1; #define NLOHMANN_JSON_FROM(v1) nlohmann_json_j.at(#v1).get_to(nlohmann_json_t.v1); -#define NLOHMANN_JSON_FROM_WITH_DEFAULT(v1) nlohmann_json_t.v1 = !nlohmann_json_j.is_null() ? nlohmann_json_j.value(#v1, nlohmann_json_default_obj.v1) : nlohmann_json_default_obj.v1; +#define NLOHMANN_JSON_FROM_WITH_DEFAULT(v1) nlohmann_json_t.v1 = nlohmann_json_j.value(#v1, nlohmann_json_default_obj.v1); /*! @brief macro @def NLOHMANN_DEFINE_TYPE_INTRUSIVE @since version 3.9.0 -@sa https://json.nlohmann.me/api/macros/nlohmann_define_type_intrusive/ */ #define NLOHMANN_DEFINE_TYPE_INTRUSIVE(Type, ...) \ - template::value, int> = 0> \ - friend void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ - template::value, int> = 0> \ - friend void from_json(const BasicJsonType& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } + friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } -/*! -@brief macro -@def NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT -@since version 3.11.0 -@sa https://json.nlohmann.me/api/macros/nlohmann_define_type_intrusive/ -*/ #define NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Type, ...) \ - template::value, int> = 0> \ - friend void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ - template::value, int> = 0> \ - friend void from_json(const BasicJsonType& nlohmann_json_j, Type& nlohmann_json_t) { const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } + friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } -/*! -@brief macro -@def NLOHMANN_DEFINE_TYPE_INTRUSIVE_ONLY_SERIALIZE -@since version 3.11.3 -@sa https://json.nlohmann.me/api/macros/nlohmann_define_type_intrusive/ -*/ #define NLOHMANN_DEFINE_TYPE_INTRUSIVE_ONLY_SERIALIZE(Type, ...) \ - template::value, int> = 0> \ - friend void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } + friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } /*! @brief macro @def NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE @since version 3.9.0 -@sa https://json.nlohmann.me/api/macros/nlohmann_define_type_non_intrusive/ */ #define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Type, ...) \ - template::value, int> = 0> \ - void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ - template::value, int> = 0> \ - void from_json(const BasicJsonType& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } + inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } -/*! -@brief macro -@def NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT -@since version 3.11.0 -@sa https://json.nlohmann.me/api/macros/nlohmann_define_type_non_intrusive/ -*/ -#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(Type, ...) \ - template::value, int> = 0> \ - void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ - template::value, int> = 0> \ - void from_json(const BasicJsonType& nlohmann_json_j, Type& nlohmann_json_t) { const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } - -/*! -@brief macro -@def NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE -@since version 3.11.3 -@sa https://json.nlohmann.me/api/macros/nlohmann_define_type_non_intrusive/ -*/ #define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE(Type, ...) \ - template::value, int> = 0> \ - void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } + inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } -/*! -@brief macro -@def NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE -@since version 3.12.0 -@sa https://json.nlohmann.me/api/macros/nlohmann_define_derived_type/ -*/ -#define NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE(Type, BaseType, ...) \ - template::value, int> = 0> \ - friend void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { nlohmann::to_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ - template::value, int> = 0> \ - friend void from_json(const BasicJsonType& nlohmann_json_j, Type& nlohmann_json_t) { nlohmann::from_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } - -/*! -@brief macro -@def NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_WITH_DEFAULT -@since version 3.12.0 -@sa https://json.nlohmann.me/api/macros/nlohmann_define_derived_type/ -*/ -#define NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_WITH_DEFAULT(Type, BaseType, ...) \ - template::value, int> = 0> \ - friend void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { nlohmann::to_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ - template::value, int> = 0> \ - friend void from_json(const BasicJsonType& nlohmann_json_j, Type& nlohmann_json_t) { nlohmann::from_json(nlohmann_json_j, static_cast(nlohmann_json_t)); const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } - -/*! -@brief macro -@def NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_ONLY_SERIALIZE -@since version 3.12.0 -@sa https://json.nlohmann.me/api/macros/nlohmann_define_derived_type/ -*/ -#define NLOHMANN_DEFINE_DERIVED_TYPE_INTRUSIVE_ONLY_SERIALIZE(Type, BaseType, ...) \ - template::value, int> = 0> \ - friend void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { nlohmann::to_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } - -/*! -@brief macro -@def NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE -@since version 3.12.0 -@sa https://json.nlohmann.me/api/macros/nlohmann_define_derived_type/ -*/ -#define NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE(Type, BaseType, ...) \ - template::value, int> = 0> \ - void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { nlohmann::to_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ - template::value, int> = 0> \ - void from_json(const BasicJsonType& nlohmann_json_j, Type& nlohmann_json_t) { nlohmann::from_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } - -/*! -@brief macro -@def NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_WITH_DEFAULT -@since version 3.12.0 -@sa https://json.nlohmann.me/api/macros/nlohmann_define_derived_type/ -*/ -#define NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_WITH_DEFAULT(Type, BaseType, ...) \ - template::value, int> = 0> \ - void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { nlohmann::to_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ - template::value, int> = 0> \ - void from_json(const BasicJsonType& nlohmann_json_j, Type& nlohmann_json_t) { nlohmann::from_json(nlohmann_json_j, static_cast(nlohmann_json_t)); const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } - -/*! -@brief macro -@def NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE -@since version 3.12.0 -@sa https://json.nlohmann.me/api/macros/nlohmann_define_derived_type/ -*/ -#define NLOHMANN_DEFINE_DERIVED_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE(Type, BaseType, ...) \ - template::value, int> = 0> \ - void to_json(BasicJsonType& nlohmann_json_j, const Type& nlohmann_json_t) { nlohmann::to_json(nlohmann_json_j, static_cast(nlohmann_json_t)); NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } +#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(Type, ...) \ + inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } // inspired from https://stackoverflow.com/a/26745591 -// allows calling any std function as if (e.g., with begin): +// allows to call any std function as if (e.g. with begin): // using std::begin; begin(x); // // it allows using the detected idiom to retrieve the return type @@ -3066,10 +2939,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -3141,10 +3014,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -3183,10 +3056,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-FileCopyrightText: 2018 The Abseil Authors // SPDX-License-Identifier: MIT @@ -3346,7 +3219,7 @@ struct static_const #endif template -constexpr std::array make_array(Args&& ... args) +inline constexpr std::array make_array(Args&& ... args) { return std::array {{static_cast(std::forward(args))...}}; } @@ -3357,27 +3230,27 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT #include // numeric_limits -#include // char_traits -#include // tuple #include // false_type, is_constructible, is_integral, is_same, true_type #include // declval +#include // tuple +#include // char_traits // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -3420,7 +3293,7 @@ struct iterator_traits template struct iterator_traits < T, enable_if_t < !std::is_pointer::value >> - : iterator_types + : iterator_types { }; @@ -3442,10 +3315,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -3462,10 +3335,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -3486,10 +3359,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT #ifndef INCLUDE_NLOHMANN_JSON_FWD_HPP_ @@ -3751,7 +3624,7 @@ struct char_traits : std::char_traits static constexpr int_type eof() noexcept { - return static_cast(std::char_traits::eof()); + return static_cast(EOF); } }; @@ -3775,7 +3648,7 @@ struct char_traits : std::char_traits static constexpr int_type eof() noexcept { - return static_cast(std::char_traits::eof()); + return static_cast(EOF); } }; @@ -3801,19 +3674,19 @@ struct is_default_constructible : std::is_default_constructible {}; template struct is_default_constructible> - : conjunction, is_default_constructible> {}; + : conjunction, is_default_constructible> {}; template struct is_default_constructible> - : conjunction, is_default_constructible> {}; + : conjunction, is_default_constructible> {}; template struct is_default_constructible> - : conjunction...> {}; + : conjunction...> {}; template struct is_default_constructible> - : conjunction...> {}; + : conjunction...> {}; template struct is_constructible : std::is_constructible {}; @@ -4011,8 +3884,8 @@ is_detected::value&& // special case for types like std::filesystem::path whose iterator's value_type are themselves // c.f. https://github.com/nlohmann/json/pull/3073 !std::is_same>::value&& -is_complete_type < -detected_t>::value >> + is_complete_type < + detected_t>::value >> { using value_type = range_value_t; @@ -4135,12 +4008,12 @@ using is_usable_as_key_type = typename std::conditional < template> using is_usable_as_basic_json_key_type = typename std::conditional < - is_usable_as_key_type::value - && !is_json_iterator_of::value, - std::true_type, - std::false_type >::type; + is_usable_as_key_type::value + && !is_json_iterator_of::value, + std::true_type, + std::false_type >::type; template using detect_erase_with_key_type = decltype(std::declval().erase(std::declval())); @@ -4274,7 +4147,7 @@ struct value_in_range_of_impl1 }; template -constexpr bool value_in_range_of(T val) +inline constexpr bool value_in_range_of(T val) { return value_in_range_of_impl1::test(val); } @@ -4290,7 +4163,7 @@ namespace impl { template -constexpr bool is_c_string() +inline constexpr bool is_c_string() { using TUnExt = typename std::remove_extent::type; using TUnCVExt = typename std::remove_cv::type; @@ -4318,7 +4191,7 @@ namespace impl { template -constexpr bool is_transparent() +inline constexpr bool is_transparent() { return is_detected::value; } @@ -4337,10 +4210,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -4485,18 +4358,6 @@ inline OutStringType concat(Args && ... args) NLOHMANN_JSON_NAMESPACE_END -// With -Wweak-vtables, Clang will complain about the exception classes as they -// have no out-of-line virtual method definitions and their vtable will be -// emitted in every translation unit. This issue cannot be fixed with a -// header-only library as there is no implementation file to move these -// functions to. As a result, we suppress this warning here to avoid client -// code to stumble over this. See https://github.com/nlohmann/json/issues/4087 -// for a discussion. -#if defined(__clang__) - #pragma clang diagnostic push - #pragma clang diagnostic ignored "-Wweak-vtables" -#endif - NLOHMANN_JSON_NAMESPACE_BEGIN namespace detail { @@ -4591,34 +4452,16 @@ class exception : public std::exception { return concat(a, '/', detail::escape(b)); }); - - return concat('(', str, ") ", get_byte_positions(leaf_element)); + return concat('(', str, ") "); #else - return get_byte_positions(leaf_element); + static_cast(leaf_element); + return ""; #endif } private: /// an exception object as storage for error messages std::runtime_error m; -#if JSON_DIAGNOSTIC_POSITIONS - template - static std::string get_byte_positions(const BasicJsonType* leaf_element) - { - if ((leaf_element->start_pos() != std::string::npos) && (leaf_element->end_pos() != std::string::npos)) - { - return concat("(bytes ", std::to_string(leaf_element->start_pos()), "-", std::to_string(leaf_element->end_pos()), ") "); - } - return ""; - } -#else - template - static std::string get_byte_positions(const BasicJsonType* leaf_element) - { - static_cast(leaf_element); - return ""; - } -#endif }; /// @brief exception indicating a parse error @@ -4746,10 +4589,6 @@ class other_error : public exception } // namespace detail NLOHMANN_JSON_NAMESPACE_END -#if defined(__clang__) - #pragma clang diagnostic pop -#endif - // #include // #include @@ -4757,10 +4596,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -4781,10 +4620,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -4801,7 +4640,7 @@ namespace std_fs = std::experimental::filesystem; } // namespace detail NLOHMANN_JSON_NAMESPACE_END #elif JSON_HAS_FILESYSTEM -#include // NOLINT(build/c++17) +#include NLOHMANN_JSON_NAMESPACE_BEGIN namespace detail { @@ -4831,24 +4670,6 @@ inline void from_json(const BasicJsonType& j, typename std::nullptr_t& n) n = nullptr; } -#ifdef JSON_HAS_CPP_17 -#ifndef JSON_USE_IMPLICIT_CONVERSIONS -template -void from_json(const BasicJsonType& j, std::optional& opt) -{ - if (j.is_null()) - { - opt = std::nullopt; - } - else - { - opt.emplace(j.template get()); - } -} - -#endif // JSON_USE_IMPLICIT_CONVERSIONS -#endif // JSON_HAS_CPP_17 - // overloads for basic_json template parameters template < typename BasicJsonType, typename ArithmeticType, enable_if_t < std::is_arithmetic::value&& @@ -4996,54 +4817,6 @@ auto from_json(const BasicJsonType& j, T (&arr)[N]) // NOLINT(cppcoreguidelines } } -template -auto from_json(const BasicJsonType& j, T (&arr)[N1][N2]) // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) --> decltype(j.template get(), void()) -{ - for (std::size_t i1 = 0; i1 < N1; ++i1) - { - for (std::size_t i2 = 0; i2 < N2; ++i2) - { - arr[i1][i2] = j.at(i1).at(i2).template get(); - } - } -} - -template -auto from_json(const BasicJsonType& j, T (&arr)[N1][N2][N3]) // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) --> decltype(j.template get(), void()) -{ - for (std::size_t i1 = 0; i1 < N1; ++i1) - { - for (std::size_t i2 = 0; i2 < N2; ++i2) - { - for (std::size_t i3 = 0; i3 < N3; ++i3) - { - arr[i1][i2][i3] = j.at(i1).at(i2).at(i3).template get(); - } - } - } -} - -template -auto from_json(const BasicJsonType& j, T (&arr)[N1][N2][N3][N4]) // NOLINT(cppcoreguidelines-avoid-c-arrays,hicpp-avoid-c-arrays,modernize-avoid-c-arrays) --> decltype(j.template get(), void()) -{ - for (std::size_t i1 = 0; i1 < N1; ++i1) - { - for (std::size_t i2 = 0; i2 < N2; ++i2) - { - for (std::size_t i3 = 0; i3 < N3; ++i3) - { - for (std::size_t i4 = 0; i4 < N4; ++i4) - { - arr[i1][i2][i3][i4] = j.at(i1).at(i2).at(i3).at(i4).template get(); - } - } - } - } -} - template inline void from_json_array_impl(const BasicJsonType& j, typename BasicJsonType::array_t& arr, priority_tag<3> /*unused*/) { @@ -5129,7 +4902,7 @@ void()) template < typename BasicJsonType, typename T, std::size_t... Idx > std::array from_json_inplace_array_impl(BasicJsonType&& j, - identity_tag> /*unused*/, index_sequence /*unused*/) + identity_tag> /*unused*/, index_sequence /*unused*/) { return { { std::forward(j).at(Idx).template get()... } }; } @@ -5233,12 +5006,6 @@ std::tuple from_json_tuple_impl_base(BasicJsonType&& j, index_sequence< return std::make_tuple(std::forward(j).at(Idx).template get()...); } -template -std::tuple<> from_json_tuple_impl_base(BasicJsonType& /*unused*/, index_sequence<> /*unused*/) -{ - return {}; -} - template < typename BasicJsonType, class A1, class A2 > std::pair from_json_tuple_impl(BasicJsonType&& j, identity_tag> /*unused*/, priority_tag<0> /*unused*/) { @@ -5324,12 +5091,7 @@ inline void from_json(const BasicJsonType& j, std_fs::path& p) { JSON_THROW(type_error::create(302, concat("type must be string, but is ", j.type_name()), &j)); } - const auto& s = *j.template get_ptr(); -#ifdef JSON_HAS_CPP_20 - p = std_fs::path(std::u8string_view(reinterpret_cast(s.data()), s.size())); -#else - p = std_fs::u8path(s); // accepts UTF-8 encoded std::string in C++17, deprecated in C++20 -#endif + p = *j.template get_ptr(); } #endif @@ -5364,20 +5126,14 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT -// #include -// JSON_HAS_CPP_17 -#ifdef JSON_HAS_CPP_17 - #include // optional -#endif - #include // copy #include // begin, end #include // string @@ -5390,16 +5146,17 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT #include // size_t -#include // forward_iterator_tag +#include // input_iterator_tag +#include // string, to_string #include // tuple_size, get, tuple_element #include // move @@ -5411,46 +5168,6 @@ NLOHMANN_JSON_NAMESPACE_END // #include -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -#include // size_t -#include // string, to_string - -// #include - - -NLOHMANN_JSON_NAMESPACE_BEGIN -namespace detail -{ - -template -void int_to_string(StringType& target, std::size_t value) -{ - // For ADL - using std::to_string; - target = to_string(value); -} - -template -StringType to_string(std::size_t value) -{ - StringType result; - int_to_string(result, value); - return result; -} - -} // namespace detail -NLOHMANN_JSON_NAMESPACE_END - // #include @@ -5458,6 +5175,13 @@ NLOHMANN_JSON_NAMESPACE_BEGIN namespace detail { +template +void int_to_string( string_type& target, std::size_t value ) +{ + // For ADL + using std::to_string; + target = to_string(value); +} template class iteration_proxy_value { public: @@ -5465,7 +5189,7 @@ template class iteration_proxy_value using value_type = iteration_proxy_value; using pointer = value_type *; using reference = value_type &; - using iterator_category = std::forward_iterator_tag; + using iterator_category = std::input_iterator_tag; using string_type = typename std::remove_cv< typename std::remove_reference().key() ) >::type >::type; private: @@ -5645,7 +5369,7 @@ namespace std #endif template class tuple_size<::nlohmann::detail::iteration_proxy_value> // NOLINT(cert-dcl58-cpp) - : public std::integral_constant {}; + : public std::integral_constant {}; template class tuple_element> // NOLINT(cert-dcl58-cpp) @@ -5666,6 +5390,8 @@ class tuple_element> inline constexpr bool ::std::ranges::enable_borrowed_range<::nlohmann::detail::iteration_proxy> = true; #endif +// #include + // #include // #include @@ -5911,22 +5637,6 @@ struct external_constructor // to_json // ///////////// -#ifdef JSON_HAS_CPP_17 -template::value, int> = 0> -void to_json(BasicJsonType& j, const std::optional& opt) -{ - if (opt.has_value()) - { - j = *opt; - } - else - { - j = nullptr; - } -} -#endif - template::value, int> = 0> inline void to_json(BasicJsonType& j, T b) noexcept @@ -5987,8 +5697,7 @@ template::type; - static constexpr value_t integral_value_t = std::is_unsigned::value ? value_t::number_unsigned : value_t::number_integer; - external_constructor::construct(j, static_cast(e)); + external_constructor::construct(j, static_cast(e)); } #endif // JSON_DISABLE_ENUM_SERIALIZATION @@ -6073,13 +5782,6 @@ inline void to_json_tuple_impl(BasicJsonType& j, const Tuple& t, index_sequence< j = { std::get(t)... }; } -template -inline void to_json_tuple_impl(BasicJsonType& j, const Tuple& /*unused*/, index_sequence<> /*unused*/) -{ - using array_t = typename BasicJsonType::array_t; - j = array_t(); -} - template::value, int > = 0> inline void to_json(BasicJsonType& j, const T& t) { @@ -6090,12 +5792,7 @@ inline void to_json(BasicJsonType& j, const T& t) template inline void to_json(BasicJsonType& j, const std_fs::path& p) { -#ifdef JSON_HAS_CPP_20 - const std::u8string s = p.u8string(); - j = std::string(s.begin(), s.end()); -#else - j = p.u8string(); // returns std::string in C++17 -#endif + j = p.string(); } #endif @@ -6170,10 +5867,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -6282,10 +5979,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -6415,10 +6112,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -6435,19 +6132,16 @@ NLOHMANN_JSON_NAMESPACE_END #include // char_traits, string #include // make_pair, move #include // vector -#ifdef __cpp_lib_byteswap - #include //byteswap -#endif // #include // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -6467,8 +6161,6 @@ NLOHMANN_JSON_NAMESPACE_END #include // istream #endif // JSON_NO_IO -// #include - // #include // #include @@ -6516,13 +6208,6 @@ class file_input_adapter return std::fgetc(m_file); } - // returns the number of characters successfully read - template - std::size_t get_elements(T* dest, std::size_t count = 1) - { - return fread(dest, 1, sizeof(T) * count, m_file); - } - private: /// the file pointer to read from std::FILE* m_file; @@ -6582,17 +6267,6 @@ class input_stream_adapter return res; } - template - std::size_t get_elements(T* dest, std::size_t count = 1) - { - auto res = static_cast(sb->sgetn(reinterpret_cast(dest), static_cast(count * sizeof(T)))); - if (JSON_HEDLEY_UNLIKELY(res < count * sizeof(T))) - { - is->clear(is->rdstate() | std::ios::eofbit); - } - return res; - } - private: /// the associated input stream std::istream* is = nullptr; @@ -6624,26 +6298,6 @@ class iterator_input_adapter return char_traits::eof(); } - // for general iterators, we cannot really do something better than falling back to processing the range one-by-one - template - std::size_t get_elements(T* dest, std::size_t count = 1) - { - auto* ptr = reinterpret_cast(dest); - for (std::size_t read_index = 0; read_index < count * sizeof(T); ++read_index) - { - if (JSON_HEDLEY_LIKELY(current != end)) - { - ptr[read_index] = static_cast(*current); - std::advance(current, 1); - } - else - { - return read_index; - } - } - return count * sizeof(T); - } - private: IteratorType current; IteratorType end; @@ -6807,13 +6461,6 @@ class wide_string_input_adapter return utf8_bytes[utf8_bytes_index++]; } - // parsing binary with wchar doesn't make sense, but since the parsing mode can be runtime, we need something here - template - std::size_t get_elements(T* /*dest*/, std::size_t /*count*/ = 1) - { - JSON_THROW(parse_error::create(112, 1, "wide string type cannot be interpreted as binary data", nullptr)); - } - private: BaseInputAdapter base_adapter; @@ -6910,17 +6557,10 @@ typename container_input_adapter_factory_impl::container_input_adapter_factory::create(container); } -// specialization for std::string -using string_input_adapter_type = decltype(input_adapter(std::declval())); - #ifndef JSON_NO_IO // Special cases with fast paths inline file_input_adapter input_adapter(std::FILE* file) { - if (file == nullptr) - { - JSON_THROW(parse_error::create(101, 0, "attempting to parse an empty input; check that your input string or stream contains the expected JSON", nullptr)); - } return file_input_adapter(file); } @@ -6947,13 +6587,9 @@ template < typename CharT, int >::type = 0 > contiguous_bytes_input_adapter input_adapter(CharT b) { - if (b == nullptr) - { - JSON_THROW(parse_error::create(101, 0, "attempting to parse an empty input; check that your input string or stream contains the expected JSON", nullptr)); - } auto length = std::strlen(reinterpret_cast(b)); const auto* ptr = reinterpret_cast(b); - return input_adapter(ptr, ptr + length); // cppcheck-suppress[nullPointerArithmeticRedundantCheck] + return input_adapter(ptr, ptr + length); } template @@ -6999,29 +6635,742 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT #include #include // string -#include // enable_if_t #include // move #include // vector // #include +// #include + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN + +/*! +@brief SAX interface + +This class describes the SAX interface used by @ref nlohmann::json::sax_parse. +Each function is called in different situations while the input is parsed. The +boolean return value informs the parser whether to continue processing the +input. +*/ +template +struct json_sax +{ + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; + + /*! + @brief a null value was read + @return whether parsing should proceed + */ + virtual bool null() = 0; + + /*! + @brief a boolean value was read + @param[in] val boolean value + @return whether parsing should proceed + */ + virtual bool boolean(bool val) = 0; + + /*! + @brief an integer number was read + @param[in] val integer value + @return whether parsing should proceed + */ + virtual bool number_integer(number_integer_t val) = 0; + + /*! + @brief an unsigned integer number was read + @param[in] val unsigned integer value + @return whether parsing should proceed + */ + virtual bool number_unsigned(number_unsigned_t val) = 0; + + /*! + @brief a floating-point number was read + @param[in] val floating-point value + @param[in] s raw token value + @return whether parsing should proceed + */ + virtual bool number_float(number_float_t val, const string_t& s) = 0; + + /*! + @brief a string value was read + @param[in] val string value + @return whether parsing should proceed + @note It is safe to move the passed string value. + */ + virtual bool string(string_t& val) = 0; + + /*! + @brief a binary value was read + @param[in] val binary value + @return whether parsing should proceed + @note It is safe to move the passed binary value. + */ + virtual bool binary(binary_t& val) = 0; + + /*! + @brief the beginning of an object was read + @param[in] elements number of object elements or -1 if unknown + @return whether parsing should proceed + @note binary formats may report the number of elements + */ + virtual bool start_object(std::size_t elements) = 0; + + /*! + @brief an object key was read + @param[in] val object key + @return whether parsing should proceed + @note It is safe to move the passed string. + */ + virtual bool key(string_t& val) = 0; + + /*! + @brief the end of an object was read + @return whether parsing should proceed + */ + virtual bool end_object() = 0; + + /*! + @brief the beginning of an array was read + @param[in] elements number of array elements or -1 if unknown + @return whether parsing should proceed + @note binary formats may report the number of elements + */ + virtual bool start_array(std::size_t elements) = 0; + + /*! + @brief the end of an array was read + @return whether parsing should proceed + */ + virtual bool end_array() = 0; + + /*! + @brief a parse error occurred + @param[in] position the position in the input where the error occurs + @param[in] last_token the last read token + @param[in] ex an exception object describing the error + @return whether parsing should proceed (must return false) + */ + virtual bool parse_error(std::size_t position, + const std::string& last_token, + const detail::exception& ex) = 0; + + json_sax() = default; + json_sax(const json_sax&) = default; + json_sax(json_sax&&) noexcept = default; + json_sax& operator=(const json_sax&) = default; + json_sax& operator=(json_sax&&) noexcept = default; + virtual ~json_sax() = default; +}; + +namespace detail +{ +/*! +@brief SAX implementation to create a JSON value from SAX events + +This class implements the @ref json_sax interface and processes the SAX events +to create a JSON value which makes it basically a DOM parser. The structure or +hierarchy of the JSON value is managed by the stack `ref_stack` which contains +a pointer to the respective array or object for each recursion depth. + +After successful parsing, the value that is passed by reference to the +constructor contains the parsed value. + +@tparam BasicJsonType the JSON type +*/ +template +class json_sax_dom_parser +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; + + /*! + @param[in,out] r reference to a JSON value that is manipulated while + parsing + @param[in] allow_exceptions_ whether parse errors yield exceptions + */ + explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true) + : root(r), allow_exceptions(allow_exceptions_) + {} + + // make class move-only + json_sax_dom_parser(const json_sax_dom_parser&) = delete; + json_sax_dom_parser(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + json_sax_dom_parser& operator=(const json_sax_dom_parser&) = delete; + json_sax_dom_parser& operator=(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + ~json_sax_dom_parser() = default; + + bool null() + { + handle_value(nullptr); + return true; + } + + bool boolean(bool val) + { + handle_value(val); + return true; + } + + bool number_integer(number_integer_t val) + { + handle_value(val); + return true; + } + + bool number_unsigned(number_unsigned_t val) + { + handle_value(val); + return true; + } + + bool number_float(number_float_t val, const string_t& /*unused*/) + { + handle_value(val); + return true; + } + + bool string(string_t& val) + { + handle_value(val); + return true; + } + + bool binary(binary_t& val) + { + handle_value(std::move(val)); + return true; + } + + bool start_object(std::size_t len) + { + ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); + + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); + } + + return true; + } + + bool key(string_t& val) + { + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(ref_stack.back()->is_object()); + + // add null at given key and store the reference for later + object_element = &(ref_stack.back()->m_data.m_value.object->operator[](val)); + return true; + } + + bool end_object() + { + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(ref_stack.back()->is_object()); + + ref_stack.back()->set_parents(); + ref_stack.pop_back(); + return true; + } + + bool start_array(std::size_t len) + { + ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); + + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); + } + + return true; + } + + bool end_array() + { + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(ref_stack.back()->is_array()); + + ref_stack.back()->set_parents(); + ref_stack.pop_back(); + return true; + } + + template + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, + const Exception& ex) + { + errored = true; + static_cast(ex); + if (allow_exceptions) + { + JSON_THROW(ex); + } + return false; + } + + constexpr bool is_errored() const + { + return errored; + } + + private: + /*! + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + */ + template + JSON_HEDLEY_RETURNS_NON_NULL + BasicJsonType* handle_value(Value&& v) + { + if (ref_stack.empty()) + { + root = BasicJsonType(std::forward(v)); + return &root; + } + + JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object()); + + if (ref_stack.back()->is_array()) + { + ref_stack.back()->m_data.m_value.array->emplace_back(std::forward(v)); + return &(ref_stack.back()->m_data.m_value.array->back()); + } + + JSON_ASSERT(ref_stack.back()->is_object()); + JSON_ASSERT(object_element); + *object_element = BasicJsonType(std::forward(v)); + return object_element; + } + + /// the parsed JSON value + BasicJsonType& root; + /// stack to model hierarchy of values + std::vector ref_stack {}; + /// helper to hold the reference for the next object element + BasicJsonType* object_element = nullptr; + /// whether a syntax error occurred + bool errored = false; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; +}; + +template +class json_sax_dom_callback_parser +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; + using parser_callback_t = typename BasicJsonType::parser_callback_t; + using parse_event_t = typename BasicJsonType::parse_event_t; + + json_sax_dom_callback_parser(BasicJsonType& r, + const parser_callback_t cb, + const bool allow_exceptions_ = true) + : root(r), callback(cb), allow_exceptions(allow_exceptions_) + { + keep_stack.push_back(true); + } + + // make class move-only + json_sax_dom_callback_parser(const json_sax_dom_callback_parser&) = delete; + json_sax_dom_callback_parser(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + json_sax_dom_callback_parser& operator=(const json_sax_dom_callback_parser&) = delete; + json_sax_dom_callback_parser& operator=(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + ~json_sax_dom_callback_parser() = default; + + bool null() + { + handle_value(nullptr); + return true; + } + + bool boolean(bool val) + { + handle_value(val); + return true; + } + + bool number_integer(number_integer_t val) + { + handle_value(val); + return true; + } + + bool number_unsigned(number_unsigned_t val) + { + handle_value(val); + return true; + } + + bool number_float(number_float_t val, const string_t& /*unused*/) + { + handle_value(val); + return true; + } + + bool string(string_t& val) + { + handle_value(val); + return true; + } + + bool binary(binary_t& val) + { + handle_value(std::move(val)); + return true; + } + + bool start_object(std::size_t len) + { + // check callback for object start + const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::object_start, discarded); + keep_stack.push_back(keep); + + auto val = handle_value(BasicJsonType::value_t::object, true); + ref_stack.push_back(val.second); + + // check object limit + if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); + } + + return true; + } + + bool key(string_t& val) + { + BasicJsonType k = BasicJsonType(val); + + // check callback for key + const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::key, k); + key_keep_stack.push_back(keep); + + // add discarded value at given key and store the reference for later + if (keep && ref_stack.back()) + { + object_element = &(ref_stack.back()->m_data.m_value.object->operator[](val) = discarded); + } + + return true; + } + + bool end_object() + { + if (ref_stack.back()) + { + if (!callback(static_cast(ref_stack.size()) - 1, parse_event_t::object_end, *ref_stack.back())) + { + // discard object + *ref_stack.back() = discarded; + } + else + { + ref_stack.back()->set_parents(); + } + } + + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(!keep_stack.empty()); + ref_stack.pop_back(); + keep_stack.pop_back(); + + if (!ref_stack.empty() && ref_stack.back() && ref_stack.back()->is_structured()) + { + // remove discarded value + for (auto it = ref_stack.back()->begin(); it != ref_stack.back()->end(); ++it) + { + if (it->is_discarded()) + { + ref_stack.back()->erase(it); + break; + } + } + } + + return true; + } + + bool start_array(std::size_t len) + { + const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::array_start, discarded); + keep_stack.push_back(keep); + + auto val = handle_value(BasicJsonType::value_t::array, true); + ref_stack.push_back(val.second); + + // check array limit + if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); + } + + return true; + } + + bool end_array() + { + bool keep = true; + + if (ref_stack.back()) + { + keep = callback(static_cast(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back()); + if (keep) + { + ref_stack.back()->set_parents(); + } + else + { + // discard array + *ref_stack.back() = discarded; + } + } + + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(!keep_stack.empty()); + ref_stack.pop_back(); + keep_stack.pop_back(); + + // remove discarded value + if (!keep && !ref_stack.empty() && ref_stack.back()->is_array()) + { + ref_stack.back()->m_data.m_value.array->pop_back(); + } + + return true; + } + + template + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, + const Exception& ex) + { + errored = true; + static_cast(ex); + if (allow_exceptions) + { + JSON_THROW(ex); + } + return false; + } + + constexpr bool is_errored() const + { + return errored; + } + + private: + /*! + @param[in] v value to add to the JSON value we build during parsing + @param[in] skip_callback whether we should skip calling the callback + function; this is required after start_array() and + start_object() SAX events, because otherwise we would call the + callback function with an empty array or object, respectively. + + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + + @return pair of boolean (whether value should be kept) and pointer (to the + passed value in the ref_stack hierarchy; nullptr if not kept) + */ + template + std::pair handle_value(Value&& v, const bool skip_callback = false) + { + JSON_ASSERT(!keep_stack.empty()); + + // do not handle this value if we know it would be added to a discarded + // container + if (!keep_stack.back()) + { + return {false, nullptr}; + } + + // create value + auto value = BasicJsonType(std::forward(v)); + + // check callback + const bool keep = skip_callback || callback(static_cast(ref_stack.size()), parse_event_t::value, value); + + // do not handle this value if we just learnt it shall be discarded + if (!keep) + { + return {false, nullptr}; + } + + if (ref_stack.empty()) + { + root = std::move(value); + return {true, & root}; + } + + // skip this value if we already decided to skip the parent + // (https://github.com/nlohmann/json/issues/971#issuecomment-413678360) + if (!ref_stack.back()) + { + return {false, nullptr}; + } + + // we now only expect arrays and objects + JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object()); + + // array + if (ref_stack.back()->is_array()) + { + ref_stack.back()->m_data.m_value.array->emplace_back(std::move(value)); + return {true, & (ref_stack.back()->m_data.m_value.array->back())}; + } + + // object + JSON_ASSERT(ref_stack.back()->is_object()); + // check if we should store an element for the current key + JSON_ASSERT(!key_keep_stack.empty()); + const bool store_element = key_keep_stack.back(); + key_keep_stack.pop_back(); + + if (!store_element) + { + return {false, nullptr}; + } + + JSON_ASSERT(object_element); + *object_element = std::move(value); + return {true, object_element}; + } + + /// the parsed JSON value + BasicJsonType& root; + /// stack to model hierarchy of values + std::vector ref_stack {}; + /// stack to manage which values to keep + std::vector keep_stack {}; + /// stack to manage which object keys to keep + std::vector key_keep_stack {}; + /// helper to hold the reference for the next object element + BasicJsonType* object_element = nullptr; + /// whether a syntax error occurred + bool errored = false; + /// callback function + const parser_callback_t callback = nullptr; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; + /// a discarded value for the callback + BasicJsonType discarded = BasicJsonType::value_t::discarded; +}; + +template +class json_sax_acceptor +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; + + bool null() + { + return true; + } + + bool boolean(bool /*unused*/) + { + return true; + } + + bool number_integer(number_integer_t /*unused*/) + { + return true; + } + + bool number_unsigned(number_unsigned_t /*unused*/) + { + return true; + } + + bool number_float(number_float_t /*unused*/, const string_t& /*unused*/) + { + return true; + } + + bool string(string_t& /*unused*/) + { + return true; + } + + bool binary(binary_t& /*unused*/) + { + return true; + } + + bool start_object(std::size_t /*unused*/ = static_cast(-1)) + { + return true; + } + + bool key(string_t& /*unused*/) + { + return true; + } + + bool end_object() + { + return true; + } + + bool start_array(std::size_t /*unused*/ = static_cast(-1)) + { + return true; + } + + bool end_array() + { + return true; + } + + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const detail::exception& /*unused*/) + { + return false; + } +}; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -7989,7 +8338,7 @@ class lexer : public lexer_base locale's decimal point is used instead of `.` to work with the locale-dependent converters. */ - token_type scan_number() // lgtm [cpp/use-of-goto] `goto` is used in this function to implement the number-parsing state machine described above. By design, any finite input will eventually reach the "done" state or return token_type::parse_error. In each intermediate state, 1 byte of the input is appended to the token_buffer vector, and only the already initialized variables token_buffer, number_type, and error_message are manipulated. + token_type scan_number() // lgtm [cpp/use-of-goto] { // reset token_buffer to store the number's bytes reset(); @@ -8071,7 +8420,6 @@ scan_number_zero: case '.': { add(decimal_point_char); - decimal_point_position = token_buffer.size() - 1; goto scan_number_decimal1; } @@ -8108,7 +8456,6 @@ scan_number_any1: case '.': { add(decimal_point_char); - decimal_point_position = token_buffer.size() - 1; goto scan_number_decimal1; } @@ -8269,7 +8616,7 @@ scan_number_done: // we are done scanning a number) unget(); - char* endptr = nullptr; // NOLINT(misc-const-correctness,cppcoreguidelines-pro-type-vararg,hicpp-vararg) + char* endptr = nullptr; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) errno = 0; // try to parse integers first and fall back to floats @@ -8280,7 +8627,7 @@ scan_number_done: // we checked the number format before JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); - if (errno != ERANGE) + if (errno == 0) { value_unsigned = static_cast(x); if (value_unsigned == x) @@ -8296,7 +8643,7 @@ scan_number_done: // we checked the number format before JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); - if (errno != ERANGE) + if (errno == 0) { value_integer = static_cast(x); if (value_integer == x) @@ -8346,7 +8693,6 @@ scan_number_done: { token_buffer.clear(); token_string.clear(); - decimal_point_position = std::string::npos; token_string.push_back(char_traits::to_char_type(current)); } @@ -8455,11 +8801,6 @@ scan_number_done: /// return current string value (implicitly resets the token; useful only once) string_t& get_string() { - // translate decimal points from locale back to '.' (#4084) - if (decimal_point_char != '.' && decimal_point_position != std::string::npos) - { - token_buffer[decimal_point_position] = '.'; - } return token_buffer; } @@ -8657,8 +8998,6 @@ scan_number_done: /// the decimal point const char_int_type decimal_point_char = '.'; - /// the position of the decimal point in the input - std::size_t decimal_point_position = std::string::npos; }; } // namespace detail @@ -8666,986 +9005,13 @@ NLOHMANN_JSON_NAMESPACE_END // #include -// #include - -NLOHMANN_JSON_NAMESPACE_BEGIN - -/*! -@brief SAX interface - -This class describes the SAX interface used by @ref nlohmann::json::sax_parse. -Each function is called in different situations while the input is parsed. The -boolean return value informs the parser whether to continue processing the -input. -*/ -template -struct json_sax -{ - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - using binary_t = typename BasicJsonType::binary_t; - - /*! - @brief a null value was read - @return whether parsing should proceed - */ - virtual bool null() = 0; - - /*! - @brief a boolean value was read - @param[in] val boolean value - @return whether parsing should proceed - */ - virtual bool boolean(bool val) = 0; - - /*! - @brief an integer number was read - @param[in] val integer value - @return whether parsing should proceed - */ - virtual bool number_integer(number_integer_t val) = 0; - - /*! - @brief an unsigned integer number was read - @param[in] val unsigned integer value - @return whether parsing should proceed - */ - virtual bool number_unsigned(number_unsigned_t val) = 0; - - /*! - @brief a floating-point number was read - @param[in] val floating-point value - @param[in] s raw token value - @return whether parsing should proceed - */ - virtual bool number_float(number_float_t val, const string_t& s) = 0; - - /*! - @brief a string value was read - @param[in] val string value - @return whether parsing should proceed - @note It is safe to move the passed string value. - */ - virtual bool string(string_t& val) = 0; - - /*! - @brief a binary value was read - @param[in] val binary value - @return whether parsing should proceed - @note It is safe to move the passed binary value. - */ - virtual bool binary(binary_t& val) = 0; - - /*! - @brief the beginning of an object was read - @param[in] elements number of object elements or -1 if unknown - @return whether parsing should proceed - @note binary formats may report the number of elements - */ - virtual bool start_object(std::size_t elements) = 0; - - /*! - @brief an object key was read - @param[in] val object key - @return whether parsing should proceed - @note It is safe to move the passed string. - */ - virtual bool key(string_t& val) = 0; - - /*! - @brief the end of an object was read - @return whether parsing should proceed - */ - virtual bool end_object() = 0; - - /*! - @brief the beginning of an array was read - @param[in] elements number of array elements or -1 if unknown - @return whether parsing should proceed - @note binary formats may report the number of elements - */ - virtual bool start_array(std::size_t elements) = 0; - - /*! - @brief the end of an array was read - @return whether parsing should proceed - */ - virtual bool end_array() = 0; - - /*! - @brief a parse error occurred - @param[in] position the position in the input where the error occurs - @param[in] last_token the last read token - @param[in] ex an exception object describing the error - @return whether parsing should proceed (must return false) - */ - virtual bool parse_error(std::size_t position, - const std::string& last_token, - const detail::exception& ex) = 0; - - json_sax() = default; - json_sax(const json_sax&) = default; - json_sax(json_sax&&) noexcept = default; - json_sax& operator=(const json_sax&) = default; - json_sax& operator=(json_sax&&) noexcept = default; - virtual ~json_sax() = default; -}; - -namespace detail -{ -constexpr std::size_t unknown_size() -{ - return (std::numeric_limits::max)(); -} - -/*! -@brief SAX implementation to create a JSON value from SAX events - -This class implements the @ref json_sax interface and processes the SAX events -to create a JSON value which makes it basically a DOM parser. The structure or -hierarchy of the JSON value is managed by the stack `ref_stack` which contains -a pointer to the respective array or object for each recursion depth. - -After successful parsing, the value that is passed by reference to the -constructor contains the parsed value. - -@tparam BasicJsonType the JSON type -*/ -template -class json_sax_dom_parser -{ - public: - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - using binary_t = typename BasicJsonType::binary_t; - using lexer_t = lexer; - - /*! - @param[in,out] r reference to a JSON value that is manipulated while - parsing - @param[in] allow_exceptions_ whether parse errors yield exceptions - */ - explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true, lexer_t* lexer_ = nullptr) - : root(r), allow_exceptions(allow_exceptions_), m_lexer_ref(lexer_) - {} - - // make class move-only - json_sax_dom_parser(const json_sax_dom_parser&) = delete; - json_sax_dom_parser(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - json_sax_dom_parser& operator=(const json_sax_dom_parser&) = delete; - json_sax_dom_parser& operator=(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - ~json_sax_dom_parser() = default; - - bool null() - { - handle_value(nullptr); - return true; - } - - bool boolean(bool val) - { - handle_value(val); - return true; - } - - bool number_integer(number_integer_t val) - { - handle_value(val); - return true; - } - - bool number_unsigned(number_unsigned_t val) - { - handle_value(val); - return true; - } - - bool number_float(number_float_t val, const string_t& /*unused*/) - { - handle_value(val); - return true; - } - - bool string(string_t& val) - { - handle_value(val); - return true; - } - - bool binary(binary_t& val) - { - handle_value(std::move(val)); - return true; - } - - bool start_object(std::size_t len) - { - ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); - -#if JSON_DIAGNOSTIC_POSITIONS - // Manually set the start position of the object here. - // Ensure this is after the call to handle_value to ensure correct start position. - if (m_lexer_ref) - { - // Lexer has read the first character of the object, so - // subtract 1 from the position to get the correct start position. - ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; - } -#endif - - if (JSON_HEDLEY_UNLIKELY(len != detail::unknown_size() && len > ref_stack.back()->max_size())) - { - JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); - } - - return true; - } - - bool key(string_t& val) - { - JSON_ASSERT(!ref_stack.empty()); - JSON_ASSERT(ref_stack.back()->is_object()); - - // add null at given key and store the reference for later - object_element = &(ref_stack.back()->m_data.m_value.object->operator[](val)); - return true; - } - - bool end_object() - { - JSON_ASSERT(!ref_stack.empty()); - JSON_ASSERT(ref_stack.back()->is_object()); - -#if JSON_DIAGNOSTIC_POSITIONS - if (m_lexer_ref) - { - // Lexer's position is past the closing brace, so set that as the end position. - ref_stack.back()->end_position = m_lexer_ref->get_position(); - } -#endif - - ref_stack.back()->set_parents(); - ref_stack.pop_back(); - return true; - } - - bool start_array(std::size_t len) - { - ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); - -#if JSON_DIAGNOSTIC_POSITIONS - // Manually set the start position of the array here. - // Ensure this is after the call to handle_value to ensure correct start position. - if (m_lexer_ref) - { - ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; - } -#endif - - if (JSON_HEDLEY_UNLIKELY(len != detail::unknown_size() && len > ref_stack.back()->max_size())) - { - JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); - } - - return true; - } - - bool end_array() - { - JSON_ASSERT(!ref_stack.empty()); - JSON_ASSERT(ref_stack.back()->is_array()); - -#if JSON_DIAGNOSTIC_POSITIONS - if (m_lexer_ref) - { - // Lexer's position is past the closing bracket, so set that as the end position. - ref_stack.back()->end_position = m_lexer_ref->get_position(); - } -#endif - - ref_stack.back()->set_parents(); - ref_stack.pop_back(); - return true; - } - - template - bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, - const Exception& ex) - { - errored = true; - static_cast(ex); - if (allow_exceptions) - { - JSON_THROW(ex); - } - return false; - } - - constexpr bool is_errored() const - { - return errored; - } - - private: - -#if JSON_DIAGNOSTIC_POSITIONS - void handle_diagnostic_positions_for_json_value(BasicJsonType& v) - { - if (m_lexer_ref) - { - // Lexer has read past the current field value, so set the end position to the current position. - // The start position will be set below based on the length of the string representation - // of the value. - v.end_position = m_lexer_ref->get_position(); - - switch (v.type()) - { - case value_t::boolean: - { - // 4 and 5 are the string length of "true" and "false" - v.start_position = v.end_position - (v.m_data.m_value.boolean ? 4 : 5); - break; - } - - case value_t::null: - { - // 4 is the string length of "null" - v.start_position = v.end_position - 4; - break; - } - - case value_t::string: - { - // include the length of the quotes, which is 2 - v.start_position = v.end_position - v.m_data.m_value.string->size() - 2; - break; - } - - // As we handle the start and end positions for values created during parsing, - // we do not expect the following value type to be called. Regardless, set the positions - // in case this is created manually or through a different constructor. Exclude from lcov - // since the exact condition of this switch is esoteric. - // LCOV_EXCL_START - case value_t::discarded: - { - v.end_position = std::string::npos; - v.start_position = v.end_position; - break; - } - // LCOV_EXCL_STOP - case value_t::binary: - case value_t::number_integer: - case value_t::number_unsigned: - case value_t::number_float: - { - v.start_position = v.end_position - m_lexer_ref->get_string().size(); - break; - } - case value_t::object: - case value_t::array: - { - // object and array are handled in start_object() and start_array() handlers - // skip setting the values here. - break; - } - default: // LCOV_EXCL_LINE - // Handle all possible types discretely, default handler should never be reached. - JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert,-warnings-as-errors) LCOV_EXCL_LINE - } - } - } -#endif - - /*! - @invariant If the ref stack is empty, then the passed value will be the new - root. - @invariant If the ref stack contains a value, then it is an array or an - object to which we can add elements - */ - template - JSON_HEDLEY_RETURNS_NON_NULL - BasicJsonType* handle_value(Value&& v) - { - if (ref_stack.empty()) - { - root = BasicJsonType(std::forward(v)); - -#if JSON_DIAGNOSTIC_POSITIONS - handle_diagnostic_positions_for_json_value(root); -#endif - - return &root; - } - - JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object()); - - if (ref_stack.back()->is_array()) - { - ref_stack.back()->m_data.m_value.array->emplace_back(std::forward(v)); - -#if JSON_DIAGNOSTIC_POSITIONS - handle_diagnostic_positions_for_json_value(ref_stack.back()->m_data.m_value.array->back()); -#endif - - return &(ref_stack.back()->m_data.m_value.array->back()); - } - - JSON_ASSERT(ref_stack.back()->is_object()); - JSON_ASSERT(object_element); - *object_element = BasicJsonType(std::forward(v)); - -#if JSON_DIAGNOSTIC_POSITIONS - handle_diagnostic_positions_for_json_value(*object_element); -#endif - - return object_element; - } - - /// the parsed JSON value - BasicJsonType& root; - /// stack to model hierarchy of values - std::vector ref_stack {}; - /// helper to hold the reference for the next object element - BasicJsonType* object_element = nullptr; - /// whether a syntax error occurred - bool errored = false; - /// whether to throw exceptions in case of errors - const bool allow_exceptions = true; - /// the lexer reference to obtain the current position - lexer_t* m_lexer_ref = nullptr; -}; - -template -class json_sax_dom_callback_parser -{ - public: - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - using binary_t = typename BasicJsonType::binary_t; - using parser_callback_t = typename BasicJsonType::parser_callback_t; - using parse_event_t = typename BasicJsonType::parse_event_t; - using lexer_t = lexer; - - json_sax_dom_callback_parser(BasicJsonType& r, - parser_callback_t cb, - const bool allow_exceptions_ = true, - lexer_t* lexer_ = nullptr) - : root(r), callback(std::move(cb)), allow_exceptions(allow_exceptions_), m_lexer_ref(lexer_) - { - keep_stack.push_back(true); - } - - // make class move-only - json_sax_dom_callback_parser(const json_sax_dom_callback_parser&) = delete; - json_sax_dom_callback_parser(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - json_sax_dom_callback_parser& operator=(const json_sax_dom_callback_parser&) = delete; - json_sax_dom_callback_parser& operator=(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - ~json_sax_dom_callback_parser() = default; - - bool null() - { - handle_value(nullptr); - return true; - } - - bool boolean(bool val) - { - handle_value(val); - return true; - } - - bool number_integer(number_integer_t val) - { - handle_value(val); - return true; - } - - bool number_unsigned(number_unsigned_t val) - { - handle_value(val); - return true; - } - - bool number_float(number_float_t val, const string_t& /*unused*/) - { - handle_value(val); - return true; - } - - bool string(string_t& val) - { - handle_value(val); - return true; - } - - bool binary(binary_t& val) - { - handle_value(std::move(val)); - return true; - } - - bool start_object(std::size_t len) - { - // check callback for object start - const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::object_start, discarded); - keep_stack.push_back(keep); - - auto val = handle_value(BasicJsonType::value_t::object, true); - ref_stack.push_back(val.second); - - if (ref_stack.back()) - { - -#if JSON_DIAGNOSTIC_POSITIONS - // Manually set the start position of the object here. - // Ensure this is after the call to handle_value to ensure correct start position. - if (m_lexer_ref) - { - // Lexer has read the first character of the object, so - // subtract 1 from the position to get the correct start position. - ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; - } -#endif - - // check object limit - if (JSON_HEDLEY_UNLIKELY(len != detail::unknown_size() && len > ref_stack.back()->max_size())) - { - JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); - } - } - return true; - } - - bool key(string_t& val) - { - BasicJsonType k = BasicJsonType(val); - - // check callback for key - const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::key, k); - key_keep_stack.push_back(keep); - - // add discarded value at given key and store the reference for later - if (keep && ref_stack.back()) - { - object_element = &(ref_stack.back()->m_data.m_value.object->operator[](val) = discarded); - } - - return true; - } - - bool end_object() - { - if (ref_stack.back()) - { - if (!callback(static_cast(ref_stack.size()) - 1, parse_event_t::object_end, *ref_stack.back())) - { - // discard object - *ref_stack.back() = discarded; - -#if JSON_DIAGNOSTIC_POSITIONS - // Set start/end positions for discarded object. - handle_diagnostic_positions_for_json_value(*ref_stack.back()); -#endif - } - else - { - -#if JSON_DIAGNOSTIC_POSITIONS - if (m_lexer_ref) - { - // Lexer's position is past the closing brace, so set that as the end position. - ref_stack.back()->end_position = m_lexer_ref->get_position(); - } -#endif - - ref_stack.back()->set_parents(); - } - } - - JSON_ASSERT(!ref_stack.empty()); - JSON_ASSERT(!keep_stack.empty()); - ref_stack.pop_back(); - keep_stack.pop_back(); - - if (!ref_stack.empty() && ref_stack.back() && ref_stack.back()->is_structured()) - { - // remove discarded value - for (auto it = ref_stack.back()->begin(); it != ref_stack.back()->end(); ++it) - { - if (it->is_discarded()) - { - ref_stack.back()->erase(it); - break; - } - } - } - - return true; - } - - bool start_array(std::size_t len) - { - const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::array_start, discarded); - keep_stack.push_back(keep); - - auto val = handle_value(BasicJsonType::value_t::array, true); - ref_stack.push_back(val.second); - - if (ref_stack.back()) - { - -#if JSON_DIAGNOSTIC_POSITIONS - // Manually set the start position of the array here. - // Ensure this is after the call to handle_value to ensure correct start position. - if (m_lexer_ref) - { - // Lexer has read the first character of the array, so - // subtract 1 from the position to get the correct start position. - ref_stack.back()->start_position = m_lexer_ref->get_position() - 1; - } -#endif - - // check array limit - if (JSON_HEDLEY_UNLIKELY(len != detail::unknown_size() && len > ref_stack.back()->max_size())) - { - JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); - } - } - - return true; - } - - bool end_array() - { - bool keep = true; - - if (ref_stack.back()) - { - keep = callback(static_cast(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back()); - if (keep) - { - -#if JSON_DIAGNOSTIC_POSITIONS - if (m_lexer_ref) - { - // Lexer's position is past the closing bracket, so set that as the end position. - ref_stack.back()->end_position = m_lexer_ref->get_position(); - } -#endif - - ref_stack.back()->set_parents(); - } - else - { - // discard array - *ref_stack.back() = discarded; - -#if JSON_DIAGNOSTIC_POSITIONS - // Set start/end positions for discarded array. - handle_diagnostic_positions_for_json_value(*ref_stack.back()); -#endif - } - } - - JSON_ASSERT(!ref_stack.empty()); - JSON_ASSERT(!keep_stack.empty()); - ref_stack.pop_back(); - keep_stack.pop_back(); - - // remove discarded value - if (!keep && !ref_stack.empty() && ref_stack.back()->is_array()) - { - ref_stack.back()->m_data.m_value.array->pop_back(); - } - - return true; - } - - template - bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, - const Exception& ex) - { - errored = true; - static_cast(ex); - if (allow_exceptions) - { - JSON_THROW(ex); - } - return false; - } - - constexpr bool is_errored() const - { - return errored; - } - - private: - -#if JSON_DIAGNOSTIC_POSITIONS - void handle_diagnostic_positions_for_json_value(BasicJsonType& v) - { - if (m_lexer_ref) - { - // Lexer has read past the current field value, so set the end position to the current position. - // The start position will be set below based on the length of the string representation - // of the value. - v.end_position = m_lexer_ref->get_position(); - - switch (v.type()) - { - case value_t::boolean: - { - // 4 and 5 are the string length of "true" and "false" - v.start_position = v.end_position - (v.m_data.m_value.boolean ? 4 : 5); - break; - } - - case value_t::null: - { - // 4 is the string length of "null" - v.start_position = v.end_position - 4; - break; - } - - case value_t::string: - { - // include the length of the quotes, which is 2 - v.start_position = v.end_position - v.m_data.m_value.string->size() - 2; - break; - } - - case value_t::discarded: - { - v.end_position = std::string::npos; - v.start_position = v.end_position; - break; - } - - case value_t::binary: - case value_t::number_integer: - case value_t::number_unsigned: - case value_t::number_float: - { - v.start_position = v.end_position - m_lexer_ref->get_string().size(); - break; - } - - case value_t::object: - case value_t::array: - { - // object and array are handled in start_object() and start_array() handlers - // skip setting the values here. - break; - } - default: // LCOV_EXCL_LINE - // Handle all possible types discretely, default handler should never be reached. - JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert,-warnings-as-errors) LCOV_EXCL_LINE - } - } - } -#endif - - /*! - @param[in] v value to add to the JSON value we build during parsing - @param[in] skip_callback whether we should skip calling the callback - function; this is required after start_array() and - start_object() SAX events, because otherwise we would call the - callback function with an empty array or object, respectively. - - @invariant If the ref stack is empty, then the passed value will be the new - root. - @invariant If the ref stack contains a value, then it is an array or an - object to which we can add elements - - @return pair of boolean (whether value should be kept) and pointer (to the - passed value in the ref_stack hierarchy; nullptr if not kept) - */ - template - std::pair handle_value(Value&& v, const bool skip_callback = false) - { - JSON_ASSERT(!keep_stack.empty()); - - // do not handle this value if we know it would be added to a discarded - // container - if (!keep_stack.back()) - { - return {false, nullptr}; - } - - // create value - auto value = BasicJsonType(std::forward(v)); - -#if JSON_DIAGNOSTIC_POSITIONS - handle_diagnostic_positions_for_json_value(value); -#endif - - // check callback - const bool keep = skip_callback || callback(static_cast(ref_stack.size()), parse_event_t::value, value); - - // do not handle this value if we just learnt it shall be discarded - if (!keep) - { - return {false, nullptr}; - } - - if (ref_stack.empty()) - { - root = std::move(value); - return {true, & root}; - } - - // skip this value if we already decided to skip the parent - // (https://github.com/nlohmann/json/issues/971#issuecomment-413678360) - if (!ref_stack.back()) - { - return {false, nullptr}; - } - - // we now only expect arrays and objects - JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object()); - - // array - if (ref_stack.back()->is_array()) - { - ref_stack.back()->m_data.m_value.array->emplace_back(std::move(value)); - return {true, & (ref_stack.back()->m_data.m_value.array->back())}; - } - - // object - JSON_ASSERT(ref_stack.back()->is_object()); - // check if we should store an element for the current key - JSON_ASSERT(!key_keep_stack.empty()); - const bool store_element = key_keep_stack.back(); - key_keep_stack.pop_back(); - - if (!store_element) - { - return {false, nullptr}; - } - - JSON_ASSERT(object_element); - *object_element = std::move(value); - return {true, object_element}; - } - - /// the parsed JSON value - BasicJsonType& root; - /// stack to model hierarchy of values - std::vector ref_stack {}; - /// stack to manage which values to keep - std::vector keep_stack {}; // NOLINT(readability-redundant-member-init) - /// stack to manage which object keys to keep - std::vector key_keep_stack {}; // NOLINT(readability-redundant-member-init) - /// helper to hold the reference for the next object element - BasicJsonType* object_element = nullptr; - /// whether a syntax error occurred - bool errored = false; - /// callback function - const parser_callback_t callback = nullptr; - /// whether to throw exceptions in case of errors - const bool allow_exceptions = true; - /// a discarded value for the callback - BasicJsonType discarded = BasicJsonType::value_t::discarded; - /// the lexer reference to obtain the current position - lexer_t* m_lexer_ref = nullptr; -}; - -template -class json_sax_acceptor -{ - public: - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - using binary_t = typename BasicJsonType::binary_t; - - bool null() - { - return true; - } - - bool boolean(bool /*unused*/) - { - return true; - } - - bool number_integer(number_integer_t /*unused*/) - { - return true; - } - - bool number_unsigned(number_unsigned_t /*unused*/) - { - return true; - } - - bool number_float(number_float_t /*unused*/, const string_t& /*unused*/) - { - return true; - } - - bool string(string_t& /*unused*/) - { - return true; - } - - bool binary(binary_t& /*unused*/) - { - return true; - } - - bool start_object(std::size_t /*unused*/ = detail::unknown_size()) - { - return true; - } - - bool key(string_t& /*unused*/) - { - return true; - } - - bool end_object() - { - return true; - } - - bool start_array(std::size_t /*unused*/ = detail::unknown_size()) - { - return true; - } - - bool end_array() - { - return true; - } - - bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const detail::exception& /*unused*/) - { - return false; - } -}; - -} // namespace detail -NLOHMANN_JSON_NAMESPACE_END - -// #include - -// #include - // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -9841,7 +9207,7 @@ static inline bool little_endianness(int num = 1) noexcept /*! @brief deserialization of CBOR, MessagePack, and UBJSON values */ -template> +template> class binary_reader { using number_integer_t = typename BasicJsonType::number_integer_t; @@ -9948,7 +9314,7 @@ class binary_reader std::int32_t document_size{}; get_number(input_format_t::bson, document_size); - if (JSON_HEDLEY_UNLIKELY(!sax->start_object(detail::unknown_size()))) + if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast(-1)))) { return false; } @@ -10104,12 +9470,6 @@ class binary_reader return get_number(input_format_t::bson, value) && sax->number_integer(value); } - case 0x11: // uint64 - { - std::uint64_t value{}; - return get_number(input_format_t::bson, value) && sax->number_unsigned(value); - } - default: // anything else not supported (yet) { std::array cr{{}}; @@ -10176,7 +9536,7 @@ class binary_reader std::int32_t document_size{}; get_number(input_format_t::bson, document_size); - if (JSON_HEDLEY_UNLIKELY(!sax->start_array(detail::unknown_size()))) + if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast(-1)))) { return false; } @@ -10436,7 +9796,7 @@ class binary_reader } case 0x9F: // array (indefinite length) - return get_cbor_array(detail::unknown_size(), tag_handler); + return get_cbor_array(static_cast(-1), tag_handler); // map (0x00..0x17 pairs of data items follow) case 0xA0: @@ -10490,7 +9850,7 @@ class binary_reader } case 0xBF: // map (indefinite length) - return get_cbor_object(detail::unknown_size(), tag_handler); + return get_cbor_object(static_cast(-1), tag_handler); case 0xC6: // tagged item case 0xC7: @@ -10878,7 +10238,7 @@ class binary_reader } /*! - @param[in] len the length of the array or detail::unknown_size() for an + @param[in] len the length of the array or static_cast(-1) for an array of indefinite size @param[in] tag_handler how CBOR tags should be treated @return whether array creation completed @@ -10891,7 +10251,7 @@ class binary_reader return false; } - if (len != detail::unknown_size()) + if (len != static_cast(-1)) { for (std::size_t i = 0; i < len; ++i) { @@ -10916,7 +10276,7 @@ class binary_reader } /*! - @param[in] len the length of the object or detail::unknown_size() for an + @param[in] len the length of the object or static_cast(-1) for an object of indefinite size @param[in] tag_handler how CBOR tags should be treated @return whether object creation completed @@ -10932,7 +10292,7 @@ class binary_reader if (len != 0) { string_t key; - if (len != detail::unknown_size()) + if (len != static_cast(-1)) { for (std::size_t i = 0; i < len; ++i) { @@ -12095,16 +11455,6 @@ class binary_reader case 'Z': // null return sax->null(); - case 'B': // byte - { - if (input_format != input_format_t::bjdata) - { - break; - } - std::uint8_t number{}; - return get_number(input_format, number) && sax->number_unsigned(number); - } - case 'U': { std::uint8_t number{}; @@ -12305,7 +11655,7 @@ class binary_reader return false; } - if (size_and_type.second == 'C' || size_and_type.second == 'B') + if (size_and_type.second == 'C') { size_and_type.second = 'U'; } @@ -12327,13 +11677,6 @@ class binary_reader return (sax->end_array() && sax->end_object()); } - // If BJData type marker is 'B' decode as binary - if (input_format == input_format_t::bjdata && size_and_type.first != npos && size_and_type.second == 'B') - { - binary_t result; - return get_binary(input_format, size_and_type.first, result) && sax->binary(result); - } - if (size_and_type.first != npos) { if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first))) @@ -12367,7 +11710,7 @@ class binary_reader } else { - if (JSON_HEDLEY_UNLIKELY(!sax->start_array(detail::unknown_size()))) + if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast(-1)))) { return false; } @@ -12445,7 +11788,7 @@ class binary_reader } else { - if (JSON_HEDLEY_UNLIKELY(!sax->start_object(detail::unknown_size()))) + if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast(-1)))) { return false; } @@ -12556,29 +11899,6 @@ class binary_reader return current = ia.get_character(); } - /*! - @brief get_to read into a primitive type - - This function provides the interface to the used input adapter. It does - not throw in case the input reached EOF, but returns false instead - - @return bool, whether the read was successful - */ - template - bool get_to(T& dest, const input_format_t format, const char* context) - { - auto new_chars_read = ia.get_elements(&dest); - chars_read += new_chars_read; - if (JSON_HEDLEY_UNLIKELY(new_chars_read < sizeof(T))) - { - // in case of failure, advance position by 1 to report failing location - ++chars_read; - sax->parse_error(chars_read, "", parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), nullptr)); - return false; - } - return true; - } - /*! @return character read from the input after ignoring all 'N' entries */ @@ -12593,28 +11913,6 @@ class binary_reader return current; } - template - static void byte_swap(NumberType& number) - { - constexpr std::size_t sz = sizeof(number); -#ifdef __cpp_lib_byteswap - if constexpr (sz == 1) - { - return; - } - if constexpr(std::is_integral_v) - { - number = std::byteswap(number); - return; - } -#endif - auto* ptr = reinterpret_cast(&number); - for (std::size_t i = 0; i < sz / 2; ++i) - { - std::swap(ptr[i], ptr[sz - i - 1]); - } - } - /* @brief read a number from the input @@ -12633,16 +11931,29 @@ class binary_reader template bool get_number(const input_format_t format, NumberType& result) { - // read in the original format + // step 1: read input into array with system's byte order + std::array vec{}; + for (std::size_t i = 0; i < sizeof(NumberType); ++i) + { + get(); + if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number"))) + { + return false; + } - if (JSON_HEDLEY_UNLIKELY(!get_to(result, format, "number"))) - { - return false; - } - if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata)) - { - byte_swap(result); + // reverse byte order prior to conversion if necessary + if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata)) + { + vec[sizeof(NumberType) - i - 1] = static_cast(current); + } + else + { + vec[i] = static_cast(current); // LCOV_EXCL_LINE + } } + + // step 2: convert array into number of type T and return + std::memcpy(&result, vec.data(), sizeof(NumberType)); return true; } @@ -12781,7 +12092,7 @@ class binary_reader } private: - static JSON_INLINE_VARIABLE constexpr std::size_t npos = detail::unknown_size(); + static JSON_INLINE_VARIABLE constexpr std::size_t npos = static_cast(-1); /// input adapter InputAdapterType ia; @@ -12807,7 +12118,6 @@ class binary_reader #define JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_ \ make_array( \ - bjd_type{'B', "byte"}, \ bjd_type{'C', "char"}, \ bjd_type{'D', "double"}, \ bjd_type{'I', "int16"}, \ @@ -12850,10 +12160,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -12927,10 +12237,10 @@ class parser public: /// a parser reading from an input adapter explicit parser(InputAdapterType&& adapter, - parser_callback_t cb = nullptr, + const parser_callback_t cb = nullptr, const bool allow_exceptions_ = true, const bool skip_comments = false) - : callback(std::move(cb)) + : callback(cb) , m_lexer(std::move(adapter), skip_comments) , allow_exceptions(allow_exceptions_) { @@ -12952,7 +12262,7 @@ class parser { if (callback) { - json_sax_dom_callback_parser sdp(result, callback, allow_exceptions, &m_lexer); + json_sax_dom_callback_parser sdp(result, callback, allow_exceptions); sax_parse_internal(&sdp); // in strict mode, input must be completely read @@ -12980,7 +12290,7 @@ class parser } else { - json_sax_dom_parser sdp(result, allow_exceptions, &m_lexer); + json_sax_dom_parser sdp(result, allow_exceptions); sax_parse_internal(&sdp); // in strict mode, input must be completely read @@ -13052,7 +12362,7 @@ class parser { case token_type::begin_object: { - if (JSON_HEDLEY_UNLIKELY(!sax->start_object(detail::unknown_size()))) + if (JSON_HEDLEY_UNLIKELY(!sax->start_object(static_cast(-1)))) { return false; } @@ -13097,7 +12407,7 @@ class parser case token_type::begin_array: { - if (JSON_HEDLEY_UNLIKELY(!sax->start_array(detail::unknown_size()))) + if (JSON_HEDLEY_UNLIKELY(!sax->start_array(static_cast(-1)))) { return false; } @@ -13379,10 +12689,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -13392,10 +12702,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -13551,10 +12861,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -14021,7 +13331,7 @@ class iter_impl // NOLINT(cppcoreguidelines-special-member-functions,hicpp-speci /*! @brief comparison: equal - @pre (1) Both iterators are initialized to point to the same object, or (2) both iterators are value-initialized. + @pre The iterator is initialized; i.e. `m_object != nullptr`. */ template < typename IterImpl, detail::enable_if_t < (std::is_same::value || std::is_same::value), std::nullptr_t > = nullptr > bool operator==(const IterImpl& other) const @@ -14032,11 +13342,7 @@ class iter_impl // NOLINT(cppcoreguidelines-special-member-functions,hicpp-speci JSON_THROW(invalid_iterator::create(212, "cannot compare iterators of different containers", m_object)); } - // value-initialized forward iterators can be compared, and must compare equal to other value-initialized iterators of the same type #4493 - if (m_object == nullptr) - { - return true; - } + JSON_ASSERT(m_object != nullptr); switch (m_object->m_data.m_type) { @@ -14061,7 +13367,7 @@ class iter_impl // NOLINT(cppcoreguidelines-special-member-functions,hicpp-speci /*! @brief comparison: not equal - @pre (1) Both iterators are initialized to point to the same object, or (2) both iterators are value-initialized. + @pre The iterator is initialized; i.e. `m_object != nullptr`. */ template < typename IterImpl, detail::enable_if_t < (std::is_same::value || std::is_same::value), std::nullptr_t > = nullptr > bool operator!=(const IterImpl& other) const @@ -14071,7 +13377,7 @@ class iter_impl // NOLINT(cppcoreguidelines-special-member-functions,hicpp-speci /*! @brief comparison: smaller - @pre (1) Both iterators are initialized to point to the same object, or (2) both iterators are value-initialized. + @pre The iterator is initialized; i.e. `m_object != nullptr`. */ bool operator<(const iter_impl& other) const { @@ -14081,12 +13387,7 @@ class iter_impl // NOLINT(cppcoreguidelines-special-member-functions,hicpp-speci JSON_THROW(invalid_iterator::create(212, "cannot compare iterators of different containers", m_object)); } - // value-initialized forward iterators can be compared, and must compare equal to other value-initialized iterators of the same type #4493 - if (m_object == nullptr) - { - // the iterators are both value-initialized and are to be considered equal, but this function checks for smaller, so we return false - return false; - } + JSON_ASSERT(m_object != nullptr); switch (m_object->m_data.m_type) { @@ -14111,7 +13412,7 @@ class iter_impl // NOLINT(cppcoreguidelines-special-member-functions,hicpp-speci /*! @brief comparison: less than or equal - @pre (1) Both iterators are initialized to point to the same object, or (2) both iterators are value-initialized. + @pre The iterator is initialized; i.e. `m_object != nullptr`. */ bool operator<=(const iter_impl& other) const { @@ -14120,7 +13421,7 @@ class iter_impl // NOLINT(cppcoreguidelines-special-member-functions,hicpp-speci /*! @brief comparison: greater than - @pre (1) Both iterators are initialized to point to the same object, or (2) both iterators are value-initialized. + @pre The iterator is initialized; i.e. `m_object != nullptr`. */ bool operator>(const iter_impl& other) const { @@ -14129,7 +13430,7 @@ class iter_impl // NOLINT(cppcoreguidelines-special-member-functions,hicpp-speci /*! @brief comparison: greater than or equal - @pre (1) The iterator is initialized; i.e. `m_object != nullptr`, or (2) both iterators are value-initialized. + @pre The iterator is initialized; i.e. `m_object != nullptr`. */ bool operator>=(const iter_impl& other) const { @@ -14322,10 +13623,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -14457,10 +13758,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -14499,10 +13800,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -14732,7 +14033,7 @@ class json_pointer } const char* p = s.c_str(); - char* p_end = nullptr; // NOLINT(misc-const-correctness) + char* p_end = nullptr; errno = 0; // strtoull doesn't reset errno const unsigned long long res = std::strtoull(p, &p_end, 10); // NOLINT(runtime/int) if (p == p_end // invalid input or empty string @@ -15254,7 +14555,7 @@ class json_pointer // iterate array and use index as reference string for (std::size_t i = 0; i < value.m_data.m_value.array->size(); ++i) { - flatten(detail::concat(reference_string, '/', std::to_string(i)), + flatten(detail::concat(reference_string, '/', std::to_string(i)), value.m_data.m_value.array->operator[](i), result); } } @@ -15273,7 +14574,7 @@ class json_pointer // iterate object and use keys as reference string for (const auto& element : *value.m_data.m_value.object) { - flatten(detail::concat(reference_string, '/', detail::escape(element.first)), element.second, result); + flatten(detail::concat(reference_string, '/', detail::escape(element.first)), element.second, result); } } break; @@ -15494,10 +14795,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -15579,8 +14880,6 @@ NLOHMANN_JSON_NAMESPACE_END // #include -// #include - // #include // #include @@ -15588,10 +14887,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -15614,10 +14913,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -15768,13 +15067,6 @@ NLOHMANN_JSON_NAMESPACE_BEGIN namespace detail { -/// how to encode BJData -enum class bjdata_version_t -{ - draft2, - draft3, -}; - /////////////////// // binary writer // /////////////////// @@ -16359,7 +15651,7 @@ class binary_writer case value_t::binary: { // step 0: determine if the binary type has a set subtype to - // determine whether to use the ext or fixext types + // determine whether or not to use the ext or fixext types const bool use_ext = j.m_data.m_value.binary->has_subtype(); // step 1: write control byte and the byte string length @@ -16482,14 +15774,11 @@ class binary_writer @param[in] use_type whether to use '$' prefixes (optimized format) @param[in] add_prefix whether prefixes need to be used for this value @param[in] use_bjdata whether write in BJData format, default is false - @param[in] bjdata_version which BJData version to use, default is draft2 */ void write_ubjson(const BasicJsonType& j, const bool use_count, const bool use_type, const bool add_prefix = true, - const bool use_bjdata = false, const bjdata_version_t bjdata_version = bjdata_version_t::draft2) + const bool use_bjdata = false) { - const bool bjdata_draft3 = use_bjdata && bjdata_version == bjdata_version_t::draft3; - switch (j.type()) { case value_t::null: @@ -16579,7 +15868,7 @@ class binary_writer for (const auto& el : *j.m_data.m_value.array) { - write_ubjson(el, use_count, use_type, prefix_required, use_bjdata, bjdata_version); + write_ubjson(el, use_count, use_type, prefix_required, use_bjdata); } if (!use_count) @@ -16597,11 +15886,11 @@ class binary_writer oa->write_character(to_char_type('[')); } - if (use_type && (bjdata_draft3 || !j.m_data.m_value.binary->empty())) + if (use_type && !j.m_data.m_value.binary->empty()) { JSON_ASSERT(use_count); oa->write_character(to_char_type('$')); - oa->write_character(bjdata_draft3 ? 'B' : 'U'); + oa->write_character('U'); } if (use_count) @@ -16620,7 +15909,7 @@ class binary_writer { for (size_t i = 0; i < j.m_data.m_value.binary->size(); ++i) { - oa->write_character(to_char_type(bjdata_draft3 ? 'B' : 'U')); + oa->write_character(to_char_type('U')); oa->write_character(j.m_data.m_value.binary->data()[i]); } } @@ -16637,7 +15926,7 @@ class binary_writer { if (use_bjdata && j.m_data.m_value.object->size() == 3 && j.m_data.m_value.object->find("_ArrayType_") != j.m_data.m_value.object->end() && j.m_data.m_value.object->find("_ArraySize_") != j.m_data.m_value.object->end() && j.m_data.m_value.object->find("_ArrayData_") != j.m_data.m_value.object->end()) { - if (!write_bjdata_ndarray(*j.m_data.m_value.object, use_count, use_type, bjdata_version)) // decode bjdata ndarray in the JData format (https://github.com/NeuroJSON/jdata) + if (!write_bjdata_ndarray(*j.m_data.m_value.object, use_count, use_type)) // decode bjdata ndarray in the JData format (https://github.com/NeuroJSON/jdata) { break; } @@ -16681,7 +15970,7 @@ class binary_writer oa->write_characters( reinterpret_cast(el.first.c_str()), el.first.size()); - write_ubjson(el.second, use_count, use_type, prefix_required, use_bjdata, bjdata_version); + write_ubjson(el.second, use_count, use_type, prefix_required, use_bjdata); } if (!use_count) @@ -16837,8 +16126,7 @@ class binary_writer } else { - write_bson_entry_header(name, 0x11 /* uint64 */); - write_number(static_cast(j.m_data.m_value.number_unsigned), true); + JSON_THROW(out_of_range::create(407, concat("integer number ", std::to_string(j.m_data.m_value.number_unsigned), " cannot be represented by BSON as it does not fit int64"), &j)); } } @@ -17366,11 +16654,10 @@ class binary_writer /*! @return false if the object is successfully converted to a bjdata ndarray, true if the type or size is invalid */ - bool write_bjdata_ndarray(const typename BasicJsonType::object_t& value, const bool use_count, const bool use_type, const bjdata_version_t bjdata_version) + bool write_bjdata_ndarray(const typename BasicJsonType::object_t& value, const bool use_count, const bool use_type) { std::map bjdtype = {{"uint8", 'U'}, {"int8", 'i'}, {"uint16", 'u'}, {"int16", 'I'}, - {"uint32", 'm'}, {"int32", 'l'}, {"uint64", 'M'}, {"int64", 'L'}, {"single", 'd'}, {"double", 'D'}, - {"char", 'C'}, {"byte", 'B'} + {"uint32", 'm'}, {"int32", 'l'}, {"uint64", 'M'}, {"int64", 'L'}, {"single", 'd'}, {"double", 'D'}, {"char", 'C'} }; string_t key = "_ArrayType_"; @@ -17400,10 +16687,10 @@ class binary_writer oa->write_character('#'); key = "_ArraySize_"; - write_ubjson(value.at(key), use_count, use_type, true, true, bjdata_version); + write_ubjson(value.at(key), use_count, use_type, true, true); key = "_ArrayData_"; - if (dtype == 'U' || dtype == 'C' || dtype == 'B') + if (dtype == 'U' || dtype == 'C') { for (const auto& el : value.at(key)) { @@ -17594,11 +16881,11 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2008 - 2009 Björn Hoehrmann -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2008-2009 Björn Hoehrmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -17619,11 +16906,11 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // // SPDX-FileCopyrightText: 2009 Florian Loitsch -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -17859,10 +17146,10 @@ boundaries compute_boundaries(FloatType value) // v- m- v m+ v+ const bool lower_boundary_is_closer = F == 0 && E > 1; - const diyfp m_plus = diyfp((2 * v.f) + 1, v.e - 1); + const diyfp m_plus = diyfp(2 * v.f + 1, v.e - 1); const diyfp m_minus = lower_boundary_is_closer - ? diyfp((4 * v.f) - 1, v.e - 2) // (B) - : diyfp((2 * v.f) - 1, v.e - 1); // (A) + ? diyfp(4 * v.f - 1, v.e - 2) // (B) + : diyfp(2 * v.f - 1, v.e - 1); // (A) // Determine the normalized w+ = m+. const diyfp w_plus = diyfp::normalize(m_plus); @@ -18092,7 +17379,7 @@ inline cached_power get_cached_power_for_binary_exponent(int e) JSON_ASSERT(e >= -1500); JSON_ASSERT(e <= 1500); const int f = kAlpha - e - 1; - const int k = ((f * 78913) / (1 << 18)) + static_cast(f > 0); + const int k = (f * 78913) / (1 << 18) + static_cast(f > 0); const int index = (-kCachedPowersMinDecExp + k + (kCachedPowersDecStep - 1)) / kCachedPowersDecStep; JSON_ASSERT(index >= 0); @@ -18570,15 +17857,15 @@ inline char* append_exponent(char* buf, int e) } else if (k < 100) { - *buf++ = static_cast('0' + (k / 10)); + *buf++ = static_cast('0' + k / 10); k %= 10; *buf++ = static_cast('0' + k); } else { - *buf++ = static_cast('0' + (k / 100)); + *buf++ = static_cast('0' + k / 100); k %= 100; - *buf++ = static_cast('0' + (k / 10)); + *buf++ = static_cast('0' + k / 10); k %= 10; *buf++ = static_cast('0' + k); } @@ -19364,7 +18651,7 @@ class serializer @param[in] x unsigned integer number to count its digits @return number of decimal digits */ - unsigned int count_digits(number_unsigned_t x) noexcept + inline unsigned int count_digits(number_unsigned_t x) noexcept { unsigned int n_digits = 1; for (;;) @@ -19647,7 +18934,7 @@ class serializer ? (byte & 0x3fu) | (codep << 6u) : (0xFFu >> type) & (byte); - const std::size_t index = 256u + (static_cast(state) * 16u) + static_cast(type); + const std::size_t index = 256u + static_cast(state) * 16u + static_cast(type); JSON_ASSERT(index < utf8d.size()); state = utf8d[index]; return state; @@ -19673,7 +18960,7 @@ class serializer * absolute values of INT_MIN and INT_MAX are usually not the same. See * #1708 for details. */ - number_unsigned_t remove_sign(number_integer_t x) noexcept + inline number_unsigned_t remove_sign(number_integer_t x) noexcept { JSON_ASSERT(x < 0 && x < (std::numeric_limits::max)()); // NOLINT(misc-redundant-expression) return static_cast(-(x + 1)) + 1; @@ -19715,10 +19002,10 @@ NLOHMANN_JSON_NAMESPACE_END // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -19743,7 +19030,7 @@ NLOHMANN_JSON_NAMESPACE_BEGIN /// for use within nlohmann::basic_json template , class Allocator = std::allocator>> - struct ordered_map : std::vector, Allocator> + struct ordered_map : std::vector, Allocator> { using key_type = Key; using mapped_type = T; @@ -20058,7 +19345,7 @@ template , template using require_input_iter = typename std::enable_if::iterator_category, - std::input_iterator_tag>::value>::type; + std::input_iterator_tag>::value>::type; template> void insert(InputIt first, InputIt last) @@ -20129,9 +19416,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec friend class ::nlohmann::detail::binary_writer; template friend class ::nlohmann::detail::binary_reader; - template + template friend class ::nlohmann::detail::json_sax_dom_parser; - template + template friend class ::nlohmann::detail::json_sax_dom_callback_parser; friend class ::nlohmann::detail::exception; @@ -20152,7 +19439,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec ) { return ::nlohmann::detail::parser(std::move(adapter), - std::move(cb), allow_exceptions, ignore_comments); + std::move(cb), allow_exceptions, ignore_comments); } private: @@ -20185,8 +19472,6 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec using error_handler_t = detail::error_handler_t; /// how to treat CBOR tags using cbor_tag_handler_t = detail::cbor_tag_handler_t; - /// how to encode BJData - using bjdata_version_t = detail::bjdata_version_t; /// helper type for initializer lists of basic_json values using initializer_list_t = std::initializer_list>; @@ -20266,7 +19551,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec { basic_json result; - result["copyright"] = "(C) 2013-2025 Niels Lohmann"; + result["copyright"] = "(C) 2013-2023 Niels Lohmann"; result["name"] = "JSON for Modern C++"; result["url"] = "https://github.com/nlohmann/json"; result["version"]["string"] = @@ -20531,7 +19816,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec object = nullptr; // silence warning, see #821 if (JSON_HEDLEY_UNLIKELY(t == value_t::null)) { - JSON_THROW(other_error::create(500, "961c151d2e87f2686a955a9be24d316f1362bf21 3.12.0", nullptr)); // LCOV_EXCL_LINE + JSON_THROW(other_error::create(500, "961c151d2e87f2686a955a9be24d316f1362bf21 3.11.3", nullptr)); // LCOV_EXCL_LINE } break; } @@ -20767,10 +20052,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec return it; } - reference set_parent(reference j, std::size_t old_capacity = detail::unknown_size()) + reference set_parent(reference j, std::size_t old_capacity = static_cast(-1)) { #if JSON_DIAGNOSTICS - if (old_capacity != detail::unknown_size()) + if (old_capacity != static_cast(-1)) { // see https://github.com/nlohmann/json/issues/2838 JSON_ASSERT(type() == value_t::array); @@ -20850,8 +20135,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec detail::enable_if_t < !detail::is_basic_json::value && detail::is_compatible_type::value, int > = 0 > basic_json(CompatibleType && val) noexcept(noexcept( // NOLINT(bugprone-forwarding-reference-overload,bugprone-exception-escape) - JSONSerializer::to_json(std::declval(), - std::forward(val)))) + JSONSerializer::to_json(std::declval(), + std::forward(val)))) { JSONSerializer::to_json(*this, std::forward(val)); set_parents(); @@ -20864,10 +20149,6 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec detail::enable_if_t < detail::is_basic_json::value&& !std::is_same::value, int > = 0 > basic_json(const BasicJsonType& val) -#if JSON_DIAGNOSTIC_POSITIONS - : start_position(val.start_pos()), - end_position(val.end_pos()) -#endif { using other_boolean_t = typename BasicJsonType::boolean_t; using other_number_float_t = typename BasicJsonType::number_float_t; @@ -20914,7 +20195,6 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE } JSON_ASSERT(m_data.m_type == val.type()); - set_parents(); assert_invariant(); } @@ -21051,7 +20331,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec template < class InputIT, typename std::enable_if < std::is_same::value || std::is_same::value, int >::type = 0 > - basic_json(InputIT first, InputIT last) // NOLINT(performance-unnecessary-value-param) + basic_json(InputIT first, InputIT last) { JSON_ASSERT(first.m_object != nullptr); JSON_ASSERT(last.m_object != nullptr); @@ -21166,10 +20446,6 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/basic_json/ basic_json(const basic_json& other) : json_base_class_t(other) -#if JSON_DIAGNOSTIC_POSITIONS - , start_position(other.start_position) - , end_position(other.end_position) -#endif { m_data.m_type = other.m_data.m_type; // check of passed value is valid @@ -21239,24 +20515,15 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/basic_json/ basic_json(basic_json&& other) noexcept : json_base_class_t(std::forward(other)), - m_data(std::move(other.m_data)) // cppcheck-suppress[accessForwarded] TODO check -#if JSON_DIAGNOSTIC_POSITIONS - , start_position(other.start_position) // cppcheck-suppress[accessForwarded] TODO check - , end_position(other.end_position) // cppcheck-suppress[accessForwarded] TODO check -#endif + m_data(std::move(other.m_data)) { // check that passed value is valid - other.assert_invariant(false); // cppcheck-suppress[accessForwarded] + other.assert_invariant(false); // invalidate payload other.m_data.m_type = value_t::null; other.m_data.m_value = {}; -#if JSON_DIAGNOSTIC_POSITIONS - other.start_position = std::string::npos; - other.end_position = std::string::npos; -#endif - set_parents(); assert_invariant(); } @@ -21277,12 +20544,6 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec using std::swap; swap(m_data.m_type, other.m_data.m_type); swap(m_data.m_value, other.m_data.m_value); - -#if JSON_DIAGNOSTIC_POSITIONS - swap(start_position, other.start_position); - swap(end_position, other.end_position); -#endif - json_base_class_t::operator=(std::move(other)); set_parents(); @@ -21504,13 +20765,13 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// get a pointer to the value (integer number) number_integer_t* get_impl_ptr(number_integer_t* /*unused*/) noexcept { - return m_data.m_type == value_t::number_integer ? &m_data.m_value.number_integer : nullptr; + return is_number_integer() ? &m_data.m_value.number_integer : nullptr; } /// get a pointer to the value (integer number) constexpr const number_integer_t* get_impl_ptr(const number_integer_t* /*unused*/) const noexcept { - return m_data.m_type == value_t::number_integer ? &m_data.m_value.number_integer : nullptr; + return is_number_integer() ? &m_data.m_value.number_integer : nullptr; } /// get a pointer to the value (unsigned number) @@ -21645,7 +20906,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec detail::has_from_json::value, int > = 0 > ValueType get_impl(detail::priority_tag<0> /*unused*/) const noexcept(noexcept( - JSONSerializer::from_json(std::declval(), std::declval()))) + JSONSerializer::from_json(std::declval(), std::declval()))) { auto ret = ValueType(); JSONSerializer::from_json(*this, ret); @@ -21687,7 +20948,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec detail::has_non_default_from_json::value, int > = 0 > ValueType get_impl(detail::priority_tag<1> /*unused*/) const noexcept(noexcept( - JSONSerializer::from_json(std::declval()))) + JSONSerializer::from_json(std::declval()))) { return JSONSerializer::from_json(*this); } @@ -21837,7 +21098,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec detail::has_from_json::value, int > = 0 > ValueType & get_to(ValueType& v) const noexcept(noexcept( - JSONSerializer::from_json(std::declval(), v))) + JSONSerializer::from_json(std::declval(), v))) { JSONSerializer::from_json(*this, v); return v; @@ -21989,7 +21250,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec { // create better exception explanation JSON_THROW(out_of_range::create(401, detail::concat("array index ", std::to_string(idx), " is out of range"), this)); - } // cppcheck-suppress[missingReturn] + } } else { @@ -22012,7 +21273,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec { // create better exception explanation JSON_THROW(out_of_range::create(401, detail::concat("array index ", std::to_string(idx), " is out of range"), this)); - } // cppcheck-suppress[missingReturn] + } } else { @@ -22157,7 +21418,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @brief access specified object element /// @sa https://json.nlohmann.me/api/basic_json/operator%5B%5D/ - reference operator[](typename object_t::key_type key) // NOLINT(performance-unnecessary-value-param) + reference operator[](typename object_t::key_type key) { // implicitly convert null value to an empty object if (is_null()) @@ -22467,7 +21728,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec template < class IteratorType, detail::enable_if_t < std::is_same::value || std::is_same::value, int > = 0 > - IteratorType erase(IteratorType pos) // NOLINT(performance-unnecessary-value-param) + IteratorType erase(IteratorType pos) { // make sure iterator fits the current value if (JSON_HEDLEY_UNLIKELY(this != pos.m_object)) @@ -22537,7 +21798,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec template < class IteratorType, detail::enable_if_t < std::is_same::value || std::is_same::value, int > = 0 > - IteratorType erase(IteratorType first, IteratorType last) // NOLINT(performance-unnecessary-value-param) + IteratorType erase(IteratorType first, IteratorType last) { // make sure iterator fits the current value if (JSON_HEDLEY_UNLIKELY(this != first.m_object || this != last.m_object)) @@ -23304,7 +22565,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @note: This uses std::distance to support GCC 4.8, /// see https://github.com/nlohmann/json/pull/1257 template - iterator insert_iterator(const_iterator pos, Args&& ... args) // NOLINT(performance-unnecessary-value-param) + iterator insert_iterator(const_iterator pos, Args&& ... args) { iterator result(this); JSON_ASSERT(m_data.m_value.array != nullptr); @@ -23323,7 +22584,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @brief inserts element into array /// @sa https://json.nlohmann.me/api/basic_json/insert/ - iterator insert(const_iterator pos, const basic_json& val) // NOLINT(performance-unnecessary-value-param) + iterator insert(const_iterator pos, const basic_json& val) { // insert only works for arrays if (JSON_HEDLEY_LIKELY(is_array())) @@ -23343,14 +22604,14 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @brief inserts element into array /// @sa https://json.nlohmann.me/api/basic_json/insert/ - iterator insert(const_iterator pos, basic_json&& val) // NOLINT(performance-unnecessary-value-param) + iterator insert(const_iterator pos, basic_json&& val) { return insert(pos, val); } /// @brief inserts copies of element into array /// @sa https://json.nlohmann.me/api/basic_json/insert/ - iterator insert(const_iterator pos, size_type cnt, const basic_json& val) // NOLINT(performance-unnecessary-value-param) + iterator insert(const_iterator pos, size_type cnt, const basic_json& val) { // insert only works for arrays if (JSON_HEDLEY_LIKELY(is_array())) @@ -23370,7 +22631,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @brief inserts range of elements into array /// @sa https://json.nlohmann.me/api/basic_json/insert/ - iterator insert(const_iterator pos, const_iterator first, const_iterator last) // NOLINT(performance-unnecessary-value-param) + iterator insert(const_iterator pos, const_iterator first, const_iterator last) { // insert only works for arrays if (JSON_HEDLEY_UNLIKELY(!is_array())) @@ -23401,7 +22662,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @brief inserts elements from initializer list into array /// @sa https://json.nlohmann.me/api/basic_json/insert/ - iterator insert(const_iterator pos, initializer_list_t ilist) // NOLINT(performance-unnecessary-value-param) + iterator insert(const_iterator pos, initializer_list_t ilist) { // insert only works for arrays if (JSON_HEDLEY_UNLIKELY(!is_array())) @@ -23421,7 +22682,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @brief inserts range of elements into object /// @sa https://json.nlohmann.me/api/basic_json/insert/ - void insert(const_iterator first, const_iterator last) // NOLINT(performance-unnecessary-value-param) + void insert(const_iterator first, const_iterator last) { // insert only works for objects if (JSON_HEDLEY_UNLIKELY(!is_object())) @@ -23442,7 +22703,6 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec } m_data.m_value.object->insert(first.m_it.object_iterator, last.m_it.object_iterator); - set_parents(); } /// @brief updates a JSON object from another object, overwriting existing keys @@ -23454,7 +22714,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @brief updates a JSON object from another object, overwriting existing keys /// @sa https://json.nlohmann.me/api/basic_json/update/ - void update(const_iterator first, const_iterator last, bool merge_objects = false) // NOLINT(performance-unnecessary-value-param) + void update(const_iterator first, const_iterator last, bool merge_objects = false) { // implicitly convert null value to an empty object if (is_null()) @@ -24055,12 +23315,12 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec template JSON_HEDLEY_WARN_UNUSED_RESULT static basic_json parse(InputType&& i, - parser_callback_t cb = nullptr, + const parser_callback_t cb = nullptr, const bool allow_exceptions = true, const bool ignore_comments = false) { basic_json result; - parser(detail::input_adapter(std::forward(i)), std::move(cb), allow_exceptions, ignore_comments).parse(true, result); // cppcheck-suppress[accessMoved,accessForwarded] + parser(detail::input_adapter(std::forward(i)), cb, allow_exceptions, ignore_comments).parse(true, result); return result; } @@ -24070,24 +23330,24 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec JSON_HEDLEY_WARN_UNUSED_RESULT static basic_json parse(IteratorType first, IteratorType last, - parser_callback_t cb = nullptr, + const parser_callback_t cb = nullptr, const bool allow_exceptions = true, const bool ignore_comments = false) { basic_json result; - parser(detail::input_adapter(std::move(first), std::move(last)), std::move(cb), allow_exceptions, ignore_comments).parse(true, result); // cppcheck-suppress[accessMoved] + parser(detail::input_adapter(std::move(first), std::move(last)), cb, allow_exceptions, ignore_comments).parse(true, result); return result; } JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DEPRECATED_FOR(3.8.0, parse(ptr, ptr + len)) static basic_json parse(detail::span_input_adapter&& i, - parser_callback_t cb = nullptr, + const parser_callback_t cb = nullptr, const bool allow_exceptions = true, const bool ignore_comments = false) { basic_json result; - parser(i.get(), std::move(cb), allow_exceptions, ignore_comments).parse(true, result); // cppcheck-suppress[accessMoved] + parser(i.get(), cb, allow_exceptions, ignore_comments).parse(true, result); return result; } @@ -24266,23 +23526,6 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec basic_json* m_parent = nullptr; #endif -#if JSON_DIAGNOSTIC_POSITIONS - /// the start position of the value - std::size_t start_position = std::string::npos; - /// the end position of the value - std::size_t end_position = std::string::npos; - public: - constexpr std::size_t start_pos() const noexcept - { - return start_position; - } - - constexpr std::size_t end_pos() const noexcept - { - return end_position; - } -#endif - ////////////////////////////////////////// // binary serialization/deserialization // ////////////////////////////////////////// @@ -24368,30 +23611,27 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/to_bjdata/ static std::vector to_bjdata(const basic_json& j, const bool use_size = false, - const bool use_type = false, - const bjdata_version_t version = bjdata_version_t::draft2) + const bool use_type = false) { std::vector result; - to_bjdata(j, result, use_size, use_type, version); + to_bjdata(j, result, use_size, use_type); return result; } /// @brief create a BJData serialization of a given JSON value /// @sa https://json.nlohmann.me/api/basic_json/to_bjdata/ static void to_bjdata(const basic_json& j, detail::output_adapter o, - const bool use_size = false, const bool use_type = false, - const bjdata_version_t version = bjdata_version_t::draft2) + const bool use_size = false, const bool use_type = false) { - binary_writer(o).write_ubjson(j, use_size, use_type, true, true, version); + binary_writer(o).write_ubjson(j, use_size, use_type, true, true); } /// @brief create a BJData serialization of a given JSON value /// @sa https://json.nlohmann.me/api/basic_json/to_bjdata/ static void to_bjdata(const basic_json& j, detail::output_adapter o, - const bool use_size = false, const bool use_type = false, - const bjdata_version_t version = bjdata_version_t::draft2) + const bool use_size = false, const bool use_type = false) { - binary_writer(o).write_ubjson(j, use_size, use_type, true, true, version); + binary_writer(o).write_ubjson(j, use_size, use_type, true, true); } /// @brief create a BSON serialization of a given JSON value @@ -24427,9 +23667,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); - detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); // cppcheck-suppress[accessMoved] + const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); return res ? result : basic_json(value_t::discarded); } @@ -24443,9 +23683,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); - detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); // cppcheck-suppress[accessMoved] + const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); return res ? result : basic_json(value_t::discarded); } @@ -24468,10 +23708,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); - detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) - const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); // cppcheck-suppress[accessMoved] + const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); return res ? result : basic_json(value_t::discarded); } @@ -24484,9 +23724,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); - detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); // cppcheck-suppress[accessMoved] + const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -24499,9 +23739,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); - detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); // cppcheck-suppress[accessMoved] + const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -24522,10 +23762,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); - detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) - const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); // cppcheck-suppress[accessMoved] + const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -24538,9 +23778,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); - detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); // cppcheck-suppress[accessMoved] + const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -24553,9 +23793,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); - detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); // cppcheck-suppress[accessMoved] + const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -24576,10 +23816,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); - detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) - const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); // cppcheck-suppress[accessMoved] + const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -24592,9 +23832,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); - detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict); // cppcheck-suppress[accessMoved] + const bool res = binary_reader(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -24607,9 +23847,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); - detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict); // cppcheck-suppress[accessMoved] + const bool res = binary_reader(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -24622,9 +23862,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); - detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); // cppcheck-suppress[accessMoved] + const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -24637,9 +23877,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); - detail::json_sax_dom_parser sdp(result, allow_exceptions); - const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); // cppcheck-suppress[accessMoved] + const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -24660,10 +23900,10 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); - detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) - const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); // cppcheck-suppress[accessMoved] + const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } /// @} @@ -24764,7 +24004,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec // the valid JSON Patch operations enum class patch_operations {add, remove, replace, move, copy, test, invalid}; - const auto get_op = [](const string_t& op) + const auto get_op = [](const std::string & op) { if (op == "add") { @@ -24795,7 +24035,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec }; // wrapper for "add" operation; add value at ptr - const auto operation_add = [&result](json_pointer & ptr, const basic_json & val) + const auto operation_add = [&result](json_pointer & ptr, basic_json val) { // adding to the root of the target document means replacing it if (ptr.empty()) @@ -24901,15 +24141,15 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec for (const auto& val : json_patch) { // wrapper to get a value for an operation - const auto get_value = [&val](const string_t& op, - const string_t& member, + const auto get_value = [&val](const std::string & op, + const std::string & member, bool string_type) -> basic_json & { // find value auto it = val.m_data.m_value.object->find(member); // context-sensitive error message - const auto error_msg = (op == "op") ? "operation" : detail::concat("operation '", op, '\''); // NOLINT(bugprone-unused-local-non-trivial-variable) + const auto error_msg = (op == "op") ? "operation" : detail::concat("operation '", op, '\''); // check if desired value is present if (JSON_HEDLEY_UNLIKELY(it == val.m_data.m_value.object->end())) @@ -24936,8 +24176,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec } // collect mandatory members - const auto op = get_value("op", "op", true).template get(); - const auto path = get_value(op, "path", true).template get(); + const auto op = get_value("op", "op", true).template get(); + const auto path = get_value(op, "path", true).template get(); json_pointer ptr(path); switch (get_op(op)) @@ -24963,7 +24203,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec case patch_operations::move: { - const auto from_path = get_value("move", "from", true).template get(); + const auto from_path = get_value("move", "from", true).template get(); json_pointer from_ptr(from_path); // the "from" location must exist - use at() @@ -24980,7 +24220,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec case patch_operations::copy: { - const auto from_path = get_value("copy", "from", true).template get(); + const auto from_path = get_value("copy", "from", true).template get(); const json_pointer from_ptr(from_path); // the "from" location must exist - use at() @@ -25040,7 +24280,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/diff/ JSON_HEDLEY_WARN_UNUSED_RESULT static basic_json diff(const basic_json& source, const basic_json& target, - const string_t& path = "") + const std::string& path = "") { // the patch basic_json result(value_t::array); @@ -25070,7 +24310,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec while (i < source.size() && i < target.size()) { // recursive call to compare array values at index i - auto temp_diff = diff(source[i], target[i], detail::concat(path, '/', detail::to_string(i))); + auto temp_diff = diff(source[i], target[i], detail::concat(path, '/', std::to_string(i))); result.insert(result.end(), temp_diff.begin(), temp_diff.end()); ++i; } @@ -25087,7 +24327,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec result.insert(result.begin() + end_index, object( { {"op", "remove"}, - {"path", detail::concat(path, '/', detail::to_string(i))} + {"path", detail::concat(path, '/', std::to_string(i))} })); ++i; } @@ -25098,7 +24338,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec result.push_back( { {"op", "add"}, - {"path", detail::concat(path, "/-")}, + {"path", detail::concat(path, "/-")}, {"value", target[i]} }); ++i; @@ -25113,7 +24353,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec for (auto it = source.cbegin(); it != source.cend(); ++it) { // escape the key name to be used in a JSON patch - const auto path_key = detail::concat(path, '/', detail::escape(it.key())); + const auto path_key = detail::concat(path, '/', detail::escape(it.key())); if (target.find(it.key()) != target.end()) { @@ -25137,7 +24377,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec if (source.find(it.key()) == source.end()) { // found a key that is not in this -> add it - const auto path_key = detail::concat(path, '/', detail::escape(it.key())); + const auto path_key = detail::concat(path, '/', detail::escape(it.key())); result.push_back( { {"op", "add"}, {"path", path_key}, @@ -25318,10 +24558,10 @@ inline void swap(nlohmann::NLOHMANN_BASIC_JSON_TPL& j1, nlohmann::NLOHMANN_BASIC // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT @@ -25352,7 +24592,6 @@ inline void swap(nlohmann::NLOHMANN_BASIC_JSON_TPL& j1, nlohmann::NLOHMANN_BASIC #undef JSON_HAS_CPP_14 #undef JSON_HAS_CPP_17 #undef JSON_HAS_CPP_20 - #undef JSON_HAS_CPP_23 #undef JSON_HAS_FILESYSTEM #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM #undef JSON_HAS_THREE_WAY_COMPARISON @@ -25364,10 +24603,10 @@ inline void swap(nlohmann::NLOHMANN_BASIC_JSON_TPL& j1, nlohmann::NLOHMANN_BASIC // #include // __ _____ _____ _____ // __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.12.0 +// | | |__ | | | | | | version 3.11.3 // |_____|_____|_____|_|___| https://github.com/nlohmann/json // -// SPDX-FileCopyrightText: 2013 - 2025 Niels Lohmann +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann // SPDX-License-Identifier: MIT From 11e0492561eb44026a86addf5c56decb790be385 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Tue, 15 Jul 2025 15:24:54 +0000 Subject: [PATCH 09/51] chore(build): replace contains method of map with count for gcc 8.5 --- writeengine/bulk/we_cmdargs.cpp | 18 ++++++++--------- writeengine/splitter/we_cmdargs.cpp | 30 ++++++++++++++--------------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/writeengine/bulk/we_cmdargs.cpp b/writeengine/bulk/we_cmdargs.cpp index 640e42867..f0e91d664 100644 --- a/writeengine/bulk/we_cmdargs.cpp +++ b/writeengine/bulk/we_cmdargs.cpp @@ -235,13 +235,13 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) po::store(po::command_line_parser(argc, argv).options(*fOptions).positional(pos_opt).run(), vm); po::notify(vm); - if (vm.contains("help")) + if (vm.count("help")) { fHelp = true; usage(); return; } - if (vm.contains("separator")) + if (vm.count("separator")) { auto value = vm["separator"].as(); if (value == "\\t") @@ -253,7 +253,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) fColDelim = value[0]; } } - if (vm.contains("binary-mode")) + if (vm.count("binary-mode")) { int value = vm["binary-mode"].as(); if (value == 1) @@ -269,7 +269,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) startupError("Invalid Binary mode; value can be 1 or 2"); } } - if (vm.contains("tz")) + if (vm.count("tz")) { auto tz = vm["tz"].as(); long offset; @@ -279,7 +279,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) } fTimeZone = tz; } - if (vm.contains("job-id")) + if (vm.count("job-id")) { errno = 0; string optarg = vm["job-id"].as(); @@ -296,7 +296,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) startupError("Wrong JobID Value"); } } - if (vm.contains("allow-missing-columns")) + if (vm.count("allow-missing-columns")) { if (vm["allow-missing-columns"].as() == "AllowMissingColumn") { @@ -328,15 +328,15 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) if (2 == fArgMode && fPmFilePath.empty()) throw runtime_error("-f option is mandatory with mode 2."); - if (vm.contains("dbname")) + if (vm.count("dbname")) { fSchema = vm["dbname"].as(); } - if (vm.contains("table")) + if (vm.count("table")) { fTable = vm["table"].as(); } - if (vm.contains("load-file")) + if (vm.count("load-file")) { fLocFile = vm["load-file"].as(); } diff --git a/writeengine/splitter/we_cmdargs.cpp b/writeengine/splitter/we_cmdargs.cpp index 87ed30e1d..ffdfcf285 100644 --- a/writeengine/splitter/we_cmdargs.cpp +++ b/writeengine/splitter/we_cmdargs.cpp @@ -645,17 +645,17 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) po::store(po::command_line_parser(argc, argv).options(*fOptions).positional(pos_opt).run(), vm); po::notify(vm); - if (vm.contains("silent")) + if (vm.count("silent")) { fConsoleOutput = !vm["silent"].as(); } - if (vm.contains("help")) + if (vm.count("help")) { fHelp = true; usage(); return; } - if (vm.contains("separator")) + if (vm.count("separator")) { auto value = vm["separator"].as(); if (value == "\\t") @@ -675,7 +675,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) } } } - if (vm.contains("binary-mode")) + if (vm.count("binary-mode")) { int value = vm["binary-mode"].as(); if (value == 1) @@ -691,7 +691,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) throw runtime_error("Invalid Binary mode; value can be 1 or 2"); } } - if (vm.contains("tz")) + if (vm.count("tz")) { auto tz = vm["tz"].as(); long offset; @@ -701,7 +701,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) } fTimeZone = tz; } - if (vm.contains("job-id")) + if (vm.count("job-id")) { errno = 0; string optarg = vm["job-id"].as(); @@ -720,12 +720,12 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) aJobType = true; } - if (vm.contains("verbose")) + if (vm.count("verbose")) { string optarg = vm["verbose"].as(); fVerbose = fDebugLvl = optarg.length(); } - if (vm.contains("batch-quantity")) + if (vm.count("batch-quantity")) { if (fBatchQty < 10000) { @@ -770,12 +770,12 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) if (0 == fArgMode) throw runtime_error("Incompatible mode and option types"); - if (vm.contains("dbname")) + if (vm.count("dbname")) { fSchema = vm["dbname"].as(); - if (!vm.contains("table")) + if (!vm.count("table")) { // if schema is there, table name should be there throw runtime_error("No table name specified with schema."); @@ -783,7 +783,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) fTable = vm["table"].as(); // 2nd pos parm - if (vm.contains("load-file")) // see if input file name is given + if (vm.count("load-file")) // see if input file name is given { // 3rd pos parm fLocFile = vm["load-file"].as(); @@ -865,7 +865,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) // 1. no positional parameters - Mode 0 & stdin // 2. Two positional parameters (schema and table names) - Mode 1/2, stdin // 3. Three positional parameters (schema, table, and import file name) - else if (vm.contains("dbname")) // see if db schema name is given + else if (vm.count("dbname")) // see if db schema name is given { if (fArgMode == 0) { @@ -882,7 +882,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) fLocFile = vm["dbname"].as(); } - if (vm.contains("table")) // dest filename provided + if (vm.count("table")) // dest filename provided { fPmFile = vm["table"].as(); @@ -936,11 +936,11 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) else fSchema = vm["dbname"].as(); // 1st pos parm - if (vm.contains("table")) // see if table name is given + if (vm.count("table")) // see if table name is given { fTable = vm["table"].as(); // 2nd pos parm - if (vm.contains("load-file")) // see if input file name is given + if (vm.count("load-file")) // see if input file name is given { // 3rd pos parm fLocFile = vm["load-file"].as(); From 902805d5a307df473ddbd0540f60c5439bd249c1 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Tue, 15 Jul 2025 16:41:31 +0000 Subject: [PATCH 10/51] GCC8.5 warning fixed --- dbcon/mysql/ha_mcs.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/dbcon/mysql/ha_mcs.cpp b/dbcon/mysql/ha_mcs.cpp index c1c7456ac..5ecdc07bf 100644 --- a/dbcon/mysql/ha_mcs.cpp +++ b/dbcon/mysql/ha_mcs.cpp @@ -1828,7 +1828,7 @@ static int columnstore_init_func(void* p) fprintf(stderr, "Columnstore: Started; Version: %s-%s\n", columnstore_version.c_str(), columnstore_release.c_str()); - plugin_ref plugin_innodb; + plugin_ref plugin_innodb = nullptr; LEX_CSTRING name = {STRING_WITH_LEN("INNODB")}; if (get_innodb_queries_uses_mcs()) @@ -1841,7 +1841,7 @@ static int columnstore_init_func(void* p) DBUG_RETURN(HA_ERR_RETRY_INIT); } } - + strncpy(cs_version, columnstore_version.c_str(), sizeof(cs_version) - 1); cs_version[sizeof(cs_version) - 1] = 0; @@ -1857,7 +1857,7 @@ static int columnstore_init_func(void* p) (my_hash_get_key)mcs_get_key, 0, 0); std::cerr << "Columnstore: init mcs_hton attributes" << std::endl; - + mcs_hton->create = ha_mcs_cache_create_handler; mcs_hton->panic = 0; mcs_hton->flags = HTON_CAN_RECREATE | HTON_NO_PARTITION; @@ -1873,13 +1873,15 @@ static int columnstore_init_func(void* p) if (get_innodb_queries_uses_mcs()) { - std::cerr << "Columnstore: innodb_queries_uses_mcs is set, redirecting all InnoDB queries to Columnstore." << std::endl; + std::cerr << "Columnstore: innodb_queries_uses_mcs is set, redirecting all InnoDB queries to Columnstore." + << std::endl; auto* innodb_hton = plugin_hton(plugin_innodb); int error = innodb_hton == nullptr; // Engine must exists! if (error) { - std::cerr << "Columnstore: innodb_queries_uses_mcs is set, but could not find InnoDB plugin." << std::endl; + std::cerr << "Columnstore: innodb_queries_uses_mcs is set, but could not find InnoDB plugin." + << std::endl; my_error(HA_ERR_INITIALIZATION, MYF(0), "Could not find storage engine %s", name.str); } innodb_hton->create_select = create_columnstore_select_handler; From 05bbfcae57b1e7e03af7e769ebd3b2297eda48ce Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Tue, 15 Jul 2025 22:15:27 +0000 Subject: [PATCH 11/51] One more try to fix autobake build --- storage-manager/CMakeLists.txt | 1 - storage-manager/src/S3Storage.h | 4 ++-- utils/regr/modamysql.cpp | 7 ------- writeengine/bulk/we_cmdargs.cpp | 2 +- writeengine/splitter/we_cmdargs.cpp | 2 +- 5 files changed, 4 insertions(+), 12 deletions(-) diff --git a/storage-manager/CMakeLists.txt b/storage-manager/CMakeLists.txt index b89012804..c72171cbd 100755 --- a/storage-manager/CMakeLists.txt +++ b/storage-manager/CMakeLists.txt @@ -82,7 +82,6 @@ target_include_directories(storagemanager PRIVATE ${Boost_INCLUDE_DIRS}) columnstore_executable(StorageManager src/main.cpp) columnstore_link(StorageManager storagemanager) -set_property(TARGET StorageManager PROPERTY CXX_STANDARD 20) set(TMPDIR ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/storage-manager/src/S3Storage.h b/storage-manager/src/S3Storage.h index df63afc5b..4b50e5854 100644 --- a/storage-manager/src/S3Storage.h +++ b/storage-manager/src/S3Storage.h @@ -19,12 +19,12 @@ #include #include -#include #include #include +#include + #include "CloudStorage.h" #include "libmarias3/marias3.h" -#include "Config.h" #include namespace storagemanager diff --git a/utils/regr/modamysql.cpp b/utils/regr/modamysql.cpp index 3d8434be4..c3f5ba4d5 100644 --- a/utils/regr/modamysql.cpp +++ b/utils/regr/modamysql.cpp @@ -1,14 +1,7 @@ #include #include -#include -#include #include #include -#include -#include -#include -#include -#include "boost/lexical_cast.hpp" #include "idb_mysql.h" namespace diff --git a/writeengine/bulk/we_cmdargs.cpp b/writeengine/bulk/we_cmdargs.cpp index f0e91d664..7d811dfaf 100644 --- a/writeengine/bulk/we_cmdargs.cpp +++ b/writeengine/bulk/we_cmdargs.cpp @@ -303,7 +303,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) fAllowMissingColumn = true; } } - if (vm.contains("max-errors")) + if (vm.count("max-errors")) { auto optarg= vm["max-errors"].as(); if (optarg == "all") diff --git a/writeengine/splitter/we_cmdargs.cpp b/writeengine/splitter/we_cmdargs.cpp index ffdfcf285..dbf2166a0 100644 --- a/writeengine/splitter/we_cmdargs.cpp +++ b/writeengine/splitter/we_cmdargs.cpp @@ -736,7 +736,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) fBatchQty = 10000; } } - if (vm.contains("max-errors")) + if (vm.count("max-errors")) { auto optarg = vm["max-errors"].as(); if (optarg == "all") From 9a606f11dd5ded30b77be0fa8b418b96bbceba56 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Tue, 15 Jul 2025 23:12:18 +0000 Subject: [PATCH 12/51] using .drone.jsonnet from msan branch --- .drone.jsonnet | 95 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 62 insertions(+), 33 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index 6e687b951..6d28db57d 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -16,33 +16,43 @@ local platforms_arm = { }; local builddir = "verylongdirnameforverystrangecpackbehavior"; + +local get_build_command(command) = "bash /mdb/" + builddir + "/storage/columnstore/columnstore/build/" + command + " "; + +local clang(version) = [get_build_command("install_clang_deb.sh " + version), + get_build_command("update-clang-version.sh " + version + " 100"), + get_build_command("install_libc++.sh " + version), + "export CC=/usr/bin/clang", + "export CXX=/usr/bin/clang++" + ]; + local customEnvCommandsMap = { - // 'clang-18': ["bash /mdb/" + builddir + "/storage/columnstore/columnstore/build/install_clang_deb.sh 18"], - "clang-20": ["bash /mdb/" + builddir + "/storage/columnstore/columnstore/build/install_clang_deb.sh 20"], + "clang-20": clang("20"), }; local customEnvCommands(envkey, builddir) = - local updateAlternatives = { - "clang-20": ["bash /mdb/" + builddir + - "/storage/columnstore/columnstore/build/update-clang-version.sh 20 100"], - }; (if (std.objectHas(customEnvCommandsMap, envkey)) - then customEnvCommandsMap[envkey] + updateAlternatives[envkey] else []); + then customEnvCommandsMap[envkey] else []); local customBootstrapParamsForExisitingPipelines(envkey) = local customBootstrapMap = { - "ubuntu:24.04": "--custom-cmake-flags '-DCOLUMNSTORE_ASAN_FOR_UNITTESTS=YES'", +// "ubuntu:24.04": "--custom-cmake-flags '-DCOLUMNSTORE_ASAN_FOR_UNITTESTS=YES'", }; (if (std.objectHas(customBootstrapMap, envkey)) then customBootstrapMap[envkey] else ""); local customBootstrapParamsForAdditionalPipelinesMap = { - ASAN: "--asan", + ASan: "--asan", TSAN: "--tsan", - UBSAN: "--ubsan", + UBSan: "--ubsan", + MSan: "--msan", + "libcpp": "--libcpp", }; +local customBuildFlags(buildKey) = + (if (std.objectHas(customBootstrapParamsForAdditionalPipelinesMap, buildKey)) + then customBootstrapParamsForAdditionalPipelinesMap[buildKey] else ""); local any_branch = "**"; @@ -83,7 +93,7 @@ local make_clickable_link(link) = "echo -e '\\e]8;;" + link + "\\e\\\\" + link local echo_running_on = ["echo running on ${DRONE_STAGE_MACHINE}", make_clickable_link("https://us-east-1.console.aws.amazon.com/ec2/home?region=us-east-1#Instances:search=:${DRONE_STAGE_MACHINE};v=3;$case=tags:true%5C,client:false;$regex=tags:false%5C,client:false;sort=desc:launchTime")]; -local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", customBootstrapParams="", customBuildEnvCommandsMapKey="") = { +local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", customBootstrapParamsKey="", customBuildEnvCommandsMapKey="") = { local pkg_format = if (std.split(platform, ":")[0] == "rockylinux") then "rpm" else "deb", local img = if (platform == "rockylinux:8") then platform else "detravi/" + std.strReplace(platform, "/", "-"), local branch_ref = if (branch == any_branch) then current_branch else branch, @@ -91,7 +101,9 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", local branchp = if (branch == "**") then "" else branch + "/", local brancht = if (branch == "**") then "" else branch + "-", local platformKey = std.strReplace(std.strReplace(platform, ":", ""), "/", "-"), - local result = platformKey + if customBuildEnvCommandsMapKey != "" then "_" + customBuildEnvCommandsMapKey else "", + local result = platformKey + + (if customBuildEnvCommandsMapKey != "" then "_" + customBuildEnvCommandsMapKey else "") + + (if customBootstrapParamsKey != "" then "_" + customBootstrapParamsKey else ""), local packages_url = "https://cspkg.s3.amazonaws.com/" + branchp + event + "/${DRONE_BUILD_NUMBER}/" + server, local publish_pkg_url = "https://cspkg.s3.amazonaws.com/index.html?prefix=" + branchp + event + "/${DRONE_BUILD_NUMBER}/" + server + "/" + arch + "/" + result + "/", @@ -187,15 +199,17 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", local getContainerName(stepname) = stepname + "$${DRONE_BUILD_NUMBER}", local prepareTestContainer(containerName, result, do_setup) = - 'sh -c "apk add bash && bash /mdb/' + builddir + "/storage/columnstore/columnstore/build/prepare_test_container.sh" + + 'sh -c "apk add bash && ' + get_build_command("prepare_test_container.sh") + " --container-name " + containerName + " --docker-image " + img + " --result-path " + result + " --packages-url " + packages_url + - " --do-setup " + std.toString(do_setup) + '"', + " --do-setup " + std.toString(do_setup) + + if result=="ubuntu24.04_clang-20_libcpp" then "" else " --install-libcpp " + //FIX THIS HACK + '"', local reportTestStage(containerName, result, stage) = - 'sh -c "apk add bash && bash /mdb/' + builddir + '/storage/columnstore/columnstore/build/report_test_stage.sh' + + 'sh -c "apk add bash && ' + get_build_command("report_test_stage.sh") + ' --container-name ' + containerName + ' --result-path ' + result + ' --stage ' + stage + '"', @@ -218,7 +232,7 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", volumes: [pipeline._volumes.mdb, pipeline._volumes.docker], commands: [ prepareTestContainer(getContainerName("smoke"), result, true), - "bash /mdb/" + builddir + "/storage/columnstore/columnstore/build/run_smoke.sh" + + get_build_command("run_smoke.sh") + ' --container-name ' + getContainerName("smoke"), ], }, @@ -290,7 +304,8 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", prepareTestContainer(getContainerName("mtr"), result, true), 'MTR_SUITE_LIST=$([ "$MTR_FULL_SUITE" == true ] && echo "' + mtr_full_set + '" || echo "$MTR_SUITE_LIST")', - 'apk add bash && bash /mdb/' + builddir + '/storage/columnstore/columnstore/build/run_mtr.sh' + + 'apk add bash &&' + + get_build_command("run_mtr.sh") + ' --container-name ' + getContainerName("mtr") + ' --distro ' + platform + ' --suite-list $${MTR_SUITE_LIST}' + @@ -334,7 +349,8 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", "export REGRESSION_REF=$${REGRESSION_REF:-$$REGRESSION_REF_AUX}", 'echo "$$REGRESSION_REF"', - "apk add bash && bash /mdb/" + builddir + "/storage/columnstore/columnstore/build/run_regression.sh" + + "apk add bash && " + + get_build_command("run_regression.sh") + " --container-name " + getContainerName("regression") + " --test-name " + name + " --distro " + platform + @@ -415,8 +431,8 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", }, commands: [ prepareTestContainer(getContainerName("cmapi"), result, true), - - "apk add bash && bash /mdb/" + builddir + "/storage/columnstore/columnstore/build/run_cmapi_test.sh" + + "apk add bash && " + + get_build_command("run_cmapi_test.sh") + " --container-name " + getContainerName("cmapi") + " --pkg-format " + pkg_format, ], @@ -449,16 +465,16 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", }, commands: [ "echo $$DOCKER_PASSWORD | docker login --username $$DOCKER_LOGIN --password-stdin", - - "apk add bash && bash /mdb/" + builddir + "/storage/columnstore/columnstore/build/run_multi_node_mtr.sh " + - "--columnstore-image-name $${MCS_IMAGE_NAME} " + - "--distro " + platform, + "apk add bash && " + + get_build_command("run_multi_node_mtr.sh") + + " --columnstore-image-name $${MCS_IMAGE_NAME} " + + " --distro " + platform, ], }, kind: "pipeline", type: "docker", - name: std.join(" ", [branch, platform, event, arch, server, customBootstrapParams, customBuildEnvCommandsMapKey]), + name: std.join(" ", [branch, platform, event, arch, server, customBootstrapParamsKey, customBuildEnvCommandsMapKey]), platform: { arch: arch }, clone: { depth: 10 }, steps: [ @@ -519,15 +535,16 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", ] + customEnvCommands(customBuildEnvCommandsMapKey, builddir) + [ - 'bash -c "set -o pipefail && bash /mdb/' + builddir + "/storage/columnstore/columnstore/build/bootstrap_mcs.sh " + + 'bash -c "set -o pipefail && ' + + get_build_command("bootstrap_mcs.sh") + "--build-type RelWithDebInfo " + "--distro " + platform + " " + + "--build-packages --install-deps --sccache " + "--build-path " + "/mdb/" + builddir + "/builddir " + - "--build-packages --install-deps --sccache" + - " " + customBootstrapParams + - " " + customBootstrapParamsForExisitingPipelines(platform) + " | " + - "/mdb/" + builddir + "/storage/columnstore/columnstore/build/ansi2txt.sh " + - "/mdb/" + builddir + "/" + result + '/build.log "', + " " + customBootstrapParamsForExisitingPipelines(platform) + + " " + customBuildFlags(customBootstrapParamsKey) + + " | " + get_build_command("ansi2txt.sh") + + "/mdb/" + builddir + "/" + result + '/build.log "', ], }, { @@ -539,7 +556,7 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", DEBIAN_FRONTEND: "noninteractive", }, commands: [ - "bash /mdb/" + builddir + "/storage/columnstore/columnstore/build/build_cmapi.sh --distro " + platform, + get_build_command("build_cmapi.sh") + " --distro " + platform, ], }, { @@ -551,7 +568,7 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", }, volumes: [pipeline._volumes.mdb], commands: [ - "bash /mdb/" + builddir + "/storage/columnstore/columnstore/build/createrepo.sh --result " + result, + get_build_command("createrepo.sh") + " --result " + result, ], }, { @@ -672,3 +689,15 @@ local FinalPipeline(branch, event) = { for triggeringEvent in events for server in servers[current_branch] ] ++ +[ + Pipeline(b, platform, triggeringEvent, a, server, flag, envcommand) + for a in ["amd64"] + for b in std.objectFields(platforms) + for platform in ["ubuntu:24.04"] + for flag in ["libcpp"] + for envcommand in ["clang-20"] +// for flag in std.objectFields(customBootstrapParamsForAdditionalPipelinesMap) + for triggeringEvent in events + for server in servers[current_branch] +] From ee3830f4597296f40bd4d66b16c5a85383352f5c Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Tue, 15 Jul 2025 23:18:56 +0000 Subject: [PATCH 13/51] awful_hack(ci): add vanilla rockylinux and gcc-toolset rockylinux --- .drone.jsonnet | 20 ++++++++++---------- build/bootstrap_mcs.sh | 19 ++++++++++++------- build/install_libc++.sh | 19 +++++++++++++++++++ 3 files changed, 41 insertions(+), 17 deletions(-) create mode 100755 build/install_libc++.sh diff --git a/.drone.jsonnet b/.drone.jsonnet index 6d28db57d..732ae9cae 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -36,8 +36,9 @@ local customEnvCommands(envkey, builddir) = local customBootstrapParamsForExisitingPipelines(envkey) = + # errorprone if we pass --custom-cmake-flags twice, the last one will win local customBootstrapMap = { -// "ubuntu:24.04": "--custom-cmake-flags '-DCOLUMNSTORE_ASAN_FOR_UNITTESTS=YES'", + "ubuntu:24.04": "--custom-cmake-flags '-DCOLUMNSTORE_ASAN_FOR_UNITTESTS=YES'", }; (if (std.objectHas(customBootstrapMap, envkey)) then customBootstrapMap[envkey] else ""); @@ -48,6 +49,7 @@ local customBootstrapParamsForAdditionalPipelinesMap = { UBSan: "--ubsan", MSan: "--msan", "libcpp": "--libcpp", + "gcc-toolset": "--gcc-toolset-for-rocky-8" }; local customBuildFlags(buildKey) = @@ -204,9 +206,7 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", " --docker-image " + img + " --result-path " + result + " --packages-url " + packages_url + - " --do-setup " + std.toString(do_setup) + - if result=="ubuntu24.04_clang-20_libcpp" then "" else " --install-libcpp " + //FIX THIS HACK - '"', + " --do-setup " + std.toString(do_setup) + '"', local reportTestStage(containerName, result, stage) = 'sh -c "apk add bash && ' + get_build_command("report_test_stage.sh") + @@ -530,6 +530,8 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", SCCACHE_S3_USE_SSL: "true", SCCACHE_S3_KEY_PREFIX: result + branch + server + arch, }, + + # errorprone if we pass --custom-cmake-flags twice, the last one will win commands: [ "mkdir /mdb/" + builddir + "/" + result, ] @@ -609,7 +611,7 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", [pipeline.cmapitest] + [pipeline.cmapilog] + [pipeline.publish("cmapilog")] + - (if (platform == "rockylinux:8" && arch == "amd64") then [pipeline.dockerfile] + [pipeline.dockerhub] + [pipeline.multi_node_mtr] else [pipeline.mtr] + [pipeline.mtrlog] + [pipeline.publish("mtrlog")]) + + (if (platform == "rockylinux:8" && arch == "amd64" && customBootstrapParamsKey == "gcc-toolset") then [pipeline.dockerfile] + [pipeline.dockerhub] + [pipeline.multi_node_mtr] else [pipeline.mtr] + [pipeline.mtrlog] + [pipeline.publish("mtrlog")]) + [pipeline.regression(regression_tests[i], if (i == 0) then ["mtr", "publish pkg", "publish cmapi build"] else [regression_tests[i - 1]]) for i in indexes(regression_tests)] + [pipeline.regressionlog] + // [pipeline.upgrade(mdb_server_versions[i]) for i in indexes(mdb_server_versions)] + @@ -691,13 +693,11 @@ local FinalPipeline(branch, event) = { ] + [ - Pipeline(b, platform, triggeringEvent, a, server, flag, envcommand) + Pipeline(b, platform, triggeringEvent, a, server, flag, "") for a in ["amd64"] for b in std.objectFields(platforms) - for platform in ["ubuntu:24.04"] - for flag in ["libcpp"] - for envcommand in ["clang-20"] -// for flag in std.objectFields(customBootstrapParamsForAdditionalPipelinesMap) + for platform in ["rockylinux:8"] + for flag in ["gcc-toolset"] for triggeringEvent in events for server in servers[current_branch] ] diff --git a/build/bootstrap_mcs.sh b/build/bootstrap_mcs.sh index 250cb245f..a7846c933 100755 --- a/build/bootstrap_mcs.sh +++ b/build/bootstrap_mcs.sh @@ -52,6 +52,7 @@ optparse.define short=O long=static desc="Build all with static libraries" varia optparse.define short=p long=build-packages desc="Build packages" variable=BUILD_PACKAGES default=false value=true optparse.define short=P long=report-path desc="Path for storing reports and profiles" variable=REPORT_PATH default="/core" optparse.define short=r long=restart-services variable=RESTART_SERVICES default=true value=false +optparse.define short=R long=gcc-toolset-for-rocky-8 variable=GCC_TOOLSET default=false value=true optparse.define short=S long=skip-columnstore-submodules desc="Skip columnstore submodules initialization" variable=SKIP_SUBMODULES default=false value=true optparse.define short=t long=build-type desc="Build Type: ${BUILD_TYPE_OPTIONS[*]}" variable=MCS_BUILD_TYPE optparse.define short=T long=tsan desc="Build with TSAN" variable=TSAN default=false value=true @@ -109,12 +110,12 @@ install_deps() { fi message_split prereq="" - RPM_BUILD_DEPS="dnf install -y lz4 lz4-devel systemd-devel git make libaio-devel openssl-devel boost-devel bison \ + RPM_BUILD_DEPS="lz4 lz4-devel systemd-devel git make libaio-devel openssl-devel boost-devel bison \ snappy-devel flex libcurl-devel libxml2-devel ncurses-devel automake libtool policycoreutils-devel \ rpm-build lsof iproute pam-devel perl-DBI cracklib-devel expect createrepo python3 checkpolicy \ cppunit-devel cmake3 libxcrypt-devel xz-devel zlib-devel libzstd-devel glibc-devel" - DEB_BUILD_DEPS="apt-get -y update && apt-get -y install build-essential automake libboost-all-dev \ + DEB_BUILD_DEPS="build-essential automake libboost-all-dev \ bison cmake libncurses5-dev python3 libaio-dev libsystemd-dev libpcre2-dev libperl-dev libssl-dev libxml2-dev \ libkrb5-dev flex libpam-dev git libsnappy-dev libcurl4-openssl-dev libgtest-dev libcppunit-dev googletest \ libjemalloc-dev liblz-dev liblzo2-dev liblzma-dev liblz4-dev libbz2-dev libbenchmark-dev libdistro-info-perl \ @@ -122,16 +123,20 @@ install_deps() { if [[ "$OS" == *"rockylinux:8"* || "$OS" == *"rocky:8"* ]]; then command="dnf install -y curl 'dnf-command(config-manager)' && dnf config-manager --set-enabled powertools && \ - dnf install -y gcc-toolset-${GCC_VERSION} libarchive cmake && . /opt/rh/gcc-toolset-${GCC_VERSION}/enable && \ - ${RPM_BUILD_DEPS}" + dnf install -y libarchive cmake ${RPM_BUILD_DEPS}" + if [[ $GCC_TOOLSET = false ]]; then + command="$command && dnf group install -y \"Development Tools\"" + else + command="$command && dnf install -y gcc-toolset-${GCC_VERSION} && . /opt/rh/gcc-toolset-${GCC_VERSION}/enable" + fi elif [[ "$OS" == "rockylinux:9"* || "$OS" == "rocky:9"* ]] then command="dnf install -y 'dnf-command(config-manager)' && dnf config-manager --set-enabled crb && \ - dnf install -y pcre2-devel gcc gcc-c++ curl-minimal && ${RPM_BUILD_DEPS}" + dnf install -y pcre2-devel gcc gcc-c++ curl-minimal ${RPM_BUILD_DEPS}" elif [[ "$OS" == "debian:11"* ]] || [[ "$OS" == "debian:12"* ]] || [[ "$OS" == "ubuntu:20.04"* ]] || [[ "$OS" == "ubuntu:22.04"* ]] || [[ "$OS" == "ubuntu:24.04"* ]]; then - command="${DEB_BUILD_DEPS}" + command="apt-get -y update && apt-get -y install ${DEB_BUILD_DEPS}" else echo "Unsupported OS: $OS" exit 17 @@ -768,7 +773,7 @@ init_submodules if [[ $BUILD_PACKAGES = true ]]; then modify_packaging - ( build_package && run_unit_tests ) + (build_package && run_unit_tests) exit_code=$? if [[ $SCCACHE = true ]]; then diff --git a/build/install_libc++.sh b/build/install_libc++.sh new file mode 100755 index 000000000..c2c5a32ac --- /dev/null +++ b/build/install_libc++.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -eo pipefail + +SCRIPT_LOCATION=$(dirname "$0") +source "$SCRIPT_LOCATION"/utils.sh + +VERSION="$1" + +if [[ $# -ne 1 ]]; then + echo "Please pass clang-version as a first parameter" + exit 1 +fi + +change_ubuntu_mirror us + +message "Installing libc++-${VERSION}" + +retry_eval 5 apt-get clean && apt-get update && apt-get install -y libc++-${VERSION}-dev libc++abi-${VERSION}-dev From d0ee5dae32ae69041166e36efba7bd7d21462e8c Mon Sep 17 00:00:00 2001 From: Pavol Sloboda Date: Wed, 16 Jul 2025 10:42:08 +0200 Subject: [PATCH 14/51] Added the support for pkgconf when finding system thrift as fedora uses pkgconf to help locate the thrift files during compilation instead of a Thrift.cmake file. I have added this logic in such a way that the existing logic should not be affected. Therefore enabling the use of system thrift even without pkgconf. --- cmake/thrift.cmake | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/cmake/thrift.cmake b/cmake/thrift.cmake index 8a8088676..10763e8ac 100644 --- a/cmake/thrift.cmake +++ b/cmake/thrift.cmake @@ -12,7 +12,17 @@ if(WITH_THRIFT STREQUAL "system" OR WITH_THRIFT STREQUAL "auto") set(THRIFT_LIBRARY "${THRIFT_LIBRARIES}") return() elseif(WITH_THRIFT STREQUAL "system") - message(FATAL_ERROR "System Thrift requested but not found!") + FIND_PACKAGE(PkgConfig REQUIRED) + pkg_check_modules(THRIFT REQUIRED thrift) + + if(THRIFT_FOUND) + add_custom_target(external_thrift) + set(THRIFT_INCLUDE_DIR "${THRIFT_INCLUDE_DIR}") + set(THRIFT_LIBRARY "${THRIFT_LIBRARIES}") + return() + else() + message(FATAL_ERROR "System Thrift requested but not found!") + endif() endif() endif() From 449029a827e7813886c12d5b78b2f6da342db3cb Mon Sep 17 00:00:00 2001 From: Leonid Fedorov <79837786+mariadb-LeonidFedorov@users.noreply.github.com> Date: Thu, 17 Jul 2025 16:14:10 +0400 Subject: [PATCH 15/51] Deep build refactoring phase 2 (#3564) * configcpp refactored * chore(build): massive removals, auto add files to debian install file * chore(build): configure before autobake * chore(build): use custom cmake commands for components, mariadb-plugin-columnstore.install generated * chore(build): install deps as separate step for build-packages * more deps * chore(codemanagement, build): build refactoring stage2 * chore(safety): Locked Map for MessageqCpp with a simpler way Please enter the commit message for your changes. Lines starting * chore(codemanagement, ci): better coredumps handling, deps fixed * Delete build/bootstrap_mcs.py * Update charset.cpp (add license) --- CMakeLists.txt | 5 - cmake/ColumnstoreLibrary.cmake | 19 + datatypes/mcs_datatype.h | 21 +- datatypes/mcs_string.h | 4 +- dbcon/ddlpackage/CMakeLists.txt | 2 +- dbcon/ddlpackage/sqlparser.h | 4 +- dbcon/ddlpackageproc/CMakeLists.txt | 2 +- dbcon/dmlpackage/CMakeLists.txt | 2 +- dbcon/dmlpackageproc/CMakeLists.txt | 2 +- dbcon/execplan/CMakeLists.txt | 10 +- dbcon/execplan/calpontsystemcatalog.h | 22 +- dbcon/execplan/clientrotator.cpp | 2 - dbcon/execplan/predicateoperator.h | 2 +- dbcon/execplan/pseudocolumn.cpp | 2 +- dbcon/joblist/CMakeLists.txt | 10 +- dbcon/joblist/jlf_tuplejoblist.cpp | 2 +- dbcon/mysql/CMakeLists.txt | 4 +- dbcon/mysql/ha_mcs_impl.cpp | 2 +- dbcon/mysql/idb_mysql.h | 15 +- ddlproc/CMakeLists.txt | 2 +- dmlproc/CMakeLists.txt | 1 - oam/oamcpp/CMakeLists.txt | 2 +- oamapps/columnstoreSupport/CMakeLists.txt | 16 - oamapps/columnstoreSupport/alarmReport.sh | 39 - .../columnstoreSupport/approximateRowCount.sh | 60 -- oamapps/columnstoreSupport/bulklogReport.sh | 30 - .../columnstoreSupport/columnstoreSupport.cpp | 897 ------------------ oamapps/columnstoreSupport/configReport.sh | 76 -- oamapps/columnstoreSupport/dbmsReport.sh | 64 -- oamapps/columnstoreSupport/getMinMax.sh | 130 --- oamapps/columnstoreSupport/hardwareReport.sh | 79 -- oamapps/columnstoreSupport/logReport.sh | 52 - oamapps/columnstoreSupport/mcsSupportUtil.cpp | 621 ------------ oamapps/columnstoreSupport/mcsSupportUtil.h | 34 - oamapps/columnstoreSupport/resourceReport.sh | 66 -- oamapps/columnstoreSupport/softwareReport.sh | 30 - oamapps/replayTransactionLog/CMakeLists.txt | 34 - .../replaytransactionlog.cpp | 156 --- oamapps/replayTransactionLog/tdriver.cpp | 66 -- oamapps/sessionWalker/CMakeLists.txt | 34 - oamapps/sessionWalker/sessionwalker.cpp | 135 --- oamapps/sessionWalker/tdriver.cpp | 189 ---- primitives/blockcache/CMakeLists.txt | 2 +- primitives/linux-port/CMakeLists.txt | 2 +- primitives/linux-port/column.cpp | 1 - primitives/primproc/CMakeLists.txt | 13 +- .../primproc}/samenodepseudosocket.cpp | 0 .../primproc}/samenodepseudosocket.h | 0 primitives/primproc/serviceexemgr.cpp | 2 +- primitives/primproc/serviceexemgr.h | 4 +- primitives/primproc/sqlfrontsessionthread.h | 125 +-- storage-manager/CMakeLists.txt | 2 +- tools/CMakeLists.txt | 23 +- tools/cleartablelock/CMakeLists.txt | 2 +- tools/configMgt/CMakeLists.txt | 2 +- tools/dbbuilder/CMakeLists.txt | 2 +- tools/ddlcleanup/CMakeLists.txt | 2 +- tools/editem/CMakeLists.txt | 2 +- tools/rgprint/CMakeLists.txt | 2 +- tools/setConfig/CMakeLists.txt | 2 +- utils/CMakeLists.txt | 44 +- utils/{common => basic}/conststring.h | 0 utils/batchloader/CMakeLists.txt | 2 +- utils/cacheutils/CMakeLists.txt | 2 +- utils/cloudio/CMakeLists.txt | 2 +- utils/common/CMakeLists.txt | 13 +- utils/common/bit_cast.h | 39 - utils/common/branchpred.h | 6 - utils/common/columnwidth.h | 3 + utils/common/genericparser.h | 2 +- utils/common/hashfamily.h | 2 +- utils/common/nullstring.h | 2 +- utils/common/string_prefixes.cpp | 2 +- utils/common/syncstream.h | 157 --- utils/common/utils_utf8.h | 8 +- utils/ddlcleanup/CMakeLists.txt | 2 +- utils/funcexp/CMakeLists.txt | 5 +- utils/funcexp/functor_json.h | 2 +- utils/funcexp/jsonhelpers.h | 4 +- utils/idbdatafile/CMakeLists.txt | 2 +- utils/joiner/CMakeLists.txt | 2 +- utils/mariadb_charset/CMakeLists.txt | 7 + .../charset.cpp} | 17 +- utils/{common => mariadb_charset}/collation.h | 2 +- utils/messageqcpp/CMakeLists.txt | 1 - utils/messageqcpp/messagequeuepool.cpp | 92 +- utils/messageqcpp/messagequeuepool.h | 10 +- utils/querystats/CMakeLists.txt | 2 +- utils/querytele/CMakeLists.txt | 4 +- utils/regr/CMakeLists.txt | 5 +- utils/regr/moda.h | 6 +- utils/rowgroup/CMakeLists.txt | 2 +- utils/rowgroup/rowgroup.h | 2 +- utils/statistics_manager/CMakeLists.txt | 6 + .../statistics.cpp | 5 +- .../statistics.h | 0 utils/threadpool/CMakeLists.txt | 4 +- utils/udfsdk/CMakeLists.txt | 4 +- versioning/BRM/CMakeLists.txt | 28 +- writeengine/bulk/CMakeLists.txt | 6 +- writeengine/client/CMakeLists.txt | 5 +- writeengine/redistribute/CMakeLists.txt | 5 +- writeengine/server/CMakeLists.txt | 7 +- writeengine/shared/we_type.h | 13 +- writeengine/splitter/CMakeLists.txt | 4 +- writeengine/wrapper/CMakeLists.txt | 5 +- writeengine/xml/CMakeLists.txt | 5 +- 107 files changed, 354 insertions(+), 3327 deletions(-) delete mode 100644 oamapps/columnstoreSupport/CMakeLists.txt delete mode 100755 oamapps/columnstoreSupport/alarmReport.sh delete mode 100755 oamapps/columnstoreSupport/approximateRowCount.sh delete mode 100755 oamapps/columnstoreSupport/bulklogReport.sh delete mode 100644 oamapps/columnstoreSupport/columnstoreSupport.cpp delete mode 100755 oamapps/columnstoreSupport/configReport.sh delete mode 100755 oamapps/columnstoreSupport/dbmsReport.sh delete mode 100755 oamapps/columnstoreSupport/getMinMax.sh delete mode 100755 oamapps/columnstoreSupport/hardwareReport.sh delete mode 100755 oamapps/columnstoreSupport/logReport.sh delete mode 100644 oamapps/columnstoreSupport/mcsSupportUtil.cpp delete mode 100644 oamapps/columnstoreSupport/mcsSupportUtil.h delete mode 100755 oamapps/columnstoreSupport/resourceReport.sh delete mode 100755 oamapps/columnstoreSupport/softwareReport.sh delete mode 100644 oamapps/replayTransactionLog/CMakeLists.txt delete mode 100644 oamapps/replayTransactionLog/replaytransactionlog.cpp delete mode 100644 oamapps/replayTransactionLog/tdriver.cpp delete mode 100644 oamapps/sessionWalker/CMakeLists.txt delete mode 100644 oamapps/sessionWalker/sessionwalker.cpp delete mode 100644 oamapps/sessionWalker/tdriver.cpp rename {utils/messageqcpp => primitives/primproc}/samenodepseudosocket.cpp (100%) rename {utils/messageqcpp => primitives/primproc}/samenodepseudosocket.h (100%) rename utils/{common => basic}/conststring.h (100%) delete mode 100644 utils/common/bit_cast.h delete mode 100644 utils/common/syncstream.h create mode 100644 utils/mariadb_charset/CMakeLists.txt rename utils/{common/utils_utf8.cpp => mariadb_charset/charset.cpp} (72%) rename utils/{common => mariadb_charset}/collation.h (99%) create mode 100644 utils/statistics_manager/CMakeLists.txt rename utils/{common => statistics_manager}/statistics.cpp (99%) rename utils/{common => statistics_manager}/statistics.h (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index b86fc76e1..bf1da3a6d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -88,8 +88,3 @@ set(COMPONENTS foreach(component ${COMPONENTS}) add_subdirectory(${component}) endforeach() - -add_dependencies(udf_mysql GenError) -add_dependencies(funcexp GenError) -add_dependencies(oamcpp GenError) -add_dependencies(regr_mysql GenError) diff --git a/cmake/ColumnstoreLibrary.cmake b/cmake/ColumnstoreLibrary.cmake index 36dbc6813..13a6097d6 100644 --- a/cmake/ColumnstoreLibrary.cmake +++ b/cmake/ColumnstoreLibrary.cmake @@ -82,6 +82,16 @@ macro(columnstore_shared_library libname) columnstore_install_target(${libname} ${ENGINE_LIBDIR}) endmacro() +macro(columnstore_static_library libname) + add_definitions(-fPIC -DPIC) + add_library(${libname} STATIC ${ARGN}) +endmacro() + +macro(columnstore_shared_library libname) + add_library(${libname} SHARED ${ARGN}) + columnstore_install_target(${libname} ${ENGINE_LIBDIR}) +endmacro() + macro(columnstore_library libname) if(COLUMNSTORE_STATIC_LIBRARIES) columnstore_static_library(${libname} ${ARGN}) @@ -99,6 +109,15 @@ macro(columnstore_link libname) target_link_libraries(${libname} ${ARGN}) endmacro() +macro(columnstore_mysql_plugin_library libname) + add_library(${libname} SHARED ${ARGN}) + columnstore_install_target(${libname} ${MARIADB_PLUGINDIR}) +endmacro() + +macro(columnstore_link libname) + target_link_libraries(${libname} ${ARGN}) +endmacro() + macro(columnstore_executable executable_name) add_executable(${executable_name} ${ARGN}) columnstore_install_target(${executable_name} ${ENGINE_BINDIR}) diff --git a/datatypes/mcs_datatype.h b/datatypes/mcs_datatype.h index 2f01010f3..e5abe1112 100644 --- a/datatypes/mcs_datatype.h +++ b/datatypes/mcs_datatype.h @@ -20,7 +20,7 @@ #include #include #include "exceptclasses.h" -#include "conststring.h" +#include "basic/conststring.h" #include "mcs_datatype_basic.h" #include "mcs_numeric_limits.h" #include "mcs_data_condition.h" @@ -796,7 +796,7 @@ class MinMaxPartitionInfo : public MinMaxInfo uint64_t m_status; public: - MinMaxPartitionInfo() : m_status(0){}; + MinMaxPartitionInfo() : m_status(0) {}; explicit MinMaxPartitionInfo(const BRM::EMEntry& entry); void set_invalid() { @@ -1102,7 +1102,8 @@ class TypeHandlerBit : public TypeHandler idbassert(0); // QQ return 1; } - std::string format(const SimpleValue& /*v*/, const SystemCatalog::TypeAttributesStd& /*attr*/) const override + std::string format(const SimpleValue& /*v*/, + const SystemCatalog::TypeAttributesStd& /*attr*/) const override { return "0"; // QQ } @@ -1135,7 +1136,7 @@ class TypeHandlerBit : public TypeHandler const ConvertFromStringParam& /*prm*/, const std::string& /*str*/, bool& /*pushWarning*/) const override; -const uint8_t* getEmptyValueForType(const SystemCatalog::TypeAttributesStd& /*attr*/) const override + const uint8_t* getEmptyValueForType(const SystemCatalog::TypeAttributesStd& /*attr*/) const override { idbassert(0); return nullptr; @@ -2033,7 +2034,8 @@ class TypeHandlerReal : public TypeHandler { return {}; // QQ: real types were not handled in IDB_format() } - std::string format(const SimpleValue& /*v*/, const SystemCatalog::TypeAttributesStd& /*attr*/) const override + std::string format(const SimpleValue& /*v*/, + const SystemCatalog::TypeAttributesStd& /*attr*/) const override { return "0"; // QQ } @@ -2329,7 +2331,8 @@ class TypeHandlerBlob : public TypeHandlerStr { return storeValueToFieldBlobText(row, pos, f); } - std::string format(const SimpleValue& /*v*/, const SystemCatalog::TypeAttributesStd& /*attr*/) const override + std::string format(const SimpleValue& /*v*/, + const SystemCatalog::TypeAttributesStd& /*attr*/) const override { return "0"; // QQ } @@ -2355,7 +2358,8 @@ class TypeHandlerText : public TypeHandlerStr { return storeValueToFieldBlobText(row, pos, f); } - std::string format(const SimpleValue& /*v*/, const SystemCatalog::TypeAttributesStd& /*attr*/) const override + std::string format(const SimpleValue& /*v*/, + const SystemCatalog::TypeAttributesStd& /*attr*/) const override { return "0"; // QQ } @@ -2386,7 +2390,8 @@ class TypeHandlerClob : public TypeHandlerStr idbassert(0); // QQ return 1; } - std::string format(const SimpleValue& /*v*/, const SystemCatalog::TypeAttributesStd& /*attr*/) const override + std::string format(const SimpleValue& /*v*/, + const SystemCatalog::TypeAttributesStd& /*attr*/) const override { return "0"; // QQ } diff --git a/datatypes/mcs_string.h b/datatypes/mcs_string.h index dbd8feb92..0714945aa 100644 --- a/datatypes/mcs_string.h +++ b/datatypes/mcs_string.h @@ -17,8 +17,8 @@ #pragma once -#include "conststring.h" -#include "collation.h" // class Charset +#include "basic/conststring.h" +#include "mariadb_charset/collation.h" // class Charset namespace datatypes { diff --git a/dbcon/ddlpackage/CMakeLists.txt b/dbcon/ddlpackage/CMakeLists.txt index 4ed7d69cc..d4fdf6280 100644 --- a/dbcon/ddlpackage/CMakeLists.txt +++ b/dbcon/ddlpackage/CMakeLists.txt @@ -42,4 +42,4 @@ columnstore_library( ${FLEX_ddl_scan_OUTPUTS} ) -columnstore_link(ddlpackage loggingcpp) +columnstore_link(ddlpackage loggingcpp messageqcpp) diff --git a/dbcon/ddlpackage/sqlparser.h b/dbcon/ddlpackage/sqlparser.h index 460433fc6..c45b903aa 100644 --- a/dbcon/ddlpackage/sqlparser.h +++ b/dbcon/ddlpackage/sqlparser.h @@ -27,7 +27,7 @@ */ #include -#include "collation.h" // CHARSET_INFO +#include "mariadb_charset/collation.h" // CHARSET_INFO #include "ddlpkg.h" #include "mariadb_my_sys.h" // myf, MYF() @@ -87,7 +87,7 @@ struct pass_to_bison myf utf8_flag; pass_to_bison(ParseTree* pt) - : fParseTree(pt), scanner(NULL), default_table_charset(NULL), utf8_flag(MYF(0)){}; + : fParseTree(pt), scanner(NULL), default_table_charset(NULL), utf8_flag(MYF(0)) {}; }; class SqlParser diff --git a/dbcon/ddlpackageproc/CMakeLists.txt b/dbcon/ddlpackageproc/CMakeLists.txt index 3eed93356..151d039fb 100644 --- a/dbcon/ddlpackageproc/CMakeLists.txt +++ b/dbcon/ddlpackageproc/CMakeLists.txt @@ -13,4 +13,4 @@ set(ddlpackageproc_LIB_SRCS ) columnstore_library(ddlpackageproc ${ddlpackageproc_LIB_SRCS}) -columnstore_link(ddlpackageproc ${NETSNMP_LIBRARIES} loggingcpp) +columnstore_link(ddlpackageproc loggingcpp oamcpp messageqcpp) diff --git a/dbcon/dmlpackage/CMakeLists.txt b/dbcon/dmlpackage/CMakeLists.txt index a0e4ddffa..0a138cca5 100644 --- a/dbcon/dmlpackage/CMakeLists.txt +++ b/dbcon/dmlpackage/CMakeLists.txt @@ -41,4 +41,4 @@ columnstore_library( ${FLEX_dml_scan_OUTPUTS} ) -columnstore_link(dmlpackage loggingcpp) +columnstore_link(dmlpackage loggingcpp messageqcpp) diff --git a/dbcon/dmlpackageproc/CMakeLists.txt b/dbcon/dmlpackageproc/CMakeLists.txt index 177bb799b..8a862ccb5 100644 --- a/dbcon/dmlpackageproc/CMakeLists.txt +++ b/dbcon/dmlpackageproc/CMakeLists.txt @@ -13,4 +13,4 @@ set(dmlpackageproc_LIB_SRCS ) columnstore_library(dmlpackageproc ${dmlpackageproc_LIB_SRCS}) -columnstore_link(dmlpackageproc ${NETSNMP_LIBRARIES} loggingcpp) +columnstore_link(dmlpackageproc loggingcpp oamcpp messageqcpp) diff --git a/dbcon/execplan/CMakeLists.txt b/dbcon/execplan/CMakeLists.txt index 6e1acbb98..4edaf97d0 100755 --- a/dbcon/execplan/CMakeLists.txt +++ b/dbcon/execplan/CMakeLists.txt @@ -46,4 +46,12 @@ set(execplan_LIB_SRCS ) columnstore_library(execplan ${execplan_LIB_SRCS}) -columnstore_link(execplan messageqcpp ${NETSNMP_LIBRARIES} ${ENGINE_DT_LIB} pron loggingcpp) +columnstore_link( + execplan + messageqcpp + ${ENGINE_DT_LIB} + pron + oamcpp + loggingcpp + querytele +) diff --git a/dbcon/execplan/calpontsystemcatalog.h b/dbcon/execplan/calpontsystemcatalog.h index dee640c3e..cfb748c54 100644 --- a/dbcon/execplan/calpontsystemcatalog.h +++ b/dbcon/execplan/calpontsystemcatalog.h @@ -49,7 +49,7 @@ #undef max #include "mcs_datatype.h" -#include "collation.h" // CHARSET_INFO, class Charset +#include "mariadb_charset/collation.h" // CHARSET_INFO, class Charset #include "nullstring.h" class ExecPlanTest; @@ -75,7 +75,8 @@ const int32_t IDB_VTABLE_ID = CNX_VTABLE_ID; /** * A struct to hold a list of table partitions. */ -struct Partitions { +struct Partitions +{ std::vector fPartNames; void serialize(messageqcpp::ByteStream& b) const { @@ -96,12 +97,11 @@ struct Partitions { b >> t; fPartNames.push_back(t); } - } }; -bool operator <(const Partitions& a, const Partitions& b); -bool operator ==(const Partitions& a, const Partitions& b); -bool operator !=(const Partitions& a, const Partitions& b); // for GCC9 +bool operator<(const Partitions& a, const Partitions& b); +bool operator==(const Partitions& a, const Partitions& b); +bool operator!=(const Partitions& a, const Partitions& b); // for GCC9 /** The CalpontSystemCatalog class * @@ -259,9 +259,9 @@ class CalpontSystemCatalog : public datatypes::SystemCatalog public: ColType() = default; ColType(const ColType& rhs); - ColType(int32_t colWidth_, int32_t scale_, int32_t precision_, - const ConstraintType& constraintType_, const DictOID& ddn_, int32_t colPosition_, - int32_t compressionType_, OID columnOID_, const ColDataType& colDataType_); + ColType(int32_t colWidth_, int32_t scale_, int32_t precision_, const ConstraintType& constraintType_, + const DictOID& ddn_, int32_t colPosition_, int32_t compressionType_, OID columnOID_, + const ColDataType& colDataType_); ColType& operator=(const ColType& rhs); CHARSET_INFO* getCharset() const; @@ -1301,8 +1301,8 @@ const int OID_SYSCOLUMN_MINVALUE = SYSCOLUMN_BASE + 19; /** @brief min va const int OID_SYSCOLUMN_MAXVALUE = SYSCOLUMN_BASE + 20; /** @brief max value col */ const int OID_SYSCOLUMN_COMPRESSIONTYPE = SYSCOLUMN_BASE + 21; /** @brief compression type */ const int OID_SYSCOLUMN_NEXTVALUE = SYSCOLUMN_BASE + 22; /** @brief next value */ -const int OID_SYSCOLUMN_CHARSETNUM = SYSCOLUMN_BASE + 23; /** @brief character set number for the column */ -const int SYSCOLUMN_MAX = SYSCOLUMN_BASE + 24; // be sure this is one more than the highest # +const int OID_SYSCOLUMN_CHARSETNUM = SYSCOLUMN_BASE + 23; /** @brief character set number for the column */ +const int SYSCOLUMN_MAX = SYSCOLUMN_BASE + 24; // be sure this is one more than the highest # /***************************************************** * SYSTABLE columns dictionary OID definition diff --git a/dbcon/execplan/clientrotator.cpp b/dbcon/execplan/clientrotator.cpp index 395d76482..4e0d1303a 100644 --- a/dbcon/execplan/clientrotator.cpp +++ b/dbcon/execplan/clientrotator.cpp @@ -47,8 +47,6 @@ using namespace logging; #include "clientrotator.h" -//#include "idb_mysql.h" - /** Debug macro */ #ifdef INFINIDB_DEBUG #define IDEBUG(x) \ diff --git a/dbcon/execplan/predicateoperator.h b/dbcon/execplan/predicateoperator.h index 434c2c768..b6e976d46 100644 --- a/dbcon/execplan/predicateoperator.h +++ b/dbcon/execplan/predicateoperator.h @@ -39,7 +39,7 @@ #include "returnedcolumn.h" #include "dataconvert.h" -#include "collation.h" // CHARSET_INFO +#include "mariadb_charset/collation.h" // CHARSET_INFO namespace messageqcpp { diff --git a/dbcon/execplan/pseudocolumn.cpp b/dbcon/execplan/pseudocolumn.cpp index a6055489e..38f2be10f 100644 --- a/dbcon/execplan/pseudocolumn.cpp +++ b/dbcon/execplan/pseudocolumn.cpp @@ -28,7 +28,7 @@ using namespace std; #include "basic/string_utils.h" #include "bytestream.h" -#include "collation.h" +#include "mariadb_charset/collation.h" using namespace messageqcpp; diff --git a/dbcon/joblist/CMakeLists.txt b/dbcon/joblist/CMakeLists.txt index 3783ccf14..a63e5c82c 100644 --- a/dbcon/joblist/CMakeLists.txt +++ b/dbcon/joblist/CMakeLists.txt @@ -68,7 +68,15 @@ columnstore_library(joblist ${joblist_LIB_SRCS}) target_include_directories( joblist BEFORE PUBLIC ${OPENSSL_INCLUDE_DIR} ${LIBMARIADB_BININC_DIR} ${LIBMARIADB_SRCINC_DIR} ) -columnstore_link(joblist loggingcpp) +columnstore_link( + joblist + loggingcpp + boost_thread + oamcpp + querytele + messageqcpp + statistics_manager +) if(WITH_ORDERBY_UT) columnstore_executable(job_orderby_tests orderby-tests.cpp) diff --git a/dbcon/joblist/jlf_tuplejoblist.cpp b/dbcon/joblist/jlf_tuplejoblist.cpp index 96add786b..7ba090460 100644 --- a/dbcon/joblist/jlf_tuplejoblist.cpp +++ b/dbcon/joblist/jlf_tuplejoblist.cpp @@ -71,7 +71,7 @@ using namespace dataconvert; #include "jlf_tuplejoblist.h" using namespace joblist; -#include "statistics.h" +#include "statistics_manager/statistics.h" #ifdef __clang__ #pragma clang diagnostic push diff --git a/dbcon/mysql/CMakeLists.txt b/dbcon/mysql/CMakeLists.txt index 711c0a188..488943704 100644 --- a/dbcon/mysql/CMakeLists.txt +++ b/dbcon/mysql/CMakeLists.txt @@ -63,7 +63,7 @@ if(COMMAND mysql_add_plugin) ${PLUGIN_EXEC_LIBS} ${PLUGIN_WRITE_LIBS} joblist_server - ${NETSNMP_LIBRARIES} + statistics_manager ${MARIADB_CLIENT_LIBS} ${S3API_DEPS} threadpool @@ -89,10 +89,10 @@ else() ${S3API_DEPS} ${ENGINE_LDFLAGS} ${ENGINE_WRITE_LIBS} - ${NETSNMP_LIBRARIES} ${SERVER_BUILD_DIR}/libservices/libmysqlservices.a threadpool loggingcpp + statistics_manager marias3 ) # define this dummy target for standalone builds (ie, when mysql_add_plugin doesn't exist) diff --git a/dbcon/mysql/ha_mcs_impl.cpp b/dbcon/mysql/ha_mcs_impl.cpp index 797ac40c4..a7e974159 100644 --- a/dbcon/mysql/ha_mcs_impl.cpp +++ b/dbcon/mysql/ha_mcs_impl.cpp @@ -128,9 +128,9 @@ using namespace funcexp; #include "ha_mcs_sysvars.h" #include "ha_mcs_datatype.h" -#include "statistics.h" #include "ha_mcs_logging.h" #include "ha_subquery.h" +#include "statistics_manager/statistics.h" namespace cal_impl_if { diff --git a/dbcon/mysql/idb_mysql.h b/dbcon/mysql/idb_mysql.h index ba700ff93..cc71e9425 100644 --- a/dbcon/mysql/idb_mysql.h +++ b/dbcon/mysql/idb_mysql.h @@ -14,10 +14,17 @@ along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -// One include file to deal with all the MySQL pollution of the -// global namespace -// -// Don't include ANY mysql headers anywhere except here! + +/* One include file to deal with all the MySQL pollution of the + global namespace + + Don't include ANY mysql headers anywhere except here! + + WARN: if any cmake build target uses this include file, + GenError from server must be added to the target dependencies + to generate mysqld_error.h used below +*/ + #pragma once #ifdef TEST_MCSCONFIG_H diff --git a/ddlproc/CMakeLists.txt b/ddlproc/CMakeLists.txt index 64924fb79..0da0d740b 100644 --- a/ddlproc/CMakeLists.txt +++ b/ddlproc/CMakeLists.txt @@ -6,4 +6,4 @@ set(DDLProc_SRCS ddlproc.cpp ddlprocessor.cpp ../utils/common/crashtrace.cpp) columnstore_executable(DDLProc ${DDLProc_SRCS}) -columnstore_link(DDLProc ${ENGINE_LDFLAGS} ${ENGINE_WRITE_LIBS} ${NETSNMP_LIBRARIES} threadpool loggingcpp) +columnstore_link(DDLProc ${ENGINE_LDFLAGS} ${ENGINE_WRITE_LIBS} threadpool loggingcpp) diff --git a/dmlproc/CMakeLists.txt b/dmlproc/CMakeLists.txt index af9ed6492..036d7b0ea 100644 --- a/dmlproc/CMakeLists.txt +++ b/dmlproc/CMakeLists.txt @@ -12,7 +12,6 @@ columnstore_link( DMLProc ${ENGINE_LDFLAGS} ${ENGINE_WRITE_LIBS} - ${NETSNMP_LIBRARIES} threadpool ddlcleanuputil batchloader diff --git a/oam/oamcpp/CMakeLists.txt b/oam/oamcpp/CMakeLists.txt index d5b4ac307..cdce7a55b 100644 --- a/oam/oamcpp/CMakeLists.txt +++ b/oam/oamcpp/CMakeLists.txt @@ -5,6 +5,6 @@ include_directories(${ENGINE_COMMON_INCLUDES}) set(oamcpp_LIB_SRCS liboamcpp.cpp oamcache.cpp) columnstore_library(oamcpp ${oamcpp_LIB_SRCS}) -columnstore_link(oamcpp loggingcpp) +columnstore_link(oamcpp loggingcpp idbboot) target_compile_options(oamcpp PRIVATE -Wno-unused-result) diff --git a/oamapps/columnstoreSupport/CMakeLists.txt b/oamapps/columnstoreSupport/CMakeLists.txt deleted file mode 100644 index feaf17aa2..000000000 --- a/oamapps/columnstoreSupport/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -include_directories(${ENGINE_COMMON_INCLUDES}) - -# ########## next target ############### - -set(columnstoreSupport_SRCS columnstoreSupport.cpp mcsSupportUtil.cpp) - -columnstore_executable(columnstoreSupport ${columnstoreSupport_SRCS}) -target_compile_options(columnstoreSupport PRIVATE -Wno-unused-result) -columnstore_link(columnstoreSupport ${ENGINE_LDFLAGS} ncurses ${ENGINE_EXEC_LIBS}) - -columnstore_install_program(dbmsReport.sh, ${ENGINE_BINDIR}) -columnstore_install_program(bulklogReport.sh, ${ENGINE_BINDIR}) -columnstore_install_program(configReport.sh, ${ENGINE_BINDIR}) -columnstore_install_program(hardwareReport.sh, ${ENGINE_BINDIR}) -columnstore_install_program(logReport.sh, ${ENGINE_BINDIR}) -columnstore_install_program(resourceReport.sh, ${ENGINE_BINDIR}) diff --git a/oamapps/columnstoreSupport/alarmReport.sh b/oamapps/columnstoreSupport/alarmReport.sh deleted file mode 100755 index ec81e2228..000000000 --- a/oamapps/columnstoreSupport/alarmReport.sh +++ /dev/null @@ -1,39 +0,0 @@ -#! /bin/sh -# -# $Id: logReport.sh 421 2007-04-05 15:46:55Z dhill $ -# -if [ $1 ] ; then - SERVER=$1 -else - SERVER="localhost" -fi - -if [ $2 ] ; then - DATE=$2 -else - DATE=" " -fi - -#get temp directory -tmpDir=`mcsGetConfig SystemConfig SystemTempFileDir` - -rm -f ${tmpDir}/logReport.log - -{ -echo " " -echo "******************** Alarm Report for $SERVER ********************" -echo " " - -echo "-- Today's Alarms --" -echo " " -cat /var/log/mariadb/columnstore/alarm.log 2>/dev/null - -if test -f /var/log/mariadb/columnstore/archive/alarm.log-$DATE ; then - echo "-- Archived Alarms --" - echo " " - cat /var/log/mariadb/columnstore/archive/alarm.log-$DATE 2>/dev/null -fi - -} > ${tmpDir}/logReport.log - -exit 0 diff --git a/oamapps/columnstoreSupport/approximateRowCount.sh b/oamapps/columnstoreSupport/approximateRowCount.sh deleted file mode 100755 index 26f3e41aa..000000000 --- a/oamapps/columnstoreSupport/approximateRowCount.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash - -# -# Estimates the row count for a given table. Uses number of extents * 8M for the estimate. -# - -# -# Initialize variables. -# - -if [ -z "$MYSQLCMD" ]; then - MYSQLCMD="mysql -u root" -fi - -# -# Validate that there are two parameters - schema and table. -# -if [ $# -ne 2 ]; then - echo "" - echo "Reports the approximate row count for the given table." - echo "" - echo "Parameters:" - echo " Schema" - echo " Table" -fi -db=$1 -table=$2 - -# -# Validate that the table exists. -# -sql="select count(*) from systable where \`schema\`='$db' and tablename='$table';" -count=`$MYSQLCMD calpontsys --skip-column-names -e "$sql;"` -if [ $count -le 0 ]; then - echo "" - echo "$db.$table does not exist in Columnstore." - echo "" - exit 1 -fi - -# -# Grab the objectid and column width for a column in the table. -# -sql="select objectid from syscolumn where \`schema\`='$db' and tablename='$table' limit 1;" -objectid=`$MYSQLCMD calpontsys --skip-column-names -e "$sql"` -sql="select columnlength from syscolumn where objectid=$objectid;" -colWidth=`$MYSQLCMD calpontsys --skip-column-names -e "$sql"` - -# -# Use editem to count the extents. -# -extentCount=`editem -o $objectid | wc -l` -let extentCount-=2 # Take out the 2 extra rows for header and blank line at end. -let approximateRowCount=$extentCount*8192*1024; - -echo "" -echo "Approximate row count for $db.$table is $approximateRowCount." -echo "" - -exit 0 diff --git a/oamapps/columnstoreSupport/bulklogReport.sh b/oamapps/columnstoreSupport/bulklogReport.sh deleted file mode 100755 index 83076f4eb..000000000 --- a/oamapps/columnstoreSupport/bulklogReport.sh +++ /dev/null @@ -1,30 +0,0 @@ -#! /bin/sh -# -# $Id: logReport.sh 421 2007-04-05 15:46:55Z dhill $ -# -if [ $1 ] ; then - MODULE=$1 -else - MODULE="pm1" -fi - -if [ $2 ] ; then - OUT_FILE=$2 -else - OUT_FILE=${MODULE}_logReport.txt -fi - -{ - -if test -d /var/lib/columnstore/data/bulk ; then - echo " " - echo "-- Check for Errors in Bulk Logs --" - echo " " - echo "################# egrep '(ERR|CRIT)' /var/lib/columnstore/data/bulk/log/*.err #################" - echo " " - egrep '(ERR|CRIT)' /var/lib/columnstore/data/bulk/log/*.err 2>/dev/null -fi - -} >> $OUT_FILE - -exit 0 diff --git a/oamapps/columnstoreSupport/columnstoreSupport.cpp b/oamapps/columnstoreSupport/columnstoreSupport.cpp deleted file mode 100644 index 51bd670b9..000000000 --- a/oamapps/columnstoreSupport/columnstoreSupport.cpp +++ /dev/null @@ -1,897 +0,0 @@ -/* Copyright (C) 2013 Calpont Corp. */ -/* Copyright (C) 2016 MariaDB Corporation */ - -/****************************************************************************************** - * $Id: columnstoreSupport.cpp 64 2006-10-12 22:21:51Z dhill $ - * - * - * - ******************************************************************************************/ -/** - * @file - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "stdio.h" -#include "ctype.h" -#include -#include -#include - -#include "mcsconfig.h" -#include "liboamcpp.h" -#include "configcpp.h" -#include "installdir.h" -#include "mcsSupportUtil.h" -#include "columnstoreversion.h" - -using namespace std; -using namespace oam; -using namespace config; - -typedef struct Child_Module_struct -{ - std::string moduleName; - std::string moduleIP; - std::string hostName; -} ChildModule; - -typedef std::vector ChildModuleList; - -string currentDate; -string systemName; -string localModule; -string localModuleHostName; -ChildModuleList childmodulelist; -ChildModuleList parentmodulelist; -ChildModule childmodule; - -string rootPassword = ""; -string debug_flag = "0"; -string mysqlpw = " "; -string tmpDir; - -int runningThreads = 0; -pthread_mutex_t mutex1 = PTHREAD_MUTEX_INITIALIZER; - -typedef boost::tuple threadInfo_t; - -bool LOCAL = false; - -void* childReportThread(threadInfo_t* st) -{ - assert(st); - ChildModuleList::iterator& list = boost::get<0>(*st); - string reportType = boost::get<1>(*st); - - string remoteModuleName = (*list).moduleName; - string remoteModuleIP = (*list).moduleIP; - string remoteHostName = (*list).hostName; - - pthread_mutex_lock(&mutex1); - runningThreads++; - // cout << "++ " << runningThreads << endl; - pthread_mutex_unlock(&mutex1); - - string outputFile; - - if (reportType == "log") - { - outputFile = remoteModuleName + "_" + reportType + "Report.tar.gz"; - } - else - { - outputFile = remoteModuleName + "_" + reportType + "Report.txt"; - - FILE* pOutputFile = fopen(outputFile.c_str(), "a"); - if (pOutputFile == NULL) - { - printf("Could not open file: %s", outputFile.c_str()); - exit(1); - } - - fprintf(pOutputFile, - "********************************************************************************\n" - "\n" - " System %s\n" - " columnstoreSupportReport script ran from Module %s on %s\n" - " SoftwareVersion = %s-%s" - "\n" - "********************************************************************************\n" - "\n" - " %s report\n" - "\n" - "********************************************************************************\n", - systemName.c_str(), localModule.c_str(), currentDate.c_str(), columnstore_version.c_str(), - columnstore_release.c_str(), reportType.c_str()); - } - - cout << "Get " + reportType + " report data for " + remoteModuleName + " " << endl; - - string cmd = "remote_command.sh " + remoteModuleIP + " " + rootPassword + ";" + reportType + "Report.sh " + - remoteModuleName + "' " + debug_flag + " - forcetty"; - int rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) != 0) - { - cout << "Error with running remote_command.sh, exiting..." << endl; - } - - cmd = "remote_scp_get.sh " + remoteModuleIP + " " + rootPassword + " " + tmpDir + "/" + outputFile + - " > /dev/null 2>&1"; - rtnCode = system(cmd.c_str()); - - if (WEXITSTATUS(rtnCode) != 0) - cout << "ERROR: failed to retrieve " << tmpDir << "/" << outputFile << " from " + remoteHostName << endl; - - pthread_mutex_lock(&mutex1); - runningThreads--; - // cout << "-- " << runningThreads << endl; - pthread_mutex_unlock(&mutex1); - - // exit thread - pthread_exit(0); -} - -void* reportThread(string* reporttype) -{ - assert(reporttype); - string reportType = *reporttype; - - Oam oam; - - pthread_mutex_lock(&mutex1); - runningThreads++; - // cout << "++ " << runningThreads << endl; - pthread_mutex_unlock(&mutex1); - - string outputFile = localModule + "_" + reportType + "Report.txt"; - - FILE* pOutputFile = fopen(outputFile.c_str(), "a"); - if (pOutputFile == NULL) - { - printf("Could not open file: %s", outputFile.c_str()); - exit(1); - } - // get local report - fprintf(pOutputFile, - "********************************************************************************\n" - "\n" - " System %s\n" - " columnstoreSupportReport script ran from Module %s on %s\n" - " SoftwareVersion = %s-%s" - "\n" - "********************************************************************************\n" - "\n" - " %s report\n" - "\n" - "********************************************************************************\n", - systemName.c_str(), localModule.c_str(), currentDate.c_str(), columnstore_version.c_str(), - columnstore_release.c_str(), reportType.c_str()); - - fclose(pOutputFile); - // run on child servers and get report - if (!LOCAL) - { - ChildModuleList::iterator list1 = childmodulelist.begin(); - - for (; list1 != childmodulelist.end(); list1++) - { - threadInfo_t* st = new threadInfo_t; - *st = boost::make_tuple(list1, reportType); - - pthread_t childreportthread; - int status = pthread_create(&childreportthread, NULL, (void* (*)(void*)) & childReportThread, st); - - if (status != 0) - { - cout << "ERROR: childreportthread: pthread_create failed, return status = " + oam.itoa(status) - << endl; - } - - sleep(1); - } - } - - if (reportType == "log") - { - // run log config on local server - cout << "Get log config data for " + localModule << endl; - - string cmd = "logReport.sh " + localModule + " " + outputFile; - system(cmd.c_str()); - } - else - { - string cmd = reportType + "Report.sh " + localModule + " " + outputFile; - system(cmd.c_str()); - - if (reportType == "config") - { - pOutputFile = fopen(outputFile.c_str(), "a"); - if (pOutputFile == NULL) - { - printf("Could not open file: %s", outputFile.c_str()); - exit(1); - } - - fprintf(pOutputFile, - "\n******************** System Network Configuration ******************************\n\n"); - getSystemNetworkConfig(pOutputFile); - - fprintf(pOutputFile, - "\n******************** System Module Configure **********************************\n\n"); - getModuleTypeConfig(pOutputFile); - - fprintf(pOutputFile, - "\n******************** System Storage Configuration *****************************\n\n"); - getStorageConfig(pOutputFile); - - fprintf(pOutputFile, - "\n******************** System Storage Status ************************************\n\n"); - getStorageStatus(pOutputFile); - - // BT: most of this is tedious to collect and can be manually looked up in the debug.log file - // fprintf(pOutputFile,"\n******************** System Status - // ********************************************\n\n"); printSystemStatus(pOutputFile); - // printProcessStatus(pOutputFile); - // printAlarmSummary(pOutputFile); - // - // fprintf(pOutputFile,"\n******************** System Directories - // ***************************************\n\n"); getSystemDirectories(pOutputFile); - - boost::filesystem::path configFile = - std::string(MCSSYSCONFDIR) + std::string("/columnstore/Columnstore.xml"); - boost::filesystem::copy_file(configFile, "./Columnstore.xml", - boost::filesystem::copy_options::overwrite_existing); - boost::filesystem::path SMconfigFile = - std::string(MCSSYSCONFDIR) + std::string("/columnstore/storagemanager.cnf"); - boost::filesystem::copy_file(SMconfigFile, "./storagemanager.cnf", - boost::filesystem::copy_options::overwrite_existing); - system("sed -i 's/.*aws_access_key_id.*/aws_access_key_id={PRIVATE}/' ./storagemanager.cnf"); - system("sed -i 's/.*aws_secret_access_key.*/aws_secret_access_key={PRIVATE}/' ./storagemanager.cnf"); - fclose(pOutputFile); - } - - /* - // TODO: This can be ported from mcsadmin if needed most info included does not seem useful at this time - if (reportType == "resource" ) - { - if (LOCAL) - { - fprintf(pOutputFile,"\n******************** mcsadmin getModuleResourceUsage - **************************\n\n"); string cmd = "mcsadmin getModuleResourceUsage " + localModule + " >> " + - outputFile; system(cmd.c_str()); - } - else - { - fprintf(pOutputFile,"\n******************** mcsadmin getSystemResourceUsage - **************************\n\n"); string cmd = "mcsadmin getSystemResourceUsage >> " + outputFile; - system(cmd.c_str()); - } - }*/ - } - - // exit thread - pthread_mutex_lock(&mutex1); - runningThreads--; - // cout << "-- " << runningThreads << endl; - pthread_mutex_unlock(&mutex1); - - pthread_exit(0); -} - -int main(int argc, char* argv[]) -{ - Oam oam; - - Config* sysConfig = Config::makeConfig(); - string SystemSection = "SystemConfig"; - string InstallSection = "Installation"; - - bool HARDWARE = false; - bool CONFIG = false; - bool DBMS = false; - bool RESOURCE = false; - bool LOG = false; - bool BULKLOG = false; - bool HADOOP = false; - - // get current time and date - time_t now; - now = time(NULL); - struct tm tm; - localtime_r(&now, &tm); - char timestamp[200]; - strftime(timestamp, 200, "%m:%d:%y-%H:%M:%S", &tm); - currentDate = timestamp; - - char helpArg[3] = "-h"; - - // Get System Name - try - { - oam.getSystemConfig("SystemName", systemName); - } - catch (...) - { - systemName = "unassigned"; - } - - // get Local Module Name and Server Install Indicator - string singleServerInstall = "n"; - - oamModuleInfo_t st; - - try - { - st = oam.getModuleInfo(); - localModule = boost::get<0>(st); - } - catch (...) - { - cout << endl << "**** Failed : Failed to read Local Module Name" << endl; - exit(-1); - } - - if (argc == 1) - { - argv[1] = &helpArg[0]; - argc = 2; - } - - string DataFilePlugin; - - try - { - DataFilePlugin = sysConfig->getConfig(SystemSection, "DataFilePlugin"); - } - catch (...) - { - cout << "ERROR: Problem accessing Columnstore configuration file" << endl; - exit(-1); - } - - tmpDir = startup::StartUp::tmpDir(); - - for (int i = 1; i < argc; i++) - { - if (string("-h") == argv[i]) - { - cout << endl; - cout << "'columnstoreSupport' generates a Set of System Support Report Files in a tar file" << endl; - cout << "called columnstoreSupportReport.'system-name'.tar.gz in the local directory." << endl; - cout << "It should be run on the server with the DBRM front-end." << endl; - cout << "Check the Admin Guide for additional information." << endl; - cout << endl; - cout << "Usage: columnstoreSupport [-h][-a][-hw][-s][-c][-db][-r][-l][-bl][-lc][-p " - "'root-password'][-de]"; - - cout << endl; - cout << " -h help" << endl; - cout << " -a Output all Reports (excluding Bulk Logs Reports)" << endl; - cout << " -hw Output Hardware Reports only" << endl; - cout << " -c Output Configuration/Status Reports only" << endl; - cout << " -db Output DBMS Reports only" << endl; - cout << " -r Output Resource Reports only" << endl; - cout << " -l Output Columnstore Log/Alarms Reports only" << endl; - cout << " -bl Output Columnstore Bulk Log Reports only" << endl; - cout << " -lc Output Reports for Local Server only" << endl; - cout << " -p password (multi-server systems), root-password or 'ssh' to use 'ssh keys'" - << endl; - cout << " -de Debug Flag" << endl; - - exit(0); - } - else - { - if (string("-a") == argv[i]) - { - HARDWARE = true; - CONFIG = true; - DBMS = true; - RESOURCE = true; - LOG = true; - HADOOP = (DataFilePlugin.empty() ? false : true); - } - else if (string("-hw") == argv[i]) - HARDWARE = true; - else if (string("-c") == argv[i]) - CONFIG = true; - else if (string("-db") == argv[i]) - DBMS = true; - else if (string("-r") == argv[i]) - RESOURCE = true; - else if (string("-l") == argv[i]) - LOG = true; - else if (string("-bl") == argv[i]) - BULKLOG = true; - else if (string("-lc") == argv[i]) - LOCAL = true; - else if (string("-p") == argv[i]) - { - i++; - - if (argc == i) - { - cout << "ERROR: missing root password argument" << endl; - exit(-1); - } - - rootPassword = argv[i]; - - // add single quote for special characters - if (rootPassword != "ssh") - { - rootPassword = "'" + rootPassword + "'"; - } - } - else if (string("-mp") == argv[i]) - { - i++; - - if (argc == i) - { - cout << "ERROR: missing MariaDB Columnstore root user password argument" << endl; - exit(-1); - } - - mysqlpw = argv[i]; - mysqlpw = "'" + mysqlpw + "'"; - } - else if (string("-de") == argv[i]) - debug_flag = "1"; - else if (string("-hd") == argv[i]) - { - HADOOP = (DataFilePlugin.empty() ? false : true); - } - else - { - cout << "Invalid Option of '" << argv[i] << "', run with '-h' for help" << endl; - exit(1); - } - } - } - - // default to -a if nothing is set - if (!HARDWARE && !CONFIG && !DBMS && !RESOURCE && !LOG && !BULKLOG && !HADOOP) - { - HARDWARE = true; - CONFIG = true; - DBMS = true; - RESOURCE = true; - LOG = true; - HADOOP = (DataFilePlugin.empty() ? false : true); - } - - // get Parent OAM Module Name and setup of it's Custom OS files - string PrimaryUMModuleName; - - try - { - PrimaryUMModuleName = sysConfig->getConfig(SystemSection, "PrimaryUMModuleName"); - } - catch (...) - { - cout << "ERROR: Problem getting Parent OAM Module Name" << endl; - exit(-1); - } - - if (PrimaryUMModuleName == "unassigned") - PrimaryUMModuleName = localModule; - - if ((localModule != PrimaryUMModuleName) && DBMS) - { - char* pcommand = 0; - char* p; - string argument = "n"; - - while (true) - { - cout << endl << "You selected to get the DBMS data." << endl; - cout << "You need to run the columnstoreSupport command on module '" << PrimaryUMModuleName - << "' to get that information." << endl; - cout << "Or you can proceed on to get all data except the DBMS." << endl; - - pcommand = readline(" Do you want to proceed: (y or n) [n]: "); - - if (pcommand && *pcommand) - { - p = strtok(pcommand, " "); - argument = p; - free(pcommand); - pcommand = 0; - } - - if (pcommand) - { - free(pcommand); - pcommand = 0; - } - - if (argument == "y") - { - cout << endl; - break; - } - else if (argument == "n") - exit(1); - } - } - - // get number of worker-nodes, will tell us if a single server system - // get Parent OAM Module Name and setup of it's Custom OS files - try - { - string NumWorkers = sysConfig->getConfig("DBRM_Controller", "NumWorkers"); - - if (NumWorkers == "1") - singleServerInstall = "y"; - } - catch (...) - { - } - - if (singleServerInstall == "n" && !LOCAL) - if (HARDWARE || CONFIG || RESOURCE || LOG || HADOOP) - if (rootPassword.empty()) - { - cout << "ERROR: Multi-Module System, Password Argument required or use '-lc' option, check help for " - "more information" - << endl; - exit(-1); - } - - // get Parent OAM Module Name and setup of it's Custom OS files - // string parentOAMModuleName; - ChildModule parentOAMModule; - - try - { - parentOAMModule.moduleName = sysConfig->getConfig(SystemSection, "ParentOAMModuleName"); - } - catch (...) - { - cout << "ERROR: Problem getting Parent OAM Module Name" << endl; - exit(-1); - } - - // Get list of configured system modules - SystemModuleTypeConfig sysModuleTypeConfig; - - try - { - oam.getSystemConfig(sysModuleTypeConfig); - } - catch (...) - { - cout << "ERROR: Problem reading the Columnstore System Configuration file" << endl; - exit(-1); - } - - string ModuleSection = "SystemModuleConfig"; - - for (unsigned int i = 0; i < sysModuleTypeConfig.moduletypeconfig.size(); i++) - { - string moduleType = sysModuleTypeConfig.moduletypeconfig[i].ModuleType; - int moduleCount = sysModuleTypeConfig.moduletypeconfig[i].ModuleCount; - - if (moduleCount == 0) - // no modules equipped for this Module Type, skip - continue; - - // get IP addresses and Host Names - DeviceNetworkList::iterator listPT = sysModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for (; listPT != sysModuleTypeConfig.moduletypeconfig[i].ModuleNetworkList.end(); listPT++) - { - string moduleName = (*listPT).DeviceName; - HostConfigList::iterator pt1 = (*listPT).hostConfigList.begin(); - string moduleIPAddr = (*pt1).IPAddr; - string moduleHostName = (*pt1).HostName; - - if (moduleName == localModule) - { - localModuleHostName = moduleHostName; - } - - // save Child modules - if (moduleName != localModule && moduleType != "xm") - { - childmodule.moduleName = moduleName; - childmodule.moduleIP = moduleIPAddr; - childmodule.hostName = moduleHostName; - childmodulelist.push_back(childmodule); - } - - if (moduleName == parentOAMModule.moduleName) - { - parentOAMModule.moduleIP = moduleIPAddr; - parentOAMModule.hostName = moduleHostName; - parentOAMModule.moduleName = moduleName; - } - } - } // end of i for loop - - // create a clean Columnstore Support Report - system("rm -f *_configReport.txt"); - system("rm -f *_dbmsReport.txt"); - system("rm -f *_hardwareReport.txt"); - system("rm -f *_logReport.txt"); - system("rm -f *_bulklogReport.txt"); - system("rm -f *_resourceReport.txt"); - - // - // Configuration - // - if (CONFIG) - { - string reportType = "config"; - cout << "Get " + reportType + " report data for " + localModule << endl; - pthread_t reportthread; - int status = pthread_create(&reportthread, NULL, (void* (*)(void*)) & reportThread, &reportType); - if (status != 0) - { - cout << "ERROR: reportthread: pthread_create failed, return status = " + oam.itoa(status); - } - sleep(1); - } - - // - // Alarms and Columnstore Logs - // - if (LOG) - { - string reportType = "log"; - cout << "Get " + reportType + " report data for " + localModule << endl; - pthread_t reportthread; - int status = pthread_create(&reportthread, NULL, (void* (*)(void*)) & reportThread, &reportType); - if (status != 0) - { - cout << "ERROR: reportthread: pthread_create failed, return status = " + oam.itoa(status); - } - sleep(1); - } - - // - // Bulk Logs - // - if (BULKLOG) - { - string reportType = "bulklog"; - cout << "Get " + reportType + " report data for " + localModule << endl; - pthread_t reportthread; - int status = pthread_create(&reportthread, NULL, (void* (*)(void*)) & reportThread, &reportType); - if (status != 0) - { - cout << "ERROR: reportthread: pthread_create failed, return status = " + oam.itoa(status); - } - sleep(1); - } - - // - // Hardware - // - if (HARDWARE) - { - string reportType = "hardware"; - cout << "Get " + reportType + " report data for " + localModule << endl; - pthread_t reportthread; - int status = pthread_create(&reportthread, NULL, (void* (*)(void*)) & reportThread, &reportType); - if (status != 0) - { - cout << "ERROR: reportthread: pthread_create failed, return status = " + oam.itoa(status); - } - sleep(1); - } - - // - // Resources - // - if (RESOURCE) - { - string reportType = "resource"; - cout << "Get " + reportType + " report data for " + localModule << endl; - pthread_t reportthread; - int status = pthread_create(&reportthread, NULL, (void* (*)(void*)) & reportThread, &reportType); - if (status != 0) - { - cout << "ERROR: reportthread: pthread_create failed, return status = " + oam.itoa(status); - } - sleep(1); - } - - // - // DBMS - // - if (DBMS) - { - cout << "Get dbms report data for " << localModule << endl; - - string outputFile = localModule + "_dbmsReport.txt"; - - FILE* pOutputFile = fopen(outputFile.c_str(), "w"); - if (pOutputFile == NULL) - { - cout << "Could not open file: " + outputFile << endl; - exit(1); - } - - fprintf(pOutputFile, - "********************************************************************************\n" - "\n" - " System %s\n" - " columnstoreSupportReport script ran from Module %s on %s\n" - " SoftwareVersion = %s-%s" - "\n" - "********************************************************************************\n" - "\n" - " DBMS report\n" - "\n" - "********************************************************************************\n", - systemName.c_str(), localModule.c_str(), currentDate.c_str(), columnstore_version.c_str(), - columnstore_release.c_str()); - - fclose(pOutputFile); - - // run DBMS report on local server - bool FAILED = false; - - if (localModule != PrimaryUMModuleName) - { - cout << " FAILED: run columnstoreSupport on '" << PrimaryUMModuleName << "' to get the dbrm report" - << endl; - FAILED = true; - } - else - { - // check if mysql is supported and get info - string logFile = tmpDir + "/idbmysql.log"; - string columnstoreMysql = "mysql -u root "; - string cmd = columnstoreMysql + " -e 'status' > " + logFile + " 2>&1"; - system(cmd.c_str()); - - // check for mysql password set - string pwprompt = " "; - - if (checkLogStatus(logFile, "ERROR 1045")) - { - cout << "NOTE: MariaDB Columnstore root user password is set" << endl; - - // needs a password, was password entered on command line - if (mysqlpw == " ") - { - // go check columnstore.cnf - string file = std::string(MCSMYCNFDIR) + "/columnstore.cnf"; - ifstream oldFile(file.c_str()); - - vector lines; - char line[200]; - string buf; - - while (oldFile.getline(line, 200)) - { - buf = line; - string::size_type pos = buf.find("password", 0); - - if (pos != string::npos) - { - string::size_type pos1 = buf.find("=", 0); - - if (pos1 != string::npos) - { - pos = buf.find("#", 0); - - if (pos == string::npos) - { - // password arg in columnstore.cnf, go get password - cout << "NOTE: Using password from columnstore.cnf" << endl; - mysqlpw = buf.substr(pos1 + 1, 80); - cout << mysqlpw << endl; - break; - } - } - } - } - - oldFile.close(); - - if (mysqlpw == " ") - { - cout << "NOTE: No password provide on command line or found uncommented in columnstore.cnf" - << endl; - cout << endl; - string prompt = " *** Enter MariaDB Columnstore password > "; - mysqlpw = getpass(prompt.c_str()); - } - } - - // check for mysql password set - pwprompt = "--password=" + mysqlpw; - - string cmd = columnstoreMysql + pwprompt + " -e 'status' > " + logFile + " 2>&1"; - system(cmd.c_str()); - - if (checkLogStatus(logFile, "ERROR 1045")) - { - cout << "FAILED: Failed login using MariaDB Columnstore root user password '" << mysqlpw << "'" - << endl; - FAILED = true; - } - } - - if (!FAILED) - { - string cmd = "dbmsReport.sh " + localModule + " " + outputFile + " " + std::string(MCSSUPPORTDIR) + - " " + pwprompt; - system(cmd.c_str()); - } - } - - /* - BT: This doesn't appear to do anything - fprintf(pOutputFile,"\n******************** Database Size Report - *************************************\n\n"); getStorageStatus(pOutputFile); - - string file = "databaseSizeReport"; - ifstream File (file.c_str()); - - if (File) - { - string cmd = "databaseSizeReport >> " + outputFile; - system(cmd.c_str()); - } - */ - - boost::filesystem::path configFile = std::string(MCSMYCNFDIR) + "/columnstore.cnf"; - boost::filesystem::copy_file(configFile, "./columnstore.cnf", - boost::filesystem::copy_options::overwrite_existing); - } - - int wait = 0; - - while (true) - { - // cout << "check " << runningThreads << endl; - if (runningThreads < 1) - break; - - sleep(2); - wait++; - - // give it 60 minutes to complete - if (wait >= 3600 * 5) - { - cout << "Timed out (60 minutes) waiting for Requests to complete" << endl; - } - } - - system("unix2dos *Report.txt > /dev/null 2>&1"); - system( - "rm -rf columnstoreSupportReport;" - "mkdir columnstoreSupportReport;" - "mv *Report.txt columnstoreSupportReport/. > /dev/null 2>&1;" - "mv Columnstore.xml columnstoreSupportReport/. > /dev/null 2>&1;" - "mv columnstore.cnf columnstoreSupportReport/. > /dev/null 2>&1;" - "mv storagemanager.cnf columnstoreSupportReport/. > /dev/null 2>&1;" - "mv *Report.tar.gz columnstoreSupportReport/. > /dev/null 2>&1"); - string cmd = "tar -zcf columnstoreSupportReport." + systemName + ".tar.gz columnstoreSupportReport/*"; - system(cmd.c_str()); - - cout << endl - << "Columnstore Support Script Successfully completed, files located in columnstoreSupportReport." + - systemName + ".tar.gz" - << endl; -} diff --git a/oamapps/columnstoreSupport/configReport.sh b/oamapps/columnstoreSupport/configReport.sh deleted file mode 100755 index cb642f137..000000000 --- a/oamapps/columnstoreSupport/configReport.sh +++ /dev/null @@ -1,76 +0,0 @@ -#!/bin/bash -# -# $Id: hardwareReport.sh 421 2007-04-05 15:46:55Z dhill $ -# -if [ $1 ] ; then - MODULE=$1 -else - MODULE="pm1" -fi - -if [ $2 ] ; then - OUT_FILE=$2 -else - OUT_FILE=${MODULE}_logReport.txt -fi - -{ -echo " " -echo "******************** Configuration/Status Report for ${MODULE} ********************" -echo " " - -chkconfig=`which chkconfig 2>/dev/null` -if [ -n "$chkconfig" ]; then - echo "-- chkconfig configuration --" - echo " " - echo "################# chkconfig --list | grep columnstore #################" - echo " " - chkconfig --list | grep columnstore 2>/dev/null -fi - -systemctl=`which systemctl 2>/dev/null` -if [ -n "$systemctl" ]; then - echo "-- systemctl configuration --" - echo " " - echo "################# systemctl list-unit-files --type=service | grep columnstore #################" - echo " " - systemctl list-unit-files --type=service | grep columnstore 2>/dev/null - echo "################# systemctl list-unit-files --type=service | grep mariadb #################" - echo " " - systemctl list-unit-files --type=service | grep mariadb 2>/dev/null -fi - -updaterc=`which update-rc.d 2>/dev/null` -if [ -n "$updaterc" ]; then - echo "-- services configuration --" - echo " " - echo "################# service --status-all | grep columnstore #################" - echo " " - service --status-all | grep columnstore 2>/dev/null -fi - - -echo " " -echo "-- fstab Configuration --" -echo " " -echo "################# cat /etc/fstab #################" -echo " " -cat /etc/fstab 2>/dev/null - -echo " " -echo "-- Server Processes --" -echo " " -echo "################# ps axu #################" -echo " " -ps axu - -echo " " -echo "-- Server Processes with resource usage --" -echo " " -echo "################# top -b -n 1 #################" -echo " " -top -b -n 1 - -} >> $OUT_FILE - -exit 0 diff --git a/oamapps/columnstoreSupport/dbmsReport.sh b/oamapps/columnstoreSupport/dbmsReport.sh deleted file mode 100755 index caac0cfd9..000000000 --- a/oamapps/columnstoreSupport/dbmsReport.sh +++ /dev/null @@ -1,64 +0,0 @@ -#! /bin/sh -# -# $Id: dbmsReport.sh -# -if [ $1 ] ; then - MODULE=$1 -else - MODULE="pm1" -fi - -if [ $2 ] ; then - OUT_FILE=$2 -else - OUT_FILE=${MODULE}_logReport.txt -fi - -if [ $3 ] ; then - MCSSUPPORTDIR=$3 -else - MCSSUPPORTDIR="/usr/share/columnstore" -fi - -if [ $4 ] ; then - PW_PROMPT=$4 -else - PW_PROMPT="" -fi - -{ - -columnstoreMysql="mysql -u root ${PW_PROMPT} " - -if ${columnstoreMysql} -V > /dev/null 2>&1; then - echo " " - echo "******************** DBMS Columnstore Version *********************************" - echo " " - ${columnstoreMysql} -e 'status' - echo " " - echo "******************** DBMS Columnstore System Column ***************************" - echo " " - ${columnstoreMysql} -e 'desc calpontsys.syscolumn;' - echo " " - echo "******************** DBMS Columnstore System Table ****************************" - echo " " - ${columnstoreMysql} -e 'desc calpontsys.systable;' - echo " " - echo "******************** DBMS Columnstore System Catalog Data *********************" - echo " " - ${columnstoreMysql} calpontsys < $MCSSUPPORTDIR/dumpcat_mysql.sql - echo " " - echo "******************** DBMS Columnstore System Table Data ***********************" - echo "******************** DBMS Columnstore Databases *******************************" - echo " " - ${columnstoreMysql} -e 'show databases;' - echo " " - echo "******************** DBMS Columnstore variables *******************************" - echo " " - ${columnstoreMysql} -e 'show variables;' - echo " " -fi -} >> $OUT_FILE - -exit 0 - diff --git a/oamapps/columnstoreSupport/getMinMax.sh b/oamapps/columnstoreSupport/getMinMax.sh deleted file mode 100755 index 0e4a0aa2f..000000000 --- a/oamapps/columnstoreSupport/getMinMax.sh +++ /dev/null @@ -1,130 +0,0 @@ -#!/bin/bash - -# -# Reports the max value from the extent map for the given column. -# - -# -# Initialize variables. -# - -if [ -z "$MYSQLCMD" ]; then - MYSQLCMD="mysql -u root" -fi - -# -# Validate that there are three parameters - schema and table and columnname. -# -if [ $# -ne 3 ]; then - echo "" - echo "Reports the max value for the given column." - echo "" - echo "Parameters:" - echo " Schema" - echo " Table" - echo " Column" - exit 1 -fi -db=$1 -table=$2 -column=$3 - -# -# Validate that the column exists. -# -sql="select count(*) from syscolumn where \`schema\`='$db' and tablename='$table' and columnname='$column';" -count=`$MYSQLCMD calpontsys --skip-column-names -e "$sql;"` -if [ $count -le 0 ]; then - echo "" - echo "$db.$table.$column does not exist in Columnstore." - echo "" - exit 1 -fi - -# -# Validate that the column type is one that this script supports. -# Supported Types: -# 6 int -# 8 date -# 9 bigint -# 11 datetime -sql="select datatype from syscolumn where \`schema\`='$db' and tablename='$table' and columnname='$column';" -dataType=`$MYSQLCMD calpontsys --skip-column-names -e "$sql"` -if [ $dataType -ne 6 ] && [ $dataType -ne 8 ] && [ $dataType -ne 9 ] && [ $dataType -ne 11 ]; then - echo "" - echo "The column data type must be an int, bigint, date, or datetime." - echo "" - exit 1 -fi - -# -# Grab the objectid for the column. -# -sql="select objectid from syscolumn where \`schema\`='$db' and tablename='$table' and columnname='$column';" -objectid=`$MYSQLCMD calpontsys --skip-column-names -e "$sql"` - -# -# Set the editem specific parameter if the column is a date or datetime. -# -if [ $dataType -eq 8 ]; then - parm="-t" -elif [ $dataType -eq 11 ]; then - parm="-s" -fi - -# -# Use the editem utility to get the min and max value. -# -editem -o $objectid $parm | grep max | awk -v dataType=$dataType ' - BEGIN { - allValid=1; - foundValidExtent=0; - } - { - if(dataType == 11) { - state=substr($14, 1, length($14)-1); # Datetime has date and time as two fields. - thisMin=$6 " " substr($7, 1, length($7)-1); - thisMax=$9 " " substr($10, 1, length($10)-1); - } - else { - state=substr($12, 1, length($12)-1); - thisMin=substr($6, 1, length($6)-1); - thisMax=substr($8, 1, length($8)-1); - } - if(state == "valid") { - if(!foundValidExtent) { - min=thisMin; - max=thisMax; - foundValidExtent=1; - } - else { - if(thisMin < min) { - min=thisMin; - } - if(thisMax > max) { - max=thisMax; - } - } - } - else { - allValid=0; - } - } - END { - if(foundValidExtent == 1) { - print ""; - print "Min=" min; - print "Max=" max; - print ""; - if(allValid == 0) { - print "Not all extents had min and max values set. Answer is incomplete." - } - } - else { - print ""; - print "There were not any extents with valid min/max values. Unable to provide answer."; - print ""; - } - }' - -exit 0 diff --git a/oamapps/columnstoreSupport/hardwareReport.sh b/oamapps/columnstoreSupport/hardwareReport.sh deleted file mode 100755 index d0bc92d5c..000000000 --- a/oamapps/columnstoreSupport/hardwareReport.sh +++ /dev/null @@ -1,79 +0,0 @@ -#! /bin/sh -# -# $Id: hardwareReport.sh 421 2007-04-05 15:46:55Z dhill $ -# -if [ $1 ] ; then - MODULE=$1 -else - MODULE="pm1" -fi - -if [ $2 ] ; then - OUT_FILE=$2 -else - OUT_FILE=${MODULE}_logReport.txt -fi - -{ -echo " " -echo "******************** Hardware Report for ${MODULE} ********************" -echo " " - -echo "-- Server OS Version --" -echo " " -echo "################# cat /proc/version #################" -echo " " -cat /proc/version 2>/dev/null -echo " " -echo "################# uname -a #################" -echo " " -uname -a -echo " " -echo "################# cat /etc/issue #################" -echo " " -cat /etc/issue 2>/dev/null -echo " " -echo "run columnstore_os_check.sh" -echo " " -echo "################# /bin/columnstore_os_check.sh #################" -echo " " -columnstore_os_check.sh 2>/dev/null - -echo " " -echo "-- Server Uptime --" -echo " " -echo "################# uptime #################" -echo " " -uptime - -echo " " -echo "-- Server cpu-info --" -echo " " -echo "################# cat /proc/cpuinfo #################" -echo " " -cat /proc/cpuinfo 2>/dev/null - -echo " " -echo "-- Server memory-info --" -echo " " -echo "################# cat /proc/meminfo #################" -echo " " -$cat /proc/meminfo 2>/dev/null - -echo " " -echo "-- Server mounts --" -echo " " -echo "################# cat /proc/mounts #################" -echo " " -cat /proc/mounts 2>/dev/null - -echo " " -echo "-- Server Ethernet Configuration --" -echo " " -echo "################# ifconfig -a #################" -echo " " -ifconfig -a 2>/dev/null - -} >> $OUT_FILE - -exit 0 diff --git a/oamapps/columnstoreSupport/logReport.sh b/oamapps/columnstoreSupport/logReport.sh deleted file mode 100755 index 35b3e9a5f..000000000 --- a/oamapps/columnstoreSupport/logReport.sh +++ /dev/null @@ -1,52 +0,0 @@ -#! /bin/sh -# -# $Id: logReport.sh 421 2007-04-05 15:46:55Z dhill $ -# -if [ $1 ] ; then - MODULE=$1 -else - MODULE="pm1" -fi - -if [ $2 ] ; then - OUT_FILE=$2 -else - OUT_FILE=${MODULE}_logReport.txt -fi - -#get temp directory -tmpDir=`mcsGetConfig SystemConfig SystemTempFileDir` - -rm -f ${tmpDir}/${MODULE}_logReport.tar.gz -tar -zcf ${tmpDir}/${MODULE}_logReport.tar.gz /var/log/mariadb/columnstore > /dev/null 2>&1 -cp ${tmpDir}/${MODULE}_logReport.tar.gz . -tar -zcf ${MODULE}_mysqllogReport.tar.gz /var/log/mysql/*.err 2>/dev/null - -echo '******************** Log Configuration ********************' >> $OUT_FILE -echo '' >> $OUT_FILE -echo 'MariaDB ColumnStore System Log Configuration Data' >> $OUT_FILE -echo '' >> $OUT_FILE -configFileName=`mcsGetConfig Installation SystemLogConfigFile` -echo 'System Logging Configuration File being used: '${configFileName} >> $OUT_FILE -echo '' >> $OUT_FILE -echo -e 'Module\tConfigured Log Levels' >> $OUT_FILE -echo -e '------\t---------------------------------------' >> $OUT_FILE -moduleConfig='' -if grep -q '/var/log/mariadb/columnstore/crit.log' ${configFileName}; then - moduleConfig=${moduleConfig}' CRITICAL' -fi -if grep -q '/var/log/mariadb/columnstore/err.log' ${configFileName}; then - moduleConfig=${moduleConfig}' ERROR' -fi -if grep -q '/var/log/mariadb/columnstore/warning.log' ${configFileName}; then - moduleConfig=${moduleConfig}' WARNING' -fi -if grep -q '/var/log/mariadb/columnstore/info.log' ${configFileName}; then - moduleConfig=${moduleConfig}' INFO' -fi -if grep -q '/var/log/mariadb/columnstore/debug.log' ${configFileName}; then - moduleConfig=${moduleConfig}' DEBUG' -fi -echo -e ${MODULE}'\t'${moduleConfig} >> $OUT_FILE -exit 0 - diff --git a/oamapps/columnstoreSupport/mcsSupportUtil.cpp b/oamapps/columnstoreSupport/mcsSupportUtil.cpp deleted file mode 100644 index 586a342e5..000000000 --- a/oamapps/columnstoreSupport/mcsSupportUtil.cpp +++ /dev/null @@ -1,621 +0,0 @@ -/* Copyright (C) 2019 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -#include "mcsSupportUtil.h" - -using namespace std; -using namespace oam; -using namespace config; - -void getSystemNetworkConfig(FILE* pOutputFile) -{ - Oam oam; - // get and display Module Network Config - SystemModuleTypeConfig systemmoduletypeconfig; - systemmoduletypeconfig.moduletypeconfig.clear(); - - // get max length of a host name for header formatting - - int maxSize = 9; - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - - for (unsigned int i = 0; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if (systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty()) - // end of list - break; - - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - string moduletype = systemmoduletypeconfig.moduletypeconfig[i].ModuleType; - string moduletypedesc = systemmoduletypeconfig.moduletypeconfig[i].ModuleDesc; - - if (moduleCount > 0) - { - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for (; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for (; pt1 != (*pt).hostConfigList.end(); pt1++) - { - if (maxSize < (int)(*pt1).HostName.size()) - maxSize = (*pt1).HostName.size(); - } - } - } - } - } - catch (exception& e) - { - fprintf(pOutputFile, "**** getNetworkConfig Failed = %s\n\n", e.what()); - } - - fprintf(pOutputFile, "%-15s%-30s%-10s%-14s%-20s\n", "Module Name", "Module Description", "NIC ID", - "Host Name", "IP Address"); - fprintf(pOutputFile, "%-15s%-30s%-10s%-14s%-20s\n", "-----------", "-------------------------", "------", - "---------", "---------------"); - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - - for (unsigned int i = 0; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if (systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty()) - // end of list - break; - - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - string moduletype = systemmoduletypeconfig.moduletypeconfig[i].ModuleType; - string moduletypedesc = systemmoduletypeconfig.moduletypeconfig[i].ModuleDesc; - - if (moduleCount > 0) - { - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for (; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - string modulename = (*pt).DeviceName; - string moduleID = modulename.substr(MAX_MODULE_TYPE_SIZE, MAX_MODULE_ID_SIZE); - string modulenamedesc = moduletypedesc + " #" + moduleID; - - fprintf(pOutputFile, "%-15s%-30s", modulename.c_str(), modulenamedesc.c_str()); - - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for (; pt1 != (*pt).hostConfigList.end(); pt1++) - { - /* MCOL-1607. IPAddr may be a host name here b/c it is read straight - from the config file. */ - string tmphost = getIPAddress(pt1->IPAddr); - string ipAddr; - if (tmphost.empty()) - ipAddr = pt1->IPAddr; - else - ipAddr = tmphost; - string hostname = (*pt1).HostName; - string nicID = oam.itoa((*pt1).NicID); - - if (nicID != "1") - { - fprintf(pOutputFile, "%-45s", ""); - } - fprintf(pOutputFile, "%-13s%-14s%-20s\n", nicID.c_str(), hostname.c_str(), ipAddr.c_str()); - } - } - } - } - } - catch (exception& e) - { - fprintf(pOutputFile, "**** getNetworkConfig Failed = %s\n\n", e.what()); - } -} - -void getModuleTypeConfig(FILE* pOutputFile) -{ - Oam oam; - SystemModuleTypeConfig systemmoduletypeconfig; - ModuleTypeConfig moduletypeconfig; - ModuleConfig moduleconfig; - systemmoduletypeconfig.moduletypeconfig.clear(); - - try - { - oam.getSystemConfig(systemmoduletypeconfig); - - fprintf(pOutputFile, "Module Type Configuration\n\n"); - - for (unsigned int i = 0; i < systemmoduletypeconfig.moduletypeconfig.size(); i++) - { - if (systemmoduletypeconfig.moduletypeconfig[i].ModuleType.empty()) - // end of list - break; - - int moduleCount = systemmoduletypeconfig.moduletypeconfig[i].ModuleCount; - - if (moduleCount < 1) - continue; - - string moduletype = systemmoduletypeconfig.moduletypeconfig[i].ModuleType; - - fprintf(pOutputFile, "ModuleType '%s' Configuration information\n", moduletype.c_str()); - fprintf(pOutputFile, "ModuleDesc = %s\n", - systemmoduletypeconfig.moduletypeconfig[i].ModuleDesc.c_str()); - fprintf(pOutputFile, "ModuleCount = %i\n", moduleCount); - - if (moduleCount > 0) - { - DeviceNetworkList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.begin(); - - for (; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleNetworkList.end(); pt++) - { - string modulename = (*pt).DeviceName; - HostConfigList::iterator pt1 = (*pt).hostConfigList.begin(); - - for (; pt1 != (*pt).hostConfigList.end(); pt1++) - { - string ipAddr = (*pt1).IPAddr; - string servername = (*pt1).HostName; - fprintf(pOutputFile, "ModuleHostName and ModuleIPAddr for NIC ID %u on module '%s' = %s , %s\n", - (*pt1).NicID, modulename.c_str(), servername.c_str(), ipAddr.c_str()); - } - } - } - - DeviceDBRootList::iterator pt = systemmoduletypeconfig.moduletypeconfig[i].ModuleDBRootList.begin(); - - for (; pt != systemmoduletypeconfig.moduletypeconfig[i].ModuleDBRootList.end(); pt++) - { - if ((*pt).dbrootConfigList.size() > 0) - { - fprintf(pOutputFile, "DBRootIDs assigned to module 'pm%u' = ", (*pt).DeviceID); - DBRootConfigList::iterator pt1 = (*pt).dbrootConfigList.begin(); - - for (; pt1 != (*pt).dbrootConfigList.end();) - { - fprintf(pOutputFile, "%u", *pt1); - pt1++; - - if (pt1 != (*pt).dbrootConfigList.end()) - fprintf(pOutputFile, ", "); - } - } - fprintf(pOutputFile, "\n"); - } - - fprintf(pOutputFile, "ModuleCPUCriticalThreshold = %u%%\n", - systemmoduletypeconfig.moduletypeconfig[i].ModuleCPUCriticalThreshold); - fprintf(pOutputFile, "ModuleCPUMajorThreshold = %u%%\n", - systemmoduletypeconfig.moduletypeconfig[i].ModuleCPUMajorThreshold); - fprintf(pOutputFile, "ModuleCPUMinorThreshold = %u%%\n", - systemmoduletypeconfig.moduletypeconfig[i].ModuleCPUMinorThreshold); - fprintf(pOutputFile, "ModuleCPUMinorClearThreshold = %u%%\n", - systemmoduletypeconfig.moduletypeconfig[i].ModuleCPUMinorClearThreshold); - fprintf(pOutputFile, "ModuleDiskCriticalThreshold = %u%%\n", - systemmoduletypeconfig.moduletypeconfig[i].ModuleDiskCriticalThreshold); - fprintf(pOutputFile, "ModuleDiskMajorThreshold = %u%%\n", - systemmoduletypeconfig.moduletypeconfig[i].ModuleDiskMajorThreshold); - fprintf(pOutputFile, "ModuleDiskMinorThreshold = %u%%\n", - systemmoduletypeconfig.moduletypeconfig[i].ModuleDiskMinorThreshold); - fprintf(pOutputFile, "ModuleMemCriticalThreshold = %u%%\n", - systemmoduletypeconfig.moduletypeconfig[i].ModuleMemCriticalThreshold); - fprintf(pOutputFile, "ModuleMemMajorThreshold = %u%%\n", - systemmoduletypeconfig.moduletypeconfig[i].ModuleMemMajorThreshold); - fprintf(pOutputFile, "ModuleMemMinorThreshold = %u%%\n", - systemmoduletypeconfig.moduletypeconfig[i].ModuleMemMinorThreshold); - fprintf(pOutputFile, "ModuleSwapCriticalThreshold = %u%%\n", - systemmoduletypeconfig.moduletypeconfig[i].ModuleSwapCriticalThreshold); - fprintf(pOutputFile, "ModuleSwapMajorThreshold = %u%%\n", - systemmoduletypeconfig.moduletypeconfig[i].ModuleSwapMajorThreshold); - fprintf(pOutputFile, "ModuleSwapMinorThreshold = %u%%\n", - systemmoduletypeconfig.moduletypeconfig[i].ModuleSwapMinorThreshold); - - DiskMonitorFileSystems::iterator pt2 = systemmoduletypeconfig.moduletypeconfig[i].FileSystems.begin(); - int id = 1; - - for (; pt2 != systemmoduletypeconfig.moduletypeconfig[i].FileSystems.end(); pt2++) - { - string fs = *pt2; - fprintf(pOutputFile, "ModuleDiskMonitorFileSystem#%i = %s\n", id, fs.c_str()); - ++id; - } - fprintf(pOutputFile, "\n"); - } - } - catch (exception& e) - { - cout << endl << "**** getModuleTypeConfig Failed = " << e.what() << endl; - } -} - -void getStorageConfig(FILE* pOutputFile) -{ - Oam oam; - try - { - systemStorageInfo_t t; - t = oam.getStorageConfig(); - - string cloud; - - try - { - oam.getSystemConfig("Cloud", cloud); - } - catch (...) - { - } - - string::size_type pos = cloud.find("amazon", 0); - - if (pos != string::npos) - cloud = "amazon"; - - fprintf(pOutputFile, "System Storage Configuration\n"); - - fprintf(pOutputFile, "Performance Module (DBRoot) Storage Type = %s\n", boost::get<0>(t).c_str()); - - if (cloud == "amazon") - fprintf(pOutputFile, "User Module Storage Type = %s\n", boost::get<3>(t).c_str()); - - fprintf(pOutputFile, "System Assigned DBRoot Count = %i\n", boost::get<1>(t)); - - DeviceDBRootList moduledbrootlist = boost::get<2>(t); - - typedef std::vector dbrootList; - dbrootList dbrootlist; - - DeviceDBRootList::iterator pt = moduledbrootlist.begin(); - - for (; pt != moduledbrootlist.end(); pt++) - { - fprintf(pOutputFile, "DBRoot IDs assigned to 'pm%u' = ", (*pt).DeviceID); - DBRootConfigList::iterator pt1 = (*pt).dbrootConfigList.begin(); - - for (; pt1 != (*pt).dbrootConfigList.end();) - { - fprintf(pOutputFile, "%u", *pt1); - dbrootlist.push_back(*pt1); - pt1++; - - if (pt1 != (*pt).dbrootConfigList.end()) - fprintf(pOutputFile, ", "); - } - - fprintf(pOutputFile, "\n"); - } - - // get any unassigned DBRoots - /*DBRootConfigList undbrootlist; - - try - { - oam.getUnassignedDbroot(undbrootlist); - } - catch (...) {} - - if ( !undbrootlist.empty() ) - { - fprintf(pOutputFile,"DBRoot IDs unassigned = "); - DBRootConfigList::iterator pt1 = undbrootlist.begin(); - - for ( ; pt1 != undbrootlist.end() ;) - { - fprintf(pOutputFile,"%u",*pt1); - pt1++; - - if (pt1 != undbrootlist.end()) - fprintf(pOutputFile,", "); - } - - fprintf(pOutputFile,"\n"); - }*/ - - fprintf(pOutputFile, "\n"); - - // um volumes - if (cloud == "amazon" && boost::get<3>(t) == "external") - { - ModuleTypeConfig moduletypeconfig; - oam.getSystemConfig("um", moduletypeconfig); - - for (int id = 1; id < moduletypeconfig.ModuleCount + 1; id++) - { - string volumeNameID = "UMVolumeName" + oam.itoa(id); - string volumeName = oam::UnassignedName; - string deviceNameID = "UMVolumeDeviceName" + oam.itoa(id); - string deviceName = oam::UnassignedName; - - try - { - oam.getSystemConfig(volumeNameID, volumeName); - oam.getSystemConfig(deviceNameID, deviceName); - } - catch (...) - { - } - - fprintf(pOutputFile, "Amazon EC2 Volume Name/Device Name for 'um%i': %s, %s", id, volumeName.c_str(), - deviceName.c_str()); - } - } - - // pm volumes - if (cloud == "amazon" && boost::get<0>(t) == "external") - { - fprintf(pOutputFile, "\n"); - - DBRootConfigList dbrootConfigList; - - try - { - oam.getSystemDbrootConfig(dbrootConfigList); - - DBRootConfigList::iterator pt = dbrootConfigList.begin(); - - for (; pt != dbrootConfigList.end(); pt++) - { - string volumeNameID = "PMVolumeName" + oam.itoa(*pt); - string volumeName = oam::UnassignedName; - string deviceNameID = "PMVolumeDeviceName" + oam.itoa(*pt); - string deviceName = oam::UnassignedName; - - try - { - oam.getSystemConfig(volumeNameID, volumeName); - oam.getSystemConfig(deviceNameID, deviceName); - } - catch (...) - { - continue; - } - } - } - catch (exception& e) - { - cout << endl << "**** getSystemDbrootConfig Failed : " << e.what() << endl; - } - - // print un-assigned dbroots - /*DBRootConfigList::iterator pt1 = undbrootlist.begin(); - - for ( ; pt1 != undbrootlist.end() ; pt1++) - { - string volumeNameID = "PMVolumeName" + oam.itoa(*pt1); - string volumeName = oam::UnassignedName; - string deviceNameID = "PMVolumeDeviceName" + oam.itoa(*pt1); - string deviceName = oam::UnassignedName; - - try - { - oam.getSystemConfig( volumeNameID, volumeName); - oam.getSystemConfig( deviceNameID, deviceName); - } - catch (...) - { - continue; - } - }*/ - } - - string DataRedundancyConfig; - int DataRedundancyCopies; - string DataRedundancyStorageType; - - try - { - oam.getSystemConfig("DataRedundancyConfig", DataRedundancyConfig); - oam.getSystemConfig("DataRedundancyCopies", DataRedundancyCopies); - oam.getSystemConfig("DataRedundancyStorageType", DataRedundancyStorageType); - } - catch (...) - { - } - - if (DataRedundancyConfig == "y") - { - fprintf(pOutputFile, "\nData Redundant Configuration\n\n"); - fprintf(pOutputFile, "Copies Per DBroot = %i", DataRedundancyCopies); - - oamModuleInfo_t st; - string moduleType; - - try - { - st = oam.getModuleInfo(); - moduleType = boost::get<1>(st); - } - catch (...) - { - } - - if (moduleType != "pm") - return; - - try - { - DBRootConfigList dbrootConfigList; - oam.getSystemDbrootConfig(dbrootConfigList); - - DBRootConfigList::iterator pt = dbrootConfigList.begin(); - - for (; pt != dbrootConfigList.end(); pt++) - { - fprintf(pOutputFile, "DBRoot #%u has copies on PMs = ", *pt); - - string pmList = ""; - - try - { - string errmsg; - // oam.glusterctl(oam::GLUSTER_WHOHAS, oam.itoa(*pt), pmList, errmsg); - } - catch (...) - { - } - - boost::char_separator sep(" "); - boost::tokenizer > tokens(pmList, sep); - - for (boost::tokenizer >::iterator it = tokens.begin(); - it != tokens.end(); ++it) - { - fprintf(pOutputFile, "%s ", (*it).c_str()); - } - - fprintf(pOutputFile, "\n"); - } - - fprintf(pOutputFile, "\n"); - } - catch (exception& e) - { - cout << endl << "**** getSystemDbrootConfig Failed : " << e.what() << endl; - } - } - } - catch (exception& e) - { - cout << endl << "**** getStorageConfig Failed : " << e.what() << endl; - } -} - -void getStorageStatus(FILE* pOutputFile) -{ - Oam oam; - - fprintf(pOutputFile, "System External DBRoot Storage Statuses\n\n"); - fprintf(pOutputFile, "Component Status Last Status Change\n"); - fprintf(pOutputFile, "------------ -------------------------- ------------------------\n"); - - /*try - { - oam.getSystemStatus(systemstatus, false); - - if ( systemstatus.systemdbrootstatus.dbrootstatus.size() == 0 ) - { - fprintf(pOutputFile," No External DBRoot Storage Configured\n\n"); - return; - } - - for ( unsigned int i = 0 ; i < systemstatus.systemdbrootstatus.dbrootstatus.size(); i++) - { - if ( systemstatus.systemdbrootstatus.dbrootstatus[i].Name.empty() ) - // end of list - break; - - int state = systemstatus.systemdbrootstatus.dbrootstatus[i].OpState; - string stime = systemstatus.systemdbrootstatus.dbrootstatus[i].StateChangeDate ; - stime = stime.substr (0, 24); - fprintf(pOutputFile,"DBRoot%s%-29s%-24s\n", - systemstatus.systemdbrootstatus.dbrootstatus[i].Name.c_str(), - oamState[state].c_str(), - stime.c_str()); - } - fprintf(pOutputFile,"\n"); - } - catch (exception& e) - { - cout << endl << "**** getSystemStatus Failed = " << e.what() << endl; - }*/ - - string DataRedundancyConfig; - int DataRedundancyCopies; - string DataRedundancyStorageType; - - try - { - oam.getSystemConfig("DataRedundancyConfig", DataRedundancyConfig); - oam.getSystemConfig("DataRedundancyCopies", DataRedundancyCopies); - oam.getSystemConfig("DataRedundancyStorageType", DataRedundancyStorageType); - } - catch (...) - { - } -} - -/******************************************************************** - * - * checkLogStatus - Check for a phrase in a log file and return status - * - ********************************************************************/ -bool checkLogStatus(std::string fileName, std::string phrase) -{ - ifstream file(fileName.c_str()); - - if (!file.is_open()) - { - return false; - } - - string buf; - - while (getline(file, buf)) - { - string::size_type pos = buf.find(phrase, 0); - - if (pos != string::npos) - // found phrase - return true; - } - - if (file.bad()) - { - return false; - } - - file.close(); - return false; -} - -/****************************************************************************************** - * @brief Get Network IP Address for Host Name - * - * purpose: Get Network IP Address for Host Name - * - ******************************************************************************************/ -string getIPAddress(string hostName) -{ - static uint32_t my_bind_addr; - struct hostent* ent; - string IPAddr = ""; - Oam oam; - - ent = gethostbyname(hostName.c_str()); - - if (ent != 0) - { - my_bind_addr = (uint32_t)((in_addr*)ent->h_addr_list[0])->s_addr; - - uint8_t split[4]; - uint32_t ip = my_bind_addr; - split[0] = (ip & 0xff000000) >> 24; - split[1] = (ip & 0x00ff0000) >> 16; - split[2] = (ip & 0x0000ff00) >> 8; - split[3] = (ip & 0x000000ff); - - IPAddr = - oam.itoa(split[3]) + "." + oam.itoa(split[2]) + "." + oam.itoa(split[1]) + "." + oam.itoa(split[0]); - } - - return IPAddr; -} diff --git a/oamapps/columnstoreSupport/mcsSupportUtil.h b/oamapps/columnstoreSupport/mcsSupportUtil.h deleted file mode 100644 index dda881255..000000000 --- a/oamapps/columnstoreSupport/mcsSupportUtil.h +++ /dev/null @@ -1,34 +0,0 @@ -/* Copyright (C) 2020 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include "configcpp.h" -#include "liboamcpp.h" - -void getSystemNetworkConfig(FILE* pOutputFile); -void getModuleTypeConfig(FILE* pOutputFile); -void getStorageConfig(FILE* pOutputFile); -void getStorageStatus(FILE* pOutputFile); -bool checkLogStatus(std::string filename, std::string phase); -std::string getIPAddress(std::string hostName); diff --git a/oamapps/columnstoreSupport/resourceReport.sh b/oamapps/columnstoreSupport/resourceReport.sh deleted file mode 100755 index b0a092563..000000000 --- a/oamapps/columnstoreSupport/resourceReport.sh +++ /dev/null @@ -1,66 +0,0 @@ -#! /bin/sh -# -# $Id: resourceReport.sh 421 2007-04-05 15:46:55Z dhill $ -# -if [ $1 ] ; then - MODULE=$1 -else - MODULE="pm1" -fi - -if [ $2 ] ; then - OUT_FILE=$2 -else - OUT_FILE=${MODULE}_logReport.txt -fi - -{ -echo " " -echo "******************** Resource Usage Report for ${MODULE} ********************" -echo " " - -echo " " -echo "-- Shared Memory --" -echo " " -echo "################# ipcs -l #################" -echo " " -ipcs -l - -echo "################# clearShm -n #################" -echo " " -clearShm -n - -echo " " -echo "-- Disk Usage --" -echo " " -echo "################# df -k #################" -echo " " -df -k - -echo " " -echo "-- Disk BRM Data files --" -echo " " -ls -l /var/lib/columnstore/data1/systemFiles/dbrm 2> /dev/null -ls -l /var/lib/columnstore/dbrm 2> /dev/null - -echo "################# cat /var/lib/columnstore/data1/systemFiles/dbrm/BRM_saves_current #################" -echo " " -cat /var/lib/columnstore/data1/systemFiles/dbrm/BRM_saves_current 2> /dev/null - -echo " " -echo "-- View Table Locks --" -echo " " -echo "################# cat bin/viewtablelock #################" -echo " " -viewtablelock 2> /dev/null - -echo " " -echo "-- BRM Extent Map --" -echo " " -echo "################# bin/editem -i #################" -echo " " -editem -i 2>/dev/null - -} >> $OUT_FILE - -exit 0 diff --git a/oamapps/columnstoreSupport/softwareReport.sh b/oamapps/columnstoreSupport/softwareReport.sh deleted file mode 100755 index 24a93274c..000000000 --- a/oamapps/columnstoreSupport/softwareReport.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -# -# $Id: hardwareReport.sh 421 2007-04-05 15:46:55Z dhill $ -# -if [ $1 ] ; then - MODULE=$1 -else - MODULE="pm1" -fi - -if [ $2 ] ; then - OUT_FILE=$2 -else - OUT_FILE=${MODULE}_logReport.txt -fi - -{ -echo " " -echo "******************** Software Report for ${MODULE} ********************" -echo " " - -echo " " -echo "-- Columnstore Package Details --" -echo " " -rpm -qi MariaDB-columnstore-engine -echo " " - -} >> $OUT_FILE - -exit 0 diff --git a/oamapps/replayTransactionLog/CMakeLists.txt b/oamapps/replayTransactionLog/CMakeLists.txt deleted file mode 100644 index 678ebba7a..000000000 --- a/oamapps/replayTransactionLog/CMakeLists.txt +++ /dev/null @@ -1,34 +0,0 @@ -# -# Not used -# - -# original Makefile.am contents follow: - -# Copyright (C) 2014 InfiniDB, Inc. -# -# This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public -# License as published by the Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied -# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along with this program; if not, write to the Free -# Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -# -# $Id: Makefile.am 333 2009-04-03 20:35:04Z rdempsey $ Process this file with automake to produce Makefile.in -# -# AM_CPPFLAGS = $(idb_cppflags) AM_CFLAGS = $(idb_cflags) AM_CXXFLAGS = $(idb_cxxflags) AM_LDFLAGS = $(idb_ldflags) -# bin_PROGRAMS = ReplayTransactionLog ReplayTransactionLog_SOURCES = replaytransactionlog.cpp -# ReplayTransactionLog_CPPFLAGS = @idb_common_includes@ $(AM_CPPFLAGS) ReplayTransactionLog_LDFLAGS = -# @idb_common_ldflags@ @idb_exec_libs@ -lreplaytxnlog $(AM_LDFLAGS) -# -# test: -# -# coverage: -# -# leakcheck: -# -# docs: -# -# bootstrap: install-data-am -# diff --git a/oamapps/replayTransactionLog/replaytransactionlog.cpp b/oamapps/replayTransactionLog/replaytransactionlog.cpp deleted file mode 100644 index 0a662510b..000000000 --- a/oamapps/replayTransactionLog/replaytransactionlog.cpp +++ /dev/null @@ -1,156 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -// WWW - Add header comment. -#include -#include -#include -#include "liboamcpp.h" - -using namespace std; -using namespace oam; - -#include "replaytxnlog.h" -namespace -{ -void usage(char* prog) -{ - cout << endl; - cout << "Usage: " << prog << " [options]" << endl; - - cout << endl; - cout << "This utility can be used after a backup is restored to report transactions that " << endl; - cout << "occurred after the backup. It begins with the first transaction that was committed " << endl; - cout << "after the backup and reports DDL and DML statements as well as imports." << endl; - cout << endl; - - cout << "Options:" << endl; - /* - cout << "-u Database user id." << endl << endl; - - cout << "-p Password." << endl << endl; - */ - cout << "-d Stop date and time as mm/dd/yy@hh:mm:ss or 'Now'." << endl; - cout << " Only transactions committed before this date and time will be reported." << endl; - cout << " The current date and time will be used if 'Now'." << endl << endl; - - /* - cout << "-i Ignore bulk load log entries." << endl; - cout << " The program will pause and prompt at bulk load entries by default." << endl << - endl; - - cout << "-e Report mode. The sql statements will be displayed to the console only. No" << - endl; cout << " transactions will be processed. The user and password will be ignored." << - endl << endl; - */ - - cout << "-h Display this help." << endl << endl; -} - -bool isRunningOnPm() -{ - Oam oam; - oamModuleInfo_t t; - string moduleType; - int installType = -1; - - char* csc_ident = getenv("CALPONT_CSC_IDENT"); - - if (csc_ident == 0 || *csc_ident == 0) - { - // get local module info valdiate running on a pm - try - { - t = oam.getModuleInfo(); - moduleType = boost::get<1>(t); - installType = boost::get<5>(t); - } - catch (exception& e) - { - moduleType = "pm"; - } - } - else - moduleType = csc_ident; - - if (installType != oam::INSTALL_COMBINE_DM_UM_PM) - { - if (moduleType != "pm") - { - cerr << "Exiting, ReplayTransactionLog can only be run on a performance module (pm)" << endl; - return false; - } - } - - return true; -} -} // namespace - -int main(int argc, char** argv) -{ - string user; - string password; - string stopDate; - bool ignoreBulk = false; - bool reportMode = false; - char c; - - // Invokes member function `int operator ()(void);' - while ((c = getopt(argc, argv, "u:p:d:ihe")) != -1) - { - switch (c) - { - /* - case 'u': - user = optarg; - break; - case 'p': - password = optarg; - break; - */ - case 'd': stopDate = optarg; break; - - /* - case 'i': - ignoreBulk = true; - break; - case 'e': - reportMode = true; - break; - */ - case 'h': - usage(argv[0]); - return 0; - break; - - default: - usage(argv[0]); - return 1; - break; - } - } - - if (!isRunningOnPm()) - { - return 0; - } - - ReplayTxnLog replayTxnLog(user, password, stopDate, ignoreBulk, reportMode); - replayTxnLog.process(); - - return 0; -} diff --git a/oamapps/replayTransactionLog/tdriver.cpp b/oamapps/replayTransactionLog/tdriver.cpp deleted file mode 100644 index 809786d7d..000000000 --- a/oamapps/replayTransactionLog/tdriver.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/*************************************************************************** - * wweeks@calpont.com * - * * - ***************************************************************************/ - -using namespace std; -#include -#include -#include -#include -#include "sessionmanager.h" - -#include - -using namespace execplan; - -int maxNewTxns = 1000; -int maxTxns = 1000; - -class ExecPlanTest : public CppUnit::TestFixture -{ - CPPUNIT_TEST_SUITE(ExecPlanTest); - - CPPUNIT_TEST_SUITE_END(); - - private: - public: - void setUp() - { - } - - void tearDown() - { - } -}; // test suite - -CPPUNIT_TEST_SUITE_REGISTRATION(ExecPlanTest); - -#include -#include - -int main(int argc, char* argv[]) -{ - CppUnit::TextUi::TestRunner runner; - CppUnit::TestFactoryRegistry& registry = CppUnit::TestFactoryRegistry::getRegistry(); - runner.addTest(registry.makeTest()); - bool wasSuccessful = runner.run("", false); - return (wasSuccessful ? 0 : 1); -} diff --git a/oamapps/sessionWalker/CMakeLists.txt b/oamapps/sessionWalker/CMakeLists.txt deleted file mode 100644 index fe6fe4fea..000000000 --- a/oamapps/sessionWalker/CMakeLists.txt +++ /dev/null @@ -1,34 +0,0 @@ -# -# Not used -# - -# original Makefile.am contents follow: - -# Copyright (C) 2014 InfiniDB, Inc. -# -# This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public -# License as published by the Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied -# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along with this program; if not, write to the Free -# Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -# -# $Id: Makefile.am 333 2009-04-03 20:35:04Z rdempsey $ Process this file with automake to produce Makefile.in -# -# AM_CPPFLAGS = $(idb_cppflags) AM_CFLAGS = $(idb_cflags) AM_CXXFLAGS = $(idb_cxxflags) AM_LDFLAGS = $(idb_ldflags) -# bin_PROGRAMS = sessionWalker sessionWalker_SOURCES = sessionwalker.cpp sessionWalker_CPPFLAGS = @idb_common_includes@ -# $(AM_CPPFLAGS) sessionWalker_LDFLAGS = @idb_common_ldflags@ @idb_common_libs@ @idb_write_libs@ @netsnmp_libs@ -# $(AM_LDFLAGS) -# -# test: -# -# coverage: -# -# leakcheck: -# -# docs: -# -# bootstrap: install-data-am -# diff --git a/oamapps/sessionWalker/sessionwalker.cpp b/oamapps/sessionWalker/sessionwalker.cpp deleted file mode 100644 index 06cf08bd0..000000000 --- a/oamapps/sessionWalker/sessionwalker.cpp +++ /dev/null @@ -1,135 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/*************************************************************************** - * $Id: sessionwalker.cpp 3072 2013-04-04 19:04:45Z rdempsey $ - * - * jrodriguez@calpont.com - * * - ***************************************************************************/ - -#include -using namespace std; - -#include "sessionmonitor.h" -using namespace execplan; - -#include "vendordmlstatement.h" -#include "calpontdmlpackage.h" -#include "calpontdmlfactory.h" -using namespace dmlpackage; - -#include "bytestream.h" -#include "messagequeue.h" -using namespace messageqcpp; - -namespace -{ -void usage() -{ - cout << "sessionwalker [-d|-h]" << endl - << " -r rollback all transactions found" << endl - << " -h display this help" << endl; -} - -void rollback(const SessionMonitor::MonSIDTIDEntry& txn) -{ - VendorDMLStatement dmlStmt("ROLLBACK;", txn.sessionid); - CalpontDMLPackage* pDMLPackage = CalpontDMLFactory::makeCalpontDMLPackage(dmlStmt); - - if (pDMLPackage == 0) - { - return; - } - - ByteStream bytestream; - pDMLPackage->write(bytestream); - delete pDMLPackage; - MessageQueueClient mq("DMLProc"); - - try - { - cout << "sending ROLLBACK for sessionID " << txn.sessionid << endl; - mq.write(bytestream); - bytestream = mq.read(); - } - catch (...) - { - } -} - -} // namespace - -int main(int argc, char** argv) -{ - bool rflg = false; - opterr = 0; - int c; - - while ((c = getopt(argc, argv, "rh")) != EOF) - switch (c) - { - case 'r': rflg = true; break; - - case 'h': - usage(); - return 0; - break; - - default: - usage(); - return 1; - break; - } - - vector toTxns; - SessionMonitor* monitor = new SessionMonitor(); - - toTxns.clear(); - toTxns = monitor->timedOutTxns(); // get timed out txns - - vector::iterator iter = toTxns.begin(); - vector::iterator end = toTxns.end(); - - vector tmp; - - while (iter != end) - { - if ((*iter)->sessionid > 0) - tmp.push_back(*iter); - - ++iter; - } - - toTxns.swap(tmp); - - cout << toTxns.size() << " timed out transactions." << endl; - - for (unsigned idx = 0; idx < toTxns.size(); idx++) - { - monitor->printTxns(*toTxns[idx]); - - if (rflg) - { - rollback(*toTxns[idx]); - } - } - - delete monitor; - - return 0; -} diff --git a/oamapps/sessionWalker/tdriver.cpp b/oamapps/sessionWalker/tdriver.cpp deleted file mode 100644 index 29e1771fd..000000000 --- a/oamapps/sessionWalker/tdriver.cpp +++ /dev/null @@ -1,189 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/*************************************************************************** - * jrodriguez@calpont.com * - * * - ***************************************************************************/ - -using namespace std; -#include -#include -#include -#include -#include "sessionmonitor.h" -#include "sessionmanager.h" - -#include - -using namespace execplan; - -int maxNewTxns = 1000; -int maxTxns = 1000; - -class ExecPlanTest : public CppUnit::TestFixture -{ - CPPUNIT_TEST_SUITE(ExecPlanTest); - - CPPUNIT_TEST(MonitorTestPlan_1); - CPPUNIT_TEST_SUITE_END(); - - private: - public: - void setUp() - { - } - - void tearDown() - { - } - - int verifyLen; - SessionManager* manager; - SessionManager::TxnID managerTxns[1000]; - - int createTxns(const int& start, const int& end) - { - int first = start; - int last = end; - int newTxns = 0; - - verifyLen = manager->verifySize(); - - for (int idx = first; idx < last && verifyLen < maxNewTxns; idx++) - { - managerTxns[idx] = manager->newTxnID((uint32_t)idx + 1000); - CPPUNIT_ASSERT(managerTxns[idx].id > 0); - CPPUNIT_ASSERT(managerTxns[idx].valid == true); - verifyLen = manager->verifySize(); - CPPUNIT_ASSERT(verifyLen > 0); - newTxns++; - } - - CPPUNIT_ASSERT(newTxns == last - first); - return newTxns; - } - - int closeTxns(const int& start, const int& end) - { - int first = start; - int last = end; - int totalClosed = 0; - - for (int idx = first; idx < last; idx++) - { - try - { - SessionManager::TxnID tmp = manager->getTxnID(idx + 1000); - - if (tmp.valid == true) - { - manager->committed(tmp); - CPPUNIT_ASSERT(tmp.valid == false); - totalClosed++; - } - } - catch (exception& e) - { - cerr << e.what() << endl; - continue; - } - } - - return totalClosed; - - } // closeTxns - - void MonitorTestPlan_1() - { - int currStartTxn = 0; - int currEndTxn = 5; - int txnCntIncr = 5; - const int sleepTime = 1; - const int iterMax = 1; - vector toTxns; - - manager = new SessionManager(); - // CPPUNIT_ASSERT(manager->verifySize()==0); - - SessionMonitor* monitor = NULL; - - for (int jdx = 0; jdx < iterMax; jdx++) - { - // store the current state of the SessionManager - monitor = new SessionMonitor(); - monitor->AgeLimit(sleepTime); - delete monitor; - int idx = 0; - int grpStart = currStartTxn; - - for (idx = 0; idx < 3; idx++) - { - createTxns(currStartTxn, currEndTxn); - // CPPUNIT_ASSERT(manager->verifySize()==(idx+1)*txnCntIncr); - - currStartTxn += txnCntIncr; - currEndTxn += txnCntIncr; - sleep(sleepTime + 1); // make sessions time out - - monitor = new SessionMonitor(); // read Monitor data - monitor->AgeLimit(sleepTime); - toTxns.clear(); - toTxns = monitor->timedOutTxns(); // get timed out txns - CPPUNIT_ASSERT(toTxns.size() == (uint32_t)txnCntIncr * idx); - - delete monitor; - } - - int grpEnd = currEndTxn; - monitor = new SessionMonitor(); - monitor->AgeLimit(sleepTime); - closeTxns(grpStart, grpEnd); // close this iteration of txns - // CPPUNIT_ASSERT(manager->verifySize()==0); - toTxns = monitor->timedOutTxns(); // get timed out txns - CPPUNIT_ASSERT(toTxns.size() == 0); - - delete monitor; - } - - monitor = new SessionMonitor(); // readload Monitor data - monitor->AgeLimit(sleepTime - 1); - - toTxns.clear(); - toTxns = monitor->timedOutTxns(); // get timed out txns - CPPUNIT_ASSERT(toTxns.size() == 0); - delete monitor; - - // CPPUNIT_ASSERT(manager->verifySize()==0); - delete manager; - } - -}; // test suite - -CPPUNIT_TEST_SUITE_REGISTRATION(ExecPlanTest); - -#include -#include - -int main(int argc, char* argv[]) -{ - CppUnit::TextUi::TestRunner runner; - CppUnit::TestFactoryRegistry& registry = CppUnit::TestFactoryRegistry::getRegistry(); - runner.addTest(registry.makeTest()); - bool wasSuccessful = runner.run("", false); - return (wasSuccessful ? 0 : 1); -} diff --git a/primitives/blockcache/CMakeLists.txt b/primitives/blockcache/CMakeLists.txt index 9ca0c950f..60deb6c52 100644 --- a/primitives/blockcache/CMakeLists.txt +++ b/primitives/blockcache/CMakeLists.txt @@ -14,4 +14,4 @@ set(dbbc_STAT_SRCS fsutils.cpp ) columnstore_static_library(dbbc ${dbbc_STAT_SRCS}) -columnstore_link(dbbc ${NETSNMP_LIBRARIES} loggingcpp) +columnstore_link(dbbc loggingcpp) diff --git a/primitives/linux-port/CMakeLists.txt b/primitives/linux-port/CMakeLists.txt index 1a0032707..254a2d786 100644 --- a/primitives/linux-port/CMakeLists.txt +++ b/primitives/linux-port/CMakeLists.txt @@ -5,4 +5,4 @@ include_directories(${ENGINE_COMMON_INCLUDES} ../blockcache ../primproc) set(processor_STAT_SRCS primitiveprocessor.cpp dictionary.cpp column.cpp) columnstore_static_library(processor ${processor_STAT_SRCS}) -columnstore_link(processor ${NETSNMP_LIBRARIES} loggingcpp) +columnstore_link(processor loggingcpp) diff --git a/primitives/linux-port/column.cpp b/primitives/linux-port/column.cpp index dbb82f54e..b8f1df2fb 100644 --- a/primitives/linux-port/column.cpp +++ b/primitives/linux-port/column.cpp @@ -40,7 +40,6 @@ using namespace boost; #include "simd_sse.h" #include "simd_arm.h" #include "utils/common/columnwidth.h" -#include "utils/common/bit_cast.h" #include "exceptclasses.h" diff --git a/primitives/primproc/CMakeLists.txt b/primitives/primproc/CMakeLists.txt index cd1232812..017f8bea3 100644 --- a/primitives/primproc/CMakeLists.txt +++ b/primitives/primproc/CMakeLists.txt @@ -3,25 +3,26 @@ include_directories(${ENGINE_COMMON_INCLUDES} ../blockcache ../linux-port) # ########## next target ############### set(PrimProc_SRCS - primproc.cpp + activestatementcounter.cpp batchprimitiveprocessor.cpp bppseeder.cpp bppsendthread.cpp columncommand.cpp command.cpp dictstep.cpp + femsghandler.cpp filtercommand.cpp logger.cpp passthrucommand.cpp primitiveserver.cpp + primproc.cpp pseudocc.cpp + rssmonfcn.cpp rtscommand.cpp - umsocketselector.cpp + samenodepseudosocket.cpp serviceexemgr.cpp sqlfrontsessionthread.cpp - rssmonfcn.cpp - activestatementcounter.cpp - femsghandler.cpp + umsocketselector.cpp ../../utils/common/crashtrace.cpp ) @@ -31,11 +32,11 @@ target_include_directories(PrimProc PRIVATE ${Boost_INCLUDE_DIRS}) columnstore_link( PrimProc ${ENGINE_LDFLAGS} - ${NETSNMP_LIBRARIES} ${ENGINE_WRITE_LIBS} threadpool cacheutils dbbc processor loggingcpp + statistics_manager ) diff --git a/utils/messageqcpp/samenodepseudosocket.cpp b/primitives/primproc/samenodepseudosocket.cpp similarity index 100% rename from utils/messageqcpp/samenodepseudosocket.cpp rename to primitives/primproc/samenodepseudosocket.cpp diff --git a/utils/messageqcpp/samenodepseudosocket.h b/primitives/primproc/samenodepseudosocket.h similarity index 100% rename from utils/messageqcpp/samenodepseudosocket.h rename to primitives/primproc/samenodepseudosocket.h diff --git a/primitives/primproc/serviceexemgr.cpp b/primitives/primproc/serviceexemgr.cpp index 2de371f21..6e759cb78 100644 --- a/primitives/primproc/serviceexemgr.cpp +++ b/primitives/primproc/serviceexemgr.cpp @@ -78,7 +78,7 @@ #include "dbrm.h" #include "mariadb_my_sys.h" -#include "statistics.h" +#include "statistics_manager/statistics.h" #include "serviceexemgr.h" #include "sqlfrontsessionthread.h" diff --git a/primitives/primproc/serviceexemgr.h b/primitives/primproc/serviceexemgr.h index a94f88c89..9da16ce0a 100644 --- a/primitives/primproc/serviceexemgr.h +++ b/primitives/primproc/serviceexemgr.h @@ -58,7 +58,7 @@ #include "dbrm.h" #include "mariadb_my_sys.h" -#include "statistics.h" +#include "statistics_manager/statistics.h" namespace exemgr { @@ -69,7 +69,7 @@ class Opt int m_debug; bool m_e; bool m_fg; - Opt() : m_debug(0), m_e(false), m_fg(false){}; + Opt() : m_debug(0), m_e(false), m_fg(false) {}; Opt(int argc, char* argv[]) : m_debug(0), m_e(false), m_fg(false) { int c; diff --git a/primitives/primproc/sqlfrontsessionthread.h b/primitives/primproc/sqlfrontsessionthread.h index 92464a733..4e07ac3fb 100644 --- a/primitives/primproc/sqlfrontsessionthread.h +++ b/primitives/primproc/sqlfrontsessionthread.h @@ -56,76 +56,77 @@ #include "dbrm.h" #include "mariadb_my_sys.h" -#include "statistics.h" +#include "statistics_manager/statistics.h" #include "serviceexemgr.h" namespace exemgr { - class SQLFrontSessionThread +class SQLFrontSessionThread +{ + public: + SQLFrontSessionThread(const messageqcpp::IOSocket& ios, joblist::DistributedEngineComm* ec, + joblist::ResourceManager* rm) + : fIos(ios) + , fEc(ec) + , fRm(rm) + , fStatsRetrieved(false) + , fTeleClient(globServiceExeMgr->getTeleServerParms()) + , fOamCachePtr(oam::OamCache::makeOamCache()) { - public: - SQLFrontSessionThread(const messageqcpp::IOSocket& ios, joblist::DistributedEngineComm* ec, - joblist::ResourceManager* rm) - : fIos(ios) - , fEc(ec) - , fRm(rm) - , fStatsRetrieved(false) - , fTeleClient(globServiceExeMgr->getTeleServerParms()) - , fOamCachePtr(oam::OamCache::makeOamCache()) - { - } + } - private: - messageqcpp::IOSocket fIos; - joblist::DistributedEngineComm* fEc; - joblist::ResourceManager* fRm; - querystats::QueryStats fStats; + private: + messageqcpp::IOSocket fIos; + joblist::DistributedEngineComm* fEc; + joblist::ResourceManager* fRm; + querystats::QueryStats fStats; - // Variables used to store return stats - bool fStatsRetrieved; + // Variables used to store return stats + bool fStatsRetrieved; - querytele::QueryTeleClient fTeleClient; + querytele::QueryTeleClient fTeleClient; - oam::OamCache* fOamCachePtr; // this ptr is copyable... + oam::OamCache* fOamCachePtr; // this ptr is copyable... - //...Reinitialize stats for start of a new query - void initStats(uint32_t sessionId, std::string& sqlText) - { - initMaxMemPct(sessionId); + //...Reinitialize stats for start of a new query + void initStats(uint32_t sessionId, std::string& sqlText) + { + initMaxMemPct(sessionId); - fStats.reset(); - fStats.setStartTime(); - fStats.fSessionID = sessionId; - fStats.fQuery = sqlText; - fStatsRetrieved = false; - } - //...Get % memory usage during latest query for sesssionId. - //...SessionId >= 0x80000000 is system catalog query we can ignore. - static uint64_t getMaxMemPct(uint32_t sessionId); - //...Delete sessionMemMap entry for the specified session's memory % use. - //...SessionId >= 0x80000000 is system catalog query we can ignore. - static void deleteMaxMemPct(uint32_t sessionId); - //...Get and log query stats to specified output stream - const std::string formatQueryStats( - joblist::SJLP& jl, // joblist associated with query - const std::string& label, // header label to print in front of log output - bool includeNewLine, // include line breaks in query stats std::string - bool vtableModeOn, bool wantExtendedStats, uint64_t rowsReturned); - static void incThreadCntPerSession(uint32_t sessionId); - static void decThreadCntPerSession(uint32_t sessionId); - //...Init sessionMemMap entry for specified session to 0 memory %. - //...SessionId >= 0x80000000 is system catalog query we can ignore. - static void initMaxMemPct(uint32_t sessionId); - //... Round off to human readable format (KB, MB, or GB). - const std::string roundBytes(uint64_t value) const; - void setRMParms(const execplan::CalpontSelectExecutionPlan::RMParmVec& parms); - void buildSysCache(const execplan::CalpontSelectExecutionPlan& csep, - boost::shared_ptr csc); - void writeCodeAndError(messageqcpp::ByteStream::quadbyte code, const std::string emsg); - void analyzeTableExecute(messageqcpp::ByteStream& bs, joblist::SJLP& jl, bool& stmtCounted); - void analyzeTableHandleStats(messageqcpp::ByteStream& bs); - uint64_t roundMB(uint64_t value) const; - public: - void operator()(); - }; -} + fStats.reset(); + fStats.setStartTime(); + fStats.fSessionID = sessionId; + fStats.fQuery = sqlText; + fStatsRetrieved = false; + } + //...Get % memory usage during latest query for sesssionId. + //...SessionId >= 0x80000000 is system catalog query we can ignore. + static uint64_t getMaxMemPct(uint32_t sessionId); + //...Delete sessionMemMap entry for the specified session's memory % use. + //...SessionId >= 0x80000000 is system catalog query we can ignore. + static void deleteMaxMemPct(uint32_t sessionId); + //...Get and log query stats to specified output stream + const std::string formatQueryStats( + joblist::SJLP& jl, // joblist associated with query + const std::string& label, // header label to print in front of log output + bool includeNewLine, // include line breaks in query stats std::string + bool vtableModeOn, bool wantExtendedStats, uint64_t rowsReturned); + static void incThreadCntPerSession(uint32_t sessionId); + static void decThreadCntPerSession(uint32_t sessionId); + //...Init sessionMemMap entry for specified session to 0 memory %. + //...SessionId >= 0x80000000 is system catalog query we can ignore. + static void initMaxMemPct(uint32_t sessionId); + //... Round off to human readable format (KB, MB, or GB). + const std::string roundBytes(uint64_t value) const; + void setRMParms(const execplan::CalpontSelectExecutionPlan::RMParmVec& parms); + void buildSysCache(const execplan::CalpontSelectExecutionPlan& csep, + boost::shared_ptr csc); + void writeCodeAndError(messageqcpp::ByteStream::quadbyte code, const std::string emsg); + void analyzeTableExecute(messageqcpp::ByteStream& bs, joblist::SJLP& jl, bool& stmtCounted); + void analyzeTableHandleStats(messageqcpp::ByteStream& bs); + uint64_t roundMB(uint64_t value) const; + + public: + void operator()(); +}; +} // namespace exemgr diff --git a/storage-manager/CMakeLists.txt b/storage-manager/CMakeLists.txt index c72171cbd..1b01b403f 100755 --- a/storage-manager/CMakeLists.txt +++ b/storage-manager/CMakeLists.txt @@ -64,7 +64,7 @@ configure_file( link_directories(${CMAKE_BINARY_DIR}/lib) set(CMAKE_INSTALL_RPATH $ORIGIN $ORIGIN/../lib) -columnstore_library(storagemanager SHARED ${storagemanager_SRCS}) +columnstore_library(storagemanager ${storagemanager_SRCS}) add_dependencies(storagemanager marias3 external_boost) target_compile_definitions(storagemanager PUBLIC BOOST_NO_CXX11_SCOPED_ENUMS) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 600b3607e..a0d6971c2 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -1,14 +1,15 @@ -add_subdirectory(dbbuilder) -add_subdirectory(editem) -add_subdirectory(dbloadxml) -add_subdirectory(getConfig) -add_subdirectory(cplogger) add_subdirectory(clearShm) +add_subdirectory(cleartablelock) +add_subdirectory(configMgt) +add_subdirectory(cplogger) +add_subdirectory(dbbuilder) +add_subdirectory(dbloadxml) +add_subdirectory(ddlcleanup) +add_subdirectory(editem) +add_subdirectory(getConfig) +add_subdirectory(idbmeminfo) +add_subdirectory(passwd) +add_subdirectory(rebuildEM) +add_subdirectory(rgprint) add_subdirectory(setConfig) add_subdirectory(viewtablelock) -add_subdirectory(cleartablelock) -add_subdirectory(ddlcleanup) -add_subdirectory(idbmeminfo) -add_subdirectory(rebuildEM) -add_subdirectory(passwd) -add_subdirectory(configMgt) diff --git a/tools/cleartablelock/CMakeLists.txt b/tools/cleartablelock/CMakeLists.txt index 30dc54edd..01dcc211e 100644 --- a/tools/cleartablelock/CMakeLists.txt +++ b/tools/cleartablelock/CMakeLists.txt @@ -6,4 +6,4 @@ set(cleartablelock_SRCS cleartablelock.cpp cleartablelockthread.cpp) columnstore_executable(cleartablelock ${cleartablelock_SRCS}) -columnstore_link(cleartablelock ${ENGINE_LDFLAGS} ${NETSNMP_LIBRARIES} ${ENGINE_WRITE_LIBS}) +columnstore_link(cleartablelock ${ENGINE_LDFLAGS} ${ENGINE_WRITE_LIBS}) diff --git a/tools/configMgt/CMakeLists.txt b/tools/configMgt/CMakeLists.txt index 9e6b3f3e9..15d6d7693 100644 --- a/tools/configMgt/CMakeLists.txt +++ b/tools/configMgt/CMakeLists.txt @@ -5,4 +5,4 @@ include_directories(${ENGINE_COMMON_INCLUDES}) set(autoConfigure_SRCS autoConfigure.cpp) add_executable(autoConfigure ${autoConfigure_SRCS}) -columnstore_link(autoConfigure ${ENGINE_LDFLAGS} ${NETSNMP_LIBRARIES} ${ENGINE_EXEC_LIBS}) +columnstore_link(autoConfigure ${ENGINE_LDFLAGS} ${ENGINE_EXEC_LIBS}) diff --git a/tools/dbbuilder/CMakeLists.txt b/tools/dbbuilder/CMakeLists.txt index 9d687f912..d6bceb767 100644 --- a/tools/dbbuilder/CMakeLists.txt +++ b/tools/dbbuilder/CMakeLists.txt @@ -6,4 +6,4 @@ set(dbbuilder_SRCS dbbuilder.cpp systemcatalog.cpp) columnstore_executable(dbbuilder ${dbbuilder_SRCS}) -columnstore_link(dbbuilder ${ENGINE_LDFLAGS} ${NETSNMP_LIBRARIES} ${ENGINE_WRITE_LIBS}) +columnstore_link(dbbuilder ${ENGINE_LDFLAGS} ${ENGINE_WRITE_LIBS}) diff --git a/tools/ddlcleanup/CMakeLists.txt b/tools/ddlcleanup/CMakeLists.txt index 9ba55097e..4e6016c04 100644 --- a/tools/ddlcleanup/CMakeLists.txt +++ b/tools/ddlcleanup/CMakeLists.txt @@ -5,4 +5,4 @@ include_directories(${ENGINE_COMMON_INCLUDES}) set(ddlcleanup_SRCS ddlcleanup.cpp) columnstore_executable(ddlcleanup ${ddlcleanup_SRCS}) -columnstore_link(ddlcleanup ${ENGINE_LDFLAGS} ${NETSNMP_LIBRARIES} ${ENGINE_WRITE_LIBS} ddlcleanuputil) +columnstore_link(ddlcleanup ${ENGINE_LDFLAGS} ${ENGINE_WRITE_LIBS} ddlcleanuputil) diff --git a/tools/editem/CMakeLists.txt b/tools/editem/CMakeLists.txt index b0dc993c6..ba6dde613 100644 --- a/tools/editem/CMakeLists.txt +++ b/tools/editem/CMakeLists.txt @@ -5,4 +5,4 @@ include_directories(${ENGINE_COMMON_INCLUDES}) set(editem_SRCS editem.cpp) columnstore_executable(editem ${editem_SRCS}) -columnstore_link(editem ${ENGINE_LDFLAGS} ${NETSNMP_LIBRARIES} ${ENGINE_EXEC_LIBS}) +columnstore_link(editem ${ENGINE_LDFLAGS} ${ENGINE_EXEC_LIBS}) diff --git a/tools/rgprint/CMakeLists.txt b/tools/rgprint/CMakeLists.txt index 788079df9..699cef3e9 100644 --- a/tools/rgprint/CMakeLists.txt +++ b/tools/rgprint/CMakeLists.txt @@ -6,4 +6,4 @@ set(rgprint_SRCS rgprint.cpp) columnstore_executable(rgprint ${rgprint_SRCS}) -columnstore_link(rgprint ${ENGINE_LDFLAGS} ${NETSNMP_LIBRARIES} ${ENGINE_WRITE_LIBS}) +columnstore_link(rgprint ${ENGINE_LDFLAGS} ${ENGINE_WRITE_LIBS}) diff --git a/tools/setConfig/CMakeLists.txt b/tools/setConfig/CMakeLists.txt index b43566721..85c16b0a8 100644 --- a/tools/setConfig/CMakeLists.txt +++ b/tools/setConfig/CMakeLists.txt @@ -5,4 +5,4 @@ include_directories(${ENGINE_COMMON_INCLUDES}) set(setConfig_SRCS main.cpp) columnstore_executable(mcsSetConfig ${setConfig_SRCS}) -columnstore_link(mcsSetConfig ${ENGINE_LDFLAGS} ${NETSNMP_LIBRARIES} ${ENGINE_EXEC_LIBS}) +columnstore_link(mcsSetConfig ${ENGINE_LDFLAGS} ${ENGINE_EXEC_LIBS}) diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 9e7b74f4c..cae710dea 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -1,26 +1,28 @@ # add_subdirectory(boost_idb) -add_subdirectory(startup) -add_subdirectory(common) -add_subdirectory(configcpp) -add_subdirectory(loggingcpp) -add_subdirectory(messageqcpp) -add_subdirectory(threadpool) -add_subdirectory(rwlock) -add_subdirectory(dataconvert) -add_subdirectory(joiner) -add_subdirectory(rowgroup) -add_subdirectory(cacheutils) -add_subdirectory(funcexp) -add_subdirectory(udfsdk) -add_subdirectory(compress) add_subdirectory(batchloader) -add_subdirectory(ddlcleanup) -add_subdirectory(querystats) -add_subdirectory(windowfunction) -add_subdirectory(idbdatafile) -add_subdirectory(querytele) -add_subdirectory(libmysql_client) -add_subdirectory(regr) +add_subdirectory(cacheutils) add_subdirectory(cloudio) +add_subdirectory(common) +add_subdirectory(compress) +add_subdirectory(configcpp) +add_subdirectory(dataconvert) +add_subdirectory(ddlcleanup) +add_subdirectory(funcexp) +add_subdirectory(idbdatafile) +add_subdirectory(joiner) add_subdirectory(libmarias3) +add_subdirectory(libmysql_client) +add_subdirectory(loggingcpp) +add_subdirectory(mariadb_charset) +add_subdirectory(messageqcpp) add_subdirectory(pron) +add_subdirectory(querystats) +add_subdirectory(querytele) +add_subdirectory(regr) +add_subdirectory(rowgroup) +add_subdirectory(rwlock) +add_subdirectory(startup) +add_subdirectory(statistics_manager) +add_subdirectory(threadpool) +add_subdirectory(udfsdk) +add_subdirectory(windowfunction) diff --git a/utils/common/conststring.h b/utils/basic/conststring.h similarity index 100% rename from utils/common/conststring.h rename to utils/basic/conststring.h diff --git a/utils/batchloader/CMakeLists.txt b/utils/batchloader/CMakeLists.txt index 25f58197e..694e88ef0 100644 --- a/utils/batchloader/CMakeLists.txt +++ b/utils/batchloader/CMakeLists.txt @@ -6,4 +6,4 @@ set(batchloader_LIB_SRCS batchloader.cpp) columnstore_library(batchloader ${batchloader_LIB_SRCS}) -columnstore_link(batchloader ${NETSNMP_LIBRARIES} loggingcpp) +columnstore_link(batchloader oamcpp loggingcpp) diff --git a/utils/cacheutils/CMakeLists.txt b/utils/cacheutils/CMakeLists.txt index 0c9c5d63c..666834835 100644 --- a/utils/cacheutils/CMakeLists.txt +++ b/utils/cacheutils/CMakeLists.txt @@ -5,4 +5,4 @@ include_directories(${ENGINE_COMMON_INCLUDES}) set(cacheutils_LIB_SRCS cacheutils.cpp) columnstore_library(cacheutils ${cacheutils_LIB_SRCS}) -columnstore_link(cacheutils messageqcpp) +columnstore_link(cacheutils messageqcpp boost_thread) diff --git a/utils/cloudio/CMakeLists.txt b/utils/cloudio/CMakeLists.txt index c5bb954d0..131030ace 100755 --- a/utils/cloudio/CMakeLists.txt +++ b/utils/cloudio/CMakeLists.txt @@ -14,7 +14,7 @@ columnstore_library(cloudio ${cloudio_LIB_SRCS}) # IDBDataFile currently depends on cloudio, which is backward. Once cloudio has been turned into a proper plugin for # idbdatafile, we should be able to reverse the dependency like so: -columnstore_link(cloudio idbdatafile messageqcpp) +columnstore_link(cloudio idbdatafile messageqcpp boost_thread) add_executable(cloudio_component_test component_test.cpp) add_executable(cloudio_end_to_end_test end_to_end_test.cpp) diff --git a/utils/common/CMakeLists.txt b/utils/common/CMakeLists.txt index dc19cd77d..d99ee7590 100644 --- a/utils/common/CMakeLists.txt +++ b/utils/common/CMakeLists.txt @@ -9,10 +9,17 @@ set(common_LIB_SRCS MonitorProcMem.cpp nullvaluemanip.cpp threadnaming.cpp - utils_utf8.cpp - statistics.cpp string_prefixes.cpp ) columnstore_library(common ${common_LIB_SRCS}) -columnstore_link(common boost_filesystem configcpp loggingcpp messageqcpp) +columnstore_link( + common + PRIVATE + boost_filesystem + configcpp + loggingcpp + messageqcpp + idbdatafile + mariadb_charset +) diff --git a/utils/common/bit_cast.h b/utils/common/bit_cast.h deleted file mode 100644 index e2a99e21a..000000000 --- a/utils/common/bit_cast.h +++ /dev/null @@ -1,39 +0,0 @@ -/* Copyright (C) 2020 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -#pragma once - -#include -#include - -namespace utils -{ -template -std::enable_if_t< - sizeof(To) == sizeof(From) && std::is_trivially_copyable_v && std::is_trivially_copyable_v, To> -// constexpr support needs compiler magic -bitCast(const From& src) noexcept -{ - static_assert(std::is_trivially_constructible_v, - "This implementation additionally requires " - "destination type to be trivially constructible"); - - To dst; - std::memcpy(&dst, &src, sizeof(To)); - return dst; -} -} // namespace utils diff --git a/utils/common/branchpred.h b/utils/common/branchpred.h index b0c4feb08..2553c1390 100644 --- a/utils/common/branchpred.h +++ b/utils/common/branchpred.h @@ -19,12 +19,6 @@ #pragma once -#if !defined(__GNUC__) || (__GNUC__ == 2 && __GNUC_MINOR__ < 96) -#ifndef __builtin_expect -#define __builtin_expect(x, expected_value) (x) -#endif -#endif - #ifndef LIKELY #define LIKELY(x) __builtin_expect((x), 1) #define UNLIKELY(x) __builtin_expect((x), 0) diff --git a/utils/common/columnwidth.h b/utils/common/columnwidth.h index 0c060ae89..1a9ca6635 100644 --- a/utils/common/columnwidth.h +++ b/utils/common/columnwidth.h @@ -17,6 +17,9 @@ #pragma once +#include +#include + #include "branchpred.h" namespace utils diff --git a/utils/common/genericparser.h b/utils/common/genericparser.h index d2bce2d26..8f4c67101 100644 --- a/utils/common/genericparser.h +++ b/utils/common/genericparser.h @@ -17,7 +17,7 @@ #pragma once -#include "conststring.h" +#include "basic/conststring.h" namespace genericparser { diff --git a/utils/common/hashfamily.h b/utils/common/hashfamily.h index 4b09e01a8..954779e40 100644 --- a/utils/common/hashfamily.h +++ b/utils/common/hashfamily.h @@ -18,7 +18,7 @@ #pragma once #include "hasher.h" -#include "collation.h" +#include "mariadb_charset/collation.h" namespace utils { diff --git a/utils/common/nullstring.h b/utils/common/nullstring.h index 313a5bcb9..0b4bd62a9 100644 --- a/utils/common/nullstring.h +++ b/utils/common/nullstring.h @@ -25,7 +25,7 @@ #include #include #include "exceptclasses.h" -#include "conststring.h" +#include "basic/conststring.h" #include "mcs_datatype_basic.h" namespace utils diff --git a/utils/common/string_prefixes.cpp b/utils/common/string_prefixes.cpp index 9f9c5c2e7..a8c45c84e 100644 --- a/utils/common/string_prefixes.cpp +++ b/utils/common/string_prefixes.cpp @@ -18,7 +18,7 @@ /* handling of the conversion of string prefixes to int64_t for quick range checking */ -#include "collation.h" +#include "mariadb_charset/collation.h" #include "joblisttypes.h" #include "string_prefixes.h" diff --git a/utils/common/syncstream.h b/utils/common/syncstream.h deleted file mode 100644 index d00b41150..000000000 --- a/utils/common/syncstream.h +++ /dev/null @@ -1,157 +0,0 @@ -/* Copyright (C) 2014 InfiniDB, Inc. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -/** @file */ - -/* - * classes isyncstream and osyncstream provide a C++ iostream interface - * for C stdio FILE* streams. The current implementation does not provide - * the necessary methods to support seeking. The I/O buffering of the - * input FILE* is used. The C++ iostream library calls syncbuf::sync() - * for every line, so output buffering is line-by-line. - * */ - -/* -#include "syncstream.h" - -void copyStream(istream& iss, ostream& oss) -{ - string line; - getline(iss, line); - while (iss.good()) - { - oss << line << endl; - getline(iss, line); - } -} - -main() -{ - FILE* ifp; - FILE* ofp; - - ... - - isyncstream iss(ifp); - osyncstream oss(ofp); - - copyStream(iss, oss); - - ... -} -*/ - -#pragma once - -#include -#include - -namespace syncstream -{ -/** A streambuf implementation for C stdio FILE* streams. - * - * Adapted from http://www.drdobbs.com/184401305 - */ -class syncbuf : public std::streambuf -{ - public: - /** ctor */ - syncbuf(FILE* f) : std::streambuf(), fptr(f) - { - } - - protected: - /** Write character in the case of overflow */ - virtual int overflow(int c = EOF) - { - return (c != EOF ? fputc(c, fptr) : EOF); - } - /** Get character in the case of overflow */ - virtual int underflow() - { - int c = getc(fptr); - - if (c != EOF) - ungetc(c, fptr); - - return c; - } - /** Get character in the case of overflow and advance get pointer */ - virtual int uflow() - { - return getc(fptr); - } - /** put character back in the case of backup underflow */ - virtual int pbackfail(int c = EOF) - { - return (c != EOF ? ungetc(c, fptr) : EOF); - } - /** Synchronize stream buffer */ - virtual int sync() - { - return fflush(fptr); - } - - private: - FILE* fptr; -}; - -/** An istream adaptor for input FILE* streams */ -class isyncstream : public std::istream -{ - public: - /** ctor */ - isyncstream() : istream(&buf), buf(0) - { - } - /** ctor */ - isyncstream(FILE* fptr) : istream(&buf), buf(fptr) - { - } - /** const streambuf accessor */ - const syncbuf* rdbuf() const - { - return &buf; - } - - private: - syncbuf buf; -}; - -/** An ostream adaptor for output FILE* streams */ -class osyncstream : public std::ostream -{ - public: - /** ctor */ - osyncstream() : ostream(&buf), buf(0) - { - } - /** ctor */ - osyncstream(FILE* fptr) : ostream(&buf), buf(fptr) - { - } - /** const streambuf accessor */ - const syncbuf* rdbuf() const - { - return &buf; - } - - private: - syncbuf buf; -}; - -} // namespace syncstream diff --git a/utils/common/utils_utf8.h b/utils/common/utils_utf8.h index 352bbb4f7..49f856bb5 100644 --- a/utils/common/utils_utf8.h +++ b/utils/common/utils_utf8.h @@ -20,16 +20,10 @@ #pragma once +#include #include -#if defined(__FreeBSD__) -//#include -#else -#include -#endif #include - #include -#include "liboamcpp.h" // Change the name from utf8. Even change the file name to something resembling char helper namespace utf8 diff --git a/utils/ddlcleanup/CMakeLists.txt b/utils/ddlcleanup/CMakeLists.txt index c813fe428..600e73e84 100644 --- a/utils/ddlcleanup/CMakeLists.txt +++ b/utils/ddlcleanup/CMakeLists.txt @@ -2,4 +2,4 @@ include_directories(${ENGINE_COMMON_INCLUDES}) set(ddlcleanuputil_LIB_SRCS ddlcleanuputil.cpp) columnstore_library(ddlcleanuputil ${ddlcleanuputil_LIB_SRCS}) -columnstore_link(ddlcleanuputil PRIVATE loggingcpp ${NETSNMP_LIBRARIES}) +columnstore_link(ddlcleanuputil PRIVATE loggingcpp) diff --git a/utils/funcexp/CMakeLists.txt b/utils/funcexp/CMakeLists.txt index 6f1c758a7..752221fed 100644 --- a/utils/funcexp/CMakeLists.txt +++ b/utils/funcexp/CMakeLists.txt @@ -150,6 +150,7 @@ columnstore_link( pron loggingcpp dataconvert - ${MARIADB_STRING_LIBS} - ${NETSNMP_LIBRARIES} + mariadb_charset ) + +columnstore_link(funcexp PRIVATE ${MARIADB_STRING_LIBS}) diff --git a/utils/funcexp/functor_json.h b/utils/funcexp/functor_json.h index 55c4d1b5f..f6939a4c5 100644 --- a/utils/funcexp/functor_json.h +++ b/utils/funcexp/functor_json.h @@ -7,7 +7,7 @@ #include #include -#include "collation.h" +#include "mariadb_charset/collation.h" #include "functor_bool.h" #include "functor_int.h" #include "functor_str.h" diff --git a/utils/funcexp/jsonhelpers.h b/utils/funcexp/jsonhelpers.h index 2756925b8..8f5fde560 100644 --- a/utils/funcexp/jsonhelpers.h +++ b/utils/funcexp/jsonhelpers.h @@ -10,10 +10,10 @@ #include // #include -#include "collation.h" +#include "mariadb_charset/collation.h" #include "functor_json.h" #include "functor_str.h" -#include "collation.h" +#include "mariadb_charset/collation.h" #include "rowgroup.h" #include "treenode.h" #include "functioncolumn.h" diff --git a/utils/idbdatafile/CMakeLists.txt b/utils/idbdatafile/CMakeLists.txt index 06a1ebf44..8e8d9ab4d 100644 --- a/utils/idbdatafile/CMakeLists.txt +++ b/utils/idbdatafile/CMakeLists.txt @@ -14,4 +14,4 @@ set(idbdatafile_LIB_SRCS ) columnstore_library(idbdatafile ${idbdatafile_LIB_SRCS}) -columnstore_link(idbdatafile PRIVATE ${NETSNMP_LIBRARIES} ${ENGINE_OAM_LIBS} boost_filesystem boost_system) +columnstore_link(idbdatafile PRIVATE ${ENGINE_OAM_LIBS} boost_filesystem boost_system compress) diff --git a/utils/joiner/CMakeLists.txt b/utils/joiner/CMakeLists.txt index 0df5c60f7..1d636162c 100644 --- a/utils/joiner/CMakeLists.txt +++ b/utils/joiner/CMakeLists.txt @@ -5,4 +5,4 @@ include_directories(${ENGINE_COMMON_INCLUDES}) set(joiner_LIB_SRCS tuplejoiner.cpp joinpartition.cpp) columnstore_library(joiner ${joiner_LIB_SRCS}) -columnstore_link(joiner PRIVATE loggingcpp) +columnstore_link(joiner PRIVATE loggingcpp rowgroup datatypes compress) diff --git a/utils/mariadb_charset/CMakeLists.txt b/utils/mariadb_charset/CMakeLists.txt new file mode 100644 index 000000000..9734bf1c5 --- /dev/null +++ b/utils/mariadb_charset/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories(${ENGINE_COMMON_INCLUDES}) + +# ########## next target ############### + +columnstore_static_library(mariadb_charset charset.cpp) +columnstore_link(mariadb_charset PRIVATE ${MARIADB_STRING_LIBS}) +add_dependencies(mariadb_charset loggingcpp) diff --git a/utils/common/utils_utf8.cpp b/utils/mariadb_charset/charset.cpp similarity index 72% rename from utils/common/utils_utf8.cpp rename to utils/mariadb_charset/charset.cpp index f0471f7d3..9a101592b 100644 --- a/utils/common/utils_utf8.cpp +++ b/utils/mariadb_charset/charset.cpp @@ -1,8 +1,9 @@ -/* Copyright (C) 2020 MariaDB Corporation. +/* Copyright (C) 2025 MariaDB Corporation - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -11,11 +12,12 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ - -#include "utils_utf8.h" + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ +#include "mariadb_charset/collation.h" #include "mariadb_my_sys.h" +#include "mcs_datatype.h" namespace datatypes { @@ -35,4 +37,3 @@ void Charset::setCharset(uint32_t charsetNumber) } } // namespace datatypes - diff --git a/utils/common/collation.h b/utils/mariadb_charset/collation.h similarity index 99% rename from utils/common/collation.h rename to utils/mariadb_charset/collation.h index e5781b8d9..411eca6aa 100644 --- a/utils/common/collation.h +++ b/utils/mariadb_charset/collation.h @@ -35,7 +35,7 @@ #include "mcsconfig.h" #include "exceptclasses.h" -#include "conststring.h" +#include "basic/conststring.h" /* Redefine definitions used by MariaDB m_ctype.h. diff --git a/utils/messageqcpp/CMakeLists.txt b/utils/messageqcpp/CMakeLists.txt index 527e9395a..c41a88168 100644 --- a/utils/messageqcpp/CMakeLists.txt +++ b/utils/messageqcpp/CMakeLists.txt @@ -8,7 +8,6 @@ set(messageqcpp_LIB_SRCS iosocket.cpp messagequeue.cpp messagequeuepool.cpp - samenodepseudosocket.cpp socketparms.cpp ) diff --git a/utils/messageqcpp/messagequeuepool.cpp b/utils/messageqcpp/messagequeuepool.cpp index 25af41a5c..2debf46cd 100644 --- a/utils/messageqcpp/messagequeuepool.cpp +++ b/utils/messageqcpp/messagequeuepool.cpp @@ -25,40 +25,57 @@ #include #include - namespace messageqcpp { using ClientMapType = std::multimap>; -struct LockedClientMap +template +struct Immortal { - LockedClientMap() + template + Immortal(Args&&... args) { + ::new (space) T(std::forward(args)...); } - ~LockedClientMap() + + operator T&() & noexcept { + return reinterpret_cast(space); } - ClientMapType clientMap; - std::mutex queueMutex; + + private: + alignas(T) unsigned char space[sizeof(T)]; }; -static int clientMapNiftyCounter; - -static typename std::aligned_storage::type clientMapBuf; - -auto& lockedMap = reinterpret_cast(clientMapBuf); - - -LockedClientMapInitilizer::LockedClientMapInitilizer () +class LockedClientMap { - if (clientMapNiftyCounter++ == 0) new (&lockedMap) LockedClientMap (); // placement new -} -LockedClientMapInitilizer::~LockedClientMapInitilizer () -{ - if (--clientMapNiftyCounter == 0) (&lockedMap)->~LockedClientMap(); -} + struct KeyToUsePrivateCtor + { + explicit KeyToUsePrivateCtor() = default; + }; + public: + LockedClientMap(const LockedClientMap&) = delete; + LockedClientMap& operator=(const LockedClientMap&) = delete; + ~LockedClientMap() = delete; + + static LockedClientMap& getInstance() + { + static Immortal instance(KeyToUsePrivateCtor{}); + return instance; + } + + LockedClientMap(KeyToUsePrivateCtor) + { + } + + private: + ClientMapType clientMap; + std::mutex queueMutex; + + friend class MessageQueueClientPool; +}; // 300 seconds idle until cleanup #define MAX_IDLE_TIME 300 @@ -70,7 +87,7 @@ static uint64_t TimeSpecToSeconds(struct timespec* ts) MessageQueueClient* MessageQueueClientPool::getInstance(const std::string& dnOrIp, uint64_t port) { - auto lock = std::scoped_lock(lockedMap.queueMutex); + auto lock = std::scoped_lock(LockedClientMap::getInstance().queueMutex); std::ostringstream oss; oss << dnOrIp << "_" << port; @@ -93,14 +110,13 @@ MessageQueueClient* MessageQueueClientPool::getInstance(const std::string& dnOrI newClientObject->client.reset(new MessageQueueClient(dnOrIp, port)); newClientObject->inUse = true; newClientObject->lastUsed = nowSeconds; - lockedMap.clientMap.emplace(std::move(searchString), std::move(newClientObject)); + LockedClientMap::getInstance().clientMap.emplace(std::move(searchString), std::move(newClientObject)); return newClientObject->client.get(); } MessageQueueClient* MessageQueueClientPool::getInstance(const std::string& module) { - auto lock = std::scoped_lock(lockedMap.queueMutex); - + auto lock = std::scoped_lock(LockedClientMap::getInstance().queueMutex); MessageQueueClient* returnClient = MessageQueueClientPool::findInPool(module); @@ -116,13 +132,11 @@ MessageQueueClient* MessageQueueClientPool::getInstance(const std::string& modul clock_gettime(CLOCK_MONOTONIC, &now); uint64_t nowSeconds = TimeSpecToSeconds(&now); - - newClientObject->client.reset(new MessageQueueClient(module)); newClientObject->inUse = true; newClientObject->lastUsed = nowSeconds; auto result = newClientObject->client.get(); - lockedMap.clientMap.emplace(std::move(module), std::move(newClientObject)); + LockedClientMap::getInstance().clientMap.emplace(std::move(module), std::move(newClientObject)); return result; } @@ -133,11 +147,10 @@ MessageQueueClient* MessageQueueClientPool::findInPool(const std::string& search uint64_t nowSeconds = TimeSpecToSeconds(&now); MessageQueueClient* returnClient = NULL; - auto it = lockedMap.clientMap.begin(); - + auto it = LockedClientMap::getInstance().clientMap.begin(); // Scan pool - while (it != lockedMap.clientMap.end()) + while (it != LockedClientMap::getInstance().clientMap.end()) { ClientObject* clientObject = it->second.get(); uint64_t elapsedTime = nowSeconds - clientObject->lastUsed; @@ -149,7 +162,7 @@ MessageQueueClient* MessageQueueClientPool::findInPool(const std::string& search // Do this so we don't invalidate current interator auto toDelete = it; it++; - lockedMap.clientMap.erase(toDelete); + LockedClientMap::getInstance().clientMap.erase(toDelete); continue; } @@ -163,7 +176,7 @@ MessageQueueClient* MessageQueueClientPool::findInPool(const std::string& search // Do this so we don't invalidate current interator auto toDelete = it; it++; - lockedMap.clientMap.erase(toDelete); + LockedClientMap::getInstance().clientMap.erase(toDelete); continue; } } @@ -193,10 +206,10 @@ void MessageQueueClientPool::releaseInstance(MessageQueueClient* client) if (client == NULL) return; - auto lock = std::scoped_lock(lockedMap.queueMutex); - auto it = lockedMap.clientMap.begin(); + auto lock = std::scoped_lock(LockedClientMap::getInstance().queueMutex); + auto it = LockedClientMap::getInstance().clientMap.begin(); - while (it != lockedMap.clientMap.end()) + while (it != LockedClientMap::getInstance().clientMap.end()) { if (it->second->client.get() == client) { @@ -221,15 +234,14 @@ void MessageQueueClientPool::deleteInstance(MessageQueueClient* client) if (client == NULL) return; + auto lock = std::scoped_lock(LockedClientMap::getInstance().queueMutex); + auto it = LockedClientMap::getInstance().clientMap.begin(); - auto lock = std::scoped_lock(lockedMap.queueMutex); - auto it = lockedMap.clientMap.begin(); - - while (it != lockedMap.clientMap.end()) + while (it != LockedClientMap::getInstance().clientMap.end()) { if (it->second->client.get() == client) { - lockedMap.clientMap.erase(it); + LockedClientMap::getInstance().clientMap.erase(it); return; } diff --git a/utils/messageqcpp/messagequeuepool.h b/utils/messageqcpp/messagequeuepool.h index 17899b76a..6af5e1cc6 100644 --- a/utils/messageqcpp/messagequeuepool.h +++ b/utils/messageqcpp/messagequeuepool.h @@ -26,12 +26,6 @@ namespace messageqcpp { - -static struct LockedClientMapInitilizer { - LockedClientMapInitilizer (); - ~LockedClientMapInitilizer (); -} clientMapInitilizer; // static initializer for every translation unit - struct ClientObject { std::unique_ptr client; @@ -49,8 +43,8 @@ class MessageQueueClientPool static MessageQueueClient* findInPool(const std::string& search); private: - MessageQueueClientPool(){}; - ~MessageQueueClientPool(){}; + MessageQueueClientPool() {}; + ~MessageQueueClientPool() {}; }; } // namespace messageqcpp diff --git a/utils/querystats/CMakeLists.txt b/utils/querystats/CMakeLists.txt index 0d91ead9e..ff6b13f91 100644 --- a/utils/querystats/CMakeLists.txt +++ b/utils/querystats/CMakeLists.txt @@ -5,4 +5,4 @@ include_directories(${ENGINE_COMMON_INCLUDES}) set(querystats_LIB_SRCS querystats.cpp) columnstore_library(querystats ${querystats_LIB_SRCS}) -columnstore_link(querystats PRIVATE loggingcpp) +columnstore_link(querystats PRIVATE loggingcpp messageqcpp) diff --git a/utils/querytele/CMakeLists.txt b/utils/querytele/CMakeLists.txt index f5e58ae60..9fd9af6a9 100644 --- a/utils/querytele/CMakeLists.txt +++ b/utils/querytele/CMakeLists.txt @@ -5,7 +5,7 @@ set(querytele_LIB_SRCS querytele.cpp queryteleclient.cpp querytele_constants.cpp ) columnstore_library(querytele ${querytele_LIB_SRCS}) -columnstore_link(querytele ${THRIFT_LIBRARY}) +columnstore_link(querytele ${THRIFT_LIBRARY} boost_thread) target_include_directories(querytele PRIVATE ${THRIFT_INCLUDE_DIRS}) -add_dependencies(querytele external_boost external_thrift) +add_dependencies(querytele external_thrift) diff --git a/utils/regr/CMakeLists.txt b/utils/regr/CMakeLists.txt index f2f378f34..e4ce77d95 100755 --- a/utils/regr/CMakeLists.txt +++ b/utils/regr/CMakeLists.txt @@ -21,11 +21,12 @@ set(regr_LIB_SRCS add_definitions(-DMYSQL_DYNAMIC_PLUGIN) columnstore_library(regr ${regr_LIB_SRCS}) -columnstore_link(regr PRIVATE loggingcpp) +columnstore_link(regr PRIVATE loggingcpp messageqcpp) set(regr_mysql_LIB_SRCS regrmysql.cpp modamysql.cpp) +# Do anyone use it? columnstore_mysql_plugin_library(regr_mysql SHARED ${regr_mysql_LIB_SRCS}) -add_dependencies(regr_mysql external_boost) +add_dependencies(regr_mysql external_boost GenError) # for "idb_mysql.h" that uses generated mysqld_error.h set_target_properties(regr_mysql PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../../) diff --git a/utils/regr/moda.h b/utils/regr/moda.h index aaa7fc771..5d3bb244e 100644 --- a/utils/regr/moda.h +++ b/utils/regr/moda.h @@ -45,7 +45,7 @@ #include "calpontsystemcatalog.h" #include "windowfunctioncolumn.h" #include "hasher.h" -#include "collation.h" +#include "mariadb_charset/collation.h" #define EXPORT @@ -277,7 +277,7 @@ class Moda_impl_T : public mcsv1_UDAF { public: // Defaults OK - Moda_impl_T() : cs(8){}; + Moda_impl_T() : cs(8) {}; ~Moda_impl_T() override = default; mcsv1_UDAF::ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes) override; @@ -305,7 +305,7 @@ class moda : public mcsv1_UDAF { public: // Defaults OK - moda() : mcsv1_UDAF(){}; + moda() : mcsv1_UDAF() {}; ~moda() override = default; mcsv1_UDAF::ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes) override; diff --git a/utils/rowgroup/CMakeLists.txt b/utils/rowgroup/CMakeLists.txt index 2ba4c2d46..b22e93b43 100644 --- a/utils/rowgroup/CMakeLists.txt +++ b/utils/rowgroup/CMakeLists.txt @@ -5,5 +5,5 @@ include_directories(${ENGINE_COMMON_INCLUDES}) set(rowgroup_LIB_SRCS rowaggregation.cpp rowgroup.cpp rowstorage.cpp) columnstore_library(rowgroup ${rowgroup_LIB_SRCS}) -columnstore_link(rowgroup PRIVATE ${NETSNMP_LIBRARIES} funcexp loggingcpp) +columnstore_link(rowgroup PRIVATE funcexp loggingcpp compress) add_dependencies(rowgroup external_boost) diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h index 50baf681c..5917885b3 100644 --- a/utils/rowgroup/rowgroup.h +++ b/utils/rowgroup/rowgroup.h @@ -53,7 +53,7 @@ #include "branchpred.h" #include "datatypes/mcs_int128.h" -#include "collation.h" +#include "mariadb_charset/collation.h" #include "common/hashfamily.h" #include "buffertypes.h" diff --git a/utils/statistics_manager/CMakeLists.txt b/utils/statistics_manager/CMakeLists.txt new file mode 100644 index 000000000..8bcdfd8ca --- /dev/null +++ b/utils/statistics_manager/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories(${ENGINE_COMMON_INCLUDES}) + +# ########## next target ############### + +columnstore_static_library(statistics_manager statistics.cpp) +columnstore_link(statistics_manager PRIVATE rowgroup) diff --git a/utils/common/statistics.cpp b/utils/statistics_manager/statistics.cpp similarity index 99% rename from utils/common/statistics.cpp rename to utils/statistics_manager/statistics.cpp index ea0dd29ff..ff68eb381 100644 --- a/utils/common/statistics.cpp +++ b/utils/statistics_manager/statistics.cpp @@ -116,9 +116,8 @@ void StatisticsManager::analyzeSample(bool traceOn) // MCV statistics. std::vector> mcvList(columnMCV.begin(), columnMCV.end()); std::sort(mcvList.begin(), mcvList.end(), - [](const std::pair& a, const std::pair& b) { - return a.second > b.second; - }); + [](const std::pair& a, const std::pair& b) + { return a.second > b.second; }); // 200 buckets as Microsoft does. const auto mcvSize = std::min(columnMCV.size(), static_cast(200)); diff --git a/utils/common/statistics.h b/utils/statistics_manager/statistics.h similarity index 100% rename from utils/common/statistics.h rename to utils/statistics_manager/statistics.h diff --git a/utils/threadpool/CMakeLists.txt b/utils/threadpool/CMakeLists.txt index 1502bb4b3..496fba57a 100644 --- a/utils/threadpool/CMakeLists.txt +++ b/utils/threadpool/CMakeLists.txt @@ -2,6 +2,4 @@ include_directories(${ENGINE_COMMON_INCLUDES}) set(threadpool_LIB_SRCS weightedthreadpool.cpp threadpool.cpp prioritythreadpool.cpp fair_threadpool.cpp) columnstore_library(threadpool ${threadpool_LIB_SRCS}) -columnstore_link(threadpool PRIVATE boost_chrono loggingcpp) - -add_dependencies(threadpool external_boost) +columnstore_link(threadpool PRIVATE boost_chrono boost_thread loggingcpp messageqcpp) diff --git a/utils/udfsdk/CMakeLists.txt b/utils/udfsdk/CMakeLists.txt index e13f07ab2..25e5020ff 100755 --- a/utils/udfsdk/CMakeLists.txt +++ b/utils/udfsdk/CMakeLists.txt @@ -11,9 +11,11 @@ set(udfsdk_LIB_SRCS ) columnstore_library(udfsdk ${udfsdk_LIB_SRCS}) -columnstore_link(udfsdk PRIVATE loggingcpp) +columnstore_link(udfsdk PRIVATE loggingcpp messageqcpp) # Do anyone use it? add_definitions(-DMYSQL_DYNAMIC_PLUGIN) set(udf_mysql_LIB_SRCS udfmysql.cpp) columnstore_mysql_plugin_library(udf_mysql SHARED ${udf_mysql_LIB_SRCS}) + +add_dependencies(udf_mysql GenError) # for "idb_mysql.h" that uses generated mysqld_error.h diff --git a/versioning/BRM/CMakeLists.txt b/versioning/BRM/CMakeLists.txt index 4468c1c05..8b62dc2b2 100644 --- a/versioning/BRM/CMakeLists.txt +++ b/versioning/BRM/CMakeLists.txt @@ -34,7 +34,7 @@ set(brm_LIB_SRCS ) columnstore_library(brm ${brm_LIB_SRCS}) -columnstore_link(brm loggingcpp datatypes) +columnstore_link(brm loggingcpp datatypes oamcpp boost_thread messageqcpp) # ########## next target ############### @@ -62,53 +62,41 @@ columnstore_link(dbrmctl ${ENGINE_LDFLAGS} ${ENGINE_OAM_LIBS} ${ENGINE_EXEC_LIBS set(reset_locks_SRCS reset_locks.cpp) columnstore_executable(reset_locks ${reset_locks_SRCS}) -columnstore_link(reset_locks ${ENGINE_LDFLAGS} ${ENGINE_OAM_LIBS} ${ENGINE_EXEC_LIBS} ${NETSNMP_LIBRARIES}) +columnstore_link(reset_locks ${ENGINE_LDFLAGS} ${ENGINE_OAM_LIBS} ${ENGINE_EXEC_LIBS}) # ########## next target ############### set(rollback_SRCS rollback.cpp) columnstore_executable(rollback ${rollback_SRCS}) -columnstore_link(rollback ${ENGINE_LDFLAGS} ${ENGINE_OAM_LIBS} ${ENGINE_EXEC_LIBS} ${NETSNMP_LIBRARIES}) +columnstore_link(rollback ${ENGINE_LDFLAGS} ${ENGINE_OAM_LIBS} ${ENGINE_EXEC_LIBS}) # ########## next target ############### set(save_brm_SRCS save_brm.cpp) columnstore_executable(save_brm ${save_brm_SRCS}) -columnstore_link(save_brm ${ENGINE_LDFLAGS} ${ENGINE_OAM_LIBS} ${ENGINE_EXEC_LIBS} ${NETSNMP_LIBRARIES}) +columnstore_link(save_brm ${ENGINE_LDFLAGS} ${ENGINE_OAM_LIBS} ${ENGINE_EXEC_LIBS}) # ########## next target ############### set(load_brm_SRCS load_brm.cpp) columnstore_executable(load_brm ${load_brm_SRCS}) -columnstore_link(load_brm ${ENGINE_LDFLAGS} ${ENGINE_OAM_LIBS} ${ENGINE_EXEC_LIBS} ${NETSNMP_LIBRARIES}) +columnstore_link(load_brm ${ENGINE_LDFLAGS} ${ENGINE_OAM_LIBS} ${ENGINE_EXEC_LIBS}) columnstore_executable(mcs-load-em load_em.cpp) -columnstore_link( - mcs-load-em ${ENGINE_LDFLAGS} ${MARIADB_CLIENT_LIBS} ${ENGINE_OAM_LIBS} ${ENGINE_EXEC_LIBS} ${NETSNMP_LIBRARIES} -) +columnstore_link(mcs-load-em ${ENGINE_LDFLAGS} ${MARIADB_CLIENT_LIBS} ${ENGINE_OAM_LIBS} ${ENGINE_EXEC_LIBS}) columnstore_executable(mcs-load-brm-from-file load_brm_from_file.cpp) columnstore_link( - mcs-load-brm-from-file - ${ENGINE_LDFLAGS} - ${MARIADB_CLIENT_LIBS} - ${ENGINE_OAM_LIBS} - ${ENGINE_EXEC_LIBS} - ${NETSNMP_LIBRARIES} + mcs-load-brm-from-file ${ENGINE_LDFLAGS} ${MARIADB_CLIENT_LIBS} ${ENGINE_OAM_LIBS} ${ENGINE_EXEC_LIBS} boost_program_options ) columnstore_executable(mcs-shmem-locks shmem_locks.cpp) columnstore_link( - mcs-shmem-locks - ${ENGINE_LDFLAGS} - ${MARIADB_CLIENT_LIBS} - ${ENGINE_OAM_LIBS} - ${ENGINE_EXEC_LIBS} - ${NETSNMP_LIBRARIES} + mcs-shmem-locks ${ENGINE_LDFLAGS} ${MARIADB_CLIENT_LIBS} ${ENGINE_OAM_LIBS} ${ENGINE_EXEC_LIBS} boost_program_options ) diff --git a/writeengine/bulk/CMakeLists.txt b/writeengine/bulk/CMakeLists.txt index 0bb90a586..229fa19c2 100644 --- a/writeengine/bulk/CMakeLists.txt +++ b/writeengine/bulk/CMakeLists.txt @@ -29,7 +29,7 @@ set(we_bulk_STAT_SRCS add_definitions(-D_FILE_OFFSET_BITS=64) columnstore_static_library(we_bulk ${we_bulk_STAT_SRCS}) -columnstore_link(we_bulk ${NETSNMP_LIBRARIES} loggingcpp boost_program_options) +columnstore_link(we_bulk loggingcpp boost_program_options) remove_definitions(-D_FILE_OFFSET_BITS=64) @@ -38,13 +38,13 @@ remove_definitions(-D_FILE_OFFSET_BITS=64) set(cpimport.bin_SRCS cpimport.cpp) columnstore_executable(cpimport.bin ${cpimport.bin_SRCS}) -add_dependencies(cpimport.bin marias3) + columnstore_link( cpimport.bin ${ENGINE_LDFLAGS} - ${NETSNMP_LIBRARIES} ${ENGINE_WRITE_LIBS} ${S3API_DEPS} we_bulk we_xml + marias3 ) diff --git a/writeengine/client/CMakeLists.txt b/writeengine/client/CMakeLists.txt index 52633ceca..910680f2c 100644 --- a/writeengine/client/CMakeLists.txt +++ b/writeengine/client/CMakeLists.txt @@ -5,7 +5,4 @@ include_directories(${ENGINE_COMMON_INCLUDES}) set(writeengineclient_LIB_SRCS we_clients.cpp we_ddlcommandclient.cpp we_dmlcommandclient.cpp) columnstore_library(writeengineclient ${writeengineclient_LIB_SRCS}) - -add_dependencies(writeengineclient loggingcpp) - -columnstore_link(writeengineclient ${NETSNMP_LIBRARIES}) +columnstore_link(writeengineclient boost_thread oamcpp messageqcpp loggingcpp) diff --git a/writeengine/redistribute/CMakeLists.txt b/writeengine/redistribute/CMakeLists.txt index 29b8b94ee..5ca49b172 100644 --- a/writeengine/redistribute/CMakeLists.txt +++ b/writeengine/redistribute/CMakeLists.txt @@ -7,9 +7,6 @@ set(writeengineredistribute_LIB_SRCS we_redistribute.cpp we_redistributecontrol. ) columnstore_library(writeengineredistribute ${writeengineredistribute_LIB_SRCS}) - -add_dependencies(writeengineredistribute loggingcpp) - -columnstore_link(writeengineredistribute ${NETSNMP_LIBRARIES}) +columnstore_link(writeengineredistribute loggingcpp oamcpp boost_thread messageqcpp) target_compile_definitions(writeengineredistribute PUBLIC BOOST_NO_CXX11_SCOPED_ENUMS) diff --git a/writeengine/server/CMakeLists.txt b/writeengine/server/CMakeLists.txt index 9ebc5cd72..0843f246f 100644 --- a/writeengine/server/CMakeLists.txt +++ b/writeengine/server/CMakeLists.txt @@ -18,9 +18,4 @@ set(WriteEngineServer_SRCS ) columnstore_executable(WriteEngineServer ${WriteEngineServer_SRCS}) - -add_dependencies(WriteEngineServer loggingcpp) - -columnstore_link( - WriteEngineServer ${ENGINE_LDFLAGS} ${NETSNMP_LIBRARIES} ${ENGINE_WRITE_LIBS} threadpool writeengineredistribute -) +columnstore_link(WriteEngineServer ${ENGINE_LDFLAGS} ${ENGINE_WRITE_LIBS} threadpool writeengineredistribute loggingcpp) diff --git a/writeengine/shared/we_type.h b/writeengine/shared/we_type.h index 7772351ab..158011e6e 100644 --- a/writeengine/shared/we_type.h +++ b/writeengine/shared/we_type.h @@ -40,7 +40,7 @@ #include "IDBDataFile.h" #include "IDBPolicy.h" #include "nullstring.h" -#include "collation.h" // For CHARSET_INFO struct +#include "mariadb_charset/collation.h" // For CHARSET_INFO struct #undef EXPORT #undef DELETE @@ -408,8 +408,8 @@ struct JobColumn /** @brief Job Column Structure */ int compressionType; /** @brief compression type */ bool autoIncFlag; /** @brief auto increment flag */ DctnryStruct dctnry; /** @brief dictionary structure */ - int128_t fMinIntSat; /** @brief For integer type, the min saturation value */ - uint128_t fMaxIntSat; /** @brief For integer type, the max saturation value */ + int128_t fMinIntSat; /** @brief For integer type, the min saturation value */ + uint128_t fMaxIntSat; /** @brief For integer type, the max saturation value */ double fMinDblSat; /** @brief for float/double, the min saturation value */ double fMaxDblSat; /** @brief for float/double, the max saturation value */ bool fWithDefault; /** @brief With default */ @@ -447,10 +447,9 @@ struct JobColumn /** @brief Job Column Structure */ , cs(nullptr) { } - JobColumn(const std::string& colName_, OID mapOid_, const std::string& typeName_, - int width_, int definedWidth_, int compressionType_, int dctnryCompressionType_, - int64_t minIntSat_, uint64_t maxIntSat_, bool withDefault_, - unsigned long long defaultUInt_) + JobColumn(const std::string& colName_, OID mapOid_, const std::string& typeName_, int width_, + int definedWidth_, int compressionType_, int dctnryCompressionType_, int64_t minIntSat_, + uint64_t maxIntSat_, bool withDefault_, unsigned long long defaultUInt_) : colName(colName_) , mapOid(mapOid_) , dataType(execplan::CalpontSystemCatalog::INT) diff --git a/writeengine/splitter/CMakeLists.txt b/writeengine/splitter/CMakeLists.txt index 7b05928f6..8d2d12251 100644 --- a/writeengine/splitter/CMakeLists.txt +++ b/writeengine/splitter/CMakeLists.txt @@ -16,15 +16,13 @@ set(cpimport_SRCS columnstore_executable(cpimport ${cpimport_SRCS}) -add_dependencies(cpimport loggingcpp) - columnstore_link( cpimport ${ENGINE_LDFLAGS} - ${NETSNMP_LIBRARIES} ${ENGINE_WRITE_LIBS} batchloader threadpool marias3 boost_program_options + loggingcpp ) diff --git a/writeengine/wrapper/CMakeLists.txt b/writeengine/wrapper/CMakeLists.txt index 28a7b50f9..2a1e71325 100644 --- a/writeengine/wrapper/CMakeLists.txt +++ b/writeengine/wrapper/CMakeLists.txt @@ -37,7 +37,4 @@ set(writeengine_LIB_SRCS add_definitions(-D_FILE_OFFSET_BITS=64) columnstore_library(writeengine ${writeengine_LIB_SRCS}) - -add_dependencies(writeengine loggingcpp) - -columnstore_link(writeengine ${NETSNMP_LIBRARIES}) +columnstore_link(writeengine loggingcpp oamcpp boost_thread compress messageqcpp) diff --git a/writeengine/xml/CMakeLists.txt b/writeengine/xml/CMakeLists.txt index 98ab2fbb3..2ef5b0e51 100644 --- a/writeengine/xml/CMakeLists.txt +++ b/writeengine/xml/CMakeLists.txt @@ -6,6 +6,5 @@ set(we_xml_STAT_SRCS we_xmlop.cpp we_xmljob.cpp we_xmlgendata.cpp we_xmlgenproc. add_definitions(-D_FILE_OFFSET_BITS=64) -add_library(we_xml STATIC ${we_xml_STAT_SRCS}) - -add_dependencies(we_xml loggingcpp) +columnstore_static_library(we_xml ${we_xml_STAT_SRCS}) +columnstore_link(we_xml loggingcpp) From a27f1a1f98b1d6665c8446dae6ae270bebd269b8 Mon Sep 17 00:00:00 2001 From: "aleksei.bukhalov" Date: Thu, 17 Jul 2025 14:40:06 +0200 Subject: [PATCH 16/51] chore(ci): fix arch --- build/build_cmapi.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build/build_cmapi.sh b/build/build_cmapi.sh index 84c6ca5b0..da0e64d94 100755 --- a/build/build_cmapi.sh +++ b/build/build_cmapi.sh @@ -28,7 +28,7 @@ fi select_pkg_format ${OS} -if [[ "$(arch)" == "arm64" ]]; then +if [[ "$(arch)" == "arm64" || "$(arch)" == "aarch64" ]]; then export CC=gcc fi @@ -59,7 +59,7 @@ install_deps() { if [ "$(arch)" == "x86_64" ]; then PYTHON_URL="https://github.com/indygreg/python-build-standalone/releases/download/20220802/cpython-3.9.13+20220802-x86_64_v2-unknown-linux-gnu-pgo+lto-full.tar.zst" - elif [ "$(arch)" == "arm64" ]; then + elif [[ "$(arch)" == "arm64" || "$(arch)" == "aarch64" ]]; then PYTHON_URL="https://github.com/indygreg/python-build-standalone/releases/download/20220802/cpython-3.9.13+20220802-aarch64-unknown-linux-gnu-noopt-full.tar.zst" else echo "Unsupported architecture: $(arch)" From ef126c43198bc6b76fb9cc92b39cf911dfd1cfe2 Mon Sep 17 00:00:00 2001 From: drrtuy Date: Fri, 27 Jun 2025 13:45:41 +0000 Subject: [PATCH 17/51] feat(optimizer): add fixed bounds for TPC-H 1GB from regr --- dbcon/mysql/rulebased_optimizer.cpp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/dbcon/mysql/rulebased_optimizer.cpp b/dbcon/mysql/rulebased_optimizer.cpp index b2107c497..4af61396c 100644 --- a/dbcon/mysql/rulebased_optimizer.cpp +++ b/dbcon/mysql/rulebased_optimizer.cpp @@ -23,6 +23,8 @@ #include "predicateoperator.h" #include "simplefilter.h" #include "rulebased_optimizer.h" +#include +#include namespace optimizer { @@ -128,7 +130,8 @@ bool matchParallelCES(execplan::CalpontSelectExecutionPlan& csep) // This routine produces a new ParseTree that is AND(lowerBand <= column, column <= upperBand) // TODO add engine-independent statistics-derived ranges -execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep) +execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, + std::pair& bound) { // INV this is SimpleColumn we supply as an argument // TODO find the suitable column using EI statistics. @@ -139,10 +142,11 @@ execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep) tableKeyColumnLeftOp->resultType(column->resultType()); // TODO Nobody owns this allocation and cleanup only depends on delete in ParseTree nodes' dtors. - auto* filterColLeftOp = new execplan::ConstantColumnUInt(42ULL, 0, 0); + auto* filterColLeftOp = new execplan::ConstantColumnUInt(bound.second, 0, 0); // set TZ // There is a question with ownership of the const column - execplan::SOP ltOp = boost::make_shared(execplan::PredicateOperator("<=")); + // WIP here we lost upper bound value if predicate is not changed to weak lt + execplan::SOP ltOp = boost::make_shared(execplan::PredicateOperator("<")); ltOp->setOpType(filterColLeftOp->resultType(), tableKeyColumnLeftOp->resultType()); ltOp->resultType(ltOp->operationType()); @@ -151,7 +155,7 @@ execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep) auto tableKeyColumnRightOp = new execplan::SimpleColumn(*column); tableKeyColumnRightOp->resultType(column->resultType()); // TODO hardcoded column type and value - auto* filterColRightOp = new execplan::ConstantColumnUInt(30ULL, 0, 0); + auto* filterColRightOp = new execplan::ConstantColumnUInt(bound.first, 0, 0); execplan::SOP gtOp = boost::make_shared(execplan::PredicateOperator(">=")); gtOp->setOpType(filterColRightOp->resultType(), tableKeyColumnRightOp->resultType()); @@ -179,11 +183,13 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable( { execplan::CalpontSelectExecutionPlan::SelectList unionVec; unionVec.reserve(numberOfLegs); - for (size_t i = 0; i < numberOfLegs; ++i) + std::vector> bounds({{0, 3000961}, + {3000961, std::numeric_limits::max()}}); + for (auto bound : bounds) { auto clonedCSEP = csep.cloneWORecursiveSelects(); // Add BETWEEN based on key column range - clonedCSEP->filters(filtersWithNewRangeAddedIfNeeded(clonedCSEP)); + clonedCSEP->filters(filtersWithNewRangeAddedIfNeeded(clonedCSEP, bound)); unionVec.push_back(clonedCSEP); } @@ -236,7 +242,7 @@ void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep) newDerivedTableList.push_back(derivedSCEP); execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, ""); newTableList.push_back(tn); - // Remove the filters as they were pushed down to union units + // Remove the filters as they were pushed down to union units derivedSCEP->filters(nullptr); } } From c4fd150453e3abde6b60e075cdc8c562f9f8dd76 Mon Sep 17 00:00:00 2001 From: drrtuy Date: Fri, 27 Jun 2025 17:43:20 +0000 Subject: [PATCH 18/51] chore(execplan): name PT walker function types --- dbcon/execplan/parsetree.h | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/dbcon/execplan/parsetree.h b/dbcon/execplan/parsetree.h index 5c190fdc0..aaa417e5d 100644 --- a/dbcon/execplan/parsetree.h +++ b/dbcon/execplan/parsetree.h @@ -42,7 +42,15 @@ class Row; namespace execplan { -// class Operator; + +using ParseTreeWalker = void (*)(ParseTree* n); +using ParseTreeConstWalker = void (*)(const ParseTree* n); + +using ParseTreePrinter = void (*)(const ParseTree* n, std::ostream& output); + +using ParseTreeWalkerWithContext = void (*)(ParseTree* n, void* obj); +using ParseTreeConstWalkerWithContext = void (*)(const ParseTree* n, void* obj); +// class ParseTree; /** * @brief A template class template to represent an expression tree * @@ -133,31 +141,31 @@ class ParseTree * * postfix walking of a const tree */ - inline void walk(void (*fn)(ParseTree* n)) const; + inline void walk(ParseTreeWalker fn) const; /** walk the tree * * postfix walking of a non-const tree. This is for deleting the tree */ - inline void walk(void (*fn)(const ParseTree* n)) const; + inline void walk(ParseTreeConstWalker fn) const; /** output the tree * * take ostream argument to walk and output the tree */ - inline void walk(void (*fn)(const ParseTree* n, std::ostream& output), std::ostream& output) const; + inline void walk(ParseTreePrinter fn, std::ostream& output) const; /** output the tree * * take user argument to walk and output the tree */ - inline void walk(void (*fn)(const ParseTree* n, void* obj), void* object) const; + inline void walk(ParseTreeConstWalkerWithContext fn, void* object) const; /** output the tree * * take user argument to walk and output the tree */ - inline void walk(void (*fn)(ParseTree* n, void* obj), void* object) const; + inline void walk(ParseTreeWalkerWithContext fn, void* object) const; /** output the tree to string * for debug purpose @@ -448,7 +456,7 @@ inline ParseTree::~ParseTree() } } -inline void ParseTree::walk(void (*fn)(ParseTree* n)) const +inline void ParseTree::walk(ParseTreeWalker fn) const { DFSStack stack; stack.emplace_back(const_cast(this)); @@ -477,7 +485,7 @@ inline void ParseTree::walk(void (*fn)(ParseTree* n)) const } } -inline void ParseTree::walk(void (*fn)(const ParseTree* n)) const +inline void ParseTree::walk(ParseTreeConstWalker fn) const { DFSStack stack; stack.emplace_back(const_cast(this)); @@ -506,7 +514,7 @@ inline void ParseTree::walk(void (*fn)(const ParseTree* n)) const } } -inline void ParseTree::walk(void (*fn)(const ParseTree* n, std::ostream& output), std::ostream& output) const +inline void ParseTree::walk(ParseTreePrinter fn, std::ostream& output) const { DFSStack stack; stack.emplace_back(const_cast(this)); @@ -535,7 +543,7 @@ inline void ParseTree::walk(void (*fn)(const ParseTree* n, std::ostream& output) } } -inline void ParseTree::walk(void (*fn)(const ParseTree* n, void* obj), void* obj) const +inline void ParseTree::walk(ParseTreeConstWalkerWithContext fn, void* obj) const { DFSStack stack; stack.emplace_back(const_cast(this)); @@ -571,7 +579,7 @@ inline std::string ParseTree::toString() const return oss.str(); } -inline void ParseTree::walk(void (*fn)(ParseTree* n, void* obj), void* obj) const +inline void ParseTree::walk(ParseTreeWalkerWithContext fn, void* obj) const { DFSStack stack; stack.emplace_back(const_cast(this)); From e21200da198e1bc39866fdeb9f1a45e0eba548af Mon Sep 17 00:00:00 2001 From: drrtuy Date: Fri, 27 Jun 2025 18:28:36 +0000 Subject: [PATCH 19/51] feat(optimizer): stack-based filters walker --- dbcon/mysql/rulebased_optimizer.cpp | 32 +++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/dbcon/mysql/rulebased_optimizer.cpp b/dbcon/mysql/rulebased_optimizer.cpp index 4af61396c..ee674cfba 100644 --- a/dbcon/mysql/rulebased_optimizer.cpp +++ b/dbcon/mysql/rulebased_optimizer.cpp @@ -18,6 +18,7 @@ #include "constantcolumn.h" #include "execplan/calpontselectexecutionplan.h" #include "execplan/simplecolumn.h" +#include "existsfilter.h" #include "logicoperator.h" #include "operator.h" #include "predicateoperator.h" @@ -95,6 +96,37 @@ bool Rule::walk(execplan::CalpontSelectExecutionPlan& csep) const rewrite |= walk(unionUnitLocal); } + if (csep.filters() != nullptr) + { + bool rewriteLocal = false; + std::vector stack; + stack.push_back(csep.filters()); + while (!stack.empty()) + { + execplan::ParseTree* node = stack.back(); + stack.pop_back(); + if (node == nullptr) + continue; + + auto* existsFilter = dynamic_cast(node->data()); + if (existsFilter) + { + if (matchRule(*existsFilter->sub())) + { + applyRule(*existsFilter->sub()); + rewriteLocal = true; + } + } + + if (node->right()) + stack.push_back(node->right()); + if (node->left()) + stack.push_back(node->left()); + } + if (rewriteLocal) + rewrite |= rewriteLocal; + } + if (matchRule(csep)) { applyRule(csep); From 9f5ddd33bc101679d799c72e891e59fdf5d98887 Mon Sep 17 00:00:00 2001 From: drrtuy Date: Sat, 12 Jul 2025 14:13:21 +0000 Subject: [PATCH 20/51] chore(): a note about unprotected crit section --- dbcon/joblist/tuplehashjoin.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbcon/joblist/tuplehashjoin.cpp b/dbcon/joblist/tuplehashjoin.cpp index dffc17ec9..368a15fdd 100644 --- a/dbcon/joblist/tuplehashjoin.cpp +++ b/dbcon/joblist/tuplehashjoin.cpp @@ -595,6 +595,8 @@ void TupleHashJoinStep::djsReaderFcn(int index) for (auto& diskJoinStep : djs) { + // TODO add and verify mutex + // boost::mutex::scoped_lock lk(*fStatsMutexPtr); fExtendedInfo += diskJoinStep->extendedInfo(); fMiniInfo += diskJoinStep->miniInfo(); } From dfddfedfe5632375d11270e828129ee28c2baa28 Mon Sep 17 00:00:00 2001 From: drrtuy Date: Sat, 12 Jul 2025 14:16:52 +0000 Subject: [PATCH 21/51] feat(optimizer): collect EI statistics for a first column in existing tables indexes --- dbcon/mysql/ha_mcs_execplan.cpp | 45 ++++++++++++++++++++++++++++----- dbcon/mysql/ha_mcs_impl_if.h | 2 ++ 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index 6e19dda44..aa3b604ca 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -49,6 +49,7 @@ using namespace logging; #define PREFER_MY_CONFIG_H #include #include "idb_mysql.h" +#include "opt_histogram_json.h" #include "partition_element.h" #include "partition_info.h" @@ -6287,6 +6288,40 @@ int processLimitAndOffset(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep return 0; } +void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi) +{ + if (!ifp->field->part_of_key.is_clear_all()) + { + return; + } + std::cout << "Processing field item: " << ifp->field_name.str << std::endl; + // std::cout << "part of a key: " << buf << std::endl; + std::cout << "ifp->field->field_index " << ifp->field->field_index << std::endl; + + for (uint j = 0; j < ifp->field->table->s->keys; j++) + { + for (uint i = 0; i < ifp->field->table->s->key_info[j].usable_key_parts; i++) + { + std::cout << "key fieldnr " << i << " " + << ifp->field->table->s->key_info[j].key_part[i].field->field_name.str << " " + << ifp->field->table->s->key_info[j].key_part[i].fieldnr << std::endl; + if (ifp->field->table->s->key_info[j].key_part[i].fieldnr == ifp->field->field_index + 1) + { + std::cout << "key_info " << j << " key_part " << i << " matched " << std::endl; + if (i == 0 && ifp->field->read_stats) + { + assert(ifp->field->table->s); + // assert(ifp->field->table->s->db); + // assert(ifp->field->table->s->table_name); + // FQCN fqcn({ifp->field->table->s->db.str}, {ifp->field->table->s->table_name.str}, {ifp->field->field_name.str}); + //TODO use FQCN as a key type + gwi.columnStatisticsMap[ifp->field->field_name.str] = ifp->field->read_stats->histogram->get_histogram(); + } + } + } + } +} + /*@brief Process SELECT part of a query or sub-query */ /*********************************************************** * DESCRIPTION: @@ -6376,21 +6411,20 @@ int processSelect(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, vector case Item::FIELD_ITEM: { Item_field* ifp = (Item_field*)item; - SimpleColumn* sc = NULL; + extractColumnStatistics(ifp, gwi); if (ifp->field_name.length && string(ifp->field_name.str) == "*") { collectAllCols(gwi, ifp); break; } - sc = buildSimpleColumn(ifp, gwi); + SimpleColumn* sc = buildSimpleColumn(ifp, gwi); if (sc) { - string fullname; String str; ifp->print(&str, QT_ORDINARY); - fullname = str.c_ptr(); + string fullname(str.c_ptr()); if (!ifp->is_explicit_name()) // no alias { @@ -7413,7 +7447,6 @@ int cs_get_derived_plan(ha_columnstore_derived_handler* handler, THD* /*thd*/, S return 0; } - int cs_get_select_plan(ha_columnstore_select_handler* handler, THD* thd, SCSEP& csep, gp_walk_info& gwi, bool isSelectLexUnit) { @@ -7442,7 +7475,7 @@ int cs_get_select_plan(ha_columnstore_select_handler* handler, THD* thd, SCSEP& cerr << *csep << endl; cerr << "-------------- EXECUTION PLAN END --------------\n" << endl; } - + // Derived table projection and filter optimization. derivedTableOptimization(&gwi, csep); diff --git a/dbcon/mysql/ha_mcs_impl_if.h b/dbcon/mysql/ha_mcs_impl_if.h index 5dfa77aa4..46050713d 100644 --- a/dbcon/mysql/ha_mcs_impl_if.h +++ b/dbcon/mysql/ha_mcs_impl_if.h @@ -28,6 +28,7 @@ #include #include "basic/string_utils.h" #include "idb_mysql.h" +#include "sql_statistics.h" #include "ha_mcs_sysvars.h" #include "dmlpkg.h" @@ -110,6 +111,7 @@ struct gp_walk_info execplan::CalpontSelectExecutionPlan::ReturnedColumnList orderByCols; std::vector extSelAggColsItems; execplan::CalpontSelectExecutionPlan::ColumnMap columnMap; + std::unordered_map> columnStatisticsMap; // This vector temporarily hold the projection columns to be added // to the returnedCols vector for subquery processing. It will be appended // to the end of returnedCols when the processing is finished. From 3f9ce7779e8535fb67458b12829c3652a6d3e144 Mon Sep 17 00:00:00 2001 From: drrtuy Date: Mon, 14 Jul 2025 18:52:46 +0000 Subject: [PATCH 22/51] feat(optimizer): PoC for EI stats retrieval in getSelectPlan() --- dbcon/mysql/ha_from_sub.cpp | 3 + dbcon/mysql/ha_mcs_execplan.cpp | 39 +++-- dbcon/mysql/ha_mcs_impl.cpp | 5 + dbcon/mysql/ha_mcs_impl_if.h | 3 + dbcon/mysql/ha_select_sub.cpp | 6 + dbcon/mysql/idb_mysql.h | 1 + dbcon/mysql/rulebased_optimizer.cpp | 245 +++++++++++++++++++--------- dbcon/mysql/rulebased_optimizer.h | 33 +++- 8 files changed, 240 insertions(+), 95 deletions(-) diff --git a/dbcon/mysql/ha_from_sub.cpp b/dbcon/mysql/ha_from_sub.cpp index 177887323..66e4d8648 100644 --- a/dbcon/mysql/ha_from_sub.cpp +++ b/dbcon/mysql/ha_from_sub.cpp @@ -444,6 +444,9 @@ SCSEP FromSubQuery::transform() return csep; } + // Insert column statistics + fGwip.mergeColumnStatisticsMap(gwi.columnStatisticsMap); + fGwip.subselectList.push_back(csep); return csep; } diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index aa3b604ca..486bb7979 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -49,7 +49,7 @@ using namespace logging; #define PREFER_MY_CONFIG_H #include #include "idb_mysql.h" -#include "opt_histogram_json.h" + #include "partition_element.h" #include "partition_info.h" @@ -6290,24 +6290,25 @@ int processLimitAndOffset(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi) { - if (!ifp->field->part_of_key.is_clear_all()) - { - return; - } - std::cout << "Processing field item: " << ifp->field_name.str << std::endl; + // TODO find clear way to check if the field is part of a key + // if (!ifp->field->part_of_key.is_clear_all()) + // { + // return; + // } + // std::cout << "Processing field item: " << ifp->field_name.str << std::endl; // std::cout << "part of a key: " << buf << std::endl; - std::cout << "ifp->field->field_index " << ifp->field->field_index << std::endl; + // std::cout << "ifp->field->field_index " << ifp->field->field_index << std::endl; for (uint j = 0; j < ifp->field->table->s->keys; j++) { for (uint i = 0; i < ifp->field->table->s->key_info[j].usable_key_parts; i++) { - std::cout << "key fieldnr " << i << " " - << ifp->field->table->s->key_info[j].key_part[i].field->field_name.str << " " - << ifp->field->table->s->key_info[j].key_part[i].fieldnr << std::endl; + // std::cout << "key fieldnr " << i << " " + // << ifp->field->table->s->key_info[j].key_part[i].field->field_name.str << " " + // << ifp->field->table->s->key_info[j].key_part[i].fieldnr << std::endl; if (ifp->field->table->s->key_info[j].key_part[i].fieldnr == ifp->field->field_index + 1) { - std::cout << "key_info " << j << " key_part " << i << " matched " << std::endl; + // std::cout << "key_info " << j << " key_part " << i << " matched " << std::endl; if (i == 0 && ifp->field->read_stats) { assert(ifp->field->table->s); @@ -6315,7 +6316,15 @@ void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi) // assert(ifp->field->table->s->table_name); // FQCN fqcn({ifp->field->table->s->db.str}, {ifp->field->table->s->table_name.str}, {ifp->field->field_name.str}); //TODO use FQCN as a key type - gwi.columnStatisticsMap[ifp->field->field_name.str] = ifp->field->read_stats->histogram->get_histogram(); + std::cout << "Adding column statistics for " << ifp->field->field_name.str << std::endl; + auto* histogram = dynamic_cast(ifp->field->read_stats->histogram); + if (histogram) + { + std::cout << "Type of histogram object: " << typeid(*histogram).name() << std::endl; + std::vector histogramBuckets = histogram->get_histogram(); + std::cout << "gwi.columnStatisticsMap[ifp->field->field_name.str].size() " << histogramBuckets.size() << std::endl; + gwi.columnStatisticsMap[ifp->field->field_name.str] = histogramBuckets; + } } } } @@ -6412,7 +6421,7 @@ int processSelect(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, vector { Item_field* ifp = (Item_field*)item; extractColumnStatistics(ifp, gwi); - + std::cout << "gwi.columnStatisticsMap 1 size " << gwi.columnStatisticsMap.size() << std::endl; if (ifp->field_name.length && string(ifp->field_name.str) == "*") { collectAllCols(gwi, ifp); @@ -7464,6 +7473,7 @@ int cs_get_select_plan(ha_columnstore_select_handler* handler, THD* thd, SCSEP& int status = getSelectPlan(gwi, select_lex, csep, false, true, isSelectLexUnit); + std::cout << "cs_get_select_plan columnStatisticsMap size " << gwi.columnStatisticsMap.size() << std::endl; if (status > 0) return ER_INTERNAL_ERROR; else if (status < 0) @@ -7481,7 +7491,8 @@ int cs_get_select_plan(ha_columnstore_select_handler* handler, THD* thd, SCSEP& if (get_unstable_optimizer(thd)) { - bool csepWasOptimized = optimizer::optimizeCSEP(*csep); + optimizer::RBOptimizerContext ctx(gwi); + bool csepWasOptimized = optimizer::optimizeCSEP(*csep, ctx); if (csep->traceOn() && csepWasOptimized) { cerr << "---------------- cs_get_select_plan optimized EXECUTION PLAN ----------------" << endl; diff --git a/dbcon/mysql/ha_mcs_impl.cpp b/dbcon/mysql/ha_mcs_impl.cpp index a7e974159..d1fae3006 100644 --- a/dbcon/mysql/ha_mcs_impl.cpp +++ b/dbcon/mysql/ha_mcs_impl.cpp @@ -135,6 +135,11 @@ using namespace funcexp; namespace cal_impl_if { extern bool nonConstFunc(Item_func* ifp); + +void gp_walk_info::mergeColumnStatisticsMap(const ColumnStatisticsMap& aColumnStatisticsMap) +{ + columnStatisticsMap.insert(aColumnStatisticsMap.begin(), aColumnStatisticsMap.end()); +} } namespace diff --git a/dbcon/mysql/ha_mcs_impl_if.h b/dbcon/mysql/ha_mcs_impl_if.h index 46050713d..95335b7eb 100644 --- a/dbcon/mysql/ha_mcs_impl_if.h +++ b/dbcon/mysql/ha_mcs_impl_if.h @@ -102,6 +102,7 @@ typedef dmlpackage::TableValuesMap TableValuesMap; typedef std::map> TableMap; typedef std::tr1::unordered_map> TableOnExprList; typedef std::tr1::unordered_map TableOuterJoinMap; +using ColumnStatisticsMap = std::unordered_map>; struct gp_walk_info { @@ -232,6 +233,8 @@ struct gp_walk_info { } ~gp_walk_info(); + + void mergeColumnStatisticsMap(const std::unordered_map>& columnStatisticsMap); }; struct SubQueryChainHolder; diff --git a/dbcon/mysql/ha_select_sub.cpp b/dbcon/mysql/ha_select_sub.cpp index 3939c11b4..b2f39aa27 100644 --- a/dbcon/mysql/ha_select_sub.cpp +++ b/dbcon/mysql/ha_select_sub.cpp @@ -96,6 +96,12 @@ SCSEP SelectSubQuery::transform() return csep; } + // Insert column statistics + fGwip.mergeColumnStatisticsMap(gwi.columnStatisticsMap); + // std::cout << "fGwip.columnStatisticsMap 2 size " << fGwip.columnStatisticsMap.size() << std::endl; + // std::cout << "gwi.columnStatisticsMap 2 size " << gwi.columnStatisticsMap.size() << std::endl; + + // Insert subselect CSEP fGwip.subselectList.push_back(csep); // remove outer query tables diff --git a/dbcon/mysql/idb_mysql.h b/dbcon/mysql/idb_mysql.h index cc71e9425..89c0291c3 100644 --- a/dbcon/mysql/idb_mysql.h +++ b/dbcon/mysql/idb_mysql.h @@ -74,6 +74,7 @@ #include "rpl_rli.h" #include "my_dbug.h" #include "sql_show.h" +#include "opt_histogram_json.h" #pragma GCC diagnostic pop // Now clean up the pollution as best we can... diff --git a/dbcon/mysql/rulebased_optimizer.cpp b/dbcon/mysql/rulebased_optimizer.cpp index ee674cfba..e1b6acd07 100644 --- a/dbcon/mysql/rulebased_optimizer.cpp +++ b/dbcon/mysql/rulebased_optimizer.cpp @@ -15,6 +15,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include +#include +#include +#include + +#include "rulebased_optimizer.h" + #include "constantcolumn.h" #include "execplan/calpontselectexecutionplan.h" #include "execplan/simplecolumn.h" @@ -23,44 +30,45 @@ #include "operator.h" #include "predicateoperator.h" #include "simplefilter.h" -#include "rulebased_optimizer.h" -#include -#include namespace optimizer { +void applyParallelCES_exists(execplan::CalpontSelectExecutionPlan& csep, const size_t id); + static const std::string RewrittenSubTableAliasPrefix = "$added_sub_"; // Apply a list of rules to a CSEP -bool optimizeCSEPWithRules(execplan::CalpontSelectExecutionPlan& root, const std::vector& rules) +bool optimizeCSEPWithRules(execplan::CalpontSelectExecutionPlan& root, const std::vector& rules, + optimizer::RBOptimizerContext& ctx) { bool changed = false; for (const auto& rule : rules) { - changed |= rule.apply(root); + changed |= rule.apply(root, ctx); } return changed; } // high level API call for optimizer -bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root) +bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root, optimizer::RBOptimizerContext& ctx) { optimizer::Rule parallelCES{"parallelCES", optimizer::matchParallelCES, optimizer::applyParallelCES}; - std::vector rules = {parallelCES}; + std::vector rules = {parallelCES}; - return optimizeCSEPWithRules(root, rules); + return optimizeCSEPWithRules(root, rules, ctx); } // Apply iteratively until CSEP is converged by rule -bool Rule::apply(execplan::CalpontSelectExecutionPlan& root) const +bool Rule::apply(execplan::CalpontSelectExecutionPlan& root, optimizer::RBOptimizerContext& ctx) const { bool changedThisRound = false; bool hasBeenApplied = false; + do { - changedThisRound = walk(root); + changedThisRound = walk(root, ctx); hasBeenApplied |= changedThisRound; } while (changedThisRound && !applyOnlyOnce); @@ -68,69 +76,42 @@ bool Rule::apply(execplan::CalpontSelectExecutionPlan& root) const } // DFS walk to match CSEP and apply rules if match -bool Rule::walk(execplan::CalpontSelectExecutionPlan& csep) const +bool Rule::walk(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx) const { bool rewrite = false; - for (auto& table : csep.derivedTableList()) + std::stack planStack; + planStack.push(&csep); + + while (!planStack.empty()) { - auto* csepPtr = dynamic_cast(table.get()); - if (!csepPtr) + execplan::CalpontSelectExecutionPlan* current = planStack.top(); + planStack.pop(); + + for (auto& table : current->derivedTableList()) { - continue; - } - - auto& csepLocal = *csepPtr; - rewrite |= walk(csepLocal); - } - - for (auto& unionUnit : csep.unionVec()) - { - auto* unionUnitPtr = dynamic_cast(unionUnit.get()); - if (!unionUnitPtr) - { - continue; - } - - auto& unionUnitLocal = *unionUnitPtr; - rewrite |= walk(unionUnitLocal); - } - - if (csep.filters() != nullptr) - { - bool rewriteLocal = false; - std::vector stack; - stack.push_back(csep.filters()); - while (!stack.empty()) - { - execplan::ParseTree* node = stack.back(); - stack.pop_back(); - if (node == nullptr) - continue; - - auto* existsFilter = dynamic_cast(node->data()); - if (existsFilter) + auto* csepPtr = dynamic_cast(table.get()); + if (csepPtr) { - if (matchRule(*existsFilter->sub())) - { - applyRule(*existsFilter->sub()); - rewriteLocal = true; - } + planStack.push(csepPtr); } - - if (node->right()) - stack.push_back(node->right()); - if (node->left()) - stack.push_back(node->left()); } - if (rewriteLocal) - rewrite |= rewriteLocal; - } - if (matchRule(csep)) - { - applyRule(csep); - rewrite = true; + for (auto& unionUnit : current->unionVec()) + { + auto* unionUnitPtr = dynamic_cast(unionUnit.get()); + if (unionUnitPtr) + { + planStack.push(unionUnitPtr); + } + } + + if (matchRule(*current)) + { + applyRule(*current, ctx); + ++ctx.uniqueId; + rewrite = true; + } } return rewrite; @@ -177,7 +158,7 @@ execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, auto* filterColLeftOp = new execplan::ConstantColumnUInt(bound.second, 0, 0); // set TZ // There is a question with ownership of the const column - // WIP here we lost upper bound value if predicate is not changed to weak lt + // WIP here we lost upper bound value if predicate is not changed to weak lt execplan::SOP ltOp = boost::make_shared(execplan::PredicateOperator("<")); ltOp->setOpType(filterColLeftOp->resultType(), tableKeyColumnLeftOp->resultType()); ltOp->resultType(ltOp->operationType()); @@ -210,14 +191,42 @@ execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, return ptp; } +execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPlan& csep) +{ + return dynamic_cast(csep.returnedCols().front().get()); +} + execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable( - const size_t numberOfLegs, execplan::CalpontSelectExecutionPlan& csep) + execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx) { execplan::CalpontSelectExecutionPlan::SelectList unionVec; - unionVec.reserve(numberOfLegs); - std::vector> bounds({{0, 3000961}, - {3000961, std::numeric_limits::max()}}); - for (auto bound : bounds) + // unionVec.reserve(numberOfLegs); + execplan::SimpleColumn* keyColumn = findSuitableKeyColumn(csep); + std::cout << "looking for " << keyColumn->columnName() << " in ctx.gwi.columnStatisticsMap " << " with size " << ctx.gwi.columnStatisticsMap.size() << std::endl; + for (auto& [k, v] : ctx.gwi.columnStatisticsMap) + { + std::cout << "key " << k << std::endl; + } + if (!keyColumn || + ctx.gwi.columnStatisticsMap.find(keyColumn->columnName()) == ctx.gwi.columnStatisticsMap.end()) + { + return unionVec; + } + + auto columnStatistics = ctx.gwi.columnStatisticsMap[keyColumn->columnName()]; + std::cout << "columnStatistics.size() " << columnStatistics.size() << std::endl; + // TODO char and other numerical types support + std::vector> bounds; + std::transform(columnStatistics.begin(), columnStatistics.end(), std::back_inserter(bounds), + [](const auto& bucket) + { + uint64_t lowerBound = std::stoul(bucket.start_value); + uint64_t upperBound = lowerBound + bucket.ndv; + return std::make_pair(lowerBound, upperBound); + }); + // std::vector> bounds({{0, 3000961}, + // // {3000961, std::numeric_limits::max()}}); + for (auto& bound : bounds) { auto clonedCSEP = csep.cloneWORecursiveSelects(); // Add BETWEEN based on key column range @@ -227,8 +236,7 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable( return unionVec; } - -void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep) +void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx) { auto tables = csep.tableList(); execplan::CalpontSelectExecutionPlan::TableList newTableList; @@ -242,16 +250,17 @@ void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep) { auto derivedSCEP = csep.cloneWORecursiveSelects(); // need to add a level here - std::string tableAlias = RewrittenSubTableAliasPrefix + table.schema + "_" + table.table; + std::string tableAlias = RewrittenSubTableAliasPrefix + table.schema + "_" + table.table + "_" + + std::to_string(ctx.uniqueId); derivedSCEP->location(execplan::CalpontSelectExecutionPlan::FROM); derivedSCEP->subType(execplan::CalpontSelectExecutionPlan::FROM_SUBS); derivedSCEP->derivedTbAlias(tableAlias); // TODO: hardcoded for now - size_t parallelFactor = 2; + // size_t parallelFactor = 2; // Create a copy of the current leaf CSEP with additional filters to partition the key column - auto additionalUnionVec = makeUnionFromTable(parallelFactor, csep); + auto additionalUnionVec = makeUnionFromTable(csep, ctx); derivedSCEP->unionVec().insert(derivedSCEP->unionVec().end(), additionalUnionVec.begin(), additionalUnionVec.end()); @@ -275,11 +284,95 @@ void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep) execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, ""); newTableList.push_back(tn); // Remove the filters as they were pushed down to union units + // This is inappropriate for EXISTS filter and join conditions derivedSCEP->filters(nullptr); } } // Remove the filters as they were pushed down to union units - csep.filters(nullptr); + // This is inappropriate for EXISTS filter and join conditions + // csep.filters(nullptr); + // There must be no derived at this point. + csep.derivedTableList(newDerivedTableList); + // Replace table list with new table list populated with union units + csep.tableList(newTableList); + csep.returnedCols(newReturnedColumns); +} + +execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable_exists( + const size_t numberOfLegs, execplan::CalpontSelectExecutionPlan& csep) +{ + execplan::CalpontSelectExecutionPlan::SelectList unionVec; + unionVec.reserve(numberOfLegs); + std::vector> bounds( + {{0, 3000961}, {3000961, std::numeric_limits::max()}}); + for (auto bound : bounds) + { + auto clonedCSEP = csep.cloneWORecursiveSelects(); + clonedCSEP->filters(nullptr); + // Add BETWEEN based on key column range + clonedCSEP->filters(filtersWithNewRangeAddedIfNeeded(clonedCSEP, bound)); + unionVec.push_back(clonedCSEP); + } + + return unionVec; +} + +void applyParallelCES_exists(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx) +{ + auto tables = csep.tableList(); + execplan::CalpontSelectExecutionPlan::TableList newTableList; + execplan::CalpontSelectExecutionPlan::SelectList newDerivedTableList; + execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns; + + // ATM Must be only 1 table + for (auto& table : tables) + { + if (!table.isColumnstore()) + { + auto derivedSCEP = csep.cloneWORecursiveSelects(); + // need to add a level here + std::string tableAlias = RewrittenSubTableAliasPrefix + table.schema + "_" + table.table + "_" + + std::to_string(ctx.uniqueId); + + derivedSCEP->location(execplan::CalpontSelectExecutionPlan::FROM); + derivedSCEP->subType(execplan::CalpontSelectExecutionPlan::FROM_SUBS); + derivedSCEP->derivedTbAlias(tableAlias); + + // TODO: hardcoded for now + size_t parallelFactor = 2; + // Create a copy of the current leaf CSEP with additional filters to partition the key column + auto additionalUnionVec = makeUnionFromTable_exists(parallelFactor, csep); + derivedSCEP->unionVec().insert(derivedSCEP->unionVec().end(), additionalUnionVec.begin(), + additionalUnionVec.end()); + + size_t colPosition = 0; + // change parent to derived table columns + for (auto& rc : csep.returnedCols()) + { + auto rcCloned = boost::make_shared(*rc); + // TODO timezone and result type are not copied + // TODO add specific ctor for this functionality + rcCloned->tableName(""); + rcCloned->schemaName(""); + rcCloned->tableAlias(tableAlias); + rcCloned->colPosition(colPosition++); + rcCloned->resultType(rc->resultType()); + + newReturnedColumns.push_back(rcCloned); + } + + newDerivedTableList.push_back(derivedSCEP); + execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, ""); + newTableList.push_back(tn); + // Remove the filters as they were pushed down to union units + // This is inappropriate for EXISTS filter and join conditions + // TODO if needed + derivedSCEP->filters(nullptr); + } + } + // Remove the filters as they were pushed down to union units + // This is inappropriate for EXISTS filter and join conditions + // csep.filters(nullptr); // There must be no derived at this point. csep.derivedTableList(newDerivedTableList); // Replace table list with new table list populated with union units diff --git a/dbcon/mysql/rulebased_optimizer.h b/dbcon/mysql/rulebased_optimizer.h index df0fa8556..65a8adcc6 100644 --- a/dbcon/mysql/rulebased_optimizer.h +++ b/dbcon/mysql/rulebased_optimizer.h @@ -18,14 +18,31 @@ #pragma once #include + +#define PREFER_MY_CONFIG_H +#include +#include "idb_mysql.h" + +#include "ha_mcs_impl_if.h" + #include "execplan/calpontselectexecutionplan.h" namespace optimizer { +class RBOptimizerContext { +public: + RBOptimizerContext() = delete; + RBOptimizerContext(cal_impl_if::gp_walk_info& walk_info) : gwi(walk_info) {} + // gwi lifetime should be longer than optimizer context. + // In plugin runtime this is always true. + cal_impl_if::gp_walk_info& gwi; + uint64_t uniqueId {0}; +}; + struct Rule { using RuleMatcher = bool (*)(execplan::CalpontSelectExecutionPlan&); - using RuleApplier = void (*)(execplan::CalpontSelectExecutionPlan&); + using RuleApplier = void (*)(execplan::CalpontSelectExecutionPlan&, RBOptimizerContext&); Rule(std::string&& name, RuleMatcher matchRule, RuleApplier applyRule) : name(name), matchRule(matchRule), applyRule(applyRule) {}; @@ -39,15 +56,21 @@ struct Rule Rule() = default; Rule(const Rule&) = default; Rule(Rule&&) = default; + + std::string getName() const + { + return name; + } + Rule& operator=(const Rule&) = default; Rule& operator=(Rule&&) = default; - bool apply(execplan::CalpontSelectExecutionPlan& csep) const; - bool walk(execplan::CalpontSelectExecutionPlan& csep) const; + bool apply(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx) const; + bool walk(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx) const; }; bool matchParallelCES(execplan::CalpontSelectExecutionPlan& csep); -void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep); -bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root); +void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx); +bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root, RBOptimizerContext& ctx); } \ No newline at end of file From 67295b43202172a2261acebfc97a8bee00b63cb5 Mon Sep 17 00:00:00 2001 From: drrtuy Date: Wed, 16 Jul 2025 22:18:09 +0000 Subject: [PATCH 23/51] feat(optimizer,rules): use EI statistics for range filters --- dbcon/mysql/ha_mcs_execplan.cpp | 2 +- dbcon/mysql/rulebased_optimizer.cpp | 40 +++++++++++++++++++++-------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index 486bb7979..642606901 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -6321,7 +6321,7 @@ void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi) if (histogram) { std::cout << "Type of histogram object: " << typeid(*histogram).name() << std::endl; - std::vector histogramBuckets = histogram->get_histogram(); + std::vector histogramBuckets = histogram->get_json_histogram(); std::cout << "gwi.columnStatisticsMap[ifp->field->field_name.str].size() " << histogramBuckets.size() << std::endl; gwi.columnStatisticsMap[ifp->field->field_name.str] = histogramBuckets; } diff --git a/dbcon/mysql/rulebased_optimizer.cpp b/dbcon/mysql/rulebased_optimizer.cpp index e1b6acd07..53f280e68 100644 --- a/dbcon/mysql/rulebased_optimizer.cpp +++ b/dbcon/mysql/rulebased_optimizer.cpp @@ -202,10 +202,11 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable( execplan::CalpontSelectExecutionPlan::SelectList unionVec; // unionVec.reserve(numberOfLegs); execplan::SimpleColumn* keyColumn = findSuitableKeyColumn(csep); - std::cout << "looking for " << keyColumn->columnName() << " in ctx.gwi.columnStatisticsMap " << " with size " << ctx.gwi.columnStatisticsMap.size() << std::endl; + std::cout << "looking for " << keyColumn->columnName() << " in ctx.gwi.columnStatisticsMap " + << " with size " << ctx.gwi.columnStatisticsMap.size() << std::endl; for (auto& [k, v] : ctx.gwi.columnStatisticsMap) { - std::cout << "key " << k << std::endl; + std::cout << "key " << k << " vector size " << v.size() << std::endl; } if (!keyColumn || ctx.gwi.columnStatisticsMap.find(keyColumn->columnName()) == ctx.gwi.columnStatisticsMap.end()) @@ -216,16 +217,33 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable( auto columnStatistics = ctx.gwi.columnStatisticsMap[keyColumn->columnName()]; std::cout << "columnStatistics.size() " << columnStatistics.size() << std::endl; // TODO char and other numerical types support + size_t numberOfUnionUnits = 2; + size_t numberOfBucketsPerUnionUnit = columnStatistics.size() / numberOfUnionUnits; + std::vector> bounds; - std::transform(columnStatistics.begin(), columnStatistics.end(), std::back_inserter(bounds), - [](const auto& bucket) - { - uint64_t lowerBound = std::stoul(bucket.start_value); - uint64_t upperBound = lowerBound + bucket.ndv; - return std::make_pair(lowerBound, upperBound); - }); - // std::vector> bounds({{0, 3000961}, - // // {3000961, std::numeric_limits::max()}}); + + // TODO need to process tail if number of buckets is not divisible by number of union units + // TODO non-overlapping buckets if it is a problem at all + for (size_t i = 0; i < numberOfUnionUnits; ++i) + { + auto bucket = columnStatistics.begin() + i * numberOfBucketsPerUnionUnit; + auto endBucket = columnStatistics.begin() + (i + 1) * numberOfBucketsPerUnionUnit; + // TODO find a median b/w the current bucket start and the previous bucket end + uint64_t currentLowerBound = + (bounds.empty() ? *(uint32_t*)bucket->start_value.data() + : std::min((uint64_t)*(uint32_t*)bucket->start_value.data(), bounds.back().second)); + uint64_t currentUpperBound = currentLowerBound; + for (; bucket != endBucket; ++bucket) + { + uint64_t bucketLowerBound = *(uint32_t*)bucket->start_value.data(); + std::cout << "bucket.start_value " << bucketLowerBound << std::endl; + currentUpperBound = bucketLowerBound + bucket->ndv; + } + std::cout << "currentLowerBound " << currentLowerBound << " currentUpperBound " << currentUpperBound + << std::endl; + bounds.push_back(std::make_pair(currentLowerBound, currentUpperBound)); + } + for (auto& bound : bounds) { auto clonedCSEP = csep.cloneWORecursiveSelects(); From 15be33fbc5c678255c538ef1d243f0b886327843 Mon Sep 17 00:00:00 2001 From: drrtuy Date: Thu, 17 Jul 2025 15:37:20 +0000 Subject: [PATCH 24/51] feat(rbo,rules): refactored statistics storage in gwi and implemented statistics based UNION rewrite. --- dbcon/mysql/ha_from_sub.cpp | 2 +- dbcon/mysql/ha_mcs_execplan.cpp | 17 +++--- dbcon/mysql/ha_mcs_impl.cpp | 30 +++++++++- dbcon/mysql/ha_mcs_impl_if.h | 26 +++++++-- dbcon/mysql/ha_select_sub.cpp | 2 +- dbcon/mysql/rulebased_optimizer.cpp | 91 +++++++++++++++++++---------- 6 files changed, 122 insertions(+), 46 deletions(-) diff --git a/dbcon/mysql/ha_from_sub.cpp b/dbcon/mysql/ha_from_sub.cpp index 66e4d8648..fd35de219 100644 --- a/dbcon/mysql/ha_from_sub.cpp +++ b/dbcon/mysql/ha_from_sub.cpp @@ -445,7 +445,7 @@ SCSEP FromSubQuery::transform() } // Insert column statistics - fGwip.mergeColumnStatisticsMap(gwi.columnStatisticsMap); + fGwip.mergeTableStatistics(gwi.tableStatisticsMap); fGwip.subselectList.push_back(csep); return csep; diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index 642606901..ab4eefe46 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -6314,16 +6314,20 @@ void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi) assert(ifp->field->table->s); // assert(ifp->field->table->s->db); // assert(ifp->field->table->s->table_name); - // FQCN fqcn({ifp->field->table->s->db.str}, {ifp->field->table->s->table_name.str}, {ifp->field->field_name.str}); - //TODO use FQCN as a key type + // FQCN fqcn({ifp->field->table->s->db.str}, {ifp->field->table->s->table_name.str}, + // {ifp->field->field_name.str}); + // TODO use FQCN as a key type std::cout << "Adding column statistics for " << ifp->field->field_name.str << std::endl; auto* histogram = dynamic_cast(ifp->field->read_stats->histogram); if (histogram) { std::cout << "Type of histogram object: " << typeid(*histogram).name() << std::endl; - std::vector histogramBuckets = histogram->get_json_histogram(); - std::cout << "gwi.columnStatisticsMap[ifp->field->field_name.str].size() " << histogramBuckets.size() << std::endl; - gwi.columnStatisticsMap[ifp->field->field_name.str] = histogramBuckets; + // std::vector histogramBuckets = histogram->get_json_histogram(); + // std::cout << "gwi.tableStatisticsMap[{ifp->field->table->s->db.str, " + // "ifp->field->table->s->table_name.str}][ifp->field->field_name.str].size() " + // << histogramBuckets.size() << std::endl; + SchemaAndTableName tableName = {ifp->field->table->s->db.str, ifp->field->table->s->table_name.str}; + gwi.tableStatisticsMap[tableName][ifp->field->field_name.str] = *histogram; } } } @@ -6421,7 +6425,7 @@ int processSelect(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep, vector { Item_field* ifp = (Item_field*)item; extractColumnStatistics(ifp, gwi); - std::cout << "gwi.columnStatisticsMap 1 size " << gwi.columnStatisticsMap.size() << std::endl; + // Handle * case if (ifp->field_name.length && string(ifp->field_name.str) == "*") { collectAllCols(gwi, ifp); @@ -7473,7 +7477,6 @@ int cs_get_select_plan(ha_columnstore_select_handler* handler, THD* thd, SCSEP& int status = getSelectPlan(gwi, select_lex, csep, false, true, isSelectLexUnit); - std::cout << "cs_get_select_plan columnStatisticsMap size " << gwi.columnStatisticsMap.size() << std::endl; if (status > 0) return ER_INTERNAL_ERROR; else if (status < 0) diff --git a/dbcon/mysql/ha_mcs_impl.cpp b/dbcon/mysql/ha_mcs_impl.cpp index d1fae3006..c385be180 100644 --- a/dbcon/mysql/ha_mcs_impl.cpp +++ b/dbcon/mysql/ha_mcs_impl.cpp @@ -136,10 +136,36 @@ namespace cal_impl_if { extern bool nonConstFunc(Item_func* ifp); -void gp_walk_info::mergeColumnStatisticsMap(const ColumnStatisticsMap& aColumnStatisticsMap) +void gp_walk_info::mergeTableStatistics(const TableStatisticsMap& aTableStatisticsMap) { - columnStatisticsMap.insert(aColumnStatisticsMap.begin(), aColumnStatisticsMap.end()); + for (auto& [schemaAndTableName, aColumnStatisticsMap]: aTableStatisticsMap) + { + auto tableStatisticsMapIt = tableStatisticsMap.find(schemaAndTableName); + if (tableStatisticsMapIt == tableStatisticsMap.end()) + { + tableStatisticsMap[schemaAndTableName] = aColumnStatisticsMap; + } + else + { + for (auto& [columnName, histogram]: aColumnStatisticsMap) + { + tableStatisticsMapIt->second[columnName] = histogram; + } + } + } } + +std::optional gp_walk_info::findStatisticsForATable(SchemaAndTableName& schemaAndTableName) +{ + auto tableStatisticsMapIt = tableStatisticsMap.find(schemaAndTableName); + if (tableStatisticsMapIt == tableStatisticsMap.end()) + { + return std::nullopt; + } + + return {tableStatisticsMapIt->second}; +} + } namespace diff --git a/dbcon/mysql/ha_mcs_impl_if.h b/dbcon/mysql/ha_mcs_impl_if.h index 95335b7eb..df53800c9 100644 --- a/dbcon/mysql/ha_mcs_impl_if.h +++ b/dbcon/mysql/ha_mcs_impl_if.h @@ -96,13 +96,29 @@ enum ClauseType ORDER_BY }; +struct SchemaAndTableName { + std::string schema; + std::string table; + bool operator==(const SchemaAndTableName& other) const { + return schema == other.schema && table == other.table; + } +}; + +struct SchemaAndTableNameHash { + std::size_t operator()(const SchemaAndTableName& k) const { + return std::hash()(k.schema + k.table); + } +}; + typedef std::vector JoinInfoVec; typedef dmlpackage::ColValuesList ColValuesList; typedef dmlpackage::TableValuesMap TableValuesMap; typedef std::map> TableMap; typedef std::tr1::unordered_map> TableOnExprList; typedef std::tr1::unordered_map TableOuterJoinMap; -using ColumnStatisticsMap = std::unordered_map>; +using ColumnName = std::string; +using ColumnStatisticsMap = std::unordered_map; +using TableStatisticsMap = std::unordered_map; struct gp_walk_info { @@ -112,7 +128,7 @@ struct gp_walk_info execplan::CalpontSelectExecutionPlan::ReturnedColumnList orderByCols; std::vector extSelAggColsItems; execplan::CalpontSelectExecutionPlan::ColumnMap columnMap; - std::unordered_map> columnStatisticsMap; + TableStatisticsMap tableStatisticsMap; // This vector temporarily hold the projection columns to be added // to the returnedCols vector for subquery processing. It will be appended // to the end of returnedCols when the processing is finished. @@ -203,7 +219,8 @@ struct gp_walk_info SubQuery** subQueriesChain; gp_walk_info(long timeZone_, SubQuery** subQueriesChain_) - : sessionid(0) + : tableStatisticsMap({}) + , sessionid(0) , fatalParseError(false) , condPush(false) , dropCond(false) @@ -234,7 +251,8 @@ struct gp_walk_info } ~gp_walk_info(); - void mergeColumnStatisticsMap(const std::unordered_map>& columnStatisticsMap); + void mergeTableStatistics(const TableStatisticsMap& tableStatisticsMap); + std::optional findStatisticsForATable(SchemaAndTableName& schemaAndTableName); }; struct SubQueryChainHolder; diff --git a/dbcon/mysql/ha_select_sub.cpp b/dbcon/mysql/ha_select_sub.cpp index b2f39aa27..caddce6af 100644 --- a/dbcon/mysql/ha_select_sub.cpp +++ b/dbcon/mysql/ha_select_sub.cpp @@ -97,7 +97,7 @@ SCSEP SelectSubQuery::transform() } // Insert column statistics - fGwip.mergeColumnStatisticsMap(gwi.columnStatisticsMap); + fGwip.mergeTableStatistics(gwi.tableStatisticsMap); // std::cout << "fGwip.columnStatisticsMap 2 size " << fGwip.columnStatisticsMap.size() << std::endl; // std::cout << "gwi.columnStatisticsMap 2 size " << gwi.columnStatisticsMap.size() << std::endl; diff --git a/dbcon/mysql/rulebased_optimizer.cpp b/dbcon/mysql/rulebased_optimizer.cpp index 53f280e68..f14635bbc 100644 --- a/dbcon/mysql/rulebased_optimizer.cpp +++ b/dbcon/mysql/rulebased_optimizer.cpp @@ -191,9 +191,24 @@ execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, return ptp; } -execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPlan& csep) +// Looking for a projected column that comes first in an available index and has EI statistics +// INV nullptr signifies that no suitable column was found +execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx) { - return dynamic_cast(csep.returnedCols().front().get()); + for (auto& rc : csep.returnedCols()) + { + auto* simpleColumn = dynamic_cast(rc.get()); + if (simpleColumn) + { + std::cout << "Found simple column " << simpleColumn->columnName() << std::endl; + cal_impl_if::SchemaAndTableName schemaAndTableNam = {simpleColumn->tableName(), simpleColumn->columnName()}; + auto columnStatistics = ctx.gwi.findStatisticsForATable(schemaAndTableNam); + + return simpleColumn; + } + } + + return nullptr; } execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable( @@ -201,49 +216,65 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable( { execplan::CalpontSelectExecutionPlan::SelectList unionVec; // unionVec.reserve(numberOfLegs); - execplan::SimpleColumn* keyColumn = findSuitableKeyColumn(csep); - std::cout << "looking for " << keyColumn->columnName() << " in ctx.gwi.columnStatisticsMap " - << " with size " << ctx.gwi.columnStatisticsMap.size() << std::endl; - for (auto& [k, v] : ctx.gwi.columnStatisticsMap) - { - std::cout << "key " << k << " vector size " << v.size() << std::endl; - } - if (!keyColumn || - ctx.gwi.columnStatisticsMap.find(keyColumn->columnName()) == ctx.gwi.columnStatisticsMap.end()) + execplan::SimpleColumn* keyColumn = findSuitableKeyColumn(csep, ctx); + if (!keyColumn) { return unionVec; } - auto columnStatistics = ctx.gwi.columnStatisticsMap[keyColumn->columnName()]; - std::cout << "columnStatistics.size() " << columnStatistics.size() << std::endl; + std::cout << "looking for " << keyColumn->columnName() << " in ctx.gwi.tableStatisticsMap " + << " with size " << ctx.gwi.tableStatisticsMap.size() << std::endl; + for (auto& [k, v] : ctx.gwi.tableStatisticsMap) + { + std::cout << "SchemaAndTableName " << k.schema << "." << k.table << " column map size " << v.size() << std::endl; + } + + + + cal_impl_if::SchemaAndTableName schemaAndTableName = {keyColumn->schemaName(), keyColumn->tableName()}; + auto tableColumnsStatisticsIt = ctx.gwi.tableStatisticsMap.find(schemaAndTableName); + if (tableColumnsStatisticsIt == ctx.gwi.tableStatisticsMap.end()) + { + return unionVec; + } + + auto columnStatisticsIt = tableColumnsStatisticsIt->second.find(keyColumn->columnName()); + if (columnStatisticsIt == tableColumnsStatisticsIt->second.end()) + { + return unionVec; + } + + auto columnStatistics = columnStatisticsIt->second; + std::cout << "Histogram_json_hb histogram size " << columnStatistics.get_json_histogram().size() << std::endl; // TODO char and other numerical types support - size_t numberOfUnionUnits = 2; - size_t numberOfBucketsPerUnionUnit = columnStatistics.size() / numberOfUnionUnits; + size_t numberOfUnionUnits = std::min(columnStatistics.get_json_histogram().size(), 16UL); + size_t numberOfBucketsPerUnionUnit = columnStatistics.get_json_histogram().size() / numberOfUnionUnits; std::vector> bounds; // TODO need to process tail if number of buckets is not divisible by number of union units // TODO non-overlapping buckets if it is a problem at all - for (size_t i = 0; i < numberOfUnionUnits; ++i) + for (size_t i = 0; i < numberOfUnionUnits - 1; ++i) { - auto bucket = columnStatistics.begin() + i * numberOfBucketsPerUnionUnit; - auto endBucket = columnStatistics.begin() + (i + 1) * numberOfBucketsPerUnionUnit; - // TODO find a median b/w the current bucket start and the previous bucket end - uint64_t currentLowerBound = - (bounds.empty() ? *(uint32_t*)bucket->start_value.data() - : std::min((uint64_t)*(uint32_t*)bucket->start_value.data(), bounds.back().second)); - uint64_t currentUpperBound = currentLowerBound; - for (; bucket != endBucket; ++bucket) - { - uint64_t bucketLowerBound = *(uint32_t*)bucket->start_value.data(); - std::cout << "bucket.start_value " << bucketLowerBound << std::endl; - currentUpperBound = bucketLowerBound + bucket->ndv; - } + auto bucket = columnStatistics.get_json_histogram().begin() + i * numberOfBucketsPerUnionUnit; + auto endBucket = columnStatistics.get_json_histogram().begin() + (i + 1) * numberOfBucketsPerUnionUnit; + uint64_t currentLowerBound = *(uint32_t*)bucket->start_value.data(); + uint64_t currentUpperBound = *(uint32_t*)endBucket->start_value.data(); + std::cout << "currentLowerBound " << currentLowerBound << " currentUpperBound " << currentUpperBound << std::endl; bounds.push_back(std::make_pair(currentLowerBound, currentUpperBound)); } + // Add last range + auto lastBucket = columnStatistics.get_json_histogram().begin() + (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit; + uint64_t currentLowerBound = *(uint32_t*)lastBucket->start_value.data(); + uint64_t currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data(); + + std::cout << "last currentLowerBound " << currentLowerBound << " last currentUpperBound " << currentUpperBound + << std::endl; + bounds.push_back(std::make_pair(currentLowerBound, currentUpperBound)); + for (auto& bound : bounds) { auto clonedCSEP = csep.cloneWORecursiveSelects(); @@ -275,8 +306,6 @@ void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon derivedSCEP->subType(execplan::CalpontSelectExecutionPlan::FROM_SUBS); derivedSCEP->derivedTbAlias(tableAlias); - // TODO: hardcoded for now - // size_t parallelFactor = 2; // Create a copy of the current leaf CSEP with additional filters to partition the key column auto additionalUnionVec = makeUnionFromTable(csep, ctx); derivedSCEP->unionVec().insert(derivedSCEP->unionVec().end(), additionalUnionVec.begin(), From 19fb5a89b74cd24932e148e450b4298da0acba16 Mon Sep 17 00:00:00 2001 From: drrtuy Date: Thu, 17 Jul 2025 15:58:57 +0000 Subject: [PATCH 25/51] chore(rbo,rules): separate the rule from common rbo code --- dbcon/mysql/CMakeLists.txt | 1 + dbcon/mysql/rbo_apply_parallel_ces.cpp | 356 +++++++++++++++++++++++++ dbcon/mysql/rbo_apply_parallel_ces.h | 30 +++ dbcon/mysql/rulebased_optimizer.cpp | 315 +--------------------- dbcon/mysql/rulebased_optimizer.h | 3 - 5 files changed, 388 insertions(+), 317 deletions(-) create mode 100644 dbcon/mysql/rbo_apply_parallel_ces.cpp create mode 100644 dbcon/mysql/rbo_apply_parallel_ces.h diff --git a/dbcon/mysql/CMakeLists.txt b/dbcon/mysql/CMakeLists.txt index 488943704..168732a07 100644 --- a/dbcon/mysql/CMakeLists.txt +++ b/dbcon/mysql/CMakeLists.txt @@ -43,6 +43,7 @@ set(libcalmysql_SRCS is_columnstore_extents.cpp columnstore_dataload.cpp rulebased_optimizer.cpp + rbo_apply_parallel_ces.cpp ) set_source_files_properties(ha_mcs.cpp PROPERTIES COMPILE_FLAGS "-fno-implicit-templates") diff --git a/dbcon/mysql/rbo_apply_parallel_ces.cpp b/dbcon/mysql/rbo_apply_parallel_ces.cpp new file mode 100644 index 000000000..7529308b9 --- /dev/null +++ b/dbcon/mysql/rbo_apply_parallel_ces.cpp @@ -0,0 +1,356 @@ +/* Copyright (C) 2025 MariaDB Corporation + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include + +#include "rulebased_optimizer.h" + +#include "constantcolumn.h" +#include "execplan/calpontselectexecutionplan.h" +#include "execplan/simplecolumn.h" +#include "existsfilter.h" +#include "logicoperator.h" +#include "operator.h" +#include "predicateoperator.h" +#include "simplefilter.h" + +namespace optimizer +{ + +void applyParallelCES_exists(execplan::CalpontSelectExecutionPlan& csep, const size_t id); + +static const std::string RewrittenSubTableAliasPrefix = "$added_sub_"; +static const size_t MaxParallelFactor = 16; + +bool tableIsInUnion(const execplan::CalpontSystemCatalog::TableAliasName& table, + execplan::CalpontSelectExecutionPlan& csep) +{ + return std::any_of(csep.unionVec().begin(), csep.unionVec().end(), + [&table](const auto& unionUnit) + { + execplan::CalpontSelectExecutionPlan* unionUnitLocal = + dynamic_cast(unionUnit.get()); + bool tableIsPresented = + std::any_of(unionUnitLocal->tableList().begin(), unionUnitLocal->tableList().end(), + [&table](const auto& unionTable) { return unionTable == table; }); + return tableIsPresented; + }); +} + +bool matchParallelCES(execplan::CalpontSelectExecutionPlan& csep) +{ + auto tables = csep.tableList(); + // This is leaf and there are no other tables at this level in neither UNION, nor derived table. + // WIP filter out CSEPs with orderBy, groupBy, having + // Filter out tables that were re-written. + return tables.size() == 1 && !tables[0].isColumnstore() && !tableIsInUnion(tables[0], csep); +} + +// This routine produces a new ParseTree that is AND(lowerBand <= column, column <= upperBand) +// TODO add engine-independent statistics-derived ranges +execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, + std::pair& bound) +{ + // INV this is SimpleColumn we supply as an argument + // TODO find the suitable column using EI statistics. + auto* column = dynamic_cast(csep->returnedCols().front().get()); + assert(column); + + auto tableKeyColumnLeftOp = new execplan::SimpleColumn(*column); + tableKeyColumnLeftOp->resultType(column->resultType()); + + // TODO Nobody owns this allocation and cleanup only depends on delete in ParseTree nodes' dtors. + auto* filterColLeftOp = new execplan::ConstantColumnUInt(bound.second, 0, 0); + // set TZ + // There is a question with ownership of the const column + // WIP here we lost upper bound value if predicate is not changed to weak lt + execplan::SOP ltOp = boost::make_shared(execplan::PredicateOperator("<")); + ltOp->setOpType(filterColLeftOp->resultType(), tableKeyColumnLeftOp->resultType()); + ltOp->resultType(ltOp->operationType()); + + auto* sfr = new execplan::SimpleFilter(ltOp, tableKeyColumnLeftOp, filterColLeftOp); + // auto tableKeyColumn = derivedSCEP->returnedCols().front(); + auto tableKeyColumnRightOp = new execplan::SimpleColumn(*column); + tableKeyColumnRightOp->resultType(column->resultType()); + // TODO hardcoded column type and value + auto* filterColRightOp = new execplan::ConstantColumnUInt(bound.first, 0, 0); + + execplan::SOP gtOp = boost::make_shared(execplan::PredicateOperator(">=")); + gtOp->setOpType(filterColRightOp->resultType(), tableKeyColumnRightOp->resultType()); + gtOp->resultType(gtOp->operationType()); + + auto* sfl = new execplan::SimpleFilter(gtOp, tableKeyColumnRightOp, filterColRightOp); + + execplan::ParseTree* ptp = new execplan::ParseTree(new execplan::LogicOperator("and")); + ptp->right(sfr); + ptp->left(sfl); + + auto* currentFilters = csep->filters(); + if (currentFilters) + { + execplan::ParseTree* andWithExistingFilters = + new execplan::ParseTree(new execplan::LogicOperator("and"), currentFilters, ptp); + return andWithExistingFilters; + } + + return ptp; +} + +// Looking for a projected column that comes first in an available index and has EI statistics +// INV nullptr signifies that no suitable column was found +execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx) +{ + for (auto& rc : csep.returnedCols()) + { + auto* simpleColumn = dynamic_cast(rc.get()); + if (simpleColumn) + { + std::cout << "Found simple column " << simpleColumn->columnName() << std::endl; + cal_impl_if::SchemaAndTableName schemaAndTableNam = {simpleColumn->tableName(), simpleColumn->columnName()}; + auto columnStatistics = ctx.gwi.findStatisticsForATable(schemaAndTableNam); + + return simpleColumn; + } + } + + return nullptr; +} + +execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable( + execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx) +{ + execplan::CalpontSelectExecutionPlan::SelectList unionVec; + + // SC type controls an integral type used to produce suitable filters. The continuation of this function + // should become a template function based on SC type. + execplan::SimpleColumn* keyColumn = findSuitableKeyColumn(csep, ctx); + if (!keyColumn) + { + return unionVec; + } + + std::cout << "looking for " << keyColumn->columnName() << " in ctx.gwi.tableStatisticsMap " + << " with size " << ctx.gwi.tableStatisticsMap.size() << std::endl; + for (auto& [k, v] : ctx.gwi.tableStatisticsMap) + { + std::cout << "SchemaAndTableName " << k.schema << "." << k.table << " column map size " << v.size() << std::endl; + } + + cal_impl_if::SchemaAndTableName schemaAndTableName = {keyColumn->schemaName(), keyColumn->tableName()}; + auto tableColumnsStatisticsIt = ctx.gwi.tableStatisticsMap.find(schemaAndTableName); + if (tableColumnsStatisticsIt == ctx.gwi.tableStatisticsMap.end()) + { + return unionVec; + } + + auto columnStatisticsIt = tableColumnsStatisticsIt->second.find(keyColumn->columnName()); + if (columnStatisticsIt == tableColumnsStatisticsIt->second.end()) + { + return unionVec; + } + + auto columnStatistics = columnStatisticsIt->second; + std::cout << "Histogram_json_hb histogram size " << columnStatistics.get_json_histogram().size() << std::endl; + + // TODO configurable parallel factor + // NB now histogram size is the way to control parallel factor with 16 being the maximum + size_t numberOfUnionUnits = std::min(columnStatistics.get_json_histogram().size(), MaxParallelFactor); + size_t numberOfBucketsPerUnionUnit = columnStatistics.get_json_histogram().size() / numberOfUnionUnits; + + // TODO char and other numerical types support + std::vector> bounds; + + // TODO need to process tail if number of buckets is not divisible by number of union units + // TODO non-overlapping buckets if it is a problem at all + for (size_t i = 0; i < numberOfUnionUnits - 1; ++i) + { + auto bucket = columnStatistics.get_json_histogram().begin() + i * numberOfBucketsPerUnionUnit; + auto endBucket = columnStatistics.get_json_histogram().begin() + (i + 1) * numberOfBucketsPerUnionUnit; + uint64_t currentLowerBound = *(uint32_t*)bucket->start_value.data(); + uint64_t currentUpperBound = *(uint32_t*)endBucket->start_value.data(); + + std::cout << "currentLowerBound " << currentLowerBound << " currentUpperBound " << currentUpperBound + << std::endl; + bounds.push_back(std::make_pair(currentLowerBound, currentUpperBound)); + } + + // Add last range + auto lastBucket = columnStatistics.get_json_histogram().begin() + (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit; + uint64_t currentLowerBound = *(uint32_t*)lastBucket->start_value.data(); + uint64_t currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data(); + + std::cout << "last currentLowerBound " << currentLowerBound << " last currentUpperBound " << currentUpperBound + << std::endl; + bounds.push_back(std::make_pair(currentLowerBound, currentUpperBound)); + + for (auto& bound : bounds) + { + auto clonedCSEP = csep.cloneWORecursiveSelects(); + // Add BETWEEN based on key column range + clonedCSEP->filters(filtersWithNewRangeAddedIfNeeded(clonedCSEP, bound)); + unionVec.push_back(clonedCSEP); + } + + return unionVec; +} +void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx) +{ + auto tables = csep.tableList(); + execplan::CalpontSelectExecutionPlan::TableList newTableList; + execplan::CalpontSelectExecutionPlan::SelectList newDerivedTableList; + execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns; + + // ATM Must be only 1 table + for (auto& table : tables) + { + if (!table.isColumnstore()) + { + auto derivedSCEP = csep.cloneWORecursiveSelects(); + // need to add a level here + std::string tableAlias = RewrittenSubTableAliasPrefix + table.schema + "_" + table.table + "_" + + std::to_string(ctx.uniqueId); + + derivedSCEP->location(execplan::CalpontSelectExecutionPlan::FROM); + derivedSCEP->subType(execplan::CalpontSelectExecutionPlan::FROM_SUBS); + derivedSCEP->derivedTbAlias(tableAlias); + + // Create a copy of the current leaf CSEP with additional filters to partition the key column + auto additionalUnionVec = makeUnionFromTable(csep, ctx); + derivedSCEP->unionVec().insert(derivedSCEP->unionVec().end(), additionalUnionVec.begin(), + additionalUnionVec.end()); + + size_t colPosition = 0; + // change parent to derived table columns + for (auto& rc : csep.returnedCols()) + { + auto rcCloned = boost::make_shared(*rc); + // TODO timezone and result type are not copied + // TODO add specific ctor for this functionality + rcCloned->tableName(""); + rcCloned->schemaName(""); + rcCloned->tableAlias(tableAlias); + rcCloned->colPosition(colPosition++); + rcCloned->resultType(rc->resultType()); + + newReturnedColumns.push_back(rcCloned); + } + + newDerivedTableList.push_back(derivedSCEP); + execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, ""); + newTableList.push_back(tn); + // Remove the filters as they were pushed down to union units + // This is inappropriate for EXISTS filter and join conditions + derivedSCEP->filters(nullptr); + } + } + // Remove the filters as they were pushed down to union units + // This is inappropriate for EXISTS filter and join conditions + // csep.filters(nullptr); + // There must be no derived at this point. + csep.derivedTableList(newDerivedTableList); + // Replace table list with new table list populated with union units + csep.tableList(newTableList); + csep.returnedCols(newReturnedColumns); +} + +execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable_exists( + const size_t numberOfLegs, execplan::CalpontSelectExecutionPlan& csep) +{ + execplan::CalpontSelectExecutionPlan::SelectList unionVec; + unionVec.reserve(numberOfLegs); + std::vector> bounds( + {{0, 3000961}, {3000961, std::numeric_limits::max()}}); + for (auto bound : bounds) + { + auto clonedCSEP = csep.cloneWORecursiveSelects(); + clonedCSEP->filters(nullptr); + // Add BETWEEN based on key column range + clonedCSEP->filters(filtersWithNewRangeAddedIfNeeded(clonedCSEP, bound)); + unionVec.push_back(clonedCSEP); + } + + return unionVec; +} + +// TODO: remove applyParallelCES_exists +void applyParallelCES_exists(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx) +{ + auto tables = csep.tableList(); + execplan::CalpontSelectExecutionPlan::TableList newTableList; + execplan::CalpontSelectExecutionPlan::SelectList newDerivedTableList; + execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns; + + // ATM Must be only 1 table + for (auto& table : tables) + { + if (!table.isColumnstore()) + { + auto derivedSCEP = csep.cloneWORecursiveSelects(); + // need to add a level here + std::string tableAlias = RewrittenSubTableAliasPrefix + table.schema + "_" + table.table + "_" + + std::to_string(ctx.uniqueId); + + derivedSCEP->location(execplan::CalpontSelectExecutionPlan::FROM); + derivedSCEP->subType(execplan::CalpontSelectExecutionPlan::FROM_SUBS); + derivedSCEP->derivedTbAlias(tableAlias); + + // TODO: hardcoded for now + size_t parallelFactor = 2; + // Create a copy of the current leaf CSEP with additional filters to partition the key column + auto additionalUnionVec = makeUnionFromTable_exists(parallelFactor, csep); + derivedSCEP->unionVec().insert(derivedSCEP->unionVec().end(), additionalUnionVec.begin(), + additionalUnionVec.end()); + + size_t colPosition = 0; + // change parent to derived table columns + for (auto& rc : csep.returnedCols()) + { + auto rcCloned = boost::make_shared(*rc); + // TODO timezone and result type are not copied + // TODO add specific ctor for this functionality + rcCloned->tableName(""); + rcCloned->schemaName(""); + rcCloned->tableAlias(tableAlias); + rcCloned->colPosition(colPosition++); + rcCloned->resultType(rc->resultType()); + + newReturnedColumns.push_back(rcCloned); + } + + newDerivedTableList.push_back(derivedSCEP); + execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, ""); + newTableList.push_back(tn); + // Remove the filters as they were pushed down to union units + // This is inappropriate for EXISTS filter and join conditions + // TODO if needed + derivedSCEP->filters(nullptr); + } + } + // Remove the filters as they were pushed down to union units + // This is inappropriate for EXISTS filter and join conditions + // csep.filters(nullptr); + // There must be no derived at this point. + csep.derivedTableList(newDerivedTableList); + // Replace table list with new table list populated with union units + csep.tableList(newTableList); + csep.returnedCols(newReturnedColumns); +} + +} // namespace optimizer diff --git a/dbcon/mysql/rbo_apply_parallel_ces.h b/dbcon/mysql/rbo_apply_parallel_ces.h new file mode 100644 index 000000000..038e71356 --- /dev/null +++ b/dbcon/mysql/rbo_apply_parallel_ces.h @@ -0,0 +1,30 @@ +/* Copyright (C) 2025 MariaDB Corporation + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#pragma once + +#define PREFER_MY_CONFIG_H +#include +#include "idb_mysql.h" + +#include "execplan/calpontselectexecutionplan.h" +#include "rulebased_optimizer.h" + +namespace optimizer { + bool matchParallelCES(execplan::CalpontSelectExecutionPlan& csep); + void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx); +} \ No newline at end of file diff --git a/dbcon/mysql/rulebased_optimizer.cpp b/dbcon/mysql/rulebased_optimizer.cpp index f14635bbc..f01f79321 100644 --- a/dbcon/mysql/rulebased_optimizer.cpp +++ b/dbcon/mysql/rulebased_optimizer.cpp @@ -30,14 +30,11 @@ #include "operator.h" #include "predicateoperator.h" #include "simplefilter.h" +#include "rbo_apply_parallel_ces.h" namespace optimizer { -void applyParallelCES_exists(execplan::CalpontSelectExecutionPlan& csep, const size_t id); - -static const std::string RewrittenSubTableAliasPrefix = "$added_sub_"; - // Apply a list of rules to a CSEP bool optimizeCSEPWithRules(execplan::CalpontSelectExecutionPlan& root, const std::vector& rules, optimizer::RBOptimizerContext& ctx) @@ -117,314 +114,4 @@ bool Rule::walk(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimiz return rewrite; } -bool tableIsInUnion(const execplan::CalpontSystemCatalog::TableAliasName& table, - execplan::CalpontSelectExecutionPlan& csep) -{ - return std::any_of(csep.unionVec().begin(), csep.unionVec().end(), - [&table](const auto& unionUnit) - { - execplan::CalpontSelectExecutionPlan* unionUnitLocal = - dynamic_cast(unionUnit.get()); - bool tableIsPresented = - std::any_of(unionUnitLocal->tableList().begin(), unionUnitLocal->tableList().end(), - [&table](const auto& unionTable) { return unionTable == table; }); - return tableIsPresented; - }); -} - -bool matchParallelCES(execplan::CalpontSelectExecutionPlan& csep) -{ - auto tables = csep.tableList(); - // This is leaf and there are no other tables at this level in neither UNION, nor derived table. - // WIP filter out CSEPs with orderBy, groupBy, having - // Filter out tables that were re-written. - return tables.size() == 1 && !tables[0].isColumnstore() && !tableIsInUnion(tables[0], csep); -} - -// This routine produces a new ParseTree that is AND(lowerBand <= column, column <= upperBand) -// TODO add engine-independent statistics-derived ranges -execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, - std::pair& bound) -{ - // INV this is SimpleColumn we supply as an argument - // TODO find the suitable column using EI statistics. - auto* column = dynamic_cast(csep->returnedCols().front().get()); - assert(column); - - auto tableKeyColumnLeftOp = new execplan::SimpleColumn(*column); - tableKeyColumnLeftOp->resultType(column->resultType()); - - // TODO Nobody owns this allocation and cleanup only depends on delete in ParseTree nodes' dtors. - auto* filterColLeftOp = new execplan::ConstantColumnUInt(bound.second, 0, 0); - // set TZ - // There is a question with ownership of the const column - // WIP here we lost upper bound value if predicate is not changed to weak lt - execplan::SOP ltOp = boost::make_shared(execplan::PredicateOperator("<")); - ltOp->setOpType(filterColLeftOp->resultType(), tableKeyColumnLeftOp->resultType()); - ltOp->resultType(ltOp->operationType()); - - auto* sfr = new execplan::SimpleFilter(ltOp, tableKeyColumnLeftOp, filterColLeftOp); - // auto tableKeyColumn = derivedSCEP->returnedCols().front(); - auto tableKeyColumnRightOp = new execplan::SimpleColumn(*column); - tableKeyColumnRightOp->resultType(column->resultType()); - // TODO hardcoded column type and value - auto* filterColRightOp = new execplan::ConstantColumnUInt(bound.first, 0, 0); - - execplan::SOP gtOp = boost::make_shared(execplan::PredicateOperator(">=")); - gtOp->setOpType(filterColRightOp->resultType(), tableKeyColumnRightOp->resultType()); - gtOp->resultType(gtOp->operationType()); - - auto* sfl = new execplan::SimpleFilter(gtOp, tableKeyColumnRightOp, filterColRightOp); - - execplan::ParseTree* ptp = new execplan::ParseTree(new execplan::LogicOperator("and")); - ptp->right(sfr); - ptp->left(sfl); - - auto* currentFilters = csep->filters(); - if (currentFilters) - { - execplan::ParseTree* andWithExistingFilters = - new execplan::ParseTree(new execplan::LogicOperator("and"), currentFilters, ptp); - return andWithExistingFilters; - } - - return ptp; -} - -// Looking for a projected column that comes first in an available index and has EI statistics -// INV nullptr signifies that no suitable column was found -execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx) -{ - for (auto& rc : csep.returnedCols()) - { - auto* simpleColumn = dynamic_cast(rc.get()); - if (simpleColumn) - { - std::cout << "Found simple column " << simpleColumn->columnName() << std::endl; - cal_impl_if::SchemaAndTableName schemaAndTableNam = {simpleColumn->tableName(), simpleColumn->columnName()}; - auto columnStatistics = ctx.gwi.findStatisticsForATable(schemaAndTableNam); - - return simpleColumn; - } - } - - return nullptr; -} - -execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable( - execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx) -{ - execplan::CalpontSelectExecutionPlan::SelectList unionVec; - // unionVec.reserve(numberOfLegs); - execplan::SimpleColumn* keyColumn = findSuitableKeyColumn(csep, ctx); - if (!keyColumn) - { - return unionVec; - } - - std::cout << "looking for " << keyColumn->columnName() << " in ctx.gwi.tableStatisticsMap " - << " with size " << ctx.gwi.tableStatisticsMap.size() << std::endl; - for (auto& [k, v] : ctx.gwi.tableStatisticsMap) - { - std::cout << "SchemaAndTableName " << k.schema << "." << k.table << " column map size " << v.size() << std::endl; - } - - - - cal_impl_if::SchemaAndTableName schemaAndTableName = {keyColumn->schemaName(), keyColumn->tableName()}; - auto tableColumnsStatisticsIt = ctx.gwi.tableStatisticsMap.find(schemaAndTableName); - if (tableColumnsStatisticsIt == ctx.gwi.tableStatisticsMap.end()) - { - return unionVec; - } - - auto columnStatisticsIt = tableColumnsStatisticsIt->second.find(keyColumn->columnName()); - if (columnStatisticsIt == tableColumnsStatisticsIt->second.end()) - { - return unionVec; - } - - auto columnStatistics = columnStatisticsIt->second; - std::cout << "Histogram_json_hb histogram size " << columnStatistics.get_json_histogram().size() << std::endl; - // TODO char and other numerical types support - size_t numberOfUnionUnits = std::min(columnStatistics.get_json_histogram().size(), 16UL); - size_t numberOfBucketsPerUnionUnit = columnStatistics.get_json_histogram().size() / numberOfUnionUnits; - - std::vector> bounds; - - // TODO need to process tail if number of buckets is not divisible by number of union units - // TODO non-overlapping buckets if it is a problem at all - for (size_t i = 0; i < numberOfUnionUnits - 1; ++i) - { - auto bucket = columnStatistics.get_json_histogram().begin() + i * numberOfBucketsPerUnionUnit; - auto endBucket = columnStatistics.get_json_histogram().begin() + (i + 1) * numberOfBucketsPerUnionUnit; - uint64_t currentLowerBound = *(uint32_t*)bucket->start_value.data(); - uint64_t currentUpperBound = *(uint32_t*)endBucket->start_value.data(); - - std::cout << "currentLowerBound " << currentLowerBound << " currentUpperBound " << currentUpperBound - << std::endl; - bounds.push_back(std::make_pair(currentLowerBound, currentUpperBound)); - } - - // Add last range - auto lastBucket = columnStatistics.get_json_histogram().begin() + (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit; - uint64_t currentLowerBound = *(uint32_t*)lastBucket->start_value.data(); - uint64_t currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data(); - - std::cout << "last currentLowerBound " << currentLowerBound << " last currentUpperBound " << currentUpperBound - << std::endl; - bounds.push_back(std::make_pair(currentLowerBound, currentUpperBound)); - - for (auto& bound : bounds) - { - auto clonedCSEP = csep.cloneWORecursiveSelects(); - // Add BETWEEN based on key column range - clonedCSEP->filters(filtersWithNewRangeAddedIfNeeded(clonedCSEP, bound)); - unionVec.push_back(clonedCSEP); - } - - return unionVec; -} -void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx) -{ - auto tables = csep.tableList(); - execplan::CalpontSelectExecutionPlan::TableList newTableList; - execplan::CalpontSelectExecutionPlan::SelectList newDerivedTableList; - execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns; - - // ATM Must be only 1 table - for (auto& table : tables) - { - if (!table.isColumnstore()) - { - auto derivedSCEP = csep.cloneWORecursiveSelects(); - // need to add a level here - std::string tableAlias = RewrittenSubTableAliasPrefix + table.schema + "_" + table.table + "_" + - std::to_string(ctx.uniqueId); - - derivedSCEP->location(execplan::CalpontSelectExecutionPlan::FROM); - derivedSCEP->subType(execplan::CalpontSelectExecutionPlan::FROM_SUBS); - derivedSCEP->derivedTbAlias(tableAlias); - - // Create a copy of the current leaf CSEP with additional filters to partition the key column - auto additionalUnionVec = makeUnionFromTable(csep, ctx); - derivedSCEP->unionVec().insert(derivedSCEP->unionVec().end(), additionalUnionVec.begin(), - additionalUnionVec.end()); - - size_t colPosition = 0; - // change parent to derived table columns - for (auto& rc : csep.returnedCols()) - { - auto rcCloned = boost::make_shared(*rc); - // TODO timezone and result type are not copied - // TODO add specific ctor for this functionality - rcCloned->tableName(""); - rcCloned->schemaName(""); - rcCloned->tableAlias(tableAlias); - rcCloned->colPosition(colPosition++); - rcCloned->resultType(rc->resultType()); - - newReturnedColumns.push_back(rcCloned); - } - - newDerivedTableList.push_back(derivedSCEP); - execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, ""); - newTableList.push_back(tn); - // Remove the filters as they were pushed down to union units - // This is inappropriate for EXISTS filter and join conditions - derivedSCEP->filters(nullptr); - } - } - // Remove the filters as they were pushed down to union units - // This is inappropriate for EXISTS filter and join conditions - // csep.filters(nullptr); - // There must be no derived at this point. - csep.derivedTableList(newDerivedTableList); - // Replace table list with new table list populated with union units - csep.tableList(newTableList); - csep.returnedCols(newReturnedColumns); -} - -execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable_exists( - const size_t numberOfLegs, execplan::CalpontSelectExecutionPlan& csep) -{ - execplan::CalpontSelectExecutionPlan::SelectList unionVec; - unionVec.reserve(numberOfLegs); - std::vector> bounds( - {{0, 3000961}, {3000961, std::numeric_limits::max()}}); - for (auto bound : bounds) - { - auto clonedCSEP = csep.cloneWORecursiveSelects(); - clonedCSEP->filters(nullptr); - // Add BETWEEN based on key column range - clonedCSEP->filters(filtersWithNewRangeAddedIfNeeded(clonedCSEP, bound)); - unionVec.push_back(clonedCSEP); - } - - return unionVec; -} - -void applyParallelCES_exists(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx) -{ - auto tables = csep.tableList(); - execplan::CalpontSelectExecutionPlan::TableList newTableList; - execplan::CalpontSelectExecutionPlan::SelectList newDerivedTableList; - execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns; - - // ATM Must be only 1 table - for (auto& table : tables) - { - if (!table.isColumnstore()) - { - auto derivedSCEP = csep.cloneWORecursiveSelects(); - // need to add a level here - std::string tableAlias = RewrittenSubTableAliasPrefix + table.schema + "_" + table.table + "_" + - std::to_string(ctx.uniqueId); - - derivedSCEP->location(execplan::CalpontSelectExecutionPlan::FROM); - derivedSCEP->subType(execplan::CalpontSelectExecutionPlan::FROM_SUBS); - derivedSCEP->derivedTbAlias(tableAlias); - - // TODO: hardcoded for now - size_t parallelFactor = 2; - // Create a copy of the current leaf CSEP with additional filters to partition the key column - auto additionalUnionVec = makeUnionFromTable_exists(parallelFactor, csep); - derivedSCEP->unionVec().insert(derivedSCEP->unionVec().end(), additionalUnionVec.begin(), - additionalUnionVec.end()); - - size_t colPosition = 0; - // change parent to derived table columns - for (auto& rc : csep.returnedCols()) - { - auto rcCloned = boost::make_shared(*rc); - // TODO timezone and result type are not copied - // TODO add specific ctor for this functionality - rcCloned->tableName(""); - rcCloned->schemaName(""); - rcCloned->tableAlias(tableAlias); - rcCloned->colPosition(colPosition++); - rcCloned->resultType(rc->resultType()); - - newReturnedColumns.push_back(rcCloned); - } - - newDerivedTableList.push_back(derivedSCEP); - execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, ""); - newTableList.push_back(tn); - // Remove the filters as they were pushed down to union units - // This is inappropriate for EXISTS filter and join conditions - // TODO if needed - derivedSCEP->filters(nullptr); - } - } - // Remove the filters as they were pushed down to union units - // This is inappropriate for EXISTS filter and join conditions - // csep.filters(nullptr); - // There must be no derived at this point. - csep.derivedTableList(newDerivedTableList); - // Replace table list with new table list populated with union units - csep.tableList(newTableList); - csep.returnedCols(newReturnedColumns); -} - } // namespace optimizer diff --git a/dbcon/mysql/rulebased_optimizer.h b/dbcon/mysql/rulebased_optimizer.h index 65a8adcc6..c047a406e 100644 --- a/dbcon/mysql/rulebased_optimizer.h +++ b/dbcon/mysql/rulebased_optimizer.h @@ -69,8 +69,5 @@ struct Rule bool walk(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx) const; }; -bool matchParallelCES(execplan::CalpontSelectExecutionPlan& csep); -void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx); bool optimizeCSEP(execplan::CalpontSelectExecutionPlan& root, RBOptimizerContext& ctx); - } \ No newline at end of file From 428be12b9b5e8231f37248dbe489e817d8c3ef0b Mon Sep 17 00:00:00 2001 From: drrtuy Date: Thu, 17 Jul 2025 19:13:56 +0000 Subject: [PATCH 26/51] feat(rbo,rules): find suitable indexed column to be used for range partitioning --- dbcon/mysql/rbo_apply_parallel_ces.cpp | 188 +++++++++++++------------ 1 file changed, 99 insertions(+), 89 deletions(-) diff --git a/dbcon/mysql/rbo_apply_parallel_ces.cpp b/dbcon/mysql/rbo_apply_parallel_ces.cpp index 7529308b9..e9465dcfd 100644 --- a/dbcon/mysql/rbo_apply_parallel_ces.cpp +++ b/dbcon/mysql/rbo_apply_parallel_ces.cpp @@ -65,16 +65,16 @@ bool matchParallelCES(execplan::CalpontSelectExecutionPlan& csep) // This routine produces a new ParseTree that is AND(lowerBand <= column, column <= upperBand) // TODO add engine-independent statistics-derived ranges -execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, +execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, execplan::SimpleColumn& column, std::pair& bound) { // INV this is SimpleColumn we supply as an argument // TODO find the suitable column using EI statistics. - auto* column = dynamic_cast(csep->returnedCols().front().get()); - assert(column); + // auto* column = dynamic_cast(csep->returnedCols().front().get()); + // assert(column); - auto tableKeyColumnLeftOp = new execplan::SimpleColumn(*column); - tableKeyColumnLeftOp->resultType(column->resultType()); + auto tableKeyColumnLeftOp = new execplan::SimpleColumn(column); + tableKeyColumnLeftOp->resultType(column.resultType()); // TODO Nobody owns this allocation and cleanup only depends on delete in ParseTree nodes' dtors. auto* filterColLeftOp = new execplan::ConstantColumnUInt(bound.second, 0, 0); @@ -87,8 +87,8 @@ execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, auto* sfr = new execplan::SimpleFilter(ltOp, tableKeyColumnLeftOp, filterColLeftOp); // auto tableKeyColumn = derivedSCEP->returnedCols().front(); - auto tableKeyColumnRightOp = new execplan::SimpleColumn(*column); - tableKeyColumnRightOp->resultType(column->resultType()); + auto tableKeyColumnRightOp = new execplan::SimpleColumn(column); + tableKeyColumnRightOp->resultType(column.resultType()); // TODO hardcoded column type and value auto* filterColRightOp = new execplan::ConstantColumnUInt(bound.first, 0, 0); @@ -117,16 +117,24 @@ execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, // INV nullptr signifies that no suitable column was found execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx) { + std::cout << "findSuitableKeyColumn " << csep.returnedCols().size() << std::endl; for (auto& rc : csep.returnedCols()) { auto* simpleColumn = dynamic_cast(rc.get()); if (simpleColumn) { std::cout << "Found simple column " << simpleColumn->columnName() << std::endl; - cal_impl_if::SchemaAndTableName schemaAndTableNam = {simpleColumn->tableName(), simpleColumn->columnName()}; + cal_impl_if::SchemaAndTableName schemaAndTableNam = {simpleColumn->schemaName(), simpleColumn->tableName()}; auto columnStatistics = ctx.gwi.findStatisticsForATable(schemaAndTableNam); - - return simpleColumn; + if (!columnStatistics) + { + continue; + } + auto columnStatisticsIt = columnStatistics->find(simpleColumn->columnName()); + if (columnStatisticsIt != columnStatistics->end()) + { + return simpleColumn; + } } } @@ -167,9 +175,8 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable( } auto columnStatistics = columnStatisticsIt->second; - std::cout << "Histogram_json_hb histogram size " << columnStatistics.get_json_histogram().size() << std::endl; - // TODO configurable parallel factor + // TODO configurable parallel factor via session variable // NB now histogram size is the way to control parallel factor with 16 being the maximum size_t numberOfUnionUnits = std::min(columnStatistics.get_json_histogram().size(), MaxParallelFactor); size_t numberOfBucketsPerUnionUnit = columnStatistics.get_json_histogram().size() / numberOfUnionUnits; @@ -188,23 +195,26 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable( std::cout << "currentLowerBound " << currentLowerBound << " currentUpperBound " << currentUpperBound << std::endl; - bounds.push_back(std::make_pair(currentLowerBound, currentUpperBound)); + bounds.push_back({currentLowerBound, currentUpperBound}); } // Add last range + // NB despite the fact that currently Histogram_json_hb has the last bucket that has end as its start auto lastBucket = columnStatistics.get_json_histogram().begin() + (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit; uint64_t currentLowerBound = *(uint32_t*)lastBucket->start_value.data(); + std::cout << "lastBucket start_value " << currentLowerBound << std::endl; uint64_t currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data(); + std::cout << "Histogram end_value " << currentUpperBound << std::endl; std::cout << "last currentLowerBound " << currentLowerBound << " last currentUpperBound " << currentUpperBound << std::endl; - bounds.push_back(std::make_pair(currentLowerBound, currentUpperBound)); + bounds.push_back({currentLowerBound, currentUpperBound}); for (auto& bound : bounds) { auto clonedCSEP = csep.cloneWORecursiveSelects(); // Add BETWEEN based on key column range - clonedCSEP->filters(filtersWithNewRangeAddedIfNeeded(clonedCSEP, bound)); + clonedCSEP->filters(filtersWithNewRangeAddedIfNeeded(clonedCSEP, *keyColumn, bound)); unionVec.push_back(clonedCSEP); } @@ -270,87 +280,87 @@ void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon csep.returnedCols(newReturnedColumns); } -execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable_exists( - const size_t numberOfLegs, execplan::CalpontSelectExecutionPlan& csep) -{ - execplan::CalpontSelectExecutionPlan::SelectList unionVec; - unionVec.reserve(numberOfLegs); - std::vector> bounds( - {{0, 3000961}, {3000961, std::numeric_limits::max()}}); - for (auto bound : bounds) - { - auto clonedCSEP = csep.cloneWORecursiveSelects(); - clonedCSEP->filters(nullptr); - // Add BETWEEN based on key column range - clonedCSEP->filters(filtersWithNewRangeAddedIfNeeded(clonedCSEP, bound)); - unionVec.push_back(clonedCSEP); - } +// execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable_exists( +// const size_t numberOfLegs, execplan::CalpontSelectExecutionPlan& csep) +// { +// execplan::CalpontSelectExecutionPlan::SelectList unionVec; +// unionVec.reserve(numberOfLegs); +// std::vector> bounds( +// {{0, 3000961}, {3000961, std::numeric_limits::max()}}); +// for (auto bound : bounds) +// { +// auto clonedCSEP = csep.cloneWORecursiveSelects(); +// clonedCSEP->filters(nullptr); +// // Add BETWEEN based on key column range +// clonedCSEP->filters(filtersWithNewRangeAddedIfNeeded(clonedCSEP, bound)); +// unionVec.push_back(clonedCSEP); +// } - return unionVec; -} +// return unionVec; +// } -// TODO: remove applyParallelCES_exists -void applyParallelCES_exists(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx) -{ - auto tables = csep.tableList(); - execplan::CalpontSelectExecutionPlan::TableList newTableList; - execplan::CalpontSelectExecutionPlan::SelectList newDerivedTableList; - execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns; +// // TODO: remove applyParallelCES_exists +// void applyParallelCES_exists(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx) +// { +// auto tables = csep.tableList(); +// execplan::CalpontSelectExecutionPlan::TableList newTableList; +// execplan::CalpontSelectExecutionPlan::SelectList newDerivedTableList; +// execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns; - // ATM Must be only 1 table - for (auto& table : tables) - { - if (!table.isColumnstore()) - { - auto derivedSCEP = csep.cloneWORecursiveSelects(); - // need to add a level here - std::string tableAlias = RewrittenSubTableAliasPrefix + table.schema + "_" + table.table + "_" + - std::to_string(ctx.uniqueId); +// // ATM Must be only 1 table +// for (auto& table : tables) +// { +// if (!table.isColumnstore()) +// { +// auto derivedSCEP = csep.cloneWORecursiveSelects(); +// // need to add a level here +// std::string tableAlias = RewrittenSubTableAliasPrefix + table.schema + "_" + table.table + "_" + +// std::to_string(ctx.uniqueId); - derivedSCEP->location(execplan::CalpontSelectExecutionPlan::FROM); - derivedSCEP->subType(execplan::CalpontSelectExecutionPlan::FROM_SUBS); - derivedSCEP->derivedTbAlias(tableAlias); +// derivedSCEP->location(execplan::CalpontSelectExecutionPlan::FROM); +// derivedSCEP->subType(execplan::CalpontSelectExecutionPlan::FROM_SUBS); +// derivedSCEP->derivedTbAlias(tableAlias); - // TODO: hardcoded for now - size_t parallelFactor = 2; - // Create a copy of the current leaf CSEP with additional filters to partition the key column - auto additionalUnionVec = makeUnionFromTable_exists(parallelFactor, csep); - derivedSCEP->unionVec().insert(derivedSCEP->unionVec().end(), additionalUnionVec.begin(), - additionalUnionVec.end()); +// // TODO: hardcoded for now +// size_t parallelFactor = 2; +// // Create a copy of the current leaf CSEP with additional filters to partition the key column +// auto additionalUnionVec = makeUnionFromTable_exists(parallelFactor, csep); +// derivedSCEP->unionVec().insert(derivedSCEP->unionVec().end(), additionalUnionVec.begin(), +// additionalUnionVec.end()); - size_t colPosition = 0; - // change parent to derived table columns - for (auto& rc : csep.returnedCols()) - { - auto rcCloned = boost::make_shared(*rc); - // TODO timezone and result type are not copied - // TODO add specific ctor for this functionality - rcCloned->tableName(""); - rcCloned->schemaName(""); - rcCloned->tableAlias(tableAlias); - rcCloned->colPosition(colPosition++); - rcCloned->resultType(rc->resultType()); +// size_t colPosition = 0; +// // change parent to derived table columns +// for (auto& rc : csep.returnedCols()) +// { +// auto rcCloned = boost::make_shared(*rc); +// // TODO timezone and result type are not copied +// // TODO add specific ctor for this functionality +// rcCloned->tableName(""); +// rcCloned->schemaName(""); +// rcCloned->tableAlias(tableAlias); +// rcCloned->colPosition(colPosition++); +// rcCloned->resultType(rc->resultType()); - newReturnedColumns.push_back(rcCloned); - } +// newReturnedColumns.push_back(rcCloned); +// } - newDerivedTableList.push_back(derivedSCEP); - execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, ""); - newTableList.push_back(tn); - // Remove the filters as they were pushed down to union units - // This is inappropriate for EXISTS filter and join conditions - // TODO if needed - derivedSCEP->filters(nullptr); - } - } - // Remove the filters as they were pushed down to union units - // This is inappropriate for EXISTS filter and join conditions - // csep.filters(nullptr); - // There must be no derived at this point. - csep.derivedTableList(newDerivedTableList); - // Replace table list with new table list populated with union units - csep.tableList(newTableList); - csep.returnedCols(newReturnedColumns); -} +// newDerivedTableList.push_back(derivedSCEP); +// execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, ""); +// newTableList.push_back(tn); +// // Remove the filters as they were pushed down to union units +// // This is inappropriate for EXISTS filter and join conditions +// // TODO if needed +// derivedSCEP->filters(nullptr); +// } +// } +// // Remove the filters as they were pushed down to union units +// // This is inappropriate for EXISTS filter and join conditions +// // csep.filters(nullptr); +// // There must be no derived at this point. +// csep.derivedTableList(newDerivedTableList); +// // Replace table list with new table list populated with union units +// csep.tableList(newTableList); +// csep.returnedCols(newReturnedColumns); +// } } // namespace optimizer From e600f11aa9eb8141883473dba88af57d9e9caf8f Mon Sep 17 00:00:00 2001 From: drrtuy Date: Fri, 18 Jul 2025 13:41:30 +0000 Subject: [PATCH 27/51] feat(rbo,rules): use EI statistics for filter ranges --- dbcon/mysql/ha_mcs_execplan.cpp | 27 +----- dbcon/mysql/ha_mcs_impl_if.h | 6 ++ dbcon/mysql/rbo_apply_parallel_ces.cpp | 115 ++----------------------- 3 files changed, 14 insertions(+), 134 deletions(-) diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index ab4eefe46..c3082ba5d 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -6288,44 +6288,23 @@ int processLimitAndOffset(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep return 0; } +// Loop over available indexes to find and extract corresponding EI column statistics +// for the first column of the index if any. +// Statistics is stored in GWI context. void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi) { - // TODO find clear way to check if the field is part of a key - // if (!ifp->field->part_of_key.is_clear_all()) - // { - // return; - // } - // std::cout << "Processing field item: " << ifp->field_name.str << std::endl; - // std::cout << "part of a key: " << buf << std::endl; - // std::cout << "ifp->field->field_index " << ifp->field->field_index << std::endl; - for (uint j = 0; j < ifp->field->table->s->keys; j++) { for (uint i = 0; i < ifp->field->table->s->key_info[j].usable_key_parts; i++) { - // std::cout << "key fieldnr " << i << " " - // << ifp->field->table->s->key_info[j].key_part[i].field->field_name.str << " " - // << ifp->field->table->s->key_info[j].key_part[i].fieldnr << std::endl; if (ifp->field->table->s->key_info[j].key_part[i].fieldnr == ifp->field->field_index + 1) { - // std::cout << "key_info " << j << " key_part " << i << " matched " << std::endl; if (i == 0 && ifp->field->read_stats) { assert(ifp->field->table->s); - // assert(ifp->field->table->s->db); - // assert(ifp->field->table->s->table_name); - // FQCN fqcn({ifp->field->table->s->db.str}, {ifp->field->table->s->table_name.str}, - // {ifp->field->field_name.str}); - // TODO use FQCN as a key type - std::cout << "Adding column statistics for " << ifp->field->field_name.str << std::endl; auto* histogram = dynamic_cast(ifp->field->read_stats->histogram); if (histogram) { - std::cout << "Type of histogram object: " << typeid(*histogram).name() << std::endl; - // std::vector histogramBuckets = histogram->get_json_histogram(); - // std::cout << "gwi.tableStatisticsMap[{ifp->field->table->s->db.str, " - // "ifp->field->table->s->table_name.str}][ifp->field->field_name.str].size() " - // << histogramBuckets.size() << std::endl; SchemaAndTableName tableName = {ifp->field->table->s->db.str, ifp->field->table->s->table_name.str}; gwi.tableStatisticsMap[tableName][ifp->field->field_name.str] = *histogram; } diff --git a/dbcon/mysql/ha_mcs_impl_if.h b/dbcon/mysql/ha_mcs_impl_if.h index df53800c9..acb0b042e 100644 --- a/dbcon/mysql/ha_mcs_impl_if.h +++ b/dbcon/mysql/ha_mcs_impl_if.h @@ -120,6 +120,12 @@ using ColumnName = std::string; using ColumnStatisticsMap = std::unordered_map; using TableStatisticsMap = std::unordered_map; +// This structure is used to store MDB AST -> CSEP translation context. +// There is a column statistics for some columns in a query. +// As per 23.10.5 "some" means first column of the index in projection list of CSEP +// satisfies the condition of applyParallelCSEP RBO rule. +// Note that statistics must be merged from subquery/derived table +// to the statistics of the outer query. struct gp_walk_info { execplan::CalpontSelectExecutionPlan::ReturnedColumnList returnedCols; diff --git a/dbcon/mysql/rbo_apply_parallel_ces.cpp b/dbcon/mysql/rbo_apply_parallel_ces.cpp index e9465dcfd..05e1d879b 100644 --- a/dbcon/mysql/rbo_apply_parallel_ces.cpp +++ b/dbcon/mysql/rbo_apply_parallel_ces.cpp @@ -68,10 +68,6 @@ bool matchParallelCES(execplan::CalpontSelectExecutionPlan& csep) execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, execplan::SimpleColumn& column, std::pair& bound) { - // INV this is SimpleColumn we supply as an argument - // TODO find the suitable column using EI statistics. - // auto* column = dynamic_cast(csep->returnedCols().front().get()); - // assert(column); auto tableKeyColumnLeftOp = new execplan::SimpleColumn(column); tableKeyColumnLeftOp->resultType(column.resultType()); @@ -117,13 +113,11 @@ execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, exe // INV nullptr signifies that no suitable column was found execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx) { - std::cout << "findSuitableKeyColumn " << csep.returnedCols().size() << std::endl; for (auto& rc : csep.returnedCols()) { auto* simpleColumn = dynamic_cast(rc.get()); if (simpleColumn) { - std::cout << "Found simple column " << simpleColumn->columnName() << std::endl; cal_impl_if::SchemaAndTableName schemaAndTableNam = {simpleColumn->schemaName(), simpleColumn->tableName()}; auto columnStatistics = ctx.gwi.findStatisticsForATable(schemaAndTableNam); if (!columnStatistics) @@ -141,6 +135,7 @@ execplan::SimpleColumn* findSuitableKeyColumn(execplan::CalpontSelectExecutionPl return nullptr; } +// TODO char and other numerical types support execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable( execplan::CalpontSelectExecutionPlan& csep, optimizer::RBOptimizerContext& ctx) { @@ -154,13 +149,6 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable( return unionVec; } - std::cout << "looking for " << keyColumn->columnName() << " in ctx.gwi.tableStatisticsMap " - << " with size " << ctx.gwi.tableStatisticsMap.size() << std::endl; - for (auto& [k, v] : ctx.gwi.tableStatisticsMap) - { - std::cout << "SchemaAndTableName " << k.schema << "." << k.table << " column map size " << v.size() << std::endl; - } - cal_impl_if::SchemaAndTableName schemaAndTableName = {keyColumn->schemaName(), keyColumn->tableName()}; auto tableColumnsStatisticsIt = ctx.gwi.tableStatisticsMap.find(schemaAndTableName); if (tableColumnsStatisticsIt == ctx.gwi.tableStatisticsMap.end()) @@ -184,17 +172,13 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable( // TODO char and other numerical types support std::vector> bounds; - // TODO need to process tail if number of buckets is not divisible by number of union units - // TODO non-overlapping buckets if it is a problem at all + // Loop over buckets to produce filter ranges for (size_t i = 0; i < numberOfUnionUnits - 1; ++i) { auto bucket = columnStatistics.get_json_histogram().begin() + i * numberOfBucketsPerUnionUnit; auto endBucket = columnStatistics.get_json_histogram().begin() + (i + 1) * numberOfBucketsPerUnionUnit; uint64_t currentLowerBound = *(uint32_t*)bucket->start_value.data(); uint64_t currentUpperBound = *(uint32_t*)endBucket->start_value.data(); - - std::cout << "currentLowerBound " << currentLowerBound << " currentUpperBound " << currentUpperBound - << std::endl; bounds.push_back({currentLowerBound, currentUpperBound}); } @@ -202,12 +186,7 @@ execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable( // NB despite the fact that currently Histogram_json_hb has the last bucket that has end as its start auto lastBucket = columnStatistics.get_json_histogram().begin() + (numberOfUnionUnits - 1) * numberOfBucketsPerUnionUnit; uint64_t currentLowerBound = *(uint32_t*)lastBucket->start_value.data(); - std::cout << "lastBucket start_value " << currentLowerBound << std::endl; uint64_t currentUpperBound = *(uint32_t*)columnStatistics.get_last_bucket_end_endp().data(); - std::cout << "Histogram end_value " << currentUpperBound << std::endl; - - std::cout << "last currentLowerBound " << currentLowerBound << " last currentUpperBound " << currentUpperBound - << std::endl; bounds.push_back({currentLowerBound, currentUpperBound}); for (auto& bound : bounds) @@ -270,97 +249,13 @@ void applyParallelCES(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerCon derivedSCEP->filters(nullptr); } } - // Remove the filters as they were pushed down to union units - // This is inappropriate for EXISTS filter and join conditions - // csep.filters(nullptr); - // There must be no derived at this point. + // Remove the filters if necessary using csep.filters(nullptr) as they were pushed down to union units + // But this is inappropriate for EXISTS filter and join conditions + // There must be no derived at this point, so we can replace it with the new derived table list csep.derivedTableList(newDerivedTableList); // Replace table list with new table list populated with union units csep.tableList(newTableList); csep.returnedCols(newReturnedColumns); } -// execplan::CalpontSelectExecutionPlan::SelectList makeUnionFromTable_exists( -// const size_t numberOfLegs, execplan::CalpontSelectExecutionPlan& csep) -// { -// execplan::CalpontSelectExecutionPlan::SelectList unionVec; -// unionVec.reserve(numberOfLegs); -// std::vector> bounds( -// {{0, 3000961}, {3000961, std::numeric_limits::max()}}); -// for (auto bound : bounds) -// { -// auto clonedCSEP = csep.cloneWORecursiveSelects(); -// clonedCSEP->filters(nullptr); -// // Add BETWEEN based on key column range -// clonedCSEP->filters(filtersWithNewRangeAddedIfNeeded(clonedCSEP, bound)); -// unionVec.push_back(clonedCSEP); -// } - -// return unionVec; -// } - -// // TODO: remove applyParallelCES_exists -// void applyParallelCES_exists(execplan::CalpontSelectExecutionPlan& csep, RBOptimizerContext& ctx) -// { -// auto tables = csep.tableList(); -// execplan::CalpontSelectExecutionPlan::TableList newTableList; -// execplan::CalpontSelectExecutionPlan::SelectList newDerivedTableList; -// execplan::CalpontSelectExecutionPlan::ReturnedColumnList newReturnedColumns; - -// // ATM Must be only 1 table -// for (auto& table : tables) -// { -// if (!table.isColumnstore()) -// { -// auto derivedSCEP = csep.cloneWORecursiveSelects(); -// // need to add a level here -// std::string tableAlias = RewrittenSubTableAliasPrefix + table.schema + "_" + table.table + "_" + -// std::to_string(ctx.uniqueId); - -// derivedSCEP->location(execplan::CalpontSelectExecutionPlan::FROM); -// derivedSCEP->subType(execplan::CalpontSelectExecutionPlan::FROM_SUBS); -// derivedSCEP->derivedTbAlias(tableAlias); - -// // TODO: hardcoded for now -// size_t parallelFactor = 2; -// // Create a copy of the current leaf CSEP with additional filters to partition the key column -// auto additionalUnionVec = makeUnionFromTable_exists(parallelFactor, csep); -// derivedSCEP->unionVec().insert(derivedSCEP->unionVec().end(), additionalUnionVec.begin(), -// additionalUnionVec.end()); - -// size_t colPosition = 0; -// // change parent to derived table columns -// for (auto& rc : csep.returnedCols()) -// { -// auto rcCloned = boost::make_shared(*rc); -// // TODO timezone and result type are not copied -// // TODO add specific ctor for this functionality -// rcCloned->tableName(""); -// rcCloned->schemaName(""); -// rcCloned->tableAlias(tableAlias); -// rcCloned->colPosition(colPosition++); -// rcCloned->resultType(rc->resultType()); - -// newReturnedColumns.push_back(rcCloned); -// } - -// newDerivedTableList.push_back(derivedSCEP); -// execplan::CalpontSystemCatalog::TableAliasName tn = execplan::make_aliasview("", "", tableAlias, ""); -// newTableList.push_back(tn); -// // Remove the filters as they were pushed down to union units -// // This is inappropriate for EXISTS filter and join conditions -// // TODO if needed -// derivedSCEP->filters(nullptr); -// } -// } -// // Remove the filters as they were pushed down to union units -// // This is inappropriate for EXISTS filter and join conditions -// // csep.filters(nullptr); -// // There must be no derived at this point. -// csep.derivedTableList(newDerivedTableList); -// // Replace table list with new table list populated with union units -// csep.tableList(newTableList); -// csep.returnedCols(newReturnedColumns); -// } - } // namespace optimizer From a01c883e074cdd68d1155ec4280114a1f87cf7a7 Mon Sep 17 00:00:00 2001 From: drrtuy Date: Fri, 18 Jul 2025 15:55:22 +0000 Subject: [PATCH 28/51] feat(rbo,rules): mock Histogram for ES < 11.4 --- dbcon/mysql/ha_mcs_execplan.cpp | 7 +++++++ dbcon/mysql/idb_mysql.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index c3082ba5d..148a9ccba 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -6291,6 +6291,8 @@ int processLimitAndOffset(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep // Loop over available indexes to find and extract corresponding EI column statistics // for the first column of the index if any. // Statistics is stored in GWI context. +// Mock for ES 10.6 +#if MYSQL_VERSION_ID >= 110401 void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi) { for (uint j = 0; j < ifp->field->table->s->keys; j++) @@ -6313,6 +6315,11 @@ void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi) } } } +#else +void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi) +{ +} +#endif /*@brief Process SELECT part of a query or sub-query */ /*********************************************************** diff --git a/dbcon/mysql/idb_mysql.h b/dbcon/mysql/idb_mysql.h index 89c0291c3..efc4260e3 100644 --- a/dbcon/mysql/idb_mysql.h +++ b/dbcon/mysql/idb_mysql.h @@ -74,7 +74,37 @@ #include "rpl_rli.h" #include "my_dbug.h" #include "sql_show.h" +#if MYSQL_VERSION_ID >= 110401 #include "opt_histogram_json.h" +#else +// Mock Histogram_bucket for MySQL < 11.4 +struct Histogram_bucket +{ + std::string start_value; + + double cum_fract; + + longlong ndv; +}; + +class Histogram_json_hb final : public Histogram_base +{ + std::vector buckets; + + std::string last_bucket_end_endp; + +public: + const std::vector& get_json_histogram() const + { + return buckets; + } + + const std::string& get_last_bucket_end_endp() const + { + return last_bucket_end_endp; + } +}; +#endif #pragma GCC diagnostic pop // Now clean up the pollution as best we can... From f881bae4960344710a9d361518ba5456656eccd2 Mon Sep 17 00:00:00 2001 From: drrtuy Date: Fri, 18 Jul 2025 16:32:08 +0000 Subject: [PATCH 29/51] chore(rbo,rules): fixes to compile MCS with ES 10.6 --- dbcon/mysql/ha_mcs_execplan.cpp | 2 +- dbcon/mysql/ha_mcs_impl_if.h | 1 - dbcon/mysql/ha_select_sub.cpp | 2 -- dbcon/mysql/idb_mysql.h | 2 +- dbcon/mysql/rbo_apply_parallel_ces.cpp | 5 ++--- 5 files changed, 4 insertions(+), 8 deletions(-) diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index 148a9ccba..5c12480a1 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -6316,7 +6316,7 @@ void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi) } } #else -void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi) +void extractColumnStatistics(Item_field* /*ifp*/, gp_walk_info& /*gwi*/) { } #endif diff --git a/dbcon/mysql/ha_mcs_impl_if.h b/dbcon/mysql/ha_mcs_impl_if.h index acb0b042e..7bf0581ef 100644 --- a/dbcon/mysql/ha_mcs_impl_if.h +++ b/dbcon/mysql/ha_mcs_impl_if.h @@ -28,7 +28,6 @@ #include #include "basic/string_utils.h" #include "idb_mysql.h" -#include "sql_statistics.h" #include "ha_mcs_sysvars.h" #include "dmlpkg.h" diff --git a/dbcon/mysql/ha_select_sub.cpp b/dbcon/mysql/ha_select_sub.cpp index caddce6af..8f29b2e22 100644 --- a/dbcon/mysql/ha_select_sub.cpp +++ b/dbcon/mysql/ha_select_sub.cpp @@ -98,8 +98,6 @@ SCSEP SelectSubQuery::transform() // Insert column statistics fGwip.mergeTableStatistics(gwi.tableStatisticsMap); - // std::cout << "fGwip.columnStatisticsMap 2 size " << fGwip.columnStatisticsMap.size() << std::endl; - // std::cout << "gwi.columnStatisticsMap 2 size " << gwi.columnStatisticsMap.size() << std::endl; // Insert subselect CSEP fGwip.subselectList.push_back(csep); diff --git a/dbcon/mysql/idb_mysql.h b/dbcon/mysql/idb_mysql.h index efc4260e3..d18e2bcf3 100644 --- a/dbcon/mysql/idb_mysql.h +++ b/dbcon/mysql/idb_mysql.h @@ -87,7 +87,7 @@ struct Histogram_bucket longlong ndv; }; -class Histogram_json_hb final : public Histogram_base +class Histogram_json_hb { std::vector buckets; diff --git a/dbcon/mysql/rbo_apply_parallel_ces.cpp b/dbcon/mysql/rbo_apply_parallel_ces.cpp index 05e1d879b..1d710126f 100644 --- a/dbcon/mysql/rbo_apply_parallel_ces.cpp +++ b/dbcon/mysql/rbo_apply_parallel_ces.cpp @@ -58,7 +58,7 @@ bool matchParallelCES(execplan::CalpontSelectExecutionPlan& csep) { auto tables = csep.tableList(); // This is leaf and there are no other tables at this level in neither UNION, nor derived table. - // WIP filter out CSEPs with orderBy, groupBy, having + // TODO filter out CSEPs with orderBy, groupBy, having // Filter out tables that were re-written. return tables.size() == 1 && !tables[0].isColumnstore() && !tableIsInUnion(tables[0], csep); } @@ -76,13 +76,12 @@ execplan::ParseTree* filtersWithNewRangeAddedIfNeeded(execplan::SCSEP& csep, exe auto* filterColLeftOp = new execplan::ConstantColumnUInt(bound.second, 0, 0); // set TZ // There is a question with ownership of the const column - // WIP here we lost upper bound value if predicate is not changed to weak lt + // TODO here we lost upper bound value if predicate is not changed to weak lt execplan::SOP ltOp = boost::make_shared(execplan::PredicateOperator("<")); ltOp->setOpType(filterColLeftOp->resultType(), tableKeyColumnLeftOp->resultType()); ltOp->resultType(ltOp->operationType()); auto* sfr = new execplan::SimpleFilter(ltOp, tableKeyColumnLeftOp, filterColLeftOp); - // auto tableKeyColumn = derivedSCEP->returnedCols().front(); auto tableKeyColumnRightOp = new execplan::SimpleColumn(column); tableKeyColumnRightOp->resultType(column.resultType()); // TODO hardcoded column type and value From b0c08d3f4e9030efcac357051fe097ced70b1aaf Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Thu, 17 Jul 2025 17:20:20 +0200 Subject: [PATCH 30/51] don't build columnstore if the compiler doesn't support c++20 --- CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bf1da3a6d..69f8cdb77 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,7 +49,9 @@ include(compiler_flags) include(misc) include(cpack_manage) -add_subdirectory(dbcon/mysql) +if(NOT __msg1_CS_NO_CXX20) + add_subdirectory(dbcon/mysql) +endif() if(NOT TARGET columnstore) return() endif() From 1aee9ccfb1d602acca8600353a0b287455f6307e Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Thu, 17 Jul 2025 18:17:51 +0200 Subject: [PATCH 31/51] fix deb builds --- debian/mariadb-plugin-columnstore.install | 1 + 1 file changed, 1 insertion(+) diff --git a/debian/mariadb-plugin-columnstore.install b/debian/mariadb-plugin-columnstore.install index 0efba1cf8..89f78a604 100644 --- a/debian/mariadb-plugin-columnstore.install +++ b/debian/mariadb-plugin-columnstore.install @@ -45,6 +45,7 @@ usr/bin/mycnfUpgrade usr/bin/post-mysql-install usr/bin/post-mysqld-install usr/bin/reset_locks +usr/bin/rgprint usr/bin/rollback usr/bin/save_brm usr/bin/smcat From f5cf7ebf7bee2a93be27514120a625e8998f13f0 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Fri, 18 Jul 2025 18:20:37 +0200 Subject: [PATCH 32/51] fix boost compilation on aarch64-openeuler __float128 related issues, specific to aarch64-openeuler, don't happen on amd64-openeuler or aarch64-rhel first, there's only dynamic libquadmath.so, and config/has_float128.cpp for charconv doesn't specify shared; so charconv doesn't detect __float128. But even if this is fixed (patched) it still doesn't find __float128 later (may be missing includes?) and it's using __float128 also when BOOST_CHARCONV_HAS_QUADMATH is not defined. Considering that we don't actually need charconv, let's not patch boost to fix it and simply disable it instead. Also, let's disable mpi that we don't need and it also produces (likely non-fatal) errors during compilation. And don't redirect logs, let's have everything in CI stdout, --- cmake/boost.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/boost.cmake b/cmake/boost.cmake index 3e5d68eb8..f9b052f18 100644 --- a/cmake/boost.cmake +++ b/cmake/boost.cmake @@ -23,7 +23,7 @@ link_directories("${Boost_LIBRARY_DIRS}") set(_cxxargs "-fPIC -DBOOST_NO_AUTO_PTR -fvisibility=default") set(_b2args cxxflags=${_cxxargs};cflags=-fPIC;threading=multi;${_extra};toolset=${_toolset} - --without-python;--prefix=${INSTALL_LOCATION} + --without-mpi;--without-charconv;--without-python;--prefix=${INSTALL_LOCATION} ) set(byproducts) @@ -48,8 +48,8 @@ ExternalProject_Add( BUILD_COMMAND ./b2 -q ${_b2args} BUILD_IN_SOURCE TRUE INSTALL_COMMAND ./b2 -q install ${_b2args} - LOG_BUILD TRUE - LOG_INSTALL TRUE + #LOG_BUILD TRUE + #LOG_INSTALL TRUE EXCLUDE_FROM_ALL TRUE ${byproducts} ) From a11edae6013920b0b28dcca131e984a36791f98f Mon Sep 17 00:00:00 2001 From: drrtuy Date: Mon, 21 Jul 2025 12:33:19 +0000 Subject: [PATCH 33/51] chore(): removed test rgprint binary --- debian/mariadb-plugin-columnstore.install | 1 - tools/CMakeLists.txt | 1 - tools/rgprint/CMakeLists.txt | 9 --- tools/rgprint/rgprint.cpp | 95 ----------------------- 4 files changed, 106 deletions(-) delete mode 100644 tools/rgprint/CMakeLists.txt delete mode 100644 tools/rgprint/rgprint.cpp diff --git a/debian/mariadb-plugin-columnstore.install b/debian/mariadb-plugin-columnstore.install index 89f78a604..0efba1cf8 100644 --- a/debian/mariadb-plugin-columnstore.install +++ b/debian/mariadb-plugin-columnstore.install @@ -45,7 +45,6 @@ usr/bin/mycnfUpgrade usr/bin/post-mysql-install usr/bin/post-mysqld-install usr/bin/reset_locks -usr/bin/rgprint usr/bin/rollback usr/bin/save_brm usr/bin/smcat diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index a0d6971c2..cb4002ecb 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -10,6 +10,5 @@ add_subdirectory(getConfig) add_subdirectory(idbmeminfo) add_subdirectory(passwd) add_subdirectory(rebuildEM) -add_subdirectory(rgprint) add_subdirectory(setConfig) add_subdirectory(viewtablelock) diff --git a/tools/rgprint/CMakeLists.txt b/tools/rgprint/CMakeLists.txt deleted file mode 100644 index 699cef3e9..000000000 --- a/tools/rgprint/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -include_directories(${ENGINE_COMMON_INCLUDES}) - -# ########## next target ############### - -set(rgprint_SRCS rgprint.cpp) - -columnstore_executable(rgprint ${rgprint_SRCS}) - -columnstore_link(rgprint ${ENGINE_LDFLAGS} ${ENGINE_WRITE_LIBS}) diff --git a/tools/rgprint/rgprint.cpp b/tools/rgprint/rgprint.cpp deleted file mode 100644 index 49b91f59a..000000000 --- a/tools/rgprint/rgprint.cpp +++ /dev/null @@ -1,95 +0,0 @@ -/* Copyright (C) 2021 MariaDB Corporation - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; version 2 of - the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - MA 02110-1301, USA. */ - -#include -#include -#include -#include -#include -#include - -int main(int argc, char* argv[]) -{ - if (argc < 2) - { - std::cerr << "Usage: " << argv[0] << " " << std::endl; - return 0; - } - rowgroup::RowGroup rg; - char* p = strrchr(argv[1], '/'); - int rfd = -1; - if (p == nullptr) - p = argv[1]; - unsigned pid; - void* agg; - auto c = sscanf(p, "Agg-p%u-t%p-", &pid, &agg); - if (c == 2) - { - char fname[1024]; - snprintf(fname, sizeof(fname), "META-p%u-t%p", pid, agg); - rfd = open(fname, O_RDONLY); - } - if (rfd < 0) - rfd = open("./META", O_RDONLY); - if (rfd >= 0) - { - struct stat rst; - fstat(rfd, &rst); - messageqcpp::ByteStream rbs; - rbs.needAtLeast(rst.st_size); - rbs.restart(); - auto r = read(rfd, rbs.getInputPtr(), rst.st_size); - if (r != rst.st_size) - abort(); - rbs.advanceInputPtr(r); - rg.deserialize(rbs); - close(rfd); - } - else - { - std::vector pos{2, 6, 22, 30, 46, 54}; // ? - std::vector oids{3011, 3011, 3011, 3011, 3011}; // ? - std::vector keys{1, 1, 1, 1, 1}; // ? - std::vector col_t{ - execplan::CalpontSystemCatalog::INT, execplan::CalpontSystemCatalog::LONGDOUBLE, - execplan::CalpontSystemCatalog::UBIGINT, execplan::CalpontSystemCatalog::LONGDOUBLE, - execplan::CalpontSystemCatalog::UBIGINT}; - std::vector csN{8, 8, 8, 8, 8}; - std::vector scale{0, 0, 0, 0, 0}; - std::vector prec{10, 4294967295, 9999, 4294967295, 19}; - rg = rowgroup::RowGroup(5, pos, oids, keys, col_t, csN, scale, prec, 20, false, std::vector{}); - } - - int fd = open(argv[1], O_RDONLY); - struct stat st; - fstat(fd, &st); - - messageqcpp::ByteStream bs; - bs.needAtLeast(st.st_size); - bs.restart(); - auto r = read(fd, bs.getInputPtr(), st.st_size); - if (r != st.st_size) - abort(); - bs.advanceInputPtr(r); - rowgroup::RGData rst; - rst.deserialize(bs); - - rg.setData(&rst); - close(fd); - std::cout << "RowGroup data:\n" << rg.toString() << std::endl; - return 0; -} \ No newline at end of file From cd4bf4fb9e4a04fe9a6ea318582f1aca9d9a31aa Mon Sep 17 00:00:00 2001 From: drrtuy Date: Fri, 18 Jul 2025 19:01:38 +0000 Subject: [PATCH 34/51] chore(CEJ): make error message more user-friendly --- utils/libmysql_client/libmysql_client.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utils/libmysql_client/libmysql_client.cpp b/utils/libmysql_client/libmysql_client.cpp index 540718183..3186bda27 100644 --- a/utils/libmysql_client/libmysql_client.cpp +++ b/utils/libmysql_client/libmysql_client.cpp @@ -1,4 +1,5 @@ /* Copyright (C) 2014 InfiniDB, Inc. + Copyright (C) 2016-2025 MariaDB Corporation This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License @@ -91,7 +92,7 @@ int LibMySQL::init(const char* h, unsigned int p, const char* u, const char* w, static const std::string extendNetTimeoutQuery = "SET SESSION net_write_timeout = 3600"; if (mysql_real_query(fCon, extendNetTimeoutQuery.c_str(), extendNetTimeoutQuery.length()) != 0) { - fErrStr = "fatal error setting net_write_timeout=3600 in libmysql_client lib"; + fErrStr = "Set or verify credentials in CrossEngineSupport section of Columnstore.xml."; ret = -1; return ret; } From f33e9ce86c73f606dbef2d50cac8b92e393344ef Mon Sep 17 00:00:00 2001 From: "aleksei.bukhalov" Date: Wed, 9 Jul 2025 14:21:28 +0200 Subject: [PATCH 35/51] chore(ci): MCOL-6071 fix regression results publish --- .drone.jsonnet | 3 ++- build/run_regression.sh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index 732ae9cae..4971389ea 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -614,9 +614,10 @@ local Pipeline(branch, platform, event, arch="amd64", server="10.6-enterprise", (if (platform == "rockylinux:8" && arch == "amd64" && customBootstrapParamsKey == "gcc-toolset") then [pipeline.dockerfile] + [pipeline.dockerhub] + [pipeline.multi_node_mtr] else [pipeline.mtr] + [pipeline.mtrlog] + [pipeline.publish("mtrlog")]) + [pipeline.regression(regression_tests[i], if (i == 0) then ["mtr", "publish pkg", "publish cmapi build"] else [regression_tests[i - 1]]) for i in indexes(regression_tests)] + [pipeline.regressionlog] + + [pipeline.publish("regressionlog")] + // [pipeline.upgrade(mdb_server_versions[i]) for i in indexes(mdb_server_versions)] + // (if (std.length(mdb_server_versions) == 0) then [] else [pipeline.upgradelog] + [pipeline.publish("upgradelog")]) + - (if (event == "cron") then [pipeline.publish("regressionlog latest", "latest")] else [pipeline.publish("regressionlog")]), + (if (event == "cron") then [pipeline.publish("regressionlog latest", "latest")] else []), volumes: [pipeline._volumes.mdb { temp: {} }, pipeline._volumes.docker { host: { path: "/var/run/docker.sock" } }], trigger: { diff --git a/build/run_regression.sh b/build/run_regression.sh index 16ed1db59..eba4cc019 100755 --- a/build/run_regression.sh +++ b/build/run_regression.sh @@ -28,6 +28,7 @@ BUILD_DIR="verylongdirnameforverystrangecpackbehavior" prepare_regression() { if execInnerDocker "${CONTAINER_NAME}" "test -f /mariadb-columnstore-regression-test/mysql/queries/queryTester.cpp"; then + message "Preparation for regression tests is already done — skipping" return 0 fi @@ -68,7 +69,7 @@ prepare_regression() { run_test() { message "Running test: ${TEST_NAME:-}" - execInnerDocker "${CONTAINER_NAME}" "bash -c 'sleep 4800 && bash /save_stack.sh /mariadb-columnstore-regression-test/mysql/queries/nightly/alltest/reg-logs/' &" + execInnerDocker "${CONTAINER_NAME}" "sleep 4800 && bash /save_stack.sh /mariadb-columnstore-regression-test/mysql/queries/nightly/alltest/reg-logs/ &" execInnerDockerNoTTY "${CONTAINER_NAME}" \ "export PRESERVE_LOGS=true && cd /mariadb-columnstore-regression-test/mysql/queries/nightly/alltest && \ From bfdd581f6baa21a72d942ef4cf3c0f4d36c22d27 Mon Sep 17 00:00:00 2001 From: "aleksei.bukhalov" Date: Wed, 23 Jul 2025 18:50:35 +0200 Subject: [PATCH 36/51] chore(docs): update BUILD.md --- BUILD.md | 27 +++++++++++++++++----- install-deps.sh | 59 ------------------------------------------------- 2 files changed, 21 insertions(+), 65 deletions(-) delete mode 100755 install-deps.sh diff --git a/BUILD.md b/BUILD.md index 81d73e503..0d2486ba1 100644 --- a/BUILD.md +++ b/BUILD.md @@ -21,7 +21,7 @@ git clone https://github.com/MariaDB/server.git MariaDB server contains many git submodules that need to be checked out with: ```bash -git submodule update --init --recursive --depth=1 +git submodule update --init --recursive ``` @@ -46,12 +46,27 @@ git config --global --add safe.directory `pwd` ## Build -Regarding dependencies: If this is the first time building MCS on your system you should either use the `./install-deps.sh` script or pass `--install-deps` to the `bootstrap_mcs.sh` script. +The `bootstrap_mcs.sh` script can now do **two** main things: -For development convenience, building the MariaDB server with MCS can be done with: +1. **Build & install** ColumnStore into your system +```bash +cd server/storage/columnstore/columnstore -``` -sudo -E build/bootstrap_mcs.sh +sudo build/bootstrap_mcs.sh --install-deps ``` -Tested for: Ubuntu:20.04/22.04, CentOS:7, Debian:10/11, RockyLinux:8 +2. **Build native OS packages** (RPM or DEB) + +```bash +cd server/storage/columnstore/columnstore +sudo build/bootstrap_mcs.sh --install-deps --build-packages +# → find your .rpm/.deb files in the build directory +``` +Note: Packages can be built only for the OS you’re on—for so for example if you are running --build-packages on Rocky Linux it will produce RPMs for Rocky. +You can see the full options list in the script itself + +> **Supported distros:** +> Ubuntu:20.04/22.04/24.04, Debian:11/12, Rocky Linux:8/9 + + + diff --git a/install-deps.sh b/install-deps.sh deleted file mode 100755 index 6ff46f9f2..000000000 --- a/install-deps.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env bash - -set -e -if test $(id -u) != 0 ; then - SUDO=sudo -fi -export LC_ALL=C - -source /etc/os-release - -case "$ID" in -ubuntu|debian) - echo "Using apt-get to install dependencies" - $SUDO apt-get update -y - $SUDO apt-get install -y build-essential automake libboost-all-dev bison \ - cmake libncurses5-dev libreadline-dev libperl-dev libssl-dev \ - libxml2-dev libkrb5-dev flex libpam-dev libreadline-dev libsnappy-dev \ - libcurl4-openssl-dev - $SUDO apt-get install -y libboost-dev libboost-all-dev - case "$VERSION" in - *Bionic*) - echo "Install dependencies specific to Ubuntu Bionic" - ;; - *Focal*) - echo "Install dependencies specific to Ubuntu Focal" - ;; - *) - echo "Unknown OS distribution" - ;; - esac - ;; -centos) - echo "Using yum to install dependencies" - $SUDO yum -y install epel-release - $SUDO yum -y groupinstall "Development Tools" - $SUDO yum -y install bison ncurses-devel readline-devel perl-devel \ - openssl-devel cmake libxml2-devel gperf libaio-devel libevent-devel \ - python-devel ruby-devel tree wget pam-devel snappy-devel libicu \ - wget strace ltrace gdb rsyslog net-tools openssh-server expect boost \ - perl-DBI libicu boost-devel initscripts jemalloc-devel libcurl-devel - ;; -opensuse*|suse|sles) - echo "Using zypper to install dependencies" - $SUDO zypper install -y bison ncurses-devel readline-devel \ - libopenssl-devel cmake libxml2-devel gperf libaio-devel \ - libevent-devel python-devel ruby-devel tree wget pam-devel \ - snappy-devel libicu-devel libboost_system-devel \ - libboost_filesystem-devel libboost_thread-devel libboost_regex-devel \ - libboost_date_time-devel libboost_chrono-devel wget strace ltrace gdb \ - rsyslog net-tools expect perl-DBI libicu boost-devel jemalloc-devel \ - libcurl-devel gcc gcc-c++ automake libtool - ;; -*) - echo "$ID is unknown, dependencies will have to be installed manually." - exit 1 - ;; -esac - -echo "Dependencies have been installed successfully" From f8a57b4412bf00e7d38456a791ab843182f99b9f Mon Sep 17 00:00:00 2001 From: Alexander Presnyakov Date: Wed, 23 Jul 2025 16:38:15 +0000 Subject: [PATCH 37/51] BRM journal is always singular, do not apply suffix logic to it --- cmapi/cmapi_server/controllers/endpoints.py | 14 ++++++++++++-- cmapi/cmapi_server/test/test_em_endpoints.py | 16 ++++++++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/cmapi/cmapi_server/controllers/endpoints.py b/cmapi/cmapi_server/controllers/endpoints.py index 870e07b35..9df64fe0d 100644 --- a/cmapi/cmapi_server/controllers/endpoints.py +++ b/cmapi/cmapi_server/controllers/endpoints.py @@ -748,7 +748,12 @@ class ExtentMapController: retry_count += 1 continue elem_current_suffix = ret.stdout.decode("utf-8").rstrip() - elem_current_filename = f'{EM_PATH_SUFFIX}/{elem_current_suffix}_{element}' + + suffix_for_file = elem_current_suffix + # The journal is always in the current directory, strip trailing A/B from suffix + if element == 'journal' and suffix_for_file.endswith(('A', 'B')): + suffix_for_file = suffix_for_file[:-1] + elem_current_filename = f'{EM_PATH_SUFFIX}/{suffix_for_file}_{element}' # TODO: Remove conditional once container dispatcher # uses non-root by default @@ -774,8 +779,13 @@ class ExtentMapController: ) elem_current_name = Path(MCS_BRM_CURRENT_PATH) elem_current_filename = elem_current_name.read_text().rstrip() + + suffix_for_file = elem_current_filename + # The journal is always in the current directory, strip trailing A/B from suffix + if element == 'journal' and suffix_for_file.endswith(('A', 'B')): + suffix_for_file = suffix_for_file[:-1] elem_current_file = Path( - f'{MCS_EM_PATH}/{elem_current_filename}_{element}' + f'{MCS_EM_PATH}/{suffix_for_file}_{element}' ) result = elem_current_file.read_bytes() diff --git a/cmapi/cmapi_server/test/test_em_endpoints.py b/cmapi/cmapi_server/test/test_em_endpoints.py index a166cc6e8..738e9d269 100644 --- a/cmapi/cmapi_server/test/test_em_endpoints.py +++ b/cmapi/cmapi_server/test/test_em_endpoints.py @@ -66,7 +66,13 @@ class TestEMEndpoints(unittest.TestCase): ["smcat", S3_BRM_CURRENT_PATH], stdout=subprocess.PIPE ) element_current_suffix = ret.stdout.decode("utf-8").rstrip() - element_current_filename = f'{EM_PATH_SUFFIX}/{element_current_suffix}_{element}' + + suffix_for_file = element_current_suffix + # Journal is always singular, so strip trailing A/B from suffix + if element == 'journal' and suffix_for_file.endswith(('A', 'B')): + suffix_for_file = suffix_for_file[:-1] + + element_current_filename = f'{EM_PATH_SUFFIX}/{suffix_for_file}_{element}' ret = subprocess.run( ["smcat", element_current_filename], stdout=subprocess.PIPE ) @@ -74,8 +80,14 @@ class TestEMEndpoints(unittest.TestCase): else: element_current_name = Path(MCS_BRM_CURRENT_PATH) element_current_filename = element_current_name.read_text().rstrip() + + suffix_for_file = element_current_filename + # Journal is always singular, so strip trailing A/B from suffix + if element == 'journal' and suffix_for_file.endswith(('A', 'B')): + suffix_for_file = suffix_for_file[:-1] + element_current_file = Path( - f'{MCS_EM_PATH}/{element_current_filename}_{element}' + f'{MCS_EM_PATH}/{suffix_for_file}_{element}' ) result = element_current_file.read_bytes() return result From a92bd2d142655b0ae9d6b383371efc2fb85f3df1 Mon Sep 17 00:00:00 2001 From: drrtuy Date: Fri, 25 Jul 2025 13:07:48 +0100 Subject: [PATCH 38/51] fix(build): statistics that is not yet available in ES 11.4/11.8 does not break builds anymore --- dbcon/mysql/idb_mysql.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbcon/mysql/idb_mysql.h b/dbcon/mysql/idb_mysql.h index d18e2bcf3..204988d60 100644 --- a/dbcon/mysql/idb_mysql.h +++ b/dbcon/mysql/idb_mysql.h @@ -74,7 +74,7 @@ #include "rpl_rli.h" #include "my_dbug.h" #include "sql_show.h" -#if MYSQL_VERSION_ID >= 110401 +#if MYSQL_VERSION_ID >= 120401 #include "opt_histogram_json.h" #else // Mock Histogram_bucket for MySQL < 11.4 From 3418e68b7861d2b5b3d57fae08ff589d8eab1a00 Mon Sep 17 00:00:00 2001 From: Sergey Zefirov <72864488+mariadb-SergeyZefirov@users.noreply.github.com> Date: Sun, 27 Jul 2025 22:06:40 +0300 Subject: [PATCH 39/51] fix(ddl): MCOL-5974 TIMESTAMP columns are not IS NOT NULL by default in MCS (#3670) The old behavior was to assign TIMESTAMP columns an IS NOT NULL constraint if list of constraints was empty. Apparently, this particular invariant was changed in 11.4 and bugfixes' mcol-5480.test started to fail. This patch fixes behavioral difference and reenables mcol-5480 test in 11.4. --- dbcon/mysql/ha_mcs_ddl.cpp | 2 ++ dbcon/mysql/ha_mcs_execplan.cpp | 2 +- mysql-test/columnstore/bugfixes/mcol-5480.test | 1 - 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dbcon/mysql/ha_mcs_ddl.cpp b/dbcon/mysql/ha_mcs_ddl.cpp index cbd8b7430..0c7fece2f 100644 --- a/dbcon/mysql/ha_mcs_ddl.cpp +++ b/dbcon/mysql/ha_mcs_ddl.cpp @@ -867,12 +867,14 @@ int ProcessDDLStatement(string& ddlStatement, string& schema, const string& /*ta return rc; } +#if MYSQL_VERSION_ID < 110400 // For TIMESTAMP, if no constraint is given, default to NOT NULL if (createTable->fTableDef->fColumns[i]->fType->fType == ddlpackage::DDL_TIMESTAMP && createTable->fTableDef->fColumns[i]->fConstraints.empty()) { createTable->fTableDef->fColumns[i]->fConstraints.push_back(new ColumnConstraintDef(DDL_NOT_NULL)); } +#endif if (createTable->fTableDef->fColumns[i]->fDefaultValue) { diff --git a/dbcon/mysql/ha_mcs_execplan.cpp b/dbcon/mysql/ha_mcs_execplan.cpp index 5c12480a1..1807c97d7 100644 --- a/dbcon/mysql/ha_mcs_execplan.cpp +++ b/dbcon/mysql/ha_mcs_execplan.cpp @@ -6292,7 +6292,7 @@ int processLimitAndOffset(SELECT_LEX& select_lex, gp_walk_info& gwi, SCSEP& csep // for the first column of the index if any. // Statistics is stored in GWI context. // Mock for ES 10.6 -#if MYSQL_VERSION_ID >= 110401 +#if MYSQL_VERSION_ID >= 120401 void extractColumnStatistics(Item_field* ifp, gp_walk_info& gwi) { for (uint j = 0; j < ifp->field->table->s->keys; j++) diff --git a/mysql-test/columnstore/bugfixes/mcol-5480.test b/mysql-test/columnstore/bugfixes/mcol-5480.test index 37de6bb1c..ad195c357 100644 --- a/mysql-test/columnstore/bugfixes/mcol-5480.test +++ b/mysql-test/columnstore/bugfixes/mcol-5480.test @@ -2,7 +2,6 @@ # MCOL-5480 LDI loads values incorrectly for MEDIUMINT, TIME and TIMESTAMP # when cpimport is used for batch insert # ---source ../include/disable_11.4.inc --source ../include/have_columnstore.inc --source ../include/detect_maxscale.inc From 72eb0bbaf9fe8262d4f7113423a4d60aeb1afb41 Mon Sep 17 00:00:00 2001 From: Kristina Pavlova Date: Sun, 27 Jul 2025 22:10:27 +0300 Subject: [PATCH 40/51] chore(cpimport): MCOL-6033 Change batch max size (#3652) --- versioning/BRM/extentmap.h | 2 ++ writeengine/splitter/we_cmdargs.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/versioning/BRM/extentmap.h b/versioning/BRM/extentmap.h index 05d340b5a..8edc42915 100644 --- a/versioning/BRM/extentmap.h +++ b/versioning/BRM/extentmap.h @@ -100,6 +100,8 @@ using DBRootVec = std::vector; // assumed column width when calculating dictionary store extent size #define DICT_COL_WIDTH 8 +static const uint32_t MAX_EXTENT_SIZE = 8000000; + // valid values for EMEntry.status const int16_t EXTENTSTATUSMIN(0); // equal to minimum valid status value const int16_t EXTENTAVAILABLE(0); diff --git a/writeengine/splitter/we_cmdargs.cpp b/writeengine/splitter/we_cmdargs.cpp index dbf2166a0..8102f3448 100644 --- a/writeengine/splitter/we_cmdargs.cpp +++ b/writeengine/splitter/we_cmdargs.cpp @@ -733,7 +733,7 @@ void WECmdArgs::parseCmdLineArgs(int argc, char** argv) } else if (fBatchQty > 100000) { - fBatchQty = 10000; + fBatchQty = min(static_cast(fBatchQty), BRM::MAX_EXTENT_SIZE); } } if (vm.count("max-errors")) From 52cf091583bd0c953559d236712e15c9419498b0 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Thu, 26 Jun 2025 15:55:50 +0000 Subject: [PATCH 41/51] chore(ci): add ASAN and UBSAN to ci --- .drone.jsonnet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index 4971389ea..9f2fc7174 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -697,8 +697,8 @@ local FinalPipeline(branch, event) = { Pipeline(b, platform, triggeringEvent, a, server, flag, "") for a in ["amd64"] for b in std.objectFields(platforms) - for platform in ["rockylinux:8"] - for flag in ["gcc-toolset"] + for platform in ["ubuntu:24.04"] + for flag in ["ASan", "UBSan"] for triggeringEvent in events for server in servers[current_branch] ] From 0fb1599913b80630c37c5567090d311d82c551c3 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Thu, 26 Jun 2025 18:17:19 +0000 Subject: [PATCH 42/51] chore(ci): Patch autobake to avoid fakeroot and eatmydata for ASAN --- .drone.jsonnet | 18 ++++++++++-------- build/bootstrap_mcs.sh | 12 ++++++++++-- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index 9f2fc7174..479d4315a 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -650,17 +650,19 @@ local FinalPipeline(branch, event) = { "failure", ], } + (if event == "cron" then { cron: ["nightly-" + std.strReplace(branch, ".", "-")] } else {}), - depends_on: std.map(function(p) std.join(" ", [branch, p, event, "amd64", "10.6-enterprise", "", ""]), platforms[current_branch]), +// depends_on: std.map(function(p) std.join(" ", [branch, p, event, "amd64", "10.6-enterprise", "", ""]), platforms[current_branch]), // +std.map(function(p) std.join(" ", [branch, p, event, "arm64", "10.6-enterprise", "", ""]), platforms_arm.develop), }; -[ - Pipeline(b, p, e, "amd64", s) - for b in std.objectFields(platforms) - for p in platforms[b] - for s in servers[b] - for e in events -] + +// [ +// Pipeline(b, p, e, "amd64", s) +// for b in std.objectFields(platforms) +// for p in platforms[b] +// for s in servers[b] +// for e in events +// ] + + + // [ // Pipeline(b, p, e, "arm64", s) // for b in std.objectFields(platforms_arm) diff --git a/build/bootstrap_mcs.sh b/build/bootstrap_mcs.sh index a7846c933..b87900efb 100755 --- a/build/bootstrap_mcs.sh +++ b/build/bootstrap_mcs.sh @@ -272,10 +272,19 @@ modify_packaging() { echo "Modifying_packaging..." cd $MDB_SOURCE_PATH + # Bypass of debian version list check in autobake if [[ $PKG_FORMAT == "deb" ]]; then sed -i 's|.*-d storage/columnstore.*|elif [[ -d storage/columnstore/columnstore/debian ]]|' debian/autobake-deb.sh fi + # patch to avoid fakeroot, which is using LD_PRELOAD for libfakeroot.so + # and eamtmydata which is using LD_PRELOAD for libeatmydata.so and this + # breaks intermediate build binaries to fail with "ASan runtime does not come first in initial library list + if [[ $PKG_FORMAT == "deb" && $ASAN = true ]]; then + sed -i 's|BUILDPACKAGE_DPKGCMD+=( "fakeroot" "--" )|echo "fakeroot was disabled for ASAN build"|' debian/autobake-deb.sh + sed -i 's|BUILDPACKAGE_DPKGCMD+=("eatmydata")|echo "eatmydata was disabled for ASAN build"|' debian/autobake-deb.sh + fi + #disable LTO for 22.04 for now if [[ $OS == 'ubuntu:22.04' || $OS == 'ubuntu:24.04' ]]; then for i in mariadb-plugin-columnstore mariadb-server mariadb-server-core mariadb mariadb-10.6; do @@ -316,7 +325,6 @@ modify_packaging() { } construct_cmake_flags() { - MDB_CMAKE_FLAGS=( -DBUILD_CONFIG=mysql_release -DCMAKE_BUILD_TYPE=$MCS_BUILD_TYPE @@ -777,7 +785,7 @@ if [[ $BUILD_PACKAGES = true ]]; then exit_code=$? if [[ $SCCACHE = true ]]; then - sccache --show-stats + sccache --show-adv-stats fi exit $exit_code From b53f831d8fad2556f00af7bd031f0b764b8784dc Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Wed, 9 Jul 2025 23:00:18 +0000 Subject: [PATCH 43/51] Add flag key prefix to $result --- .drone.jsonnet | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.drone.jsonnet b/.drone.jsonnet index 479d4315a..b91c42dde 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -45,11 +45,16 @@ local customBootstrapParamsForExisitingPipelines(envkey) = local customBootstrapParamsForAdditionalPipelinesMap = { ASan: "--asan", +<<<<<<< HEAD TSAN: "--tsan", UBSan: "--ubsan", MSan: "--msan", "libcpp": "--libcpp", "gcc-toolset": "--gcc-toolset-for-rocky-8" +======= + //TSAN: "--tsan", + UBSan: "--ubsan", +>>>>>>> a25d751cb (Add flag key prefix to $result) }; local customBuildFlags(buildKey) = From b1ecea09571714467fe861548b1db65258bc3ae5 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Fri, 11 Jul 2025 12:05:27 +0000 Subject: [PATCH 44/51] Fix config files in packages for sanitizers, do not abort on errors for packages add options for ASAN via code linkage --- build/asan_options.cpp.in | 4 ++++ build/bootstrap_mcs.sh | 29 +++++++++++++++++++++-------- cmake/ColumnstoreLibrary.cmake | 6 +++++- oam/install_scripts/CMakeLists.txt | 24 ++++++++++++++++++++---- 4 files changed, 50 insertions(+), 13 deletions(-) create mode 100644 build/asan_options.cpp.in diff --git a/build/asan_options.cpp.in b/build/asan_options.cpp.in new file mode 100644 index 000000000..88b17442c --- /dev/null +++ b/build/asan_options.cpp.in @@ -0,0 +1,4 @@ +extern "C" const char* __asan_default_options() +{ + return "@COLUMNSTORE_STANDALONE_BINARIES_ASAN_COPTIONS@"; +} diff --git a/build/bootstrap_mcs.sh b/build/bootstrap_mcs.sh index b87900efb..d1ecadc53 100755 --- a/build/bootstrap_mcs.sh +++ b/build/bootstrap_mcs.sh @@ -346,6 +346,11 @@ construct_cmake_flags() { -DWITH_WSREP=NO ) + if [[ $BUILD_PACKAGES = true ]]; then + MDB_CMAKE_FLAGS+=(-DCOLUMNSTORE_PACKAGES_BUILD=YES) + message "Building packages for Columnstore" + fi + if [[ $MAINTAINER_MODE = true ]]; then MDB_CMAKE_FLAGS+=(-DCOLUMNSTORE_MAINTAINER=YES) message "Columnstore maintainer mode on" @@ -622,12 +627,20 @@ enable_columnstore_back() { fix_config_files() { message Fixing config files - THREAD_STACK_SIZE="20M" - SYSTEMD_SERVICE_DIR="/usr/lib/systemd/system" - MDB_SERVICE_FILE=$SYSTEMD_SERVICE_DIR/mariadb.service - COLUMNSTORE_CONFIG=$CONFIG_DIR/columnstore.cnf + # while packaging we have to patch configs in the sources to get them in the packakges + # for local builds, we patch config after installation in the systemdirs + if [[ $BUILD_PACKAGES = true ]]; then + MDB_SERVICE_FILE=$MDB_SOURCE_PATH/support-files/mariadb.service.in + COLUMNSTORE_CONFIG=$COLUMSNTORE_SOURCE_PATH/dbcon/mysql/columnstore.cnf + SANITIZERS_ABORT_ON_ERROR='0' + else + SYSTEMD_SERVICE_DIR="/usr/lib/systemd/system" + MDB_SERVICE_FILE=$SYSTEMD_SERVICE_DIR/mariadb.service + COLUMNSTORE_CONFIG=$CONFIG_DIR/columnstore.cnf + SANITIZERS_ABORT_ON_ERROR='1' + fi if [[ $ASAN = true ]]; then if grep -q thread_stack $COLUMNSTORE_CONFIG; then @@ -641,7 +654,7 @@ fix_config_files() { if grep -q ASAN $MDB_SERVICE_FILE; then warn "MDB Server has ASAN options in $MDB_SERVICE_FILE, check it's compatibility" else - echo Environment="'ASAN_OPTIONS=abort_on_error=1:disable_coredump=0,print_stats=false,detect_odr_violation=0,check_initialization_order=1,detect_stack_use_after_return=1,atexit=false,log_path=${REPORT_PATH}/asan.mariadb'" >>$MDB_SERVICE_FILE + echo Environment="'ASAN_OPTIONS=abort_on_error=$SANITIZERS_ABORT_ON_ERROR:disable_coredump=0,print_stats=false,detect_odr_violation=0,check_initialization_order=1,detect_stack_use_after_return=1,atexit=false,log_path=${REPORT_PATH}/asan.mariadb'" >>$MDB_SERVICE_FILE message "ASAN options were added to $MDB_SERVICE_FILE" fi fi @@ -650,7 +663,7 @@ fix_config_files() { if grep -q TSAN $MDB_SERVICE_FILE; then warn "MDB Server has TSAN options in $MDB_SERVICE_FILE, check it's compatibility" else - echo Environment="'TSAN_OPTIONS=abort_on_error=0,log_path=${REPORT_PATH}/tsan.mariadb'" >>$MDB_SERVICE_FILE + echo Environment="'TSAN_OPTIONS=abort_on_error=$SANITIZERS_ABORT_ON_ERROR,log_path=${REPORT_PATH}/tsan.mariadb'" >>$MDB_SERVICE_FILE message "TSAN options were added to $MDB_SERVICE_FILE" fi fi @@ -659,7 +672,7 @@ fix_config_files() { if grep -q UBSAN $MDB_SERVICE_FILE; then warn "MDB Server has UBSAN options in $MDB_SERVICE_FILE, check it's compatibility" else - echo Environment="'UBSAN_OPTIONS=abort_on_error=0,print_stacktrace=true,log_path=${REPORT_PATH}/ubsan.mariadb'" >>$MDB_SERVICE_FILE + echo Environment="'UBSAN_OPTIONS=abort_on_error=$SANITIZERS_ABORT_ON_ERROR,print_stacktrace=true,log_path=${REPORT_PATH}/ubsan.mariadb'" >>$MDB_SERVICE_FILE message "UBSAN options were added to $MDB_SERVICE_FILE" fi fi @@ -780,7 +793,7 @@ init_submodules if [[ $BUILD_PACKAGES = true ]]; then modify_packaging - + fix_config_files (build_package && run_unit_tests) exit_code=$? diff --git a/cmake/ColumnstoreLibrary.cmake b/cmake/ColumnstoreLibrary.cmake index 13a6097d6..6e0b97b55 100644 --- a/cmake/ColumnstoreLibrary.cmake +++ b/cmake/ColumnstoreLibrary.cmake @@ -119,6 +119,10 @@ macro(columnstore_link libname) endmacro() macro(columnstore_executable executable_name) - add_executable(${executable_name} ${ARGN}) + if(WITH_COLUMNSTORE_ASAN) + add_executable(${executable_name} ${ARGN} ${CMAKE_BINARY_DIR}/asan_options.cpp) + else() + add_executable(${executable_name} ${ARGN}) + endif() columnstore_install_target(${executable_name} ${ENGINE_BINDIR}) endmacro() diff --git a/oam/install_scripts/CMakeLists.txt b/oam/install_scripts/CMakeLists.txt index aaabc3559..056dbc1a8 100644 --- a/oam/install_scripts/CMakeLists.txt +++ b/oam/install_scripts/CMakeLists.txt @@ -24,9 +24,17 @@ if(WITH_COLUMNSTORE_ASAN) endif(WITH_COLUMNSTORE_REPORT_PATH) set(LD_PRELOAD_STRING "") - set(ALLOC_CONFIG - "ASAN_OPTIONS=abort_on_error=1:disable_coredump=0,print_stats=false,detect_odr_violation=0,check_initialization_order=1,detect_stack_use_after_return=1,atexit=false" + set(ABORT_ON_ERROR "1") + + if(COLUMNSTORE_PACKAGES_BUILD) + set(ABORT_ON_ERROR "0") + endif() + + set(ASAN_OPTIONS + abort_on_error=${ABORT_ON_ERROR}:disable_coredump=0:print_stats=0:detect_odr_violation=0:check_initialization_order=1:detect_stack_use_after_return=1:atexit=0 ) + set(ALLOC_CONFIG ASAN_OPTIONS=${ASAN_OPTIONS}) + set(PRIMPROC_ALLOC_CONFIG ${ALLOC_CONFIG},log_path=${ASAN_PATH}.primproc) set(DMLPROC_ALLOC_CONFIG ${ALLOC_CONFIG},log_path=${ASAN_PATH}.dmlproc) set(DDLPROC_ALLOC_CONFIG ${ALLOC_CONFIG},log_path=${ASAN_PATH}.ddlproc) @@ -34,6 +42,12 @@ if(WITH_COLUMNSTORE_ASAN) set(CONTROLLERNODE_ALLOC_CONFIG ${ALLOC_CONFIG},log_path=${ASAN_PATH}.controllernode) set(WORKERNODE_ALLOC_CONFIG ${ALLOC_CONFIG},log_path=${ASAN_PATH}.workernode) set(STORAGEMANAGER_ALLOC_CONFIG ${ALLOC_CONFIG},log_path=${ASAN_PATH}.storagemanager) + set(COLUMNSTORE_STANDALONE_BINARIES_ASAN_COPTIONS ${ASAN_OPTIONS}) + configure_file( + "${CMAKE_SOURCE_DIR}/storage/columnstore/columnstore/build/asan_options.cpp.in" + "${CMAKE_BINARY_DIR}/asan_options.cpp" @ONLY + ) + endif() if(WITH_UBSAN) @@ -43,7 +57,8 @@ if(WITH_UBSAN) endif(WITH_COLUMNSTORE_REPORT_PATH) set(LD_PRELOAD_STRING "") - set(ALLOC_CONFIG "UBSAN_OPTIONS=abort_on_error=0,print_stacktrace=true") + set(UBSAN_OPTIONS abort_on_error=0:print_stacktrace=1) + set(ALLOC_CONFIG "UBSAN_OPTIONS="${UBSAN_OPTIONS}) set(PRIMPROC_ALLOC_CONFIG ${ALLOC_CONFIG},log_path=${UBSAN_PATH}.primproc) set(DMLPROC_ALLOC_CONFIG ${ALLOC_CONFIG},log_path=${UBSAN_PATH}.dmlproc) set(DDLPROC_ALLOC_CONFIG ${ALLOC_CONFIG},log_path=${UBSAN_PATH}.ddlproc) @@ -60,7 +75,8 @@ if(WITH_TSAN) endif(WITH_COLUMNSTORE_REPORT_PATH) set(LD_PRELOAD_STRING "") - set(ALLOC_CONFIG "TSAN_OPTIONS=abort_on_error=0:log_path=${TSAN_PATH}") + set(TSAN_OPTIONS abort_on_error=0:log_path=${TSAN_PATH}) + set(ALLOC_CONFIG TSAN_OPTIONS=${TSAN_OPTIONS}) set(PRIMPROC_ALLOC_CONFIG ${ALLOC_CONFIG},log_path=${TSAN_PATH}.primproc) set(DMLPROC_ALLOC_CONFIG ${ALLOC_CONFIG},log_path=${TSAN_PATH}.dmlproc) set(DDLPROC_ALLOC_CONFIG ${ALLOC_CONFIG},log_path=${TSAN_PATH}.ddlproc) From 30f92ea52b57d4a0301386c521fc4bdea7b4b5c9 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Fri, 11 Jul 2025 20:07:49 +0000 Subject: [PATCH 45/51] Adding ddeb packages for teststages and repos --- build/createrepo.sh | 8 +++- build/prepare_test_container.sh | 80 ++++++++++++++++----------------- 2 files changed, 47 insertions(+), 41 deletions(-) diff --git a/build/createrepo.sh b/build/createrepo.sh index 7bc2673c8..5911f1fa3 100755 --- a/build/createrepo.sh +++ b/build/createrepo.sh @@ -17,6 +17,8 @@ COLUMNSTORE_RPM_PACKAGES_PATH="/mdb/${BUILDDIR}/*.rpm" CMAPI_RPM_PACKAGES_PATH="/mdb/${BUILDDIR}/storage/columnstore/columnstore/cmapi/*.rpm" COLUMNSTORE_DEB_PACKAGES_PATH="/mdb/*.deb" +COLUMNSTORE_DEBUG_DEB_PACKAGES_PATH="/mdb/*.ddeb" + CMAPI_DEB_PACKAGES_PATH="/mdb/${BUILDDIR}/storage/columnstore/columnstore/cmapi/*.deb" if [ "$EUID" -ne 0 ]; then @@ -44,6 +46,7 @@ if [[ $(compgen -G "$COLUMNSTORE_RPM_PACKAGES_PATH") ]]; then mv -v $COLUMNSTORE_RPM_PACKAGES_PATH "./${RESULT}/" elif [[ $(compgen -G "$COLUMNSTORE_DEB_PACKAGES_PATH") ]]; then mv -v $COLUMNSTORE_DEB_PACKAGES_PATH "./${RESULT}/" + mv -v $COLUMNSTORE_DEBUG_DEB_PACKAGES_PATH "./${RESULT}/" else echo "Columnstore packages are not found!" fi @@ -62,7 +65,10 @@ if [[ $(compgen -G "./${RESULT}/*.rpm") ]]; then createrepo "./${RESULT}" else retry_eval 5 "apt update && apt install -y dpkg-dev" - dpkg-scanpackages "${RESULT}" | gzip >"./${RESULT}/Packages.gz" + + dpkg-scanpackages "${RESULT}" >Packages + dpkg-scanpackages --type ddeb "${RESULT}" >>Packages + gzip -c Packages >"./${RESULT}/Packages.gz" fi mkdir -p "/drone/src/${RESULT}" diff --git a/build/prepare_test_container.sh b/build/prepare_test_container.sh index 83abaeb5e..1f1258029 100755 --- a/build/prepare_test_container.sh +++ b/build/prepare_test_container.sh @@ -78,57 +78,57 @@ start_container() { } prepare_container() { -if [[ "$RESULT" != *rocky* ]]; then - execInnerDocker "$CONTAINER_NAME" 'sed -i "s/exit 101/exit 0/g" /usr/sbin/policy-rc.d' -fi + if [[ "$RESULT" != *rocky* ]]; then + execInnerDocker "$CONTAINER_NAME" 'sed -i "s/exit 101/exit 0/g" /usr/sbin/policy-rc.d' + fi -#list_cgroups -echo "Docker CGroups opts here" -ls -al /sys/fs/cgroup/cgroup.controllers || true -ls -al /sys/fs/cgroup/ || true -ls -al /sys/fs/cgroup/memory || true + #list_cgroups + echo "Docker CGroups opts here" + ls -al /sys/fs/cgroup/cgroup.controllers || true + ls -al /sys/fs/cgroup/ || true + ls -al /sys/fs/cgroup/memory || true -execInnerDocker "$CONTAINER_NAME" 'echo Inner Docker CGroups opts here' -execInnerDocker "$CONTAINER_NAME" 'ls -al /sys/fs/cgroup/cgroup.controllers || true' -execInnerDocker "$CONTAINER_NAME" 'ls -al /sys/fs/cgroup/ || true' -execInnerDocker "$CONTAINER_NAME" 'ls -al /sys/fs/cgroup/memory || true' + execInnerDocker "$CONTAINER_NAME" 'echo Inner Docker CGroups opts here' + execInnerDocker "$CONTAINER_NAME" 'ls -al /sys/fs/cgroup/cgroup.controllers || true' + execInnerDocker "$CONTAINER_NAME" 'ls -al /sys/fs/cgroup/ || true' + execInnerDocker "$CONTAINER_NAME" 'ls -al /sys/fs/cgroup/memory || true' -# Prepare core dump directory inside container -execInnerDocker "$CONTAINER_NAME" 'mkdir -p core && chmod 777 core' -docker cp "$COLUMNSTORE_SOURCE_PATH"/core_dumps/. "$CONTAINER_NAME":/ -docker cp "$COLUMNSTORE_SOURCE_PATH"/build/utils.sh "$CONTAINER_NAME":/ -docker cp "$COLUMNSTORE_SOURCE_PATH"/setup-repo.sh "$CONTAINER_NAME":/ + # Prepare core dump directory inside container + execInnerDocker "$CONTAINER_NAME" 'mkdir -p core && chmod 777 core' + docker cp "$COLUMNSTORE_SOURCE_PATH"/core_dumps/. "$CONTAINER_NAME":/ + docker cp "$COLUMNSTORE_SOURCE_PATH"/build/utils.sh "$CONTAINER_NAME":/ + docker cp "$COLUMNSTORE_SOURCE_PATH"/setup-repo.sh "$CONTAINER_NAME":/ -if [[ "$DO_SETUP" == "true" ]]; then - execInnerDocker "$CONTAINER_NAME" '/setup-repo.sh' -fi + if [[ "$DO_SETUP" == "true" ]]; then + execInnerDocker "$CONTAINER_NAME" '/setup-repo.sh' + fi -# install deps -if [[ "$RESULT" == *rocky* ]]; then - execInnerDockerWithRetry "$CONTAINER_NAME" 'yum --nobest update -y && yum --nobest install -y cracklib-dicts diffutils elfutils epel-release expect findutils iproute gawk gcc-c++ gdb hostname lz4 patch perl procps-ng rsyslog sudo tar wget which' -else - change_ubuntu_mirror_in_docker "$CONTAINER_NAME" "us" - execInnerDockerWithRetry "$CONTAINER_NAME" 'apt update -y && apt install -y elfutils expect findutils iproute2 g++ gawk gdb hostname liblz4-tool patch procps rsyslog sudo tar wget' -fi + # install deps + if [[ "$RESULT" == *rocky* ]]; then + execInnerDockerWithRetry "$CONTAINER_NAME" 'yum --nobest update -y && yum --nobest install -y cracklib-dicts diffutils elfutils epel-release expect findutils iproute gawk gcc-c++ gdb hostname lz4 patch perl procps-ng rsyslog sudo tar wget which' + else + change_ubuntu_mirror_in_docker "$CONTAINER_NAME" "us" + execInnerDockerWithRetry "$CONTAINER_NAME" 'apt update -y && apt install -y elfutils expect findutils iproute2 g++ gawk gdb hostname liblz4-tool patch procps rsyslog sudo tar wget' + fi -# Configure core dump naming pattern -execInnerDocker "$CONTAINER_NAME" 'sysctl -w kernel.core_pattern="/core/%E_${RESULT}_core_dump.%p"' + # Configure core dump naming pattern + execInnerDocker "$CONTAINER_NAME" 'sysctl -w kernel.core_pattern="/core/%E_${RESULT}_core_dump.%p"' -#Install columnstore in container -echo "Installing columnstore..." -if [[ "$RESULT" == *rocky* ]]; then - execInnerDockerWithRetry "$CONTAINER_NAME" 'yum install -y MariaDB-columnstore-engine MariaDB-test' -else - execInnerDockerWithRetry "$CONTAINER_NAME" 'apt update -y && apt install -y mariadb-plugin-columnstore mariadb-test mariadb-test-data' -fi + #Install columnstore in container + echo "Installing columnstore..." + if [[ "$RESULT" == *rocky* ]]; then + execInnerDockerWithRetry "$CONTAINER_NAME" 'yum install -y MariaDB-columnstore-engine MariaDB-test' + else + execInnerDockerWithRetry "$CONTAINER_NAME" 'apt update -y && apt install -y mariadb-plugin-columnstore mariadb-test mariadb-test-data mariadb-plugin-columnstore-dbgsym' + fi -sleep 5 -echo "PrepareTestStage completed in $CONTAINER_NAME" + sleep 5 + echo "PrepareTestStage completed in $CONTAINER_NAME" } - if [[ -z $(docker ps -q --filter "name=${CONTAINER_NAME}") ]]; then start_container prepare_container -else message "Container ${CONTAINER_NAME} is already running, skipping prepare step" +else + message "Container ${CONTAINER_NAME} is already running, skipping prepare step" fi From 052f43cc9754d3d212316b652c9e20ccd10fb58d Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Sun, 13 Jul 2025 14:05:40 +0000 Subject: [PATCH 46/51] start regressions after mtr logs published --- tests/scripts/fullmtr.sh | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/tests/scripts/fullmtr.sh b/tests/scripts/fullmtr.sh index 7ca3a4558..bbda72ee2 100644 --- a/tests/scripts/fullmtr.sh +++ b/tests/scripts/fullmtr.sh @@ -4,48 +4,43 @@ SCRIPT_LOCATION=$(dirname "$0") MARIADB_SOURCE_PATH=$(realpath $SCRIPT_LOCATION/../../../../../) COLUMNSTORE_MTR_SOURCE=$(realpath $SCRIPT_LOCATION/../../mysql-test/columnstore) INSTALLED_MTR_PATH='/usr/share/mysql/mysql-test' -COLUMSNTORE_MTR_INSTALLED=${INSTALLED_MTR_PATH}/plugin/columnstore/columnstore/ +COLUMSNTORE_MTR_INSTALLED=${INSTALLED_MTR_PATH}/plugin/columnstore/ PATCHNAME=$(realpath $SCRIPT_LOCATION)/mtr_warn.patch -CURRENT_DIR=`pwd` +CURRENT_DIR=$(pwd) mysql -e "create database if not exists test;" -SOCKET=`mysql -e "show variables like 'socket';" | grep socket | cut -f2` +SOCKET=$(mysql -e "show variables like 'socket';" | grep socket | cut -f2) export ASAN_OPTIONS=abort_on_error=1:disable_coredump=0,print_stats=false,detect_odr_violation=0,check_initialization_order=1,detect_stack_use_after_return=1,atexit=false,log_path=/core/asan.hz - # needed when run MTR tests locally, see mariadb-test-run.pl:417, mtr functions # are added to the database mtr only when --extern is not specified -add_mtr_warn_functions() -{ +add_mtr_warn_functions() { echo "Adding mtr warnings functions..." cd /tmp - mysql -e "drop database if exists mtr"; + mysql -e "drop database if exists mtr" cp ${MARIADB_SOURCE_PATH}/mysql-test/include/mtr_warnings.sql mtr_warnings.sql - patch -p1 < ${PATCHNAME} + patch -p1 <${PATCHNAME} mysql -e "create database if not exists mtr;" - mysql mtr < mtr_warnings.sql + mysql mtr $CURRENT_DIR/mtr.$1.cores-before ./mtr --force --extern=socket=${SOCKET} --max-test-fail=0 --testcase-timeout=60 --suite=columnstore/$1 $2 | tee $CURRENT_DIR/mtr.$1.log 2>&1 # dump analyses. @@ -57,15 +52,14 @@ run_suite() rm $CURRENT_DIR/mtr.$1.cores-before $CURRENT_DIR/mtr.$1.cores-after } - add_mtr_warn_functions -if (( $# == 2 )); then +if (($# == 2)); then run_suite $1 $2 exit 1 fi -if (( $# == 1 )); then +if (($# == 1)); then run_suite $1 exit 1 fi From 84e2dde86515b8c945131b51cb6ec6fbca67e081 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Wed, 16 Jul 2025 02:43:13 +0000 Subject: [PATCH 47/51] simpler cores --- build/bootstrap_mcs.sh | 2 +- core_dumps/core_dump_format.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/build/bootstrap_mcs.sh b/build/bootstrap_mcs.sh index d1ecadc53..902a14455 100755 --- a/build/bootstrap_mcs.sh +++ b/build/bootstrap_mcs.sh @@ -654,7 +654,7 @@ fix_config_files() { if grep -q ASAN $MDB_SERVICE_FILE; then warn "MDB Server has ASAN options in $MDB_SERVICE_FILE, check it's compatibility" else - echo Environment="'ASAN_OPTIONS=abort_on_error=$SANITIZERS_ABORT_ON_ERROR:disable_coredump=0,print_stats=false,detect_odr_violation=0,check_initialization_order=1,detect_stack_use_after_return=1,atexit=false,log_path=${REPORT_PATH}/asan.mariadb'" >>$MDB_SERVICE_FILE + echo Environment="'ASAN_OPTIONS=abort_on_error=$SANITIZERS_ABORT_ON_ERROR:disable_coredump=0,print_stats=false,detect_odr_violation=0,check_initialization_order=0,detect_stack_use_after_return=1,atexit=false,log_path=${REPORT_PATH}/asan.mariadb'" >>$MDB_SERVICE_FILE message "ASAN options were added to $MDB_SERVICE_FILE" fi fi diff --git a/core_dumps/core_dump_format.sh b/core_dumps/core_dump_format.sh index 17691f86b..0e9e0ee1f 100755 --- a/core_dumps/core_dump_format.sh +++ b/core_dumps/core_dump_format.sh @@ -15,7 +15,7 @@ save_ansi_to_html() { cat "$DUMPNAME" | bash "${SCRIPT_LOCATION}"/ansi2html.sh --palette=solarized >>"${FILENAME}" } invoke_gdb_command() { - unbuffer gdb -x "${SCRIPT_LOCATION}"/gdbinit -q ${BINARY} --core ${COREDUMP} -ex "$1" -ex quit >>"$DUMPNAME" + gdb -x "${SCRIPT_LOCATION}"/gdbinit -q ${BINARY} --core ${COREDUMP} -ex "$1" -ex quit >>"$DUMPNAME" } echo "

Step: ${STEP_NAME}
Binary name: ${BINARY}

" >>"${FILENAME}" From bbff8b89a89e2175f4983b393696fa26ba520aa9 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Wed, 16 Jul 2025 13:02:22 +0000 Subject: [PATCH 48/51] colored coredumps --- .drone.jsonnet | 5 ----- build/createrepo.sh | 2 +- core_dumps/core_dump_format.sh | 4 ++-- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index b91c42dde..479d4315a 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -45,16 +45,11 @@ local customBootstrapParamsForExisitingPipelines(envkey) = local customBootstrapParamsForAdditionalPipelinesMap = { ASan: "--asan", -<<<<<<< HEAD TSAN: "--tsan", UBSan: "--ubsan", MSan: "--msan", "libcpp": "--libcpp", "gcc-toolset": "--gcc-toolset-for-rocky-8" -======= - //TSAN: "--tsan", - UBSan: "--ubsan", ->>>>>>> a25d751cb (Add flag key prefix to $result) }; local customBuildFlags(buildKey) = diff --git a/build/createrepo.sh b/build/createrepo.sh index 5911f1fa3..03c1b89c1 100755 --- a/build/createrepo.sh +++ b/build/createrepo.sh @@ -46,7 +46,7 @@ if [[ $(compgen -G "$COLUMNSTORE_RPM_PACKAGES_PATH") ]]; then mv -v $COLUMNSTORE_RPM_PACKAGES_PATH "./${RESULT}/" elif [[ $(compgen -G "$COLUMNSTORE_DEB_PACKAGES_PATH") ]]; then mv -v $COLUMNSTORE_DEB_PACKAGES_PATH "./${RESULT}/" - mv -v $COLUMNSTORE_DEBUG_DEB_PACKAGES_PATH "./${RESULT}/" + mv -v $COLUMNSTORE_DEBUG_DEB_PACKAGES_PATH "./${RESULT}/" || true else echo "Columnstore packages are not found!" fi diff --git a/core_dumps/core_dump_format.sh b/core_dumps/core_dump_format.sh index 0e9e0ee1f..2e5e63a46 100755 --- a/core_dumps/core_dump_format.sh +++ b/core_dumps/core_dump_format.sh @@ -12,10 +12,10 @@ STEP_NAME=$5 save_ansi_to_html() { echo "

$1

" >>"${FILENAME}" - cat "$DUMPNAME" | bash "${SCRIPT_LOCATION}"/ansi2html.sh --palette=solarized >>"${FILENAME}" + cat "$DUMPNAME" | bash "${SCRIPT_LOCATION}"/ansi2html.sh --bg=dark --palette=tango >>"${FILENAME}" } invoke_gdb_command() { - gdb -x "${SCRIPT_LOCATION}"/gdbinit -q ${BINARY} --core ${COREDUMP} -ex "$1" -ex quit >>"$DUMPNAME" + script -q -c "gdb -x \"${SCRIPT_LOCATION}\"/gdbinit -q ${BINARY} --core ${COREDUMP} -ex \"$1\" -ex \"quit\"" $DUMPNAME } echo "

Step: ${STEP_NAME}
Binary name: ${BINARY}

" >>"${FILENAME}" From 980c3acaa8bf74b65af12186c26a3acfa2c383b5 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Thu, 17 Jul 2025 10:49:42 +0000 Subject: [PATCH 49/51] noncolored cores --- core_dumps/core_dump_format.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core_dumps/core_dump_format.sh b/core_dumps/core_dump_format.sh index 2e5e63a46..95c792904 100755 --- a/core_dumps/core_dump_format.sh +++ b/core_dumps/core_dump_format.sh @@ -10,12 +10,12 @@ DUMPNAME=$4 STEP_NAME=$5 save_ansi_to_html() { - echo "

$1

" >>"${FILENAME}" - cat "$DUMPNAME" | bash "${SCRIPT_LOCATION}"/ansi2html.sh --bg=dark --palette=tango >>"${FILENAME}" + cat "$DUMPNAME" | bash "${SCRIPT_LOCATION}"/ansi2html.sh --palette=solarized >>"${FILENAME}" } + invoke_gdb_command() { - script -q -c "gdb -x \"${SCRIPT_LOCATION}\"/gdbinit -q ${BINARY} --core ${COREDUMP} -ex \"$1\" -ex \"quit\"" $DUMPNAME + gdb -x "${SCRIPT_LOCATION}"/gdbinit -q ${BINARY} --core ${COREDUMP} -ex "$1" -ex quit >>"$DUMPNAME" } echo "

Step: ${STEP_NAME}
Binary name: ${BINARY}

" >>"${FILENAME}" From d4d87ad07ff70b5d61a70f4cd0bf1df8cdb861aa Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Mon, 28 Jul 2025 13:18:01 +0000 Subject: [PATCH 50/51] revert ci --- .drone.jsonnet | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/.drone.jsonnet b/.drone.jsonnet index 479d4315a..4971389ea 100644 --- a/.drone.jsonnet +++ b/.drone.jsonnet @@ -650,19 +650,17 @@ local FinalPipeline(branch, event) = { "failure", ], } + (if event == "cron" then { cron: ["nightly-" + std.strReplace(branch, ".", "-")] } else {}), -// depends_on: std.map(function(p) std.join(" ", [branch, p, event, "amd64", "10.6-enterprise", "", ""]), platforms[current_branch]), + depends_on: std.map(function(p) std.join(" ", [branch, p, event, "amd64", "10.6-enterprise", "", ""]), platforms[current_branch]), // +std.map(function(p) std.join(" ", [branch, p, event, "arm64", "10.6-enterprise", "", ""]), platforms_arm.develop), }; -// [ -// Pipeline(b, p, e, "amd64", s) -// for b in std.objectFields(platforms) -// for p in platforms[b] -// for s in servers[b] -// for e in events -// ] + - - +[ + Pipeline(b, p, e, "amd64", s) + for b in std.objectFields(platforms) + for p in platforms[b] + for s in servers[b] + for e in events +] + // [ // Pipeline(b, p, e, "arm64", s) // for b in std.objectFields(platforms_arm) @@ -699,8 +697,8 @@ local FinalPipeline(branch, event) = { Pipeline(b, platform, triggeringEvent, a, server, flag, "") for a in ["amd64"] for b in std.objectFields(platforms) - for platform in ["ubuntu:24.04"] - for flag in ["ASan", "UBSan"] + for platform in ["rockylinux:8"] + for flag in ["gcc-toolset"] for triggeringEvent in events for server in servers[current_branch] ] From 2e2b4b3f82a5ca1638a7bb783fc758cc14de2116 Mon Sep 17 00:00:00 2001 From: Leonid Fedorov Date: Wed, 30 Jul 2025 13:57:40 +0000 Subject: [PATCH 51/51] chore(tests): make local mtr runnner smarter --- tests/scripts/fullmtr.sh | 77 ---------------------------- tests/scripts/run_mtr.sh | 107 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 77 deletions(-) delete mode 100644 tests/scripts/fullmtr.sh create mode 100755 tests/scripts/run_mtr.sh diff --git a/tests/scripts/fullmtr.sh b/tests/scripts/fullmtr.sh deleted file mode 100644 index bbda72ee2..000000000 --- a/tests/scripts/fullmtr.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/bash - -SCRIPT_LOCATION=$(dirname "$0") -MARIADB_SOURCE_PATH=$(realpath $SCRIPT_LOCATION/../../../../../) -COLUMNSTORE_MTR_SOURCE=$(realpath $SCRIPT_LOCATION/../../mysql-test/columnstore) -INSTALLED_MTR_PATH='/usr/share/mysql/mysql-test' -COLUMSNTORE_MTR_INSTALLED=${INSTALLED_MTR_PATH}/plugin/columnstore/ -PATCHNAME=$(realpath $SCRIPT_LOCATION)/mtr_warn.patch -CURRENT_DIR=$(pwd) -mysql -e "create database if not exists test;" -SOCKET=$(mysql -e "show variables like 'socket';" | grep socket | cut -f2) - -export ASAN_OPTIONS=abort_on_error=1:disable_coredump=0,print_stats=false,detect_odr_violation=0,check_initialization_order=1,detect_stack_use_after_return=1,atexit=false,log_path=/core/asan.hz - -# needed when run MTR tests locally, see mariadb-test-run.pl:417, mtr functions -# are added to the database mtr only when --extern is not specified - -add_mtr_warn_functions() { - echo "Adding mtr warnings functions..." - cd /tmp - mysql -e "drop database if exists mtr" - cp ${MARIADB_SOURCE_PATH}/mysql-test/include/mtr_warnings.sql mtr_warnings.sql - patch -p1 <${PATCHNAME} - mysql -e "create database if not exists mtr;" - mysql mtr $CURRENT_DIR/mtr.$1.cores-before - ./mtr --force --extern=socket=${SOCKET} --max-test-fail=0 --testcase-timeout=60 --suite=columnstore/$1 $2 | tee $CURRENT_DIR/mtr.$1.log 2>&1 - # dump analyses. - systemctl stop mariadb - systemctl start mariadb - ls /core >$CURRENT_DIR/mtr.$1.cores-after - echo "reports or coredumps:" - diff -u $CURRENT_DIR/mtr.$1.cores-before $CURRENT_DIR/mtr.$1.cores-after && echo "no new reports or coredumps" - rm $CURRENT_DIR/mtr.$1.cores-before $CURRENT_DIR/mtr.$1.cores-after -} - -add_mtr_warn_functions - -if (($# == 2)); then - run_suite $1 $2 - exit 1 -fi - -if (($# == 1)); then - run_suite $1 - exit 1 -fi - -run_suite basic -run_suite bugfixes -run_suite setup -run_suite devregression -run_suite autopilot -run_suite extended -run_suite multinode -run_suite oracle -run_suite 1pmonly - -cd - diff --git a/tests/scripts/run_mtr.sh b/tests/scripts/run_mtr.sh new file mode 100755 index 000000000..f48cc9d5c --- /dev/null +++ b/tests/scripts/run_mtr.sh @@ -0,0 +1,107 @@ +#!/bin/bash + + +SCRIPT_LOCATION=$(dirname "$0") +COLUMNSTORE_SOURCE_PATH=$(realpath $SCRIPT_LOCATION/../../) +MARIADB_SOURCE_PATH=$(realpath $SCRIPT_LOCATION/../../../../../) +COLUMNSTORE_MTR_SOURCE=$(realpath $COLUMNSTORE_SOURCE_PATH/mysql-test/columnstore) +INSTALLED_MTR_PATH='/usr/share/mysql/mysql-test/' +PATCHNAME=$(realpath $SCRIPT_LOCATION)/mtr_warn.patch +CURRENT_DIR=$(pwd) + +source $COLUMNSTORE_SOURCE_PATH/build/utils.sh + + +optparse.define short=s long=suite desc="whole suite to run" variable=SUITE_NAME +optparse.define short=t long=test_full_name desc="Testname with suite as like bugfixes.mcol-4899" variable=TEST_FULL_NAME default="" +optparse.define short=f long=full desc="Run full MTR" variable=RUN_FULL default=false value=true +optparse.define short=r long=record desc="Record the result" variable=RECORD default=false value=true +optparse.define short=e long=no-extern desc="Run without --extern" variable=EXTERN default=true value=false + +source $(optparse.build) + +mariadb -e "create database if not exists test;" +SOCKET=$(mariadb -e "show variables like 'socket';" | grep socket | cut -f2) + +export ASAN_OPTIONS=abort_on_error=1:disable_coredump=0,print_stats=false,detect_odr_violation=0,check_initialization_order=1,detect_stack_use_after_return=1,atexit=false,log_path=/core/asan.hz + + +# needed when run MTR tests locally, see mariadb-test-run.pl:417, mtr functions +# are added to the database mtr only when --extern is not specified + +add_mtr_warn_functions() { + message "Adding mtr warnings functions..." + cd /tmp + mariadb -e "drop database if exists mtr" + cp ${MARIADB_SOURCE_PATH}/mysql-test/include/mtr_warnings.sql mtr_warnings.sql + patch -p1 <${PATCHNAME} + mariadb -e "create database if not exists mtr;" + mariadb mtr $CURRENT_DIR/mtr.$1.cores-before + + if [[ $EXTERN == true ]]; then + EXTERN_FLAG="--extern=socket=${SOCKET}" + else + EXTERN_FLAG="" + fi + + if [[ $RECORD == true ]]; then + RECORD_FLAG="--record" + else + RECORD_FLAG="" + fi + + ./mtr --force $EXTERN_FLAG $RECORD_FLAG --max-test-fail=0 --testcase-timeout=60 --suite=columnstore/$1 $2 | tee $CURRENT_DIR/mtr.$1.log 2>&1 + # dump analyses. + systemctl stop mariadb + systemctl start mariadb + ls /core >$CURRENT_DIR/mtr.$1.cores-after + message "reports or coredumps:" + diff -u $CURRENT_DIR/mtr.$1.cores-before $CURRENT_DIR/mtr.$1.cores-after && echo "no new reports or coredumps" + rm $CURRENT_DIR/mtr.$1.cores-before $CURRENT_DIR/mtr.$1.cores-after +} + +add_mtr_warn_functions + + +if [[ $RUN_FULL == true ]]; then + message "Running FULL MTR" + run_suite basic + run_suite bugfixes + run_suite setup + run_suite devregression + run_suite autopilot + run_suite extended + run_suite multinode + run_suite oracle + run_suite 1pmonly +else + message "Running suite $SUITE_NAME with test $TEST_NAME" + run_suite $SUITE_NAME $TEST_NAME +fi + +cd -