diff --git a/CMakeLists.txt b/CMakeLists.txt index 482e8d19e..62ff05455 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,6 @@ -CMAKE_MINIMUM_REQUIRED(VERSION 2.6) +CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12) + # Avoid warnings in higher versions if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" GREATER 2.6) CMAKE_POLICY(VERSION 2.8) @@ -107,8 +108,8 @@ endif() INCLUDE(check_compiler_flag.cmake) -MY_CHECK_AND_SET_COMPILER_FLAG("-g -O3 -fno-strict-aliasing -Wall -fno-tree-vectorize -DDBUG_OFF -DHAVE_CONFIG_H" RELEASE RELWITHDEBINFO MINSIZEREL) -MY_CHECK_AND_SET_COMPILER_FLAG("-ggdb3 -fno-tree-vectorize -DSAFE_MUTEX -DSAFEMALLOC -DENABLED_DEBUG_SYNC -O0 -Wall -D_DEBUG -DHAVE_CONFIG_H" DEBUG) +MY_CHECK_AND_SET_COMPILER_FLAG("-g -O3 -fno-omit-frame-pointer -fno-strict-aliasing -Wall -fno-tree-vectorize -DDBUG_OFF -DHAVE_CONFIG_H" RELEASE RELWITHDEBINFO MINSIZEREL) +MY_CHECK_AND_SET_COMPILER_FLAG("-ggdb3 -fno-omit-frame-pointer -fno-tree-vectorize -DSAFE_MUTEX -DSAFEMALLOC -DENABLED_DEBUG_SYNC -O0 -Wall -D_DEBUG -DHAVE_CONFIG_H" DEBUG) # enable security hardening features, like most distributions do # in our benchmarks that costs about ~1% of performance, depending on the load diff --git a/README b/README index 3517d0580..69fff339d 100644 --- a/README +++ b/README @@ -9,5 +9,4 @@ series are included in this release. Additional features will be pushed in future releases. A few things to notice: -- Do not use Beta releases on production systems. - The building of the ColumnStore engine needs a special build environment. We're working on making it available for everyone to build. diff --git a/cpackEngineDEB.cmake b/cpackEngineDEB.cmake index 461714ced..4a5e24b1f 100644 --- a/cpackEngineDEB.cmake +++ b/cpackEngineDEB.cmake @@ -1,5 +1,7 @@ IF(DEB) +CMAKE_MINIMUM_REQUIRED(VERSION 3.4) + SET(CMAKE_INSTALL_PREFIX ${INSTALL_ENGINE}) SET(CPACK_GENERATOR "DEB") @@ -65,9 +67,11 @@ if (EXISTS "/etc/debian_version") set(DEBIAN_VERSION_NUMBER "${CMAKE_MATCH_1}") endif () if ("${DEBIAN_VERSION_NUMBER}" EQUAL "8") - SET(CPACK_DEBIAN_PLATFORM_PACKAGE_DEPENDS "expect, libboost-all-dev, mariadb-columnstore-libs, libsnappy1") -else () - SET(CPACK_DEBIAN_PLATFORM_PACKAGE_DEPENDS "expect, libboost-all-dev, mariadb-columnstore-libs, libsnappy1v5") + SET(CPACK_DEBIAN_PLATFORM_PACKAGE_DEPENDS "expect, perl, openssl, file, sudo, libdbi-perl, libreadline-dev, rsync, net-tools, libboost-all-dev, mariadb-columnstore-libs, mariadb-columnstore-server, libsnappy1") +elseif ("${DEBIAN_VERSION_NUMBER}" EQUAL "9") + SET(CPACK_DEBIAN_PLATFORM_PACKAGE_DEPENDS "expect, perl, openssl, file, sudo, libdbi-perl, libreadline-dev, rsync, net-tools, libboost-all-dev, mariadb-columnstore-libs, mariadb-columnstore-server, libsnappy1v5, libreadline5") +else() + SET(CPACK_DEBIAN_PLATFORM_PACKAGE_DEPENDS "expect, perl, openssl, file, sudo, libdbi-perl, libboost-all-dev, libreadline-dev, rsync, snappy, net-tools") endif () SET(CPACK_DEBIAN_STORAGE-ENGINE_PACKAGE_DEPENDS "mariadb-columnstore-libs") diff --git a/cpackEngineRPM.cmake b/cpackEngineRPM.cmake index dcc65e032..5c340adfc 100644 --- a/cpackEngineRPM.cmake +++ b/cpackEngineRPM.cmake @@ -5,7 +5,6 @@ SET(CMAKE_INSTALL_PREFIX ${INSTALL_ENGINE}) SET(CPACK_GENERATOR "RPM") SET(CPACK_RPM_PACKAGE_DEBUG 1) SET(CPACK_PACKAGING_INSTALL_PREFIX ${INSTALL_ENGINE}) -CMAKE_MINIMUM_REQUIRED(VERSION 2.8.7) SET(CPACK_RPM_COMPONENT_INSTALL ON) diff --git a/dbcon/joblist/jobstep.cpp b/dbcon/joblist/jobstep.cpp index b24a0c0ea..94229871a 100644 --- a/dbcon/joblist/jobstep.cpp +++ b/dbcon/joblist/jobstep.cpp @@ -56,7 +56,7 @@ namespace joblist { boost::mutex JobStep::fLogMutex; //=PTHREAD_MUTEX_INITIALIZER; -ThreadPool JobStep::jobstepThreadPool(0, 0); +ThreadPool JobStep::jobstepThreadPool(defaultJLThreadPoolSize, 0); ostream& operator<<(ostream& os, const JobStep* rhs) { diff --git a/dbcon/joblist/resourcemanager.h b/dbcon/joblist/resourcemanager.h index ffc2de639..73de7ce75 100644 --- a/dbcon/joblist/resourcemanager.h +++ b/dbcon/joblist/resourcemanager.h @@ -93,6 +93,7 @@ namespace joblist const uint64_t defaultNumBuckets = 128; const uint64_t defaultMaxElementsPerBuckert = 16 * 1024 * 1024; + const int defaultEMServerThreads = 50; const int defaultEMSecondsBetweenMemChecks = 1; const int defaultEMMaxPct = 95; const int defaultEMPriority = 21; // @Bug 3385 @@ -147,10 +148,14 @@ namespace joblist typedef std::map MemMap; - int getEmSecondsBetweenMemChecks() const { return getUintVal(fExeMgrStr, "SecondsBetweenMemChecks", defaultEMSecondsBetweenMemChecks); } - int getEmMaxPct() const { return getUintVal(fExeMgrStr, "MaxPct", defaultEMMaxPct); } - EXPORT int getEmPriority() const; - int getEmExecQueueSize() const { return getIntVal(fExeMgrStr, "ExecQueueSize", defaultEMExecQueueSize); } + // @MCOL-513 - Added threadpool to ExeMgr + int getEmServerThreads() const { return getIntVal(fExeMgrStr, "ThreadPoolSize", defaultEMServerThreads); } + std::string getExeMgrThreadPoolDebug() const { return getStringVal(fExeMgrStr, "ThreadPoolDebug", "N"); } + + int getEmSecondsBetweenMemChecks() const { return getUintVal(fExeMgrStr, "SecondsBetweenMemChecks", defaultEMSecondsBetweenMemChecks); } + int getEmMaxPct() const { return getUintVal(fExeMgrStr, "MaxPct", defaultEMMaxPct); } + EXPORT int getEmPriority() const; + int getEmExecQueueSize() const { return getIntVal(fExeMgrStr, "ExecQueueSize", defaultEMExecQueueSize); } int getHjMaxBuckets() const { return getUintVal(fHashJoinStr, "MaxBuckets", defaultHJMaxBuckets); } unsigned getHjNumThreads() const { return fHjNumThreads; } //getUintVal(fHashJoinStr, "NumThreads", defaultNumThreads); } @@ -165,8 +170,9 @@ namespace joblist uint32_t getJlScanLbidReqThreshold() const { return getUintVal(fJobListStr,"ScanLbidReqThreshold", defaultScanLbidReqThreshold); } // @MCOL-513 - Added threadpool to JobSteps - uint32_t getJLThreadPoolSize() const { return getUintVal(fJobListStr, "ThreadPoolSize", defaultJLThreadPoolSize); } + int getJLThreadPoolSize() const { return getIntVal(fJobListStr, "ThreadPoolSize", defaultJLThreadPoolSize); } std::string getJlThreadPoolDebug() const { return getStringVal(fJobListStr, "ThreadPoolDebug", "N"); } + std::string getDMLJlThreadPoolDebug() const { return getStringVal(fJobListStr, "DMLThreadPoolDebug", "N"); } // @bug 1264 - Added LogicalBlocksPerScan configurable which determines the number of blocks contained in each BPS scan request. uint32_t getJlLogicalBlocksPerScan() const { return getUintVal(fJobListStr,"LogicalBlocksPerScan", defaultLogicalBlocksPerScan); } diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index 3b80a25bd..42910e98c 100755 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -1819,6 +1819,130 @@ void calsettrace_deinit(UDF_INIT* initid) { } +#ifdef _MSC_VER +__declspec(dllexport) +#endif +// Return 1 if system is ready for reads or 0 if not. +long long mcssystemready(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, char* error) +{ + long long rtn = 0; + Oam oam; + DBRM dbrm(true); + SystemStatus systemstatus; + + try + { + oam.getSystemStatus(systemstatus); + if (systemstatus.SystemOpState == ACTIVE + && dbrm.getSystemReady() + && dbrm.getSystemQueryReady()) + { + return 1; + } + } + catch (...) + { + *error = 1; + } + return rtn; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +my_bool mcssystemready_init(UDF_INIT* initid, UDF_ARGS* args, char* message) +{ + return 0; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +void mcssystemready_deinit(UDF_INIT* initid) +{ +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +// Return 1 if system is read only; 0 if writeable +long long mcssystemreadonly(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, char* error) +{ + long long rtn = 0; + DBRM dbrm(true); + + try + { + if (dbrm.isReadWrite()) // Returns 0 for writable, 5 for read only + { + rtn = 1; + } + } + catch (...) + { + *error = 1; + rtn = 1; + } + return rtn; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +my_bool mcssystemreadonly_init(UDF_INIT* initid, UDF_ARGS* args, char* message) +{ + return 0; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +void mcssystemreadonly_deinit(UDF_INIT* initid) +{ +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +// Return 1 if system is read only; 0 if writeable +long long mcswritessuspended(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, char* error) +{ + long long rtn = 0; + DBRM dbrm(true); + + try + { + if (dbrm.getSystemSuspended()) + { + rtn = 1; + } + } + catch (...) + { + *error = 1; + rtn = 1; + } + return rtn; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +my_bool mcswritessuspended_init(UDF_INIT* initid, UDF_ARGS* args, char* message) +{ + return 0; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +void mcswritessuspended_deinit(UDF_INIT* initid) +{ +} + #define MAXSTRINGLENGTH 50 const char* PmSmallSideMaxMemory = "pmmaxmemorysmallside"; diff --git a/dbcon/mysql/install_calpont_mysql.sh b/dbcon/mysql/install_calpont_mysql.sh index aaa17473a..17c6e1817 100755 --- a/dbcon/mysql/install_calpont_mysql.sh +++ b/dbcon/mysql/install_calpont_mysql.sh @@ -82,6 +82,9 @@ CREATE FUNCTION idbextentmin RETURNS STRING soname 'libcalmysql.so'; CREATE FUNCTION idbextentmax RETURNS STRING soname 'libcalmysql.so'; CREATE FUNCTION idbpartition RETURNS STRING soname 'libcalmysql.so'; CREATE FUNCTION idblocalpm RETURNS INTEGER soname 'libcalmysql.so'; +CREATE FUNCTION mcssystemready RETURNS INTEGER soname 'libcalmysql.so'; +CREATE FUNCTION mcssystemreadonly RETURNS INTEGER soname 'libcalmysql.so'; +CREATE FUNCTION mcswritessuspended RETURNS INTEGER soname 'libcalmysql.so'; CREATE DATABASE IF NOT EXISTS infinidb_vtable; CREATE DATABASE IF NOT EXISTS infinidb_querystats; diff --git a/dbcon/mysql/is_columnstore_files.cpp b/dbcon/mysql/is_columnstore_files.cpp index e9479e254..ce00b8aae 100644 --- a/dbcon/mysql/is_columnstore_files.cpp +++ b/dbcon/mysql/is_columnstore_files.cpp @@ -124,6 +124,17 @@ static int is_columnstore_files_fill(THD *thd, TABLE_LIST *tables, COND *cond) iter++; continue; } + + try + { + oam_instance.getDbrootPmConfig(iter->dbRoot, pmId); + } + catch (std::runtime_error) + { + // MCOL-1116: If we are here a DBRoot is offline/missing + iter++; + continue; + } table->field[0]->store(oid); table->field[1]->store(iter->segmentNum); table->field[2]->store(iter->partitionNum); @@ -134,7 +145,7 @@ static int is_columnstore_files_fill(THD *thd, TABLE_LIST *tables, COND *cond) std::string DbRootPath = config->getConfig("SystemConfig", DbRootName.str()); fileSize = compressedFileSize = 0; snprintf(fullFileName, WriteEngine::FILE_NAME_SIZE, "%s/%s", DbRootPath.c_str(), oidDirName); - oam_instance.getDbrootPmConfig(iter->dbRoot, pmId); + std::ostringstream oss; oss << "pm" << pmId << "_WriteEngineServer"; std::string client = oss.str(); diff --git a/dbcon/mysql/mysql-Columnstore b/dbcon/mysql/mysql-Columnstore index b49c646e2..02a0de162 100755 --- a/dbcon/mysql/mysql-Columnstore +++ b/dbcon/mysql/mysql-Columnstore @@ -61,7 +61,7 @@ datadir=$basedir/db # Value here is overriden by value in my.cnf. # 0 means don't wait at all # Negative numbers mean to wait indefinitely -service_startup_timeout=60 +service_startup_timeout=90 # Lock directory for RedHat / SuSE. lockdir='/var/lock/subsys' diff --git a/ddlproc/CMakeLists.txt b/ddlproc/CMakeLists.txt index cd089d170..222313a69 100644 --- a/ddlproc/CMakeLists.txt +++ b/ddlproc/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ) ########### next target ############### -set(DDLProc_SRCS ddlproc.cpp ddlprocessor.cpp) +set(DDLProc_SRCS ddlproc.cpp ddlprocessor.cpp ../utils/common/crashtrace.cpp) add_executable(DDLProc ${DDLProc_SRCS}) diff --git a/ddlproc/ddlproc.cpp b/ddlproc/ddlproc.cpp index c14233cc6..45bc6a48d 100644 --- a/ddlproc/ddlproc.cpp +++ b/ddlproc/ddlproc.cpp @@ -61,6 +61,7 @@ using namespace execplan; #include "IDBPolicy.h" #include "utils_utf8.h" +#include "crashtrace.h" namespace fs = boost::filesystem; @@ -97,6 +98,9 @@ int main(int argc, char* argv[]) string systemLang = "C"; systemLang = funcexp::utf8::idb_setlocale(); + // This is unset due to the way we start it + program_invocation_short_name = const_cast("DDLProc"); + setupCwd(); WriteEngine::WriteEngineWrapper::init( WriteEngine::SUBSYSTEM_ID_DDLPROC ); @@ -116,6 +120,11 @@ int main(int argc, char* argv[]) sigaction(SIGHUP, &ign, 0); ign.sa_handler = SIG_IGN; sigaction(SIGPIPE, &ign, 0); + memset(&ign, 0, sizeof(ign)); + ign.sa_handler = fatalHandler; + sigaction(SIGSEGV, &ign, 0); + sigaction(SIGABRT, &ign, 0); + sigaction(SIGFPE, &ign, 0); #endif ddlprocessor::DDLProcessor ddlprocessor(1, 20); diff --git a/dmlproc/CMakeLists.txt b/dmlproc/CMakeLists.txt index 36f06657c..fece5b614 100644 --- a/dmlproc/CMakeLists.txt +++ b/dmlproc/CMakeLists.txt @@ -8,7 +8,8 @@ set(DMLProc_SRCS dmlproc.cpp dmlprocessor.cpp dmlresultbuffer.cpp - batchinsertprocessor.cpp) + batchinsertprocessor.cpp + ../utils/common/crashtrace.cpp) add_executable(DMLProc ${DMLProc_SRCS}) diff --git a/dmlproc/dmlproc.cpp b/dmlproc/dmlproc.cpp index c8ad57b62..df17fbed6 100644 --- a/dmlproc/dmlproc.cpp +++ b/dmlproc/dmlproc.cpp @@ -82,6 +82,8 @@ using namespace joblist; #include "utils_utf8.h" +#include "crashtrace.h" + namespace fs = boost::filesystem; namespace @@ -473,6 +475,9 @@ int main(int argc, char* argv[]) //BUG 5362 systemLang = funcexp::utf8::idb_setlocale(); + // This is unset due to the way we start it + program_invocation_short_name = const_cast("DMLProc"); + Config* cf = Config::makeConfig(); setupCwd(); @@ -565,14 +570,14 @@ int main(int argc, char* argv[]) // because rm has a "isExeMgr" flag that is set upon creation (rm is a singleton). // From the pools perspective, it has no idea if it is ExeMgr doing the // creation, so it has no idea which way to set the flag. So we set the max here. -// JobStep::jobstepThreadPool.setMaxThreads(rm->getJLThreadPoolSize()); + JobStep::jobstepThreadPool.setMaxThreads(rm->getJLThreadPoolSize()); JobStep::jobstepThreadPool.setName("DMLProcJobList"); -// if (rm->getJlThreadPoolDebug() == "Y" || rm->getJlThreadPoolDebug() == "y") -// { -// JobStep::jobstepThreadPool.setDebug(true); -// JobStep::jobstepThreadPool.invoke(ThreadPoolMonitor(&JobStep::jobstepThreadPool)); -// } + if (rm->getDMLJlThreadPoolDebug() == "Y" || rm->getDMLJlThreadPoolDebug() == "y") + { + JobStep::jobstepThreadPool.setDebug(true); + JobStep::jobstepThreadPool.invoke(ThreadPoolMonitor(&JobStep::jobstepThreadPool)); + } //set ACTIVE state try @@ -592,6 +597,12 @@ int main(int argc, char* argv[]) sigaction(SIGHUP, &ign, 0); ign.sa_handler = SIG_IGN; sigaction(SIGPIPE, &ign, 0); + + memset(&ign, 0, sizeof(ign)); + ign.sa_handler = fatalHandler; + sigaction(SIGSEGV, &ign, 0); + sigaction(SIGABRT, &ign, 0); + sigaction(SIGFPE, &ign, 0); #endif dmlserver.start(); diff --git a/exemgr/CMakeLists.txt b/exemgr/CMakeLists.txt index 929ec804c..cae1cf3ce 100644 --- a/exemgr/CMakeLists.txt +++ b/exemgr/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ) ########### next target ############### -set(ExeMgr_SRCS main.cpp activestatementcounter.cpp femsghandler.cpp) +set(ExeMgr_SRCS main.cpp activestatementcounter.cpp femsghandler.cpp ../utils/common/crashtrace.cpp) add_executable(ExeMgr ${ExeMgr_SRCS}) diff --git a/exemgr/main.cpp b/exemgr/main.cpp index 6967b2ca9..b8def9813 100644 --- a/exemgr/main.cpp +++ b/exemgr/main.cpp @@ -98,6 +98,7 @@ using namespace querytele; #include "boost/filesystem.hpp" #include "threadpool.h" +#include "crashtrace.h" namespace { @@ -515,7 +516,7 @@ public: SJLP jl; bool incSessionThreadCnt = true; - bool selfJoin = false; + bool selfJoin = false; bool tryTuples = false; bool usingTuples = false; bool stmtCounted = false; @@ -1436,18 +1437,20 @@ int main(int argc, char* argv[]) // because rm has a "isExeMgr" flag that is set upon creation (rm is a singleton). // From the pools perspective, it has no idea if it is ExeMgr doing the // creation, so it has no idea which way to set the flag. So we set the max here. -// JobStep::jobstepThreadPool.setMaxThreads(rm->getJLThreadPoolSize()); + JobStep::jobstepThreadPool.setMaxThreads(rm->getJLThreadPoolSize()); JobStep::jobstepThreadPool.setName("ExeMgrJobList"); -// if (rm->getJlThreadPoolDebug() == "Y" || rm->getJlThreadPoolDebug() == "y") -// { -// JobStep::jobstepThreadPool.setDebug(true); -// JobStep::jobstepThreadPool.invoke(ThreadPoolMonitor(&JobStep::jobstepThreadPool)); -// } + if (rm->getJlThreadPoolDebug() == "Y" || rm->getJlThreadPoolDebug() == "y") + { + JobStep::jobstepThreadPool.setDebug(true); + JobStep::jobstepThreadPool.invoke(ThreadPoolMonitor(&JobStep::jobstepThreadPool)); + } + int serverThreads = rm->getEmServerThreads(); int maxPct = rm->getEmMaxPct(); int pauseSeconds = rm->getEmSecondsBetweenMemChecks(); int priority = rm->getEmPriority(); + FEMsgHandler::threadPool.setMaxThreads(serverThreads); FEMsgHandler::threadPool.setName("FEMsgHandler"); if (maxPct > 0) @@ -1468,7 +1471,8 @@ int main(int argc, char* argv[]) } } - cout << "Starting ExeMgr: qs = " << rm->getEmExecQueueSize() << ", mx = " << maxPct << ", cf = " << + cout << "Starting ExeMgr: st = " << serverThreads << + ", qs = " << rm->getEmExecQueueSize() << ", mx = " << maxPct << ", cf = " << rm->getConfig()->configFile() << endl; //set ACTIVE state @@ -1483,10 +1487,16 @@ int main(int argc, char* argv[]) } } - threadpool::ThreadPool exeMgrThreadPool; + threadpool::ThreadPool exeMgrThreadPool(serverThreads, 0); exeMgrThreadPool.setName("ExeMgrServer"); - for (;;) + if (rm->getExeMgrThreadPoolDebug() == "Y" || rm->getExeMgrThreadPoolDebug() == "y") + { + exeMgrThreadPool.setDebug(true); + exeMgrThreadPool.invoke(ThreadPoolMonitor(&exeMgrThreadPool)); + } + + for (;;) { IOSocket ios; ios = mqs->accept(); diff --git a/oam/install_scripts/binary_installer.sh b/oam/install_scripts/binary_installer.sh index 9baa26e4a..59a35dd8b 100644 --- a/oam/install_scripts/binary_installer.sh +++ b/oam/install_scripts/binary_installer.sh @@ -215,7 +215,7 @@ send_user "\n" send_user "Start ColumnStore service " send_user " \n" send "ssh -v $USERNAME@$SERVER '$INSTALLDIR/bin/columnstore restart'\n" -set timeout 60 +set timeout 120 # check return expect { "word: " { send "$PASSWORD\n" diff --git a/oam/install_scripts/disable-rep-columnstore.sh b/oam/install_scripts/disable-rep-columnstore.sh index 0a8c9b17f..1c730da4c 100644 --- a/oam/install_scripts/disable-rep-columnstore.sh +++ b/oam/install_scripts/disable-rep-columnstore.sh @@ -56,7 +56,7 @@ checkForError # # Run reset slave command # -echo "Run start slave command" >>/tmp/disable-rep-status.log +echo "Run reset slave command" >>/tmp/disable-rep-status.log cat >/tmp/idb_disable-rep.sql < /tmp/error.check + if [ `cat /tmp/error.check | wc -c` -ne 0 ]; then + echo "ERROR - PASSWORD: check log file: /tmp/mariadb-command-line.log" + rm -f /tmp/error.check + exit 2 + fi + grep ERROR /tmp/mariadb-command-line.log > /tmp/error.check if [ `cat /tmp/error.check | wc -c` -ne 0 ]; then echo "ERROR: check log file: /tmp/mariadb-command-line.log" diff --git a/oam/install_scripts/post-install b/oam/install_scripts/post-install index bd3d3fb55..fc60e32e7 100755 --- a/oam/install_scripts/post-install +++ b/oam/install_scripts/post-install @@ -190,12 +190,13 @@ if [ $user = "root" ]; then else $SUDO chmod 777 $RCFILE printf '%s\n' '#!/bin/bash' "#" | $SUDO tee -a $RCFILEl > /dev/null 2>&1 + + if [ -n "$systemctl" ]; then + $SUDO systemctl start rc-local >/dev/null 2>&1 + $SUDO systemctl enable rc-local >/dev/null 2>&1 + fi fi -if [ -n "$systemctl" ]; then - $SUDO systemctl restart rc-local >/dev/null 2>&1 - $SUDO systemctl enable rc-local >/dev/null 2>&1 -fi #setup the columnstore service script rm -f /etc/init.d/columnstore >/dev/null 2>&1 diff --git a/oam/install_scripts/remote_command.sh b/oam/install_scripts/remote_command.sh index 9d0f7c2ed..75095632e 100755 --- a/oam/install_scripts/remote_command.sh +++ b/oam/install_scripts/remote_command.sh @@ -10,7 +10,7 @@ # Argument 5 - Remote user name (optional) # Argument 6 - Force a tty to be allocated (optional) set stty_init {cols 512 -opost}; -set timeout 10 +set timeout 30 set SERVER [lindex $argv 0] set PASSWORD [lindex $argv 1] set COMMAND [lindex $argv 2] diff --git a/oamapps/mcsadmin/mcsadmin.cpp b/oamapps/mcsadmin/mcsadmin.cpp index 7d8a6c153..f7a6e2a38 100644 --- a/oamapps/mcsadmin/mcsadmin.cpp +++ b/oamapps/mcsadmin/mcsadmin.cpp @@ -1878,7 +1878,7 @@ int processCommand(string* arguments) } string DataRedundancyConfig; - string DataRedundancyCopies; + int DataRedundancyCopies; string DataRedundancyStorageType; try { oam.getSystemConfig("DataRedundancyConfig", DataRedundancyConfig); @@ -3565,7 +3565,7 @@ int processCommand(string* arguments) } string DataRedundancyConfig; - string DataRedundancyCopies; + int DataRedundancyCopies; string DataRedundancyStorageType; try { oam.getSystemConfig("DataRedundancyConfig", DataRedundancyConfig); @@ -5552,7 +5552,7 @@ int processCommand(string* arguments) } } string command = startup::StartUp::installDir() + "/bin/remote_command.sh " + (*hostConfigIter).IPAddr + " " + password + " 'mkdir -p " + startup::StartUp::installDir() + "/gluster/brick" + oam.itoa(brickID) + "'"; - int status = system(command.c_str()); + system(command.c_str()); brickID++; } } @@ -5853,7 +5853,7 @@ int processCommand(string* arguments) } } - if ( DataRedundancyConfig == "y" && devicenetworklist.size() != DataRedundancyCopies) { + if ( DataRedundancyConfig == "y" && devicenetworklist.size() != (size_t)DataRedundancyCopies) { cout << endl << "**** removeModule Failed : Data Redundancy requires you to remove modules in groups equal to number of copies" << endl; quit = true; } @@ -6828,7 +6828,6 @@ int processCommand(string* arguments) { string DataRedundancyConfig = "n"; - int DataRedundancyCopies; try { oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); } diff --git a/oamapps/postConfigure/installer.cpp b/oamapps/postConfigure/installer.cpp index bc91217f0..e85fb0989 100644 --- a/oamapps/postConfigure/installer.cpp +++ b/oamapps/postConfigure/installer.cpp @@ -250,7 +250,7 @@ int main(int argc, char *argv[]) catch (...) {} //get memory stats - long long total = myinfo.totalram / 1024 / 1000; +// long long total = myinfo.totalram / 1024 / 1000; // adjust max memory, 25% of total memory string percent = "25%"; @@ -812,7 +812,7 @@ int main(int argc, char *argv[]) cout << "Enter 'mcsmysql' to access the MariaDB ColumnStore SQL console" << endl; cout << "Enter 'mcsadmin' to access the MariaDB ColumnStore Admin console" << endl << endl; - cout << "NOTE: The MariaDB ColumnStore Alias Commands are in /etc/profile.d/columnstoreAlias" << endl << endl; + cout << "NOTE: The MariaDB ColumnStore Alias Commands are in /etc/profile.d/columnstoreAlias.sh" << endl << endl; } else { diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index 9f7505f6b..a12784039 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -166,7 +166,7 @@ bool thread_remote_installer = true; string singleServerInstall = "1"; string reuseConfig ="n"; -string oldFileName; +string oldFileName = oam::UnassignedName; string glusterCopies; string glusterInstalled = "n"; string hadoopInstalled = "n"; @@ -370,7 +370,8 @@ int main(int argc, char *argv[]) exit(1); } - oldFileName = installDir + "/etc/Columnstore.xml.rpmsave"; + if ( oldFileName == oam::UnassignedName ) + oldFileName = installDir + "/etc/Columnstore.xml.rpmsave"; cout << endl; cout << "This is the MariaDB ColumnStore System Configuration and Installation tool." << endl; @@ -3336,7 +3337,7 @@ int main(int argc, char *argv[]) cout << "Enter 'mcsmysql' to access the MariaDB ColumnStore SQL console" << endl; cout << "Enter 'mcsadmin' to access the MariaDB ColumnStore Admin console" << endl << endl; - cout << "NOTE: The MariaDB ColumnStore Alias Commands are in /etc/profile.d/columnstoreAlias" << endl << endl; + cout << "NOTE: The MariaDB ColumnStore Alias Commands are in /etc/profile.d/columnstoreAlias.sh" << endl << endl; } else { diff --git a/oamapps/serverMonitor/CMakeLists.txt b/oamapps/serverMonitor/CMakeLists.txt index d318fd78d..91d13bbd0 100644 --- a/oamapps/serverMonitor/CMakeLists.txt +++ b/oamapps/serverMonitor/CMakeLists.txt @@ -13,7 +13,8 @@ set(ServerMonitor_SRCS procmonMonitor.cpp msgProcessor.cpp dbhealthMonitor.cpp - UMAutoSync.cpp) + UMAutoSync.cpp + ../../utils/common/crashtrace.cpp) add_executable(ServerMonitor ${ServerMonitor_SRCS}) diff --git a/oamapps/serverMonitor/diskMonitor.cpp b/oamapps/serverMonitor/diskMonitor.cpp index 2edcaa2f6..bef1df560 100644 --- a/oamapps/serverMonitor/diskMonitor.cpp +++ b/oamapps/serverMonitor/diskMonitor.cpp @@ -23,6 +23,7 @@ ***************************************************************************/ #include "serverMonitor.h" +#include "installdir.h" using namespace std; using namespace oam; @@ -223,7 +224,7 @@ void diskMonitor() string fileName; // check local if ( deviceName == "/") { - fileName = deviceName + "usr/local/mariadb/columnstore/"; + fileName = deviceName + startup::StartUp::installDir(); } else { diff --git a/oamapps/serverMonitor/main.cpp b/oamapps/serverMonitor/main.cpp index 10903d6a4..56b8e3825 100644 --- a/oamapps/serverMonitor/main.cpp +++ b/oamapps/serverMonitor/main.cpp @@ -18,6 +18,8 @@ #include "IDBPolicy.h" #include "serverMonitor.h" +#include "crashtrace.h" + using namespace std; using namespace servermonitor; using namespace oam; @@ -38,6 +40,14 @@ int main (int argc, char** argv) ServerMonitor serverMonitor; Oam oam; + struct sigaction ign; + + memset(&ign, 0, sizeof(ign)); + ign.sa_handler = fatalHandler; + sigaction(SIGSEGV, &ign, 0); + sigaction(SIGABRT, &ign, 0); + sigaction(SIGFPE, &ign, 0); + //Launch Memory Monitor Thread and check if swap is in critical condition pthread_t memoryMonitorThread; pthread_create (&memoryMonitorThread, NULL, (void*(*)(void*)) &memoryMonitor, NULL); diff --git a/primitives/primproc/CMakeLists.txt b/primitives/primproc/CMakeLists.txt index e13071f12..477792b0d 100644 --- a/primitives/primproc/CMakeLists.txt +++ b/primitives/primproc/CMakeLists.txt @@ -18,7 +18,8 @@ set(PrimProc_SRCS primitiveserver.cpp pseudocc.cpp rtscommand.cpp - umsocketselector.cpp) + umsocketselector.cpp + ../../utils/common/crashtrace.cpp) #PrimProc_CXXFLAGS = $(march_flags) $(AM_CXXFLAGS) diff --git a/primitives/primproc/primproc.cpp b/primitives/primproc/primproc.cpp index 33a987a0c..b86e2f74d 100644 --- a/primitives/primproc/primproc.cpp +++ b/primitives/primproc/primproc.cpp @@ -71,6 +71,8 @@ using namespace idbdatafile; #include "cgroupconfigurator.h" +#include "crashtrace.h" + namespace primitiveprocessor { @@ -126,6 +128,12 @@ void setupSignalHandlers() ign.sa_handler = SIG_IGN; sigaction(SIGUSR2, &ign, 0); + memset(&ign, 0, sizeof(ign)); + ign.sa_handler = fatalHandler; + sigaction(SIGSEGV, &ign, 0); + sigaction(SIGABRT, &ign, 0); + sigaction(SIGFPE, &ign, 0); + sigset_t sigset; sigemptyset(&sigset); sigaddset(&sigset, SIGPIPE); @@ -288,6 +296,9 @@ int main(int argc, char* argv[]) systemLang.find("UTF") != string::npos ) utf8 = true; + // This is unset due to the way we start it + program_invocation_short_name = const_cast("PrimProc"); + Config* cf = Config::makeConfig(); setupSignalHandlers(); diff --git a/procmgr/CMakeLists.txt b/procmgr/CMakeLists.txt index 1e7d1f33e..642890e13 100644 --- a/procmgr/CMakeLists.txt +++ b/procmgr/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ) ########### next target ############### -set(ProcMgr_SRCS main.cpp processmanager.cpp) +set(ProcMgr_SRCS main.cpp processmanager.cpp ../utils/common/crashtrace.cpp) add_executable(ProcMgr ${ProcMgr_SRCS}) diff --git a/procmgr/main.cpp b/procmgr/main.cpp index 860775780..81e1593af 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -33,6 +33,8 @@ #include "utils_utf8.h" +#include "crashtrace.h" + using namespace std; using namespace logging; using namespace messageqcpp; @@ -99,6 +101,16 @@ int main(int argc, char **argv) setlocale(LC_ALL, systemLang.c_str()); + // This is unset due to the way we start it + program_invocation_short_name = const_cast("ProcMgr"); + + struct sigaction ign; + memset(&ign, 0, sizeof(ign)); + ign.sa_handler = fatalHandler; + sigaction(SIGSEGV, &ign, 0); + sigaction(SIGABRT, &ign, 0); + sigaction(SIGFPE, &ign, 0); + Oam oam; //check if root-user @@ -1645,7 +1657,7 @@ void pingDeviceThread() DeviceNetworkConfig devicenetworkconfig; devicenetworkconfig.DeviceName = moduleName; devicenetworklist.push_back(devicenetworkconfig); - processManager.setMySQLReplication(devicenetworklist); + processManager.setMySQLReplication(devicenetworklist, oam::UnassignedName, false, true); } } else @@ -1816,9 +1828,18 @@ void pingDeviceThread() break; // if disabled and not amazon, skip - if (opState == oam::AUTO_DISABLED && !amazon) + if ( (opState == oam::AUTO_DISABLED) && !amazon) break; + // if disabled, amazon,and NOT terminated skip + if ( (opState == oam::AUTO_DISABLED) && amazon) + { + // return values = 'ip address' for running or rebooting, stopped or terminated + string currentIPAddr = oam.getEC2InstanceIpAddress(hostName); + if ( currentIPAddr != "terminated") + break; + } + log.writeLog(__LINE__, "module failed to respond to pings: " + moduleName, LOG_TYPE_WARNING); //bump module ping failure counter @@ -1833,71 +1854,73 @@ void pingDeviceThread() if (LANOUTAGEACTIVE) break; - //Log failure, issue alarm, set moduleOpState - Configuration config; - log.writeLog(__LINE__, "module is down: " + moduleName, LOG_TYPE_CRITICAL); + // if not disabled and amazon, skip + if (opState != oam::AUTO_DISABLED ) + { + //Log failure, issue alarm, set moduleOpState + Configuration config; + log.writeLog(__LINE__, "module is down: " + moduleName, LOG_TYPE_CRITICAL); + + //set query system state not ready + BRM::DBRM dbrm; + dbrm.setSystemQueryReady(false); + + processManager.setQuerySystemState(false); + + processManager.setSystemState(oam::BUSY_INIT); + + processManager.reinitProcessType("cpimport"); + + // halt the dbrm + oam.dbrmctl("halt"); + log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG); + + processManager.setSystemState(oam::BUSY_INIT); + + //string cmd = "/etc/init.d/glusterd restart > /dev/null 2>&1"; + //system(cmd.c_str()); + + //send notification + oam.sendDeviceNotification(moduleName, MODULE_DOWN); + + //Issue an alarm + aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET); - //set query system state not ready - BRM::DBRM dbrm; - dbrm.setSystemQueryReady(false); + //mark all processes running on module auto-offline + processManager.setProcessStates(moduleName, oam::AUTO_OFFLINE); + + //set module to disable state + processManager.disableModule(moduleName, false); - processManager.setQuerySystemState(false); + //call dbrm control + oam.dbrmctl("reload"); + log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - processManager.setSystemState(oam::BUSY_INIT); - - processManager.reinitProcessType("cpimport"); - - // halt the dbrm - oam.dbrmctl("halt"); - log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG); - - processManager.setSystemState(oam::BUSY_INIT); - - //string cmd = "/etc/init.d/glusterd restart > /dev/null 2>&1"; - //system(cmd.c_str()); - - //send notification - oam.sendDeviceNotification(moduleName, MODULE_DOWN); - - //Issue an alarm - aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET); - - //mark all processes running on module auto-offline - processManager.setProcessStates(moduleName, oam::AUTO_OFFLINE); - - //set module to disable state - processManager.disableModule(moduleName, false); - - //call dbrm control - oam.dbrmctl("reload"); - log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - - // if pm, move dbroots to other pms - if ( ( moduleName.find("pm") == 0 && !amazon && ( DBRootStorageType != "internal") ) || - ( moduleName.find("pm") == 0 && amazon && downActiveOAMModule ) || - ( moduleName.find("pm") == 0 && amazon && AmazonPMFailover == "y") ) { - try { - log.writeLog(__LINE__, "Call autoMovePmDbroot", LOG_TYPE_DEBUG); - oam.autoMovePmDbroot(moduleName); - log.writeLog(__LINE__, "autoMovePmDbroot success", LOG_TYPE_DEBUG); - //distribute config file - processManager.distributeConfigFile("system"); - } - catch (exception& ex) - { - string error = ex.what(); - log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: " + error, LOG_TYPE_DEBUG); - } - catch(...) - { - log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: Caught unknown exception!", LOG_TYPE_ERROR); + // if pm, move dbroots to other pms + if ( ( moduleName.find("pm") == 0 && !amazon && ( DBRootStorageType != "internal") ) || + ( moduleName.find("pm") == 0 && amazon && downActiveOAMModule ) || + ( moduleName.find("pm") == 0 && amazon && AmazonPMFailover == "y") ) { + try { + log.writeLog(__LINE__, "Call autoMovePmDbroot", LOG_TYPE_DEBUG); + oam.autoMovePmDbroot(moduleName); + log.writeLog(__LINE__, "autoMovePmDbroot success", LOG_TYPE_DEBUG); + //distribute config file + processManager.distributeConfigFile("system"); + } + catch (exception& ex) + { + string error = ex.what(); + log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: " + error, LOG_TYPE_DEBUG); + } + catch(...) + { + log.writeLog(__LINE__, "EXCEPTION ERROR on autoMovePmDbroot: Caught unknown exception!", LOG_TYPE_ERROR); + } } } - + // if Cloud Instance - // state = running, then instance is rebooting, monitor for recovery - // state = stopped, then try starting, if fail, remove/addmodule to launch new instance - // state = terminate or nothing, remove/addmodule to launch new instance + // state = terminate, remove/addmodule to launch new instance if ( amazon ) { if ( moduleName.find("um") == 0 ) { @@ -2104,7 +2127,9 @@ void pingDeviceThread() } } - if ( moduleName.find("pm") == 0 ) + if ( ( moduleName.find("pm") == 0 ) && + ( opState != oam::AUTO_DISABLED ) ) + { // resume the dbrm oam.dbrmctl("resume"); @@ -2165,6 +2190,10 @@ void pingDeviceThread() } } } + + // if disabled and amazon, break out + if ( (opState == oam::AUTO_DISABLED ) && amazon ) + break; //start SIMPLEX runtype processes on a SIMPLEX runtype module string moduletype = moduleName.substr(0,MAX_MODULE_TYPE_SIZE); diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 118591c09..5a2739358 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -9455,7 +9455,7 @@ int ProcessManager::OAMParentModuleChange() //restart/reinit processes to force their release of the controller node port if ( ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM) && - ( moduleNameList.size() <= 1 && config.moduleType() == "pm") ) + ( moduleNameList.size() <= 0 && config.moduleType() == "pm") ) { status = 0; } @@ -9710,13 +9710,17 @@ std::string ProcessManager::getStandbyModule() //already have a hot-standby return ""; - if ( backupStandbyModule != "NONE" ) + if ( ( backupStandbyModule != "NONE" ) || + ( newStandbyModule != "NONE" ) ) continue; if ( systemprocessstatus.processstatus[i].ProcessName == "ProcessManager" && systemprocessstatus.processstatus[i].ProcessOpState == oam::COLD_STANDBY ) + { // Found a ProcessManager in a COLD_STANDBY state newStandbyModule = systemprocessstatus.processstatus[i].Module; + continue; + } if ( systemprocessstatus.processstatus[i].ProcessName == "ProcessManager" && systemprocessstatus.processstatus[i].ProcessOpState == oam::MAN_OFFLINE && @@ -10346,6 +10350,18 @@ int ProcessManager::setMySQLReplication(oam::DeviceNetworkList devicenetworklist if ( remoteModuleName == masterModule ) continue; + // skip disabled modules + int opState = oam::ACTIVE; + bool degraded; + try { + oam.getModuleStatus(remoteModuleName, opState, degraded); + } + catch(...) + {} + + if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) + continue; + // don't do PMs unless PMwithUM flag is set if ( config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM ) { string moduleType = remoteModuleName.substr(0,MAX_MODULE_TYPE_SIZE); @@ -10423,6 +10439,18 @@ int ProcessManager::setMySQLReplication(oam::DeviceNetworkList devicenetworklist if ( remoteModuleName == masterModule ) continue; + // skip disabled modules + int opState = oam::ACTIVE; + bool degraded; + try { + oam.getModuleStatus(remoteModuleName, opState, degraded); + } + catch(...) + {} + + if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) + continue; + // don't do PMs unless PMwithUM flag is set if ( config.ServerInstallType() != oam::INSTALL_COMBINE_DM_UM_PM ) { string moduleType = remoteModuleName.substr(0,MAX_MODULE_TYPE_SIZE); @@ -10471,7 +10499,19 @@ int ProcessManager::setMySQLReplication(oam::DeviceNetworkList devicenetworklist if ( remoteModuleName == masterModule ) continue; - ByteStream msg1; + // skip disabled modules + int opState = oam::ACTIVE; + bool degraded; + try { + oam.getModuleStatus(remoteModuleName, opState, degraded); + } + catch(...) + {} + + if (opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED) + continue; + + ByteStream msg1; ByteStream::byte requestID = oam::SLAVEREP; if ( !enable ) { requestID = oam::DISABLEREP; diff --git a/procmon/CMakeLists.txt b/procmon/CMakeLists.txt index 151b9511d..8bfff3c5a 100644 --- a/procmon/CMakeLists.txt +++ b/procmon/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ) ########### next target ############### -set(ProcMon_SRCS main.cpp processmonitor.cpp) +set(ProcMon_SRCS main.cpp processmonitor.cpp ../utils/common/crashtrace.cpp) add_executable(ProcMon ${ProcMon_SRCS}) diff --git a/procmon/main.cpp b/procmon/main.cpp index cc6835f3a..2f9cbce18 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -26,6 +26,8 @@ namespace bi=boost::interprocess; #include "IDBPolicy.h" +#include "crashtrace.h" + using namespace std; using namespace messageqcpp; using namespace processmonitor; @@ -55,10 +57,12 @@ void updateShareMemory(processStatusList* aPtr); bool runStandby = false; bool processInitComplete = false; bool rootUser = true; +bool mainResumeFlag; string USER = "root"; string PMwithUM = "n"; bool startProcMon = false; + //extern std::string gOAMParentModuleName; extern bool gOAMParentModuleFlag; @@ -76,6 +80,14 @@ int main(int argc, char **argv) setuid(0); // set effective ID to root; ignore return status #endif + struct sigaction ign; + + memset(&ign, 0, sizeof(ign)); + ign.sa_handler = fatalHandler; + sigaction(SIGSEGV, &ign, 0); + sigaction(SIGABRT, &ign, 0); + sigaction(SIGFPE, &ign, 0); + if (argc > 1 && string(argv[1]) == "--daemon") { if (fork() != 0) return 0; @@ -165,7 +177,7 @@ int main(int argc, char **argv) //re-read local system info with updated Columnstore.xml sleep(1); - Config* sysConfig = Config::makeConfig(); +// Config* sysConfig = Config::makeConfig(); MonitorConfig config; //PMwithUM config @@ -499,13 +511,24 @@ int main(int argc, char **argv) unlink ("/var/log/mariadb/columnstore/activeAlarms"); } + //Clear mainResumeFlag + + mainResumeFlag = false; + //launch Status table control thread on 'pm' modules pthread_t statusThread; int ret = pthread_create (&statusThread, NULL, (void*(*)(void*)) &statusControlThread, NULL); if ( ret != 0 ) log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR); - sleep(6); // give the Status thread time to fully initialize + //wait for flag to be set + + while(!mainResumeFlag) + { + log.writeLog(__LINE__, "WATING FOR mainResumeFlag to be set", LOG_TYPE_DEBUG); + + sleep(1); + } } SystemStatus systemstatus; @@ -786,6 +809,8 @@ int main(int argc, char **argv) } } + log.writeLog(__LINE__, "SYSTEM STATUS = " + oam.itoa(systemstatus.SystemOpState), LOG_TYPE_DEBUG); + if ( systemstatus.SystemOpState != MAN_OFFLINE && !DISABLED) { // Loop through the process list to check the process current state @@ -2099,6 +2124,10 @@ static void statusControlThread() log.writeLog(__LINE__, "Dbroot Status shared Memory allociated and Initialized", LOG_TYPE_DEBUG); } + //Set mainResumeFlag, to start up main thread + + mainResumeFlag = true; + string portName = "ProcStatusControl"; if (runStandby) { portName = "ProcStatusControlStandby"; diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index 148546a72..8f51cccf1 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -4950,6 +4950,14 @@ int ProcessMonitor::runMasterRep(std::string& masterLogFile, std::string& master { string moduleName = (*pt).DeviceName; + //skip if module is not ACTIVE + + int opState = oam::ACTIVE; + bool degraded; + oam.getModuleStatus(moduleName, opState, degraded); + if (opState != oam::ACTIVE) + continue; + bool passwordError = false; string moduleType = systemModuleTypeConfig.moduletypeconfig[i].ModuleType; diff --git a/utils/clusterTester/columnstoreClusterTester.sh b/utils/clusterTester/columnstoreClusterTester.sh index 78fa3f889..b512eeb20 100755 --- a/utils/clusterTester/columnstoreClusterTester.sh +++ b/utils/clusterTester/columnstoreClusterTester.sh @@ -43,7 +43,7 @@ checkContinue() { helpPrint () { ################################################################################ echo "" - echo "This is the MariaDB ColumnStore Cluster System Test tool." + echo "This is the MariaDB ColumnStore Cluster System Test Tool." echo "" echo "It will run a set of test to validate the setup of the MariaDB Columnstore system." echo "This can be run prior to the install of MariaDB ColumnStore to make sure the" @@ -711,6 +711,49 @@ checkTime() fi } +checkMysqlPassword() +{ + # Locale check + # + echo "" + echo "** Run MariaDB Console Password check" + echo "" + + #get MariaDB password + pass=true + `$COLUMNSTORE_INSTALL_DIR/mysql/mysql-Columnstore start > /dev/null 2>&1` + `$COLUMNSTORE_INSTALL_DIR/bin/mariadb-command-line.sh > /dev/null 2>&1` + if [ "$?" -eq 2 ]; then + echo "${bold}Failed${normal}, Local Node MariaDB login failed with missing password file, /root/.my.cnf" + fi + + if [ "$IPADDRESSES" != "" ]; then + `/bin/cp -f $COLUMNSTORE_INSTALL_DIR/bin/mariadb-command-line.sh /tmp/.` + + for ipadd in "${NODE_IPADDRESS[@]}"; do + `$COLUMNSTORE_INSTALL_DIR/bin/remote_command.sh $ipadd $PASSWORD $COLUMNSTORE_INSTALL_DIR/mysql/mysql-Columnstore start > /dev/null 2>&1` + `$COLUMNSTORE_INSTALL_DIR/bin/remote_scp_put.sh $ipadd $PASSWORD /tmp/mariadb-command-line.sh 1 > /tmp/remote_scp_put_check 2>&1` + if [ "$?" -ne 0 ]; then + echo "Error running remote_scp_put.sh to $ipadd Node, check /tmp/remote_scp_put_check" + exit 1 + else + `$COLUMNSTORE_INSTALL_DIR/bin/remote_command.sh $ipadd $PASSWORD /tmp/mariadb-command-line.sh 1 > /tmp/remote_command_check` + `cat /tmp/remote_command_check | grep "ERROR - PASSWORD" > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, $ipadd Node MariaDB login failed with missing password file, /root/.my.cnf" + pass=false + fi + fi + done + fi + + if ! $pass; then + checkContinue + else + echo "Passed, no problems detected with a MariaDB password being set without an associated /root/.my.cnf" + fi +} + checkPackages() { # @@ -722,6 +765,7 @@ checkPackages() echo "" declare -a CENTOS_PKG=("expect" "perl" "perl-DBI" "openssl" "zlib" "file" "sudo" "libaio" "rsync" "snappy" "net-tools" "perl-DBD-MySQL") + declare -a CENTOS_PKG_NOT=("mariadb-libs") if [ "$OS" == "centos6" ] || [ "$OS" == "centos7" ]; then if [ ! `which yum 2>/dev/null` ] ; then @@ -757,6 +801,24 @@ checkPackages() checkContinue fi + #check for package that shouldnt be installed + pass=true + for PKG in "${CENTOS_PKG_NOT[@]}"; do + `yum list installed "$PKG" > /tmp/pkg_check 2>&1` + `cat /tmp/pkg_check | grep Installed > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, Local Node package ${bold}${PKG}${normal} is installed, please un-install" + pass=false + REPORTPASS=false + fi + done + + if [ $pass == true ] ; then + echo "Local Node - Passed, all packages that should not be installed aren't installed" + else + checkContinue + fi + echo "" pass=true if [ "$IPADDRESSES" != "" ]; then @@ -792,11 +854,37 @@ checkPackages() pass=true fi echo "" + + #check for package that shouldnt be installed + for PKG in "${CENTOS_PKG_NOT[@]}"; do + `$COLUMNSTORE_INSTALL_DIR/bin/remote_command.sh $ipadd $PASSWORD "yum list installed '$PKG' > /tmp/pkg_check 2>&1" 1 > /tmp/remote_command_check 2>&1` + rc="$?" + if [ $rc -eq 2 ] ; then + echo "${bold}Failed${normal}, $ipadd Node, 'yum' not installed" + pass=false + REPORTPASS=false + break + elif [ $rc -ne 1 ] ; then + echo "${bold}Failed${normal}, $ipadd Node package ${bold}${PKG}${normal} is installed, please un-install" + pass=false + REPORTPASS=false + fi + done + + if $pass; then + echo "$ipadd Node - Passed, all packages that should not be installed aren't installed" + else + checkContinue + pass=true + fi + echo "" + done fi fi declare -a SUSE_PKG=("boost-devel" "expect" "perl" "perl-DBI" "openssl" "file" "sudo" "libaio1" "rsync" "libsnappy1" "net-tools" "perl-DBD-mysql") + declare -a SUSE_PKG_NOT=("mariadb" , "libmariadb18") if [ "$OS" == "suse12" ]; then if [ ! `which rpm 2>/dev/null` ] ; then @@ -821,6 +909,24 @@ checkPackages() else checkContinue fi + + #check for package that shouldnt be installed + pass=true + for PKG in "${SUSE_PKG_NOT[@]}"; do + `rpm -qi "$PKG" > /tmp/pkg_check 2>&1` + `cat /tmp/pkg_check | grep "not installed" > /dev/null 2>&1` + if [ "$?" -ne 0 ]; then + echo "${bold}Failed${normal}, Local Node package ${bold}${PKG}${normal} is installed, please un-install" + pass=false + REPORTPASS=false + fi + done + + if $pass; then + echo "Local Node - Passed, all packages that should not be installed aren't installed" + else + checkContinue + fi fi echo "" @@ -844,11 +950,32 @@ checkPackages() pass=true fi echo "" + + #check for package that shouldnt be installed + for PKG in "${SUSE_PKG_NOT[@]}"; do + `$COLUMNSTORE_INSTALL_DIR/bin/remote_command.sh $ipadd $PASSWORD "rpm -qi '$PKG' > /tmp/pkg_check 2>&1" 1 > /tmp/remote_command_check 2>&1` + rc="$?" + if [ $rc -eq 0 ] ; then + echo "${bold}Failed${normal}, $ipadd Node package ${bold}${PKG}${normal} is installed, please un-install" + pass=false + REPORTPASS=false + fi + done + + if $pass; then + echo "$ipadd Node - Passed, all packages that should not be installed aren't installed" + else + checkContinue + pass=true + fi + echo "" + done fi fi declare -a UBUNTU_PKG=("libboost-all-dev" "expect" "libdbi-perl" "perl" "openssl" "file" "sudo" "libreadline-dev" "rsync" "libsnappy1V5" "net-tools" "libdbd-mysql-perl") + declare -a UBUNTU_PKG_NOT=("mariadb-server" "libmariadb18") if [ "$OS" == "ubuntu16" ] ; then if [ ! `which dpkg 2>/dev/null` ] ; then @@ -873,6 +1000,24 @@ checkPackages() else checkContinue fi + + #check for package that shouldnt be installed + pass=true + for PKG in "${UBUNTU_PKG_NOT[@]}"; do + `dpkg -s "$PKG" > /tmp/pkg_check 2>&1` + `cat /tmp/pkg_check | grep 'install ok installed' > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, Local Node package ${bold}${PKG}${normal} is installed, please un-install" + pass=false + REPORTPASS=false + fi + done + + if $pass; then + echo "Local Node - Passed, all packages that should not be installed aren't installed" + else + checkContinue + fi fi echo "" @@ -909,11 +1054,45 @@ checkPackages() pass=true fi echo "" + + #check for package that shouldnt be installed + for PKG in "${UBUNTU_PKG_NOT[@]}"; do + `$COLUMNSTORE_INSTALL_DIR/bin/remote_command.sh $ipadd $PASSWORD "dpkg -s '$PKG' > /tmp/pkg_check 2>&1" 1 > /tmp/remote_command_check 2>&1` + `$COLUMNSTORE_INSTALL_DIR/bin/remote_scp_get.sh $ipadd $PASSWORD /tmp/pkg_check > /tmp/remote_scp_get_check 2>&1` + if [ "$?" -ne 0 ]; then + echo "Error running remote_scp_get.sh to $ipadd Node, check /tmp/remote_scp_get_check" + else + `cat /tmp/remote_command_check | grep 'command not found' > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, $ipadd Node ${bold}dpkg${normal} package not installed" + pass=false + break + else + `cat pkg_check | grep 'install ok installed' > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, $ipadd Node package ${bold}${PKG}${normal} is installed, please un-install" + pass=false + fi + + `rm -f pkg_check` + fi + fi + done + + if $pass; then + echo "$ipadd Node - Passed, all packages that should not be installed aren't installed" + else + checkContinue + pass=true + fi + echo "" + done fi fi declare -a DEBIAN_PKG=("libboost-all-dev" "expect" "libdbi-perl" "perl" "openssl" "file" "sudo" "libreadline-dev" "rsync" "libsnappy1" "net-tools" "libdbd-mysql-perl") + declare -a DEBIAN_PKG_NOT=("libmariadb18" "mariadb-server") if [ "$OS" == "debian8" ]; then if [ ! `which dpkg 2>/dev/null` ] ; then @@ -938,6 +1117,24 @@ checkPackages() else checkContinue fi + + #check for package that shouldnt be installed + pass=true + for PKG in "${DEBIAN_PKG_NOT[@]}"; do + `dpkg -s "$PKG" > /tmp/pkg_check 2>&1` + `cat /tmp/pkg_check | grep 'install ok installed' > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, Local Node package ${bold}${PKG}${normal} is installed, please un-install" + pass=false + REPORTPASS=false + fi + done + + if $pass; then + echo "Local Node - Passed, all packages that should not be installed aren't installed" + else + checkContinue + fi fi echo "" @@ -974,11 +1171,45 @@ checkPackages() pass=true fi echo "" + + #check for package that shouldnt be installed + for PKG in "${DEBIAN_PKG_NOT[@]}"; do + `$COLUMNSTORE_INSTALL_DIR/bin/remote_command.sh $ipadd $PASSWORD "dpkg -s '$PKG' > /tmp/pkg_check 2>&1" 1 > /tmp/remote_command_check 2>&1` + `$COLUMNSTORE_INSTALL_DIR/bin/remote_scp_get.sh $ipadd $PASSWORD /tmp/pkg_check > /tmp/remote_scp_get_check 2>&1` + if [ "$?" -ne 0 ]; then + echo "Error running remote_scp_get.sh to $ipadd Node, check /tmp/remote_scp_get_check" + else + `cat /tmp/remote_command_check | grep 'command not found' > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, $ipadd Node ${bold}dpkg${normal} package not installed" + pass=false + break + else + `cat pkg_check | grep 'install ok installed' > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, $ipadd Node package ${bold}${PKG}${normal} is installed, please un-install" + pass=false + fi + + `rm -f pkg_check` + fi + fi + done + + if $pass; then + echo "$ipadd Node - Passed, all packages that should not be installed aren't installed" + else + checkContinue + pass=true + fi + echo "" + done fi fi declare -a DEBIAN9_PKG=("libboost-all-dev" "expect" "libdbi-perl" "perl" "openssl" "file" "sudo" "libreadline5" "rsync" "libsnappy1V5" "net-tools" "libaio1") + declare -a DEBIAN9_PKG_NOT=("libmariadb18" "mariadb-server") if [ "$OS" == "debian9" ]; then if [ ! `which dpkg 2>/dev/null` ] ; then @@ -1003,6 +1234,25 @@ checkPackages() else checkContinue fi + + #check for package that shouldnt be installed + pass=true + for PKG in "${DEBIAN9_PKG_NOT[@]}"; do + `dpkg -s "$PKG" > /tmp/pkg_check 2>&1` + `cat /tmp/pkg_check | grep 'install ok installed' > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, Local Node package ${bold}${PKG}${normal} is installed, please un-install" + pass=false + REPORTPASS=false + fi + done + + if $pass; then + echo "Local Node - Passed, all packages that should not be installed aren't installed" + else + checkContinue + fi + fi echo "" @@ -1039,6 +1289,39 @@ checkPackages() pass=true fi echo "" + + #check for package that shouldnt be installed + for PKG in "${DEBIAN9_PKG_NOT[@]}"; do + `$COLUMNSTORE_INSTALL_DIR/bin/remote_command.sh $ipadd $PASSWORD "dpkg -s '$PKG' > /tmp/pkg_check 2>&1" 1 > /tmp/remote_command_check 2>&1` + `$COLUMNSTORE_INSTALL_DIR/bin/remote_scp_get.sh $ipadd $PASSWORD /tmp/pkg_check > /tmp/remote_scp_get_check 2>&1` + if [ "$?" -ne 0 ]; then + echo "Error running remote_scp_get.sh to $ipadd Node, check /tmp/remote_scp_get_check" + else + `cat /tmp/remote_command_check | grep 'command not found' > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, $ipadd Node ${bold}dpkg${normal} package not installed" + pass=false + break + else + `cat pkg_check | grep 'install ok installed' > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, $ipadd Node package ${bold}${PKG}${normal} is installed, please un-install" + pass=false + fi + + `rm -f pkg_check` + fi + fi + done + + if $pass; then + echo "$ipadd Node - Passed, all packages that should not be installed aren't installed" + else + checkContinue + pass=true + fi + echo "" + done fi fi @@ -1047,7 +1330,7 @@ checkPackages() } echo "" -echo "*** This is the MariaDB Columnstore Cluster System test tool ***" +echo "*** This is the MariaDB Columnstore Cluster System Test Tool ***" echo "" checkLocalOS @@ -1063,11 +1346,13 @@ if [ "$IPADDRESSES" != "" ]; then checkPorts checkTime fi + +checkMysqlPassword checkPackages if [ $REPORTPASS == true ] ; then echo "" - echo "*** Finished Validation of the Cluster, all Test Passed ***" + echo "*** Finished Validation of the Cluster, all Tests Passed ***" echo "" exit 0 else diff --git a/utils/common/crashtrace.cpp b/utils/common/crashtrace.cpp new file mode 100644 index 000000000..41626361e --- /dev/null +++ b/utils/common/crashtrace.cpp @@ -0,0 +1,51 @@ +/* Copyright (C) 2018 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include +#include +#include +#include +#include + +void fatalHandler(int sig) +{ + char filename[128]; + void* addrs[128]; + snprintf(filename, 128, "/var/log/mariadb/columnstore/trace/%s.%d.log", program_invocation_short_name, getpid()); + FILE* logfile = fopen(filename, "w"); + char s[30]; + struct tm tim; + time_t now; + now = time(NULL); + tim = *(localtime(&now)); + strftime(s,30,"%F %T",&tim); + fprintf(logfile, "Date/time: %s\n", s); + fprintf(logfile, "Signal: %d\n\n", sig); + fflush(logfile); + int fd = fileno(logfile); + int count = backtrace(addrs, sizeof(addrs) / sizeof(addrs[0])); + backtrace_symbols_fd(addrs, count, fd); + fclose(logfile); + struct sigaction sigact; + memset(&sigact, 0, sizeof(sigact)); + sigact.sa_handler = SIG_DFL; + sigaction(sig, &sigact, NULL); + raise(sig); +} diff --git a/utils/common/crashtrace.h b/utils/common/crashtrace.h new file mode 100644 index 000000000..3b9cb4036 --- /dev/null +++ b/utils/common/crashtrace.h @@ -0,0 +1,18 @@ +/* Copyright (C) 2018 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +void fatalHandler(int sig); diff --git a/utils/threadpool/threadpool.cpp b/utils/threadpool/threadpool.cpp index 8b0c0a848..197bdedd9 100644 --- a/utils/threadpool/threadpool.cpp +++ b/utils/threadpool/threadpool.cpp @@ -470,7 +470,7 @@ void ThreadPoolMonitor::operator()() << setw(4) << tv.tv_usec/100 << " Name " << fPool->fName << " Active " << fPool->waitingFunctorsSize - << " Most " << fPool->fThreadCount + << " ThdCnt " << fPool->fThreadCount << " Max " << fPool->fMaxThreads << " Q " << fPool->fQueueSize << endl; diff --git a/utils/winport/win_setup_mysql_part2.sql b/utils/winport/win_setup_mysql_part2.sql index 894e1583d..0a2a972fb 100644 --- a/utils/winport/win_setup_mysql_part2.sql +++ b/utils/winport/win_setup_mysql_part2.sql @@ -9,6 +9,9 @@ CREATE FUNCTION calonlinealter RETURNS INTEGER SONAME 'libcalmysql.dll'; CREATE FUNCTION calviewtablelock RETURNS STRING SONAME 'libcalmysql.dll'; CREATE FUNCTION calcleartablelock RETURNS STRING SONAME 'libcalmysql.dll'; CREATE FUNCTION calgetsqlcount RETURNS STRING SONAME 'libcalmysql.dll'; +CREATE FUNCTION mcssystemready RETURNS INTEGER SONAME 'libcalmysql.dll'; +CREATE FUNCTION mcssystemreadonly RETURNS INTEGER SONAME 'libcalmysql.dll'; +CREATE FUNCTION mcswritessuspended RETURNS INTEGER SONAME 'libcalmysql.dll'; create database if not exists calpontsys; use calpontsys; diff --git a/versioning/BRM/CMakeLists.txt b/versioning/BRM/CMakeLists.txt index 2ff7e9bfc..8182ffc33 100644 --- a/versioning/BRM/CMakeLists.txt +++ b/versioning/BRM/CMakeLists.txt @@ -39,7 +39,7 @@ install(TARGETS brm DESTINATION ${ENGINE_LIBDIR} COMPONENT libs) ########### next target ############### -set(controllernode_SRCS masternode.cpp masterdbrmnode.cpp) +set(controllernode_SRCS masternode.cpp masterdbrmnode.cpp ../../utils/common/crashtrace.cpp) add_executable(controllernode ${controllernode_SRCS}) @@ -50,7 +50,7 @@ install(TARGETS controllernode DESTINATION ${ENGINE_BINDIR} COMPONENT platform) ########### next target ############### -set(workernode_SRCS slavenode.cpp) +set(workernode_SRCS slavenode.cpp ../../utils/common/crashtrace.cpp) add_executable(workernode ${workernode_SRCS}) diff --git a/versioning/BRM/masternode.cpp b/versioning/BRM/masternode.cpp index 3932f9a37..4bdf42553 100644 --- a/versioning/BRM/masternode.cpp +++ b/versioning/BRM/masternode.cpp @@ -35,6 +35,8 @@ #include "brmtypes.h" #include "utils_utf8.h" +#include "crashtrace.h" + #define MAX_RETRIES 10 BRM::MasterDBRMNode *m; @@ -128,6 +130,13 @@ int main(int argc, char **argv) signal(SIGUSR1, restart); signal(SIGPIPE, SIG_IGN); #endif + struct sigaction ign; + + memset(&ign, 0, sizeof(ign)); + ign.sa_handler = fatalHandler; + sigaction(SIGSEGV, &ign, 0); + sigaction(SIGABRT, &ign, 0); + sigaction(SIGFPE, &ign, 0); idbdatafile::IDBPolicy::configIDBPolicy(); diff --git a/versioning/BRM/slavenode.cpp b/versioning/BRM/slavenode.cpp index 2d36bc3a4..5ec20f69f 100644 --- a/versioning/BRM/slavenode.cpp +++ b/versioning/BRM/slavenode.cpp @@ -37,6 +37,8 @@ #include "utils_utf8.h" #include "IDBPolicy.h" +#include "crashtrace.h" + using namespace BRM; using namespace std; @@ -117,6 +119,13 @@ int main(int argc, char **argv) signal(SIGPIPE, SIG_IGN); #endif + struct sigaction ign; + memset(&ign, 0, sizeof(ign)); + ign.sa_handler = fatalHandler; + sigaction(SIGSEGV, &ign, 0); + sigaction(SIGABRT, &ign, 0); + sigaction(SIGFPE, &ign, 0); + if (!(argc >= 3 && (arg = argv[2]) == "fg")) err = fork(); diff --git a/writeengine/server/CMakeLists.txt b/writeengine/server/CMakeLists.txt index 20a3a4f16..cb11ff375 100644 --- a/writeengine/server/CMakeLists.txt +++ b/writeengine/server/CMakeLists.txt @@ -14,7 +14,8 @@ set(WriteEngineServer_SRCS we_dmlcommandproc.cpp we_cleartablelockcmd.cpp we_cpifeederthread.cpp - we_getfilesizes.cpp) + we_getfilesizes.cpp + ../../utils/common/crashtrace.cpp) add_executable(WriteEngineServer ${WriteEngineServer_SRCS}) diff --git a/writeengine/server/we_server.cpp b/writeengine/server/we_server.cpp index 5b698fce9..9bdb6faa6 100644 --- a/writeengine/server/we_server.cpp +++ b/writeengine/server/we_server.cpp @@ -51,6 +51,8 @@ using namespace oam; #include "utils_utf8.h" #include "dbrm.h" +#include "crashtrace.h" + namespace { void added_a_pm(int) @@ -96,6 +98,9 @@ int main(int argc, char** argv) string systemLang = "C"; systemLang = funcexp::utf8::idb_setlocale(); + // This is unset due to the way we start it + program_invocation_short_name = const_cast("WriteEngineServ"); + printf ("Locale is : %s\n", systemLang.c_str() ); //set BUSY_INIT state @@ -119,6 +124,12 @@ int main(int argc, char** argv) sigaction(SIGHUP, &sa, 0); sa.sa_handler = SIG_IGN; sigaction(SIGPIPE, &sa, 0); + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = fatalHandler; + sigaction(SIGSEGV, &sa, 0); + sigaction(SIGABRT, &sa, 0); + sigaction(SIGFPE, &sa, 0); #endif // Init WriteEngine Wrapper (including Config Columnstore.xml cache) diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index e2d785021..50d846e7c 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -1475,6 +1475,7 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, Column curCol; ColStruct curColStruct; ColStructList newColStructList; + std::vector colNewValueList; DctnryStructList newDctnryStructList; HWM hwm = 0; HWM oldHwm = 0; @@ -2058,6 +2059,19 @@ timer.stop("tokenize"); tableMetaData->setColExtsInfo(colStructList[i].dataOid, aColExtsInfo); } + //-------------------------------------------------------------------------- + //Prepare the valuelist for the new extent + //-------------------------------------------------------------------------- + + for (unsigned i=1; i <= totalColumns; i++) + { + // Copy values to second value list + for (uint64_t j=rowsLeft; j > 0; j--) + { + colNewValueList.push_back(colValueList[(totalRow*i)-j]); + } + } + // end of allocate row id #ifdef PROFILE @@ -2094,6 +2108,22 @@ timer.start("writeColumnRec"); } } } + // If we create a new extent for this batch + for (unsigned i = 0; i < newColStructList.size(); i++) + { + colOp = m_colOp[op(newColStructList[i].fCompressionType)]; + width = newColStructList[i].colWidth; + successFlag = colOp->calculateRowId(lastRidNew , BYTE_PER_BLOCK/width, width, curFbo, curBio); + if (successFlag) { + if (curFbo != lastFbo) { + RETURN_ON_ERROR(AddLBIDtoList(txnid, + lbids, + colDataTypes, + newColStructList[i], + curFbo)); + } + } + } } if (lbids.size() > 0) @@ -2104,7 +2134,7 @@ timer.start("writeColumnRec"); //---------------------------------------------------------------------- bool versioning = !(isAutoCommitOn && insertSelect); AddDictToList(txnid, dictLbids); - rc = writeColumnRecBinary(txnid, colStructList, colValueList, rowIdArray, newColStructList, tableOid, useTmpSuffix, versioning); // @bug 5572 HDFS tmp file + rc = writeColumnRecBinary(txnid, colStructList, colValueList, rowIdArray, newColStructList, colNewValueList, tableOid, useTmpSuffix, versioning); // @bug 5572 HDFS tmp file } return rc; } @@ -4568,6 +4598,7 @@ int WriteEngineWrapper::writeColumnRecBinary(const TxnID& txnid, std::vector& colValueList, RID* rowIdArray, const ColStructList& newColStructList, + std::vector& newColValueList, const int32_t tableOid, bool useTmpSuffix, bool versioning) @@ -4578,7 +4609,7 @@ int WriteEngineWrapper::writeColumnRecBinary(const TxnID& txnid, Column curCol; ColStructList::size_type totalColumn; ColStructList::size_type i; - size_t totalRow; + size_t totalRow1, totalRow2; setTransId(txnid); @@ -4586,11 +4617,21 @@ int WriteEngineWrapper::writeColumnRecBinary(const TxnID& txnid, #ifdef PROFILE StopWatch timer; #endif - totalRow = colValueList.size() / totalColumn; - valArray = malloc(sizeof(uint64_t) * totalRow); + totalRow1 = colValueList.size() / totalColumn; + if (newColValueList.size() > 0) + { + totalRow2 = newColValueList.size() / newColStructList.size(); + totalRow1 -= totalRow2; + } + else + { + totalRow2 = 0; + } - if (totalRow == 0) + valArray = malloc(sizeof(uint64_t) * totalRow1); + + if (totalRow1 == 0) return rc; TableMetaData* aTbaleMetaData = TableMetaData::makeTableMetaData(tableOid); @@ -4638,7 +4679,7 @@ StopWatch timer; if (versioning) { rc = processVersionBuffer(curCol.dataFile.pFile, txnid, colStructList[i], - colStructList[i].colWidth, totalRow, firstPart, rangeList); + colStructList[i].colWidth, totalRow1, firstPart, rangeList); if (rc != NO_ERROR) { if (colStructList[i].fCompressionType == 0) { @@ -4656,9 +4697,9 @@ StopWatch timer; uint8_t tmp8; uint16_t tmp16; uint32_t tmp32; - for (size_t j = 0; j < totalRow; j++) + for (size_t j = 0; j < totalRow1; j++) { - uint64_t curValue = colValueList[(totalRow*i) + j]; + uint64_t curValue = colValueList[((totalRow1 + totalRow2)*i) + j]; switch (colStructList[i].colType) { case WriteEngine::WR_VARBINARY : // treat same as char for now @@ -4696,7 +4737,7 @@ StopWatch timer; #ifdef PROFILE timer.start("writeRow "); #endif - rc = colOp->writeRow(curCol, totalRow, firstPart, valArray); + rc = colOp->writeRow(curCol, totalRow1, firstPart, valArray); #ifdef PROFILE timer.stop("writeRow "); #endif @@ -4711,7 +4752,135 @@ timer.stop("writeRow "); } // end of for (i = 0 if (valArray != NULL) + { free(valArray); + valArray = NULL; + } + + // MCOL-1176 - Write second extent + if (totalRow2) + { + valArray = malloc(sizeof(uint64_t) * totalRow2); + for (i = 0; i < newColStructList.size(); i++) + { + //@Bug 2205 Check if all rows go to the new extent + //Write the first batch + RID * secondPart = rowIdArray + totalRow1; + ColumnOp* colOp = m_colOp[op(newColStructList[i].fCompressionType)]; + + // set params + colOp->initColumn(curCol); + // need to pass real dbRoot, partition, and segment to setColParam + colOp->setColParam(curCol, 0, newColStructList[i].colWidth, + newColStructList[i].colDataType, newColStructList[i].colType, newColStructList[i].dataOid, + newColStructList[i].fCompressionType, newColStructList[i].fColDbRoot, + newColStructList[i].fColPartition, newColStructList[i].fColSegment); + + ColExtsInfo aColExtsInfo = aTbaleMetaData->getColExtsInfo(newColStructList[i].dataOid); + ColExtsInfo::iterator it = aColExtsInfo.begin(); + while (it != aColExtsInfo.end()) + { + if ((it->dbRoot == newColStructList[i].fColDbRoot) && (it->partNum == newColStructList[i].fColPartition) && (it->segNum == colStructList[i].fColSegment)) + break; + it++; + } + + if (it == aColExtsInfo.end()) //add this one to the list + { + ColExtInfo aExt; + aExt.dbRoot = newColStructList[i].fColDbRoot; + aExt.partNum = newColStructList[i].fColPartition; + aExt.segNum = newColStructList[i].fColSegment; + aExt.compType = newColStructList[i].fCompressionType; + aColExtsInfo.push_back(aExt); + aTbaleMetaData->setColExtsInfo(newColStructList[i].dataOid, aColExtsInfo); + } + + rc = colOp->openColumnFile(curCol, segFile, useTmpSuffix, IO_BUFF_SIZE); // @bug 5572 HDFS tmp file + if (rc != NO_ERROR) + break; + + // handling versioning + vector rangeList; + if (versioning) + { + rc = processVersionBuffer(curCol.dataFile.pFile, txnid, newColStructList[i], + newColStructList[i].colWidth, totalRow2, secondPart, rangeList); + if (rc != NO_ERROR) { + if (newColStructList[i].fCompressionType == 0) + { + curCol.dataFile.pFile->flush(); + } + + BRMWrapper::getInstance()->writeVBEnd(txnid, rangeList); + break; + } + } + + //totalRow1 -= totalRow2; + // have to init the size here + // nullArray = (bool*) malloc(sizeof(bool) * totalRow); + uint8_t tmp8; + uint16_t tmp16; + uint32_t tmp32; + for (size_t j = 0; j < totalRow2; j++) + { + uint64_t curValue = newColValueList[(totalRow2*i) + j]; + switch (newColStructList[i].colType) + { + case WriteEngine::WR_VARBINARY : // treat same as char for now + case WriteEngine::WR_CHAR: + case WriteEngine::WR_BLOB: + case WriteEngine::WR_TEXT: + ((uint64_t*)valArray)[j] = curValue; + break; + case WriteEngine::WR_INT: + case WriteEngine::WR_UINT: + case WriteEngine::WR_FLOAT: + tmp32 = curValue; + ((uint32_t*)valArray)[j] = tmp32; + break; + case WriteEngine::WR_ULONGLONG: + case WriteEngine::WR_LONGLONG: + case WriteEngine::WR_DOUBLE: + case WriteEngine::WR_TOKEN: + ((uint64_t*)valArray)[j] = curValue; + break; + case WriteEngine::WR_BYTE: + case WriteEngine::WR_UBYTE: + tmp8 = curValue; + ((uint8_t*)valArray)[j] = tmp8; + break; + case WriteEngine::WR_SHORT: + case WriteEngine::WR_USHORT: + tmp16 = curValue; + ((uint16_t*)valArray)[j] = tmp16; + break; + } + } + + + #ifdef PROFILE + timer.start("writeRow "); + #endif + rc = colOp->writeRow(curCol, totalRow2, secondPart, valArray); + #ifdef PROFILE + timer.stop("writeRow "); + #endif + colOp->closeColumnFile(curCol); + + if (versioning) + BRMWrapper::getInstance()->writeVBEnd(txnid, rangeList); + + // check error + if (rc != NO_ERROR) + break; + + } // end of for (i = 0 + } + if (valArray != NULL) + free(valArray); + #ifdef PROFILE timer.finish(); diff --git a/writeengine/wrapper/writeengine.h b/writeengine/wrapper/writeengine.h index f1783ac8e..099854f77 100644 --- a/writeengine/wrapper/writeengine.h +++ b/writeengine/wrapper/writeengine.h @@ -668,6 +668,7 @@ private: int writeColumnRecBinary(const TxnID& txnid, const ColStructList& colStructList, std::vector& colValueList, RID* rowIdArray, const ColStructList& newColStructList, + std::vector& newColValueList, const int32_t tableOid, bool useTmpSuffix, bool versioning = true);