From 32ebc3e392fdba4a984288291531bcfb7acbd5fd Mon Sep 17 00:00:00 2001 From: david hill Date: Mon, 11 Dec 2017 10:22:20 -0600 Subject: [PATCH 01/29] MCOL-1047 - fixed debian 9 lib dep --- utils/clusterTester/columnstoreClusterTester.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/clusterTester/columnstoreClusterTester.sh b/utils/clusterTester/columnstoreClusterTester.sh index acc39d23a..78fa3f889 100755 --- a/utils/clusterTester/columnstoreClusterTester.sh +++ b/utils/clusterTester/columnstoreClusterTester.sh @@ -978,7 +978,7 @@ checkPackages() fi fi - declare -a DEBIAN9_PKG=("libboost-all-dev" "expect" "libdbi-perl" "perl" "openssl" "file" "sudo" "libreadline5" "rsync" "libsnappy1V5" "net-tools" "libioa1") + declare -a DEBIAN9_PKG=("libboost-all-dev" "expect" "libdbi-perl" "perl" "openssl" "file" "sudo" "libreadline5" "rsync" "libsnappy1V5" "net-tools" "libaio1") if [ "$OS" == "debian9" ]; then if [ ! `which dpkg 2>/dev/null` ] ; then From 44c06f2cc3002d0d2ecf39bc00add6c4dff72a00 Mon Sep 17 00:00:00 2001 From: david hill Date: Tue, 12 Dec 2017 16:32:49 -0600 Subject: [PATCH 02/29] MCOL-1106 - increased mysqld startup timeouts --- dbcon/mysql/mysql-Columnstore | 7 +++- oam/install_scripts/binary_installer.sh | 54 +++---------------------- oam/install_scripts/user_installer.sh | 2 +- 3 files changed, 11 insertions(+), 52 deletions(-) diff --git a/dbcon/mysql/mysql-Columnstore b/dbcon/mysql/mysql-Columnstore index de090329f..02a0de162 100755 --- a/dbcon/mysql/mysql-Columnstore +++ b/dbcon/mysql/mysql-Columnstore @@ -61,7 +61,7 @@ datadir=$basedir/db # Value here is overriden by value in my.cnf. # 0 means don't wait at all # Negative numbers mean to wait indefinitely -service_startup_timeout=30 +service_startup_timeout=90 # Lock directory for RedHat / SuSE. lockdir='/var/lock/subsys' @@ -262,6 +262,8 @@ wait_for_gone () { done log_failure_msg + kill_by_pid + return 1 } @@ -310,7 +312,7 @@ fi kill_by_pid() { # let's see if we can kill the 2 mysql procs by hand # get the our mysql from ps - eval $(ps -ef | grep "$COLUMNSTORE_INSTALL_DIR/mysql//bin/mysqld " | grep -v grep | head -1 | awk '{printf "pid=%d\n", $2}') + eval $(ps -ef | grep "$COLUMNSTORE_INSTALL_DIR/mysql/bin/mysqld" | grep -v grep | head -1 | awk '{printf "pid=%d\n", $2}') if [ -n "$pid" ]; then ppid=$(ps -o ppid= -p $pid) @@ -369,6 +371,7 @@ case "$mode" in wait_for_gone $mysqld_pid "$mysqld_pid_file_path"; return_value=$? else log_failure_msg "MySQL server process #$mysqld_pid is not running!" + kill_by_pid rm "$mysqld_pid_file_path" fi diff --git a/oam/install_scripts/binary_installer.sh b/oam/install_scripts/binary_installer.sh index ead0dde37..280b10c22 100644 --- a/oam/install_scripts/binary_installer.sh +++ b/oam/install_scripts/binary_installer.sh @@ -238,11 +238,15 @@ send_user "\n" # # Start module installer to setup Customer OS files # +if { $SERVERTYPE == "2" || $SERVERTYPE == "pmwithum" } { + set MODULETYPE "um" +} + send_user "Run Module Installer " send " \n" send date\n send "ssh -v $USERNAME@$SERVER '$INSTALLDIR/bin/module_installer.sh --module=$MODULETYPE --port=$MYSQLPORT --installdir=$INSTALLDIR'\n" -set timeout 60 +set timeout 120 expect { "word: " { send "$PASSWORD\n" exp_continue @@ -256,54 +260,6 @@ expect { } send_user "\n" -if { $MODULETYPE == "um" || $SERVERTYPE == "2" || $SERVERTYPE == "pmwithum" } { - # - # run mysql setup scripts - # - send_user "Run MySQL Setup Scripts on Module " - send " \n" - send date\n - send "ssh -v $USERNAME@$SERVER '$INSTALLDIR/bin/post-mysqld-install --installdir=$INSTALLDIR'\n" - set timeout 60 - expect { - "word: " { send "$PASSWORD\n" - exp_continue - } - "passphrase" { send "$PASSWORD\n" - exp_continue - } - "Exit status 0" { send_user "DONE" } - "Exit status 1" { send_user "ERROR: scp failed" ; exit 1 } - "ERROR" { send_user "ERROR: Daemon failed to run"; - exit 1 } - "FAILED" { send_user "ERROR: Daemon failed to run"; - exit 1 } - timeout { send_user "ERROR: Timeout\n" ; exit 2 } - } - - send " \n" - send date\n - send "ssh -v $USERNAME@$SERVER '$INSTALLDIR/bin/post-mysql-install --installdir=$INSTALLDIR'\n" - set timeout 120 - expect { - "word: " { send "$PASSWORD\n" - exp_continue - } - "passphrase" { send "$PASSWORD\n" - exp_continue - } - "Exit status 0" { send_user "DONE" } - "Exit status 1" { send_user "ERROR: scp failed" ; exit 1 } - "ERROR" { send_user "ERROR: Daemon failed to run"; - exit 1 } - "FAILED" { send_user "ERROR: Daemon failed to run"; - exit 1 } - timeout { send_user "ERROR: Timeout\n" ; exit 2 } - } - send_user "\n" -} - - send_user "\nInstallation Successfully Completed on '$MODULE'\n" exit 0 diff --git a/oam/install_scripts/user_installer.sh b/oam/install_scripts/user_installer.sh index a31367aed..4d30492d8 100644 --- a/oam/install_scripts/user_installer.sh +++ b/oam/install_scripts/user_installer.sh @@ -255,7 +255,7 @@ send_user "\n" # send_user "Run Module Installer " send "ssh -v $USERNAME@$SERVER '$INSTALLDIR/bin/module_installer.sh --module=um --port=$MYSQLPORT'\n" -set timeout 60 +set timeout 120 expect { "word: " { send "$PASSWORD\n" exp_continue From de24edcf8f1b7499525be9d896133f04b6a80bf3 Mon Sep 17 00:00:00 2001 From: david hill Date: Thu, 14 Dec 2017 14:47:47 -0600 Subject: [PATCH 03/29] Update README --- README | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README b/README index 291da0ab7..8186d1ed7 100644 --- a/README +++ b/README @@ -1,9 +1,9 @@ -This is MariaDB ColumnStore 1.0.11 -MariaDB ColumnStore 1.0.11 is the development version of MariaDB ColumnStore. +This is MariaDB ColumnStore 1.0.12 +MariaDB ColumnStore 1.0.12 is the development version of MariaDB ColumnStore. It is built by porting InfiniDB 4.6.7 on MariaDB 10.1.26 and adding entirely new features not found anywhere else. -MariaDB ColumnStore 1.0.11 is an GA release. This is the first MariaDB +MariaDB ColumnStore 1.0.12 is an GA release. This is the first MariaDB ColumnStore release, not all features planned for the MariaDB ColumnStore 1.0 series are included in this release. From 9f5bd31e37dd330ae01b5da04123985c6450e3f3 Mon Sep 17 00:00:00 2001 From: david hill Date: Thu, 14 Dec 2017 14:49:58 -0600 Subject: [PATCH 04/29] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 397a1b1d8..40c1e99ec 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ -#MariaDB ColumnStore Storage/Execution engine 1.0.11 -MariaDB ColumnStore 1.0.11 is the development version of MariaDB ColumnStore. +#MariaDB ColumnStore Storage/Execution engine 1.0.12 +MariaDB ColumnStore 1.0.12 is the development version of MariaDB ColumnStore. It is built by porting InfiniDB 4.6.7 on MariaDB 10.1.26 and adding entirely new features not found anywhere else. -#MariaDB ColumnStore 1.0.11 is an GA release. +#MariaDB ColumnStore 1.0.12 is an GA release. #Building This repository is not meant to be built independently outside of the server. This repository is integrated into http://mariadb-corporation/mariadb-columnstore-server (ie, the *server*) as a git submodule. As such, you can find complete build instructions on *the server* page. From e9aff27e59f8a9dc7fec58c35e3968aa2018ec46 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Fri, 15 Dec 2017 20:33:07 +0000 Subject: [PATCH 05/29] MCOL-1116 Fix I_S.CS_FILES for missing dbroot If a dbroot is missing/offline mysqld would crash on information_schema.columnstore_files due to not catching an exception. This patch now catches the exception. --- dbcon/mysql/is_columnstore_files.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/dbcon/mysql/is_columnstore_files.cpp b/dbcon/mysql/is_columnstore_files.cpp index e9479e254..ce00b8aae 100644 --- a/dbcon/mysql/is_columnstore_files.cpp +++ b/dbcon/mysql/is_columnstore_files.cpp @@ -124,6 +124,17 @@ static int is_columnstore_files_fill(THD *thd, TABLE_LIST *tables, COND *cond) iter++; continue; } + + try + { + oam_instance.getDbrootPmConfig(iter->dbRoot, pmId); + } + catch (std::runtime_error) + { + // MCOL-1116: If we are here a DBRoot is offline/missing + iter++; + continue; + } table->field[0]->store(oid); table->field[1]->store(iter->segmentNum); table->field[2]->store(iter->partitionNum); @@ -134,7 +145,7 @@ static int is_columnstore_files_fill(THD *thd, TABLE_LIST *tables, COND *cond) std::string DbRootPath = config->getConfig("SystemConfig", DbRootName.str()); fileSize = compressedFileSize = 0; snprintf(fullFileName, WriteEngine::FILE_NAME_SIZE, "%s/%s", DbRootPath.c_str(), oidDirName); - oam_instance.getDbrootPmConfig(iter->dbRoot, pmId); + std::ostringstream oss; oss << "pm" << pmId << "_WriteEngineServer"; std::string client = oss.str(); From c31c836352ee921141e8bbdc5264700b59ae97a9 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Wed, 3 Jan 2018 09:20:09 +0000 Subject: [PATCH 06/29] MCOL-1085 Add crash dump to daemons This patch adds an automated crash dump which logs in /var/log/mariadb/columnstore/trace/ when one of the ColumnStore daemons crashes. --- CMakeLists.txt | 8 ++--- ddlproc/CMakeLists.txt | 2 +- ddlproc/ddlproc.cpp | 9 +++++ dmlproc/CMakeLists.txt | 3 +- dmlproc/dmlproc.cpp | 11 ++++++ exemgr/CMakeLists.txt | 2 +- exemgr/main.cpp | 14 +++++++- oamapps/serverMonitor/CMakeLists.txt | 3 +- oamapps/serverMonitor/main.cpp | 10 ++++++ primitives/primproc/CMakeLists.txt | 3 +- primitives/primproc/primproc.cpp | 11 ++++++ procmgr/CMakeLists.txt | 2 +- procmgr/main.cpp | 12 +++++++ procmon/CMakeLists.txt | 2 +- procmon/main.cpp | 10 ++++++ utils/common/crashtrace.cpp | 51 ++++++++++++++++++++++++++++ utils/common/crashtrace.h | 18 ++++++++++ versioning/BRM/CMakeLists.txt | 4 +-- versioning/BRM/masternode.cpp | 9 +++++ versioning/BRM/slavenode.cpp | 9 +++++ writeengine/server/CMakeLists.txt | 3 +- writeengine/server/we_server.cpp | 11 ++++++ 22 files changed, 192 insertions(+), 15 deletions(-) create mode 100644 utils/common/crashtrace.cpp create mode 100644 utils/common/crashtrace.h diff --git a/CMakeLists.txt b/CMakeLists.txt index df20c3e1b..b40c663a3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -98,12 +98,12 @@ endif() FOREACH(BUILD_TYPE RELEASE RELWITHDEBINFO MINSIZEREL) - SET(CMAKE_CXX_FLAGS_${BUILD_TYPE} "-g -O3 -fno-strict-aliasing -Wall -fno-tree-vectorize -DDBUG_OFF -DHAVE_CONFIG_H") - SET(CMAKE_C_FLAGS_${BUILD_TYPE} "-g -O3 -fno-strict-aliasing -Wall -fno-tree-vectorize -DDBUG_OFF -DHAVE_CONFIG_H") + SET(CMAKE_CXX_FLAGS_${BUILD_TYPE} "-g -O3 -fno-omit-frame-pointer -fno-strict-aliasing -Wall -fno-tree-vectorize -DDBUG_OFF -DHAVE_CONFIG_H") + SET(CMAKE_C_FLAGS_${BUILD_TYPE} "-g -O3 -fno-omit-frame-pointer -fno-strict-aliasing -Wall -fno-tree-vectorize -DDBUG_OFF -DHAVE_CONFIG_H") ENDFOREACH() -SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -ggdb3 -fno-tree-vectorize -DSAFE_MUTEX -DSAFEMALLOC -DENABLED_DEBUG_SYNC -O0 -Wall -D_DEBUG -DHAVE_CONFIG_H") -SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -ggdb3 -fno-tree-vectorize -DSAFE_MUTEX -DSAFEMALLOC -DENABLED_DEBUG_SYNC -O0 -Wall -D _DEBUG -DHAVE_CONFIG_H") +SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -ggdb3 -fno-omit-frame-pointer -fno-tree-vectorize -DSAFE_MUTEX -DSAFEMALLOC -DENABLED_DEBUG_SYNC -O0 -Wall -D_DEBUG -DHAVE_CONFIG_H") +SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -ggdb3 -fno-omit-frame-pointer -fno-tree-vectorize -DSAFE_MUTEX -DSAFEMALLOC -DENABLED_DEBUG_SYNC -O0 -Wall -D _DEBUG -DHAVE_CONFIG_H") SET (ENGINE_LDFLAGS "-Wl,--no-as-needed -Wl,--add-needed") diff --git a/ddlproc/CMakeLists.txt b/ddlproc/CMakeLists.txt index a56fb4683..7c7fdf70c 100644 --- a/ddlproc/CMakeLists.txt +++ b/ddlproc/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ) ########### next target ############### -set(DDLProc_SRCS ddlproc.cpp ddlprocessor.cpp) +set(DDLProc_SRCS ddlproc.cpp ddlprocessor.cpp ../utils/common/crashtrace.cpp) add_executable(DDLProc ${DDLProc_SRCS}) diff --git a/ddlproc/ddlproc.cpp b/ddlproc/ddlproc.cpp index 45cf09327..2589f9c29 100644 --- a/ddlproc/ddlproc.cpp +++ b/ddlproc/ddlproc.cpp @@ -61,6 +61,7 @@ using namespace execplan; #include "IDBPolicy.h" #include "utils_utf8.h" +#include "crashtrace.h" namespace fs = boost::filesystem; @@ -97,6 +98,9 @@ int main(int argc, char* argv[]) string systemLang = "C"; systemLang = funcexp::utf8::idb_setlocale(); + // This is unset due to the way we start it + program_invocation_short_name = const_cast("DDLProc"); + setupCwd(); WriteEngine::WriteEngineWrapper::init( WriteEngine::SUBSYSTEM_ID_DDLPROC ); @@ -116,6 +120,11 @@ int main(int argc, char* argv[]) sigaction(SIGHUP, &ign, 0); ign.sa_handler = SIG_IGN; sigaction(SIGPIPE, &ign, 0); + memset(&ign, 0, sizeof(ign)); + ign.sa_handler = fatalHandler; + sigaction(SIGSEGV, &ign, 0); + sigaction(SIGABRT, &ign, 0); + sigaction(SIGFPE, &ign, 0); #endif ddlprocessor::DDLProcessor ddlprocessor(1, 20); diff --git a/dmlproc/CMakeLists.txt b/dmlproc/CMakeLists.txt index 8ed489aa8..957cfdff6 100644 --- a/dmlproc/CMakeLists.txt +++ b/dmlproc/CMakeLists.txt @@ -8,7 +8,8 @@ set(DMLProc_SRCS dmlproc.cpp dmlprocessor.cpp dmlresultbuffer.cpp - batchinsertprocessor.cpp) + batchinsertprocessor.cpp + ../utils/common/crashtrace.cpp) add_executable(DMLProc ${DMLProc_SRCS}) diff --git a/dmlproc/dmlproc.cpp b/dmlproc/dmlproc.cpp index fe542e3a4..350cc22e3 100644 --- a/dmlproc/dmlproc.cpp +++ b/dmlproc/dmlproc.cpp @@ -82,6 +82,8 @@ using namespace joblist; #include "utils_utf8.h" +#include "crashtrace.h" + namespace fs = boost::filesystem; namespace @@ -473,6 +475,9 @@ int main(int argc, char* argv[]) //BUG 5362 systemLang = funcexp::utf8::idb_setlocale(); + // This is unset due to the way we start it + program_invocation_short_name = const_cast("DMLProc"); + Config* cf = Config::makeConfig(); setupCwd(); @@ -578,6 +583,12 @@ int main(int argc, char* argv[]) sigaction(SIGHUP, &ign, 0); ign.sa_handler = SIG_IGN; sigaction(SIGPIPE, &ign, 0); + + memset(&ign, 0, sizeof(ign)); + ign.sa_handler = fatalHandler; + sigaction(SIGSEGV, &ign, 0); + sigaction(SIGABRT, &ign, 0); + sigaction(SIGFPE, &ign, 0); #endif dmlserver.start(); diff --git a/exemgr/CMakeLists.txt b/exemgr/CMakeLists.txt index 5a9f197d7..c7276ad05 100644 --- a/exemgr/CMakeLists.txt +++ b/exemgr/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ) ########### next target ############### -set(ExeMgr_SRCS main.cpp activestatementcounter.cpp femsghandler.cpp) +set(ExeMgr_SRCS main.cpp activestatementcounter.cpp femsghandler.cpp ../utils/common/crashtrace.cpp) add_executable(ExeMgr ${ExeMgr_SRCS}) diff --git a/exemgr/main.cpp b/exemgr/main.cpp index 002ba6aab..ca837a33e 100644 --- a/exemgr/main.cpp +++ b/exemgr/main.cpp @@ -97,6 +97,8 @@ using namespace querytele; #include "utils_utf8.h" #include "boost/filesystem.hpp" +#include "crashtrace.h" + namespace { //If any flags other than the table mode flags are set, produce output to screeen @@ -1165,10 +1167,12 @@ public: } }; +#ifdef _MSC_VER void exit_(int) { exit(0); } +#endif void added_a_pm(int) { @@ -1213,7 +1217,6 @@ void printTotalUmMemory(int sig) void setupSignalHandlers() { #ifdef _MSC_VER - signal(SIGSEGV, exit_); signal(SIGINT, exit_); signal(SIGTERM, exit_); #else @@ -1229,6 +1232,12 @@ void setupSignalHandlers() sigaction(SIGHUP, &ign, 0); ign.sa_handler = printTotalUmMemory; sigaction(SIGUSR1, &ign, 0); + + memset(&ign, 0, sizeof(ign)); + ign.sa_handler = fatalHandler; + sigaction(SIGSEGV, &ign, 0); + sigaction(SIGABRT, &ign, 0); + sigaction(SIGFPE, &ign, 0); #endif } @@ -1302,6 +1311,9 @@ int main(int argc, char* argv[]) string systemLang = "C"; systemLang = funcexp::utf8::idb_setlocale(); + // This is unset due to the way we start it + program_invocation_short_name = const_cast("ExeMgr"); + gDebug = 0; bool eFlg = false; int c; diff --git a/oamapps/serverMonitor/CMakeLists.txt b/oamapps/serverMonitor/CMakeLists.txt index 04296114a..81d14c9ea 100644 --- a/oamapps/serverMonitor/CMakeLists.txt +++ b/oamapps/serverMonitor/CMakeLists.txt @@ -13,7 +13,8 @@ set(ServerMonitor_SRCS procmonMonitor.cpp msgProcessor.cpp dbhealthMonitor.cpp - UMAutoSync.cpp) + UMAutoSync.cpp + ../../utils/common/crashtrace.cpp) add_executable(ServerMonitor ${ServerMonitor_SRCS}) diff --git a/oamapps/serverMonitor/main.cpp b/oamapps/serverMonitor/main.cpp index 10903d6a4..56b8e3825 100644 --- a/oamapps/serverMonitor/main.cpp +++ b/oamapps/serverMonitor/main.cpp @@ -18,6 +18,8 @@ #include "IDBPolicy.h" #include "serverMonitor.h" +#include "crashtrace.h" + using namespace std; using namespace servermonitor; using namespace oam; @@ -38,6 +40,14 @@ int main (int argc, char** argv) ServerMonitor serverMonitor; Oam oam; + struct sigaction ign; + + memset(&ign, 0, sizeof(ign)); + ign.sa_handler = fatalHandler; + sigaction(SIGSEGV, &ign, 0); + sigaction(SIGABRT, &ign, 0); + sigaction(SIGFPE, &ign, 0); + //Launch Memory Monitor Thread and check if swap is in critical condition pthread_t memoryMonitorThread; pthread_create (&memoryMonitorThread, NULL, (void*(*)(void*)) &memoryMonitor, NULL); diff --git a/primitives/primproc/CMakeLists.txt b/primitives/primproc/CMakeLists.txt index ddfa56abc..cb5a7c163 100644 --- a/primitives/primproc/CMakeLists.txt +++ b/primitives/primproc/CMakeLists.txt @@ -18,7 +18,8 @@ set(PrimProc_SRCS primitiveserver.cpp pseudocc.cpp rtscommand.cpp - umsocketselector.cpp) + umsocketselector.cpp + ../../utils/common/crashtrace.cpp) #PrimProc_CXXFLAGS = $(march_flags) $(AM_CXXFLAGS) diff --git a/primitives/primproc/primproc.cpp b/primitives/primproc/primproc.cpp index 7fc0c36f2..200786e46 100644 --- a/primitives/primproc/primproc.cpp +++ b/primitives/primproc/primproc.cpp @@ -71,6 +71,8 @@ using namespace idbdatafile; #include "cgroupconfigurator.h" +#include "crashtrace.h" + namespace primitiveprocessor { @@ -126,6 +128,12 @@ void setupSignalHandlers() ign.sa_handler = SIG_IGN; sigaction(SIGUSR2, &ign, 0); + memset(&ign, 0, sizeof(ign)); + ign.sa_handler = fatalHandler; + sigaction(SIGSEGV, &ign, 0); + sigaction(SIGABRT, &ign, 0); + sigaction(SIGFPE, &ign, 0); + sigset_t sigset; sigemptyset(&sigset); sigaddset(&sigset, SIGPIPE); @@ -288,6 +296,9 @@ int main(int argc, char* argv[]) systemLang.find("UTF") != string::npos ) utf8 = true; + // This is unset due to the way we start it + program_invocation_short_name = const_cast("PrimProc"); + Config* cf = Config::makeConfig(); setupSignalHandlers(); diff --git a/procmgr/CMakeLists.txt b/procmgr/CMakeLists.txt index 7ced90a35..aa2fa15fc 100644 --- a/procmgr/CMakeLists.txt +++ b/procmgr/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ) ########### next target ############### -set(ProcMgr_SRCS main.cpp processmanager.cpp) +set(ProcMgr_SRCS main.cpp processmanager.cpp ../utils/common/crashtrace.cpp) add_executable(ProcMgr ${ProcMgr_SRCS}) diff --git a/procmgr/main.cpp b/procmgr/main.cpp index 3c1a870fc..16c3ad5b0 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -32,6 +32,8 @@ #include "utils_utf8.h" +#include "crashtrace.h" + using namespace std; using namespace logging; using namespace messageqcpp; @@ -100,6 +102,16 @@ int main(int argc, char **argv) setlocale(LC_ALL, systemLang.c_str()); + // This is unset due to the way we start it + program_invocation_short_name = const_cast("ProcMgr"); + + struct sigaction ign; + memset(&ign, 0, sizeof(ign)); + ign.sa_handler = fatalHandler; + sigaction(SIGSEGV, &ign, 0); + sigaction(SIGABRT, &ign, 0); + sigaction(SIGFPE, &ign, 0); + Oam oam; //check if root-user diff --git a/procmon/CMakeLists.txt b/procmon/CMakeLists.txt index cb6ecbb49..cb61e8044 100644 --- a/procmon/CMakeLists.txt +++ b/procmon/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ) ########### next target ############### -set(ProcMon_SRCS main.cpp processmonitor.cpp) +set(ProcMon_SRCS main.cpp processmonitor.cpp ../utils/common/crashtrace.cpp) add_executable(ProcMon ${ProcMon_SRCS}) diff --git a/procmon/main.cpp b/procmon/main.cpp index 88deeeb77..25dc07d6a 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -26,6 +26,8 @@ namespace bi=boost::interprocess; #include "IDBPolicy.h" +#include "crashtrace.h" + using namespace std; using namespace messageqcpp; using namespace processmonitor; @@ -75,6 +77,14 @@ int main(int argc, char **argv) setuid(0); // set effective ID to root; ignore return status #endif + struct sigaction ign; + + memset(&ign, 0, sizeof(ign)); + ign.sa_handler = fatalHandler; + sigaction(SIGSEGV, &ign, 0); + sigaction(SIGABRT, &ign, 0); + sigaction(SIGFPE, &ign, 0); + if (argc > 1 && string(argv[1]) == "--daemon") { if (fork() != 0) return 0; diff --git a/utils/common/crashtrace.cpp b/utils/common/crashtrace.cpp new file mode 100644 index 000000000..41626361e --- /dev/null +++ b/utils/common/crashtrace.cpp @@ -0,0 +1,51 @@ +/* Copyright (C) 2018 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include +#include +#include +#include +#include + +void fatalHandler(int sig) +{ + char filename[128]; + void* addrs[128]; + snprintf(filename, 128, "/var/log/mariadb/columnstore/trace/%s.%d.log", program_invocation_short_name, getpid()); + FILE* logfile = fopen(filename, "w"); + char s[30]; + struct tm tim; + time_t now; + now = time(NULL); + tim = *(localtime(&now)); + strftime(s,30,"%F %T",&tim); + fprintf(logfile, "Date/time: %s\n", s); + fprintf(logfile, "Signal: %d\n\n", sig); + fflush(logfile); + int fd = fileno(logfile); + int count = backtrace(addrs, sizeof(addrs) / sizeof(addrs[0])); + backtrace_symbols_fd(addrs, count, fd); + fclose(logfile); + struct sigaction sigact; + memset(&sigact, 0, sizeof(sigact)); + sigact.sa_handler = SIG_DFL; + sigaction(sig, &sigact, NULL); + raise(sig); +} diff --git a/utils/common/crashtrace.h b/utils/common/crashtrace.h new file mode 100644 index 000000000..3b9cb4036 --- /dev/null +++ b/utils/common/crashtrace.h @@ -0,0 +1,18 @@ +/* Copyright (C) 2018 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +void fatalHandler(int sig); diff --git a/versioning/BRM/CMakeLists.txt b/versioning/BRM/CMakeLists.txt index 2ce60254d..2c904e872 100644 --- a/versioning/BRM/CMakeLists.txt +++ b/versioning/BRM/CMakeLists.txt @@ -41,7 +41,7 @@ install(TARGETS brm DESTINATION ${ENGINE_LIBDIR} COMPONENT libs) ########### next target ############### -set(controllernode_SRCS masternode.cpp masterdbrmnode.cpp) +set(controllernode_SRCS masternode.cpp masterdbrmnode.cpp ../../utils/common/crashtrace.cpp) add_executable(controllernode ${controllernode_SRCS}) @@ -52,7 +52,7 @@ install(TARGETS controllernode DESTINATION ${ENGINE_BINDIR} COMPONENT platform) ########### next target ############### -set(workernode_SRCS slavenode.cpp) +set(workernode_SRCS slavenode.cpp ../../utils/common/crashtrace.cpp) add_executable(workernode ${workernode_SRCS}) diff --git a/versioning/BRM/masternode.cpp b/versioning/BRM/masternode.cpp index 3932f9a37..4bdf42553 100644 --- a/versioning/BRM/masternode.cpp +++ b/versioning/BRM/masternode.cpp @@ -35,6 +35,8 @@ #include "brmtypes.h" #include "utils_utf8.h" +#include "crashtrace.h" + #define MAX_RETRIES 10 BRM::MasterDBRMNode *m; @@ -128,6 +130,13 @@ int main(int argc, char **argv) signal(SIGUSR1, restart); signal(SIGPIPE, SIG_IGN); #endif + struct sigaction ign; + + memset(&ign, 0, sizeof(ign)); + ign.sa_handler = fatalHandler; + sigaction(SIGSEGV, &ign, 0); + sigaction(SIGABRT, &ign, 0); + sigaction(SIGFPE, &ign, 0); idbdatafile::IDBPolicy::configIDBPolicy(); diff --git a/versioning/BRM/slavenode.cpp b/versioning/BRM/slavenode.cpp index 2d36bc3a4..5ec20f69f 100644 --- a/versioning/BRM/slavenode.cpp +++ b/versioning/BRM/slavenode.cpp @@ -37,6 +37,8 @@ #include "utils_utf8.h" #include "IDBPolicy.h" +#include "crashtrace.h" + using namespace BRM; using namespace std; @@ -117,6 +119,13 @@ int main(int argc, char **argv) signal(SIGPIPE, SIG_IGN); #endif + struct sigaction ign; + memset(&ign, 0, sizeof(ign)); + ign.sa_handler = fatalHandler; + sigaction(SIGSEGV, &ign, 0); + sigaction(SIGABRT, &ign, 0); + sigaction(SIGFPE, &ign, 0); + if (!(argc >= 3 && (arg = argv[2]) == "fg")) err = fork(); diff --git a/writeengine/server/CMakeLists.txt b/writeengine/server/CMakeLists.txt index 365e98036..5029cdb15 100644 --- a/writeengine/server/CMakeLists.txt +++ b/writeengine/server/CMakeLists.txt @@ -14,7 +14,8 @@ set(WriteEngineServer_SRCS we_dmlcommandproc.cpp we_cleartablelockcmd.cpp we_cpifeederthread.cpp - we_getfilesizes.cpp) + we_getfilesizes.cpp + ../../utils/common/crashtrace.cpp) add_executable(WriteEngineServer ${WriteEngineServer_SRCS}) diff --git a/writeengine/server/we_server.cpp b/writeengine/server/we_server.cpp index b5365fe0f..1e4415eac 100644 --- a/writeengine/server/we_server.cpp +++ b/writeengine/server/we_server.cpp @@ -51,6 +51,8 @@ using namespace oam; #include "utils_utf8.h" #include "dbrm.h" +#include "crashtrace.h" + namespace { void added_a_pm(int) @@ -96,6 +98,9 @@ int main(int argc, char** argv) string systemLang = "C"; systemLang = funcexp::utf8::idb_setlocale(); + // This is unset due to the way we start it + program_invocation_short_name = const_cast("WriteEngineServ"); + printf ("Locale is : %s\n", systemLang.c_str() ); //set BUSY_INIT state @@ -119,6 +124,12 @@ int main(int argc, char** argv) sigaction(SIGHUP, &sa, 0); sa.sa_handler = SIG_IGN; sigaction(SIGPIPE, &sa, 0); + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = fatalHandler; + sigaction(SIGSEGV, &sa, 0); + sigaction(SIGABRT, &sa, 0); + sigaction(SIGFPE, &sa, 0); #endif // Init WriteEngine Wrapper (including Config Columnstore.xml cache) From 44989fbd74900ef045c07bd40b25d9a08a7a322f Mon Sep 17 00:00:00 2001 From: david hill Date: Wed, 10 Jan 2018 11:41:44 -0600 Subject: [PATCH 07/29] MCOL-1149 - add a main resume flag --- procmon/main.cpp | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/procmon/main.cpp b/procmon/main.cpp index 88deeeb77..f6a0d10dc 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -26,6 +26,8 @@ namespace bi=boost::interprocess; #include "IDBPolicy.h" +#include "crashtrace.h" + using namespace std; using namespace messageqcpp; using namespace processmonitor; @@ -55,9 +57,11 @@ void updateShareMemory(processStatusList* aPtr); bool runStandby = false; bool processInitComplete = false; bool rootUser = true; +bool mainResumeFlag; string USER = "root"; string PMwithUM = "n"; + //extern std::string gOAMParentModuleName; extern bool gOAMParentModuleFlag; @@ -75,6 +79,14 @@ int main(int argc, char **argv) setuid(0); // set effective ID to root; ignore return status #endif + struct sigaction ign; + + memset(&ign, 0, sizeof(ign)); + ign.sa_handler = fatalHandler; + sigaction(SIGSEGV, &ign, 0); + sigaction(SIGABRT, &ign, 0); + sigaction(SIGFPE, &ign, 0); + if (argc > 1 && string(argv[1]) == "--daemon") { if (fork() != 0) return 0; @@ -435,13 +447,24 @@ int main(int argc, char **argv) unlink ("/var/log/mariadb/columnstore/activeAlarms"); } + //Clear mainResumeFlag + + mainResumeFlag = false; + //launch Status table control thread on 'pm' modules pthread_t statusThread; int ret = pthread_create (&statusThread, NULL, (void*(*)(void*)) &statusControlThread, NULL); if ( ret != 0 ) log.writeLog(__LINE__, "pthread_create failed, return code = " + oam.itoa(ret), LOG_TYPE_ERROR); - sleep(6); // give the Status thread time to fully initialize + //wait for flag to be set + + while(!mainResumeFlag) + { + log.writeLog(__LINE__, "WATING FOR mainResumeFlag to be set", LOG_TYPE_DEBUG); + + sleep(1); + } } SystemStatus systemstatus; @@ -723,6 +746,8 @@ int main(int argc, char **argv) } } + log.writeLog(__LINE__, "SYSTEM STATUS = " + oam.itoa(systemstatus.SystemOpState), LOG_TYPE_DEBUG); + if ( systemstatus.SystemOpState != MAN_OFFLINE && !DISABLED) { // Loop through the process list to check the process current state @@ -2026,6 +2051,10 @@ static void statusControlThread() log.writeLog(__LINE__, "Dbroot Status shared Memory allociated and Initialized", LOG_TYPE_DEBUG); } + //Set mainResumeFlag, to start up main thread + + mainResumeFlag = true; + string portName = "ProcStatusControl"; if (runStandby) { portName = "ProcStatusControlStandby"; From 1ffeda44d673075936f19123dfc41a626f1e13d5 Mon Sep 17 00:00:00 2001 From: David Hall Date: Wed, 10 Jan 2018 12:17:57 -0600 Subject: [PATCH 08/29] MCOL-962 Add mcsSystemReady(), mcsSystemReadOnly() and mcsWritesSuspended() --- dbcon/mysql/ha_calpont_impl.cpp | 124 ++++++++++++++++++++++++ dbcon/mysql/install_calpont_mysql.sh | 3 + oamapps/mcsadmin/mcsadmin.cpp | 9 +- oamapps/postConfigure/installer.cpp | 2 +- procmon/main.cpp | 2 +- utils/winport/win_setup_mysql_part2.sql | 3 + 6 files changed, 136 insertions(+), 7 deletions(-) diff --git a/dbcon/mysql/ha_calpont_impl.cpp b/dbcon/mysql/ha_calpont_impl.cpp index d4669c8d5..3b3a5f00b 100755 --- a/dbcon/mysql/ha_calpont_impl.cpp +++ b/dbcon/mysql/ha_calpont_impl.cpp @@ -1819,6 +1819,130 @@ void calsettrace_deinit(UDF_INIT* initid) { } +#ifdef _MSC_VER +__declspec(dllexport) +#endif +// Return 1 if system is ready for reads or 0 if not. +long long mcssystemready(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, char* error) +{ + long long rtn = 0; + Oam oam; + DBRM dbrm(true); + SystemStatus systemstatus; + + try + { + oam.getSystemStatus(systemstatus); + if (systemstatus.SystemOpState == ACTIVE + && dbrm.getSystemReady() + && dbrm.getSystemQueryReady()) + { + return 1; + } + } + catch (...) + { + *error = 1; + } + return rtn; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +my_bool mcssystemready_init(UDF_INIT* initid, UDF_ARGS* args, char* message) +{ + return 0; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +void mcssystemready_deinit(UDF_INIT* initid) +{ +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +// Return 1 if system is read only; 0 if writeable +long long mcssystemreadonly(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, char* error) +{ + long long rtn = 0; + DBRM dbrm(true); + + try + { + if (dbrm.isReadWrite()) // Returns 0 for writable, 5 for read only + { + rtn = 1; + } + } + catch (...) + { + *error = 1; + rtn = 1; + } + return rtn; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +my_bool mcssystemreadonly_init(UDF_INIT* initid, UDF_ARGS* args, char* message) +{ + return 0; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +void mcssystemreadonly_deinit(UDF_INIT* initid) +{ +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +// Return 1 if system is read only; 0 if writeable +long long mcswritessuspended(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, char* error) +{ + long long rtn = 0; + DBRM dbrm(true); + + try + { + if (dbrm.getSystemSuspended()) + { + rtn = 1; + } + } + catch (...) + { + *error = 1; + rtn = 1; + } + return rtn; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +my_bool mcswritessuspended_init(UDF_INIT* initid, UDF_ARGS* args, char* message) +{ + return 0; +} + +#ifdef _MSC_VER +__declspec(dllexport) +#endif +void mcswritessuspended_deinit(UDF_INIT* initid) +{ +} + #define MAXSTRINGLENGTH 50 const char* PmSmallSideMaxMemory = "pmmaxmemorysmallside"; diff --git a/dbcon/mysql/install_calpont_mysql.sh b/dbcon/mysql/install_calpont_mysql.sh index aaa17473a..17c6e1817 100755 --- a/dbcon/mysql/install_calpont_mysql.sh +++ b/dbcon/mysql/install_calpont_mysql.sh @@ -82,6 +82,9 @@ CREATE FUNCTION idbextentmin RETURNS STRING soname 'libcalmysql.so'; CREATE FUNCTION idbextentmax RETURNS STRING soname 'libcalmysql.so'; CREATE FUNCTION idbpartition RETURNS STRING soname 'libcalmysql.so'; CREATE FUNCTION idblocalpm RETURNS INTEGER soname 'libcalmysql.so'; +CREATE FUNCTION mcssystemready RETURNS INTEGER soname 'libcalmysql.so'; +CREATE FUNCTION mcssystemreadonly RETURNS INTEGER soname 'libcalmysql.so'; +CREATE FUNCTION mcswritessuspended RETURNS INTEGER soname 'libcalmysql.so'; CREATE DATABASE IF NOT EXISTS infinidb_vtable; CREATE DATABASE IF NOT EXISTS infinidb_querystats; diff --git a/oamapps/mcsadmin/mcsadmin.cpp b/oamapps/mcsadmin/mcsadmin.cpp index 7d8a6c153..795414eea 100644 --- a/oamapps/mcsadmin/mcsadmin.cpp +++ b/oamapps/mcsadmin/mcsadmin.cpp @@ -1878,7 +1878,7 @@ int processCommand(string* arguments) } string DataRedundancyConfig; - string DataRedundancyCopies; + int DataRedundancyCopies; string DataRedundancyStorageType; try { oam.getSystemConfig("DataRedundancyConfig", DataRedundancyConfig); @@ -3565,7 +3565,7 @@ int processCommand(string* arguments) } string DataRedundancyConfig; - string DataRedundancyCopies; + int DataRedundancyCopies; string DataRedundancyStorageType; try { oam.getSystemConfig("DataRedundancyConfig", DataRedundancyConfig); @@ -5552,7 +5552,7 @@ int processCommand(string* arguments) } } string command = startup::StartUp::installDir() + "/bin/remote_command.sh " + (*hostConfigIter).IPAddr + " " + password + " 'mkdir -p " + startup::StartUp::installDir() + "/gluster/brick" + oam.itoa(brickID) + "'"; - int status = system(command.c_str()); +// int status = system(command.c_str()); brickID++; } } @@ -5853,7 +5853,7 @@ int processCommand(string* arguments) } } - if ( DataRedundancyConfig == "y" && devicenetworklist.size() != DataRedundancyCopies) { + if ( DataRedundancyConfig == "y" && devicenetworklist.size() != (size_t)DataRedundancyCopies) { cout << endl << "**** removeModule Failed : Data Redundancy requires you to remove modules in groups equal to number of copies" << endl; quit = true; } @@ -6828,7 +6828,6 @@ int processCommand(string* arguments) { string DataRedundancyConfig = "n"; - int DataRedundancyCopies; try { oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); } diff --git a/oamapps/postConfigure/installer.cpp b/oamapps/postConfigure/installer.cpp index bc91217f0..569b699fa 100644 --- a/oamapps/postConfigure/installer.cpp +++ b/oamapps/postConfigure/installer.cpp @@ -250,7 +250,7 @@ int main(int argc, char *argv[]) catch (...) {} //get memory stats - long long total = myinfo.totalram / 1024 / 1000; +// long long total = myinfo.totalram / 1024 / 1000; // adjust max memory, 25% of total memory string percent = "25%"; diff --git a/procmon/main.cpp b/procmon/main.cpp index cc6835f3a..10da06f69 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -165,7 +165,7 @@ int main(int argc, char **argv) //re-read local system info with updated Columnstore.xml sleep(1); - Config* sysConfig = Config::makeConfig(); +// Config* sysConfig = Config::makeConfig(); MonitorConfig config; //PMwithUM config diff --git a/utils/winport/win_setup_mysql_part2.sql b/utils/winport/win_setup_mysql_part2.sql index 894e1583d..0a2a972fb 100644 --- a/utils/winport/win_setup_mysql_part2.sql +++ b/utils/winport/win_setup_mysql_part2.sql @@ -9,6 +9,9 @@ CREATE FUNCTION calonlinealter RETURNS INTEGER SONAME 'libcalmysql.dll'; CREATE FUNCTION calviewtablelock RETURNS STRING SONAME 'libcalmysql.dll'; CREATE FUNCTION calcleartablelock RETURNS STRING SONAME 'libcalmysql.dll'; CREATE FUNCTION calgetsqlcount RETURNS STRING SONAME 'libcalmysql.dll'; +CREATE FUNCTION mcssystemready RETURNS INTEGER SONAME 'libcalmysql.dll'; +CREATE FUNCTION mcssystemreadonly RETURNS INTEGER SONAME 'libcalmysql.dll'; +CREATE FUNCTION mcswritessuspended RETURNS INTEGER SONAME 'libcalmysql.dll'; create database if not exists calpontsys; use calpontsys; From 2d0cea5542e178794cb4855757b80ffa8ab85bd0 Mon Sep 17 00:00:00 2001 From: david hill Date: Tue, 16 Jan 2018 10:18:29 -0600 Subject: [PATCH 09/29] MCOL-1167 - fixed -c option --- oamapps/postConfigure/postConfigure.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index 9f7505f6b..4016688c2 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -166,7 +166,7 @@ bool thread_remote_installer = true; string singleServerInstall = "1"; string reuseConfig ="n"; -string oldFileName; +string oldFileName = oam::UnassignedName; string glusterCopies; string glusterInstalled = "n"; string hadoopInstalled = "n"; @@ -370,7 +370,8 @@ int main(int argc, char *argv[]) exit(1); } - oldFileName = installDir + "/etc/Columnstore.xml.rpmsave"; + if ( oldFileName == oam::UnassignedName ) + oldFileName = installDir + "/etc/Columnstore.xml.rpmsave"; cout << endl; cout << "This is the MariaDB ColumnStore System Configuration and Installation tool." << endl; From ce1f9c2ddab0ff33d6a81112f12782ad6012f63d Mon Sep 17 00:00:00 2001 From: david hill Date: Tue, 16 Jan 2018 10:33:13 -0600 Subject: [PATCH 10/29] MCOL-1135 - change rc-local service start command --- oam/install_scripts/post-install | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/oam/install_scripts/post-install b/oam/install_scripts/post-install index bd3d3fb55..fc60e32e7 100755 --- a/oam/install_scripts/post-install +++ b/oam/install_scripts/post-install @@ -190,12 +190,13 @@ if [ $user = "root" ]; then else $SUDO chmod 777 $RCFILE printf '%s\n' '#!/bin/bash' "#" | $SUDO tee -a $RCFILEl > /dev/null 2>&1 + + if [ -n "$systemctl" ]; then + $SUDO systemctl start rc-local >/dev/null 2>&1 + $SUDO systemctl enable rc-local >/dev/null 2>&1 + fi fi -if [ -n "$systemctl" ]; then - $SUDO systemctl restart rc-local >/dev/null 2>&1 - $SUDO systemctl enable rc-local >/dev/null 2>&1 -fi #setup the columnstore service script rm -f /etc/init.d/columnstore >/dev/null 2>&1 From b3bbd89d008a6e8d62511c3ff81938b3d4e1780c Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 16 Jan 2018 12:29:46 -0600 Subject: [PATCH 11/29] MCOL-962 Can't comment out import code. Do something else to remove warning. --- oamapps/mcsadmin/mcsadmin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oamapps/mcsadmin/mcsadmin.cpp b/oamapps/mcsadmin/mcsadmin.cpp index 795414eea..f7a6e2a38 100644 --- a/oamapps/mcsadmin/mcsadmin.cpp +++ b/oamapps/mcsadmin/mcsadmin.cpp @@ -5552,7 +5552,7 @@ int processCommand(string* arguments) } } string command = startup::StartUp::installDir() + "/bin/remote_command.sh " + (*hostConfigIter).IPAddr + " " + password + " 'mkdir -p " + startup::StartUp::installDir() + "/gluster/brick" + oam.itoa(brickID) + "'"; -// int status = system(command.c_str()); + system(command.c_str()); brickID++; } } From a9c8f4821b4a3734b3413e5f2834ddcda4c39fee Mon Sep 17 00:00:00 2001 From: david hill Date: Tue, 16 Jan 2018 12:41:41 -0600 Subject: [PATCH 12/29] MCOL-1061 - fixed alais name --- oamapps/postConfigure/installer.cpp | 2 +- oamapps/postConfigure/postConfigure.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/oamapps/postConfigure/installer.cpp b/oamapps/postConfigure/installer.cpp index bc91217f0..ffae8d34c 100644 --- a/oamapps/postConfigure/installer.cpp +++ b/oamapps/postConfigure/installer.cpp @@ -812,7 +812,7 @@ int main(int argc, char *argv[]) cout << "Enter 'mcsmysql' to access the MariaDB ColumnStore SQL console" << endl; cout << "Enter 'mcsadmin' to access the MariaDB ColumnStore Admin console" << endl << endl; - cout << "NOTE: The MariaDB ColumnStore Alias Commands are in /etc/profile.d/columnstoreAlias" << endl << endl; + cout << "NOTE: The MariaDB ColumnStore Alias Commands are in /etc/profile.d/columnstoreAlias.sh" << endl << endl; } else { diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index 9f7505f6b..3af0dcd90 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -3336,7 +3336,7 @@ int main(int argc, char *argv[]) cout << "Enter 'mcsmysql' to access the MariaDB ColumnStore SQL console" << endl; cout << "Enter 'mcsadmin' to access the MariaDB ColumnStore Admin console" << endl << endl; - cout << "NOTE: The MariaDB ColumnStore Alias Commands are in /etc/profile.d/columnstoreAlias" << endl << endl; + cout << "NOTE: The MariaDB ColumnStore Alias Commands are in /etc/profile.d/columnstoreAlias.sh" << endl << endl; } else { From 9749d825b509750911b136f69f2c25b4b4597121 Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 16 Jan 2018 13:26:44 -0600 Subject: [PATCH 13/29] MCOL-1165 use the threadpool's idle down feature --- dbcon/joblist/jobstep.cpp | 2 +- dbcon/joblist/resourcemanager.h | 16 +++++++++++----- dmlproc/dmlproc.cpp | 12 ++++++------ exemgr/main.cpp | 27 ++++++++++++++++++--------- utils/threadpool/threadpool.cpp | 2 +- 5 files changed, 37 insertions(+), 22 deletions(-) diff --git a/dbcon/joblist/jobstep.cpp b/dbcon/joblist/jobstep.cpp index b24a0c0ea..94229871a 100644 --- a/dbcon/joblist/jobstep.cpp +++ b/dbcon/joblist/jobstep.cpp @@ -56,7 +56,7 @@ namespace joblist { boost::mutex JobStep::fLogMutex; //=PTHREAD_MUTEX_INITIALIZER; -ThreadPool JobStep::jobstepThreadPool(0, 0); +ThreadPool JobStep::jobstepThreadPool(defaultJLThreadPoolSize, 0); ostream& operator<<(ostream& os, const JobStep* rhs) { diff --git a/dbcon/joblist/resourcemanager.h b/dbcon/joblist/resourcemanager.h index ffc2de639..73de7ce75 100644 --- a/dbcon/joblist/resourcemanager.h +++ b/dbcon/joblist/resourcemanager.h @@ -93,6 +93,7 @@ namespace joblist const uint64_t defaultNumBuckets = 128; const uint64_t defaultMaxElementsPerBuckert = 16 * 1024 * 1024; + const int defaultEMServerThreads = 50; const int defaultEMSecondsBetweenMemChecks = 1; const int defaultEMMaxPct = 95; const int defaultEMPriority = 21; // @Bug 3385 @@ -147,10 +148,14 @@ namespace joblist typedef std::map MemMap; - int getEmSecondsBetweenMemChecks() const { return getUintVal(fExeMgrStr, "SecondsBetweenMemChecks", defaultEMSecondsBetweenMemChecks); } - int getEmMaxPct() const { return getUintVal(fExeMgrStr, "MaxPct", defaultEMMaxPct); } - EXPORT int getEmPriority() const; - int getEmExecQueueSize() const { return getIntVal(fExeMgrStr, "ExecQueueSize", defaultEMExecQueueSize); } + // @MCOL-513 - Added threadpool to ExeMgr + int getEmServerThreads() const { return getIntVal(fExeMgrStr, "ThreadPoolSize", defaultEMServerThreads); } + std::string getExeMgrThreadPoolDebug() const { return getStringVal(fExeMgrStr, "ThreadPoolDebug", "N"); } + + int getEmSecondsBetweenMemChecks() const { return getUintVal(fExeMgrStr, "SecondsBetweenMemChecks", defaultEMSecondsBetweenMemChecks); } + int getEmMaxPct() const { return getUintVal(fExeMgrStr, "MaxPct", defaultEMMaxPct); } + EXPORT int getEmPriority() const; + int getEmExecQueueSize() const { return getIntVal(fExeMgrStr, "ExecQueueSize", defaultEMExecQueueSize); } int getHjMaxBuckets() const { return getUintVal(fHashJoinStr, "MaxBuckets", defaultHJMaxBuckets); } unsigned getHjNumThreads() const { return fHjNumThreads; } //getUintVal(fHashJoinStr, "NumThreads", defaultNumThreads); } @@ -165,8 +170,9 @@ namespace joblist uint32_t getJlScanLbidReqThreshold() const { return getUintVal(fJobListStr,"ScanLbidReqThreshold", defaultScanLbidReqThreshold); } // @MCOL-513 - Added threadpool to JobSteps - uint32_t getJLThreadPoolSize() const { return getUintVal(fJobListStr, "ThreadPoolSize", defaultJLThreadPoolSize); } + int getJLThreadPoolSize() const { return getIntVal(fJobListStr, "ThreadPoolSize", defaultJLThreadPoolSize); } std::string getJlThreadPoolDebug() const { return getStringVal(fJobListStr, "ThreadPoolDebug", "N"); } + std::string getDMLJlThreadPoolDebug() const { return getStringVal(fJobListStr, "DMLThreadPoolDebug", "N"); } // @bug 1264 - Added LogicalBlocksPerScan configurable which determines the number of blocks contained in each BPS scan request. uint32_t getJlLogicalBlocksPerScan() const { return getUintVal(fJobListStr,"LogicalBlocksPerScan", defaultLogicalBlocksPerScan); } diff --git a/dmlproc/dmlproc.cpp b/dmlproc/dmlproc.cpp index c8ad57b62..0db77f23a 100644 --- a/dmlproc/dmlproc.cpp +++ b/dmlproc/dmlproc.cpp @@ -565,14 +565,14 @@ int main(int argc, char* argv[]) // because rm has a "isExeMgr" flag that is set upon creation (rm is a singleton). // From the pools perspective, it has no idea if it is ExeMgr doing the // creation, so it has no idea which way to set the flag. So we set the max here. -// JobStep::jobstepThreadPool.setMaxThreads(rm->getJLThreadPoolSize()); + JobStep::jobstepThreadPool.setMaxThreads(rm->getJLThreadPoolSize()); JobStep::jobstepThreadPool.setName("DMLProcJobList"); -// if (rm->getJlThreadPoolDebug() == "Y" || rm->getJlThreadPoolDebug() == "y") -// { -// JobStep::jobstepThreadPool.setDebug(true); -// JobStep::jobstepThreadPool.invoke(ThreadPoolMonitor(&JobStep::jobstepThreadPool)); -// } + if (rm->getDMLJlThreadPoolDebug() == "Y" || rm->getDMLJlThreadPoolDebug() == "y") + { + JobStep::jobstepThreadPool.setDebug(true); + JobStep::jobstepThreadPool.invoke(ThreadPoolMonitor(&JobStep::jobstepThreadPool)); + } //set ACTIVE state try diff --git a/exemgr/main.cpp b/exemgr/main.cpp index 6967b2ca9..5abed0144 100644 --- a/exemgr/main.cpp +++ b/exemgr/main.cpp @@ -1436,18 +1436,20 @@ int main(int argc, char* argv[]) // because rm has a "isExeMgr" flag that is set upon creation (rm is a singleton). // From the pools perspective, it has no idea if it is ExeMgr doing the // creation, so it has no idea which way to set the flag. So we set the max here. -// JobStep::jobstepThreadPool.setMaxThreads(rm->getJLThreadPoolSize()); + JobStep::jobstepThreadPool.setMaxThreads(rm->getJLThreadPoolSize()); JobStep::jobstepThreadPool.setName("ExeMgrJobList"); -// if (rm->getJlThreadPoolDebug() == "Y" || rm->getJlThreadPoolDebug() == "y") -// { -// JobStep::jobstepThreadPool.setDebug(true); -// JobStep::jobstepThreadPool.invoke(ThreadPoolMonitor(&JobStep::jobstepThreadPool)); -// } + if (rm->getJlThreadPoolDebug() == "Y" || rm->getJlThreadPoolDebug() == "y") + { + JobStep::jobstepThreadPool.setDebug(true); + JobStep::jobstepThreadPool.invoke(ThreadPoolMonitor(&JobStep::jobstepThreadPool)); + } + int serverThreads = rm->getEmServerThreads(); int maxPct = rm->getEmMaxPct(); int pauseSeconds = rm->getEmSecondsBetweenMemChecks(); int priority = rm->getEmPriority(); + FEMsgHandler::threadPool.setMaxThreads(serverThreads); FEMsgHandler::threadPool.setName("FEMsgHandler"); if (maxPct > 0) @@ -1468,7 +1470,8 @@ int main(int argc, char* argv[]) } } - cout << "Starting ExeMgr: qs = " << rm->getEmExecQueueSize() << ", mx = " << maxPct << ", cf = " << + cout << "Starting ExeMgr: st = " << serverThreads << + ", qs = " << rm->getEmExecQueueSize() << ", mx = " << maxPct << ", cf = " << rm->getConfig()->configFile() << endl; //set ACTIVE state @@ -1483,10 +1486,16 @@ int main(int argc, char* argv[]) } } - threadpool::ThreadPool exeMgrThreadPool; + threadpool::ThreadPool exeMgrThreadPool(serverThreads, 0); exeMgrThreadPool.setName("ExeMgrServer"); - for (;;) + if (rm->getExeMgrThreadPoolDebug() == "Y" || rm->getExeMgrThreadPoolDebug() == "y") + { + exeMgrThreadPool.setDebug(true); + exeMgrThreadPool.invoke(ThreadPoolMonitor(&exeMgrThreadPool)); + } + + for (;;) { IOSocket ios; ios = mqs->accept(); diff --git a/utils/threadpool/threadpool.cpp b/utils/threadpool/threadpool.cpp index 8b0c0a848..197bdedd9 100644 --- a/utils/threadpool/threadpool.cpp +++ b/utils/threadpool/threadpool.cpp @@ -470,7 +470,7 @@ void ThreadPoolMonitor::operator()() << setw(4) << tv.tv_usec/100 << " Name " << fPool->fName << " Active " << fPool->waitingFunctorsSize - << " Most " << fPool->fThreadCount + << " ThdCnt " << fPool->fThreadCount << " Max " << fPool->fMaxThreads << " Q " << fPool->fQueueSize << endl; From 7bc2e2476954a8e7fe1979a08ebbd4ec4561d043 Mon Sep 17 00:00:00 2001 From: david hill Date: Tue, 16 Jan 2018 14:06:24 -0600 Subject: [PATCH 14/29] MCOL-1066 - changed direcory path for non-root install local disk --- oamapps/serverMonitor/diskMonitor.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/oamapps/serverMonitor/diskMonitor.cpp b/oamapps/serverMonitor/diskMonitor.cpp index 2edcaa2f6..bef1df560 100644 --- a/oamapps/serverMonitor/diskMonitor.cpp +++ b/oamapps/serverMonitor/diskMonitor.cpp @@ -23,6 +23,7 @@ ***************************************************************************/ #include "serverMonitor.h" +#include "installdir.h" using namespace std; using namespace oam; @@ -223,7 +224,7 @@ void diskMonitor() string fileName; // check local if ( deviceName == "/") { - fileName = deviceName + "usr/local/mariadb/columnstore/"; + fileName = deviceName + startup::StartUp::installDir(); } else { From 3ed23512e1d7a7d8c631ff687ecf71b7b752b330 Mon Sep 17 00:00:00 2001 From: david hill Date: Wed, 17 Jan 2018 11:18:39 -0600 Subject: [PATCH 15/29] MCOL-1058 - added check for maradin password and mariadb-libs paclage --- oam/install_scripts/mariadb-command-line.sh | 7 + oam/install_scripts/remote_command.sh | 2 +- .../clusterTester/columnstoreClusterTester.sh | 285 ++++++++++++++++++ 3 files changed, 293 insertions(+), 1 deletion(-) mode change 100644 => 100755 oam/install_scripts/mariadb-command-line.sh diff --git a/oam/install_scripts/mariadb-command-line.sh b/oam/install_scripts/mariadb-command-line.sh old mode 100644 new mode 100755 index bb2750581..efdbbddd5 --- a/oam/install_scripts/mariadb-command-line.sh +++ b/oam/install_scripts/mariadb-command-line.sh @@ -8,6 +8,13 @@ # check log for error checkForError() { + grep "ERROR 1045" /tmp/mariadb-command-line.log > /tmp/error.check + if [ `cat /tmp/error.check | wc -c` -ne 0 ]; then + echo "ERROR - PASSWORD: check log file: /tmp/mariadb-command-line.log" + rm -f /tmp/error.check + exit 2 + fi + grep ERROR /tmp/mariadb-command-line.log > /tmp/error.check if [ `cat /tmp/error.check | wc -c` -ne 0 ]; then echo "ERROR: check log file: /tmp/mariadb-command-line.log" diff --git a/oam/install_scripts/remote_command.sh b/oam/install_scripts/remote_command.sh index 9d0f7c2ed..75095632e 100755 --- a/oam/install_scripts/remote_command.sh +++ b/oam/install_scripts/remote_command.sh @@ -10,7 +10,7 @@ # Argument 5 - Remote user name (optional) # Argument 6 - Force a tty to be allocated (optional) set stty_init {cols 512 -opost}; -set timeout 10 +set timeout 30 set SERVER [lindex $argv 0] set PASSWORD [lindex $argv 1] set COMMAND [lindex $argv 2] diff --git a/utils/clusterTester/columnstoreClusterTester.sh b/utils/clusterTester/columnstoreClusterTester.sh index 78fa3f889..da0f83440 100755 --- a/utils/clusterTester/columnstoreClusterTester.sh +++ b/utils/clusterTester/columnstoreClusterTester.sh @@ -711,6 +711,49 @@ checkTime() fi } +checkMysqlPassword() +{ + # Locale check + # + echo "" + echo "** Run MariaDB Console Password check" + echo "" + + #get MariaDB password + pass=true + `$COLUMNSTORE_INSTALL_DIR/mysql/mysql-Columnstore start > /dev/null 2>&1` + `$COLUMNSTORE_INSTALL_DIR/bin/mariadb-command-line.sh > /dev/null 2>&1` + if [ "$?" -eq 2 ]; then + echo "${bold}Failed${normal}, Local Node MariaDB login failed with missing password file, /root/.my.cnf" + fi + + if [ "$IPADDRESSES" != "" ]; then + `/bin/cp -f $COLUMNSTORE_INSTALL_DIR/bin/mariadb-command-line.sh /tmp/.` + + for ipadd in "${NODE_IPADDRESS[@]}"; do + `$COLUMNSTORE_INSTALL_DIR/bin/remote_command.sh $ipadd $PASSWORD $COLUMNSTORE_INSTALL_DIR/mysql/mysql-Columnstore start > /dev/null 2>&1` + `$COLUMNSTORE_INSTALL_DIR/bin/remote_scp_put.sh $ipadd $PASSWORD /tmp/mariadb-command-line.sh 1 > /tmp/remote_scp_put_check 2>&1` + if [ "$?" -ne 0 ]; then + echo "Error running remote_scp_put.sh to $ipadd Node, check /tmp/remote_scp_put_check" + exit 1 + else + `$COLUMNSTORE_INSTALL_DIR/bin/remote_command.sh $ipadd $PASSWORD /tmp/mariadb-command-line.sh 1 > /tmp/remote_command_check` + `cat /tmp/remote_command_check | grep "ERROR - PASSWORD" > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, $ipadd Node MariaDB login failed with missing password file, /root/.my.cnf" + pass=false + fi + fi + done + fi + + if ! $pass; then + checkContinue + else + echo "Passed, no problems detected with a MariaDB password being set without an associated /root/.my.cnf" + fi +} + checkPackages() { # @@ -722,6 +765,7 @@ checkPackages() echo "" declare -a CENTOS_PKG=("expect" "perl" "perl-DBI" "openssl" "zlib" "file" "sudo" "libaio" "rsync" "snappy" "net-tools" "perl-DBD-MySQL") + declare -a CENTOS_PKG_NOT=("mariadb-libs") if [ "$OS" == "centos6" ] || [ "$OS" == "centos7" ]; then if [ ! `which yum 2>/dev/null` ] ; then @@ -757,6 +801,24 @@ checkPackages() checkContinue fi + #check for package that shouldnt be installed + pass=true + for PKG in "${CENTOS_PKG_NOT[@]}"; do + `yum list installed "$PKG" > /tmp/pkg_check 2>&1` + `cat /tmp/pkg_check | grep Installed > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, Local Node package ${bold}${PKG}${normal} is installed, please un-install" + pass=false + REPORTPASS=false + fi + done + + if [ $pass == true ] ; then + echo "Local Node - Passed, all packages that should not be installed aren't installed" + else + checkContinue + fi + echo "" pass=true if [ "$IPADDRESSES" != "" ]; then @@ -792,11 +854,37 @@ checkPackages() pass=true fi echo "" + + #check for package that shouldnt be installed + for PKG in "${CENTOS_PKG_NOT[@]}"; do + `$COLUMNSTORE_INSTALL_DIR/bin/remote_command.sh $ipadd $PASSWORD "yum list installed '$PKG' > /tmp/pkg_check 2>&1" 1 > /tmp/remote_command_check 2>&1` + rc="$?" + if [ $rc -eq 2 ] ; then + echo "${bold}Failed${normal}, $ipadd Node, 'yum' not installed" + pass=false + REPORTPASS=false + break + elif [ $rc -ne 1 ] ; then + echo "${bold}Failed${normal}, $ipadd Node package ${bold}${PKG}${normal} is installed, please un-install" + pass=false + REPORTPASS=false + fi + done + + if $pass; then + echo "$ipadd Node - Passed, all packages that should not be installed aren't installed" + else + checkContinue + pass=true + fi + echo "" + done fi fi declare -a SUSE_PKG=("boost-devel" "expect" "perl" "perl-DBI" "openssl" "file" "sudo" "libaio1" "rsync" "libsnappy1" "net-tools" "perl-DBD-mysql") + declare -a SUSE_PKG_NOT=("mariadb" , "libmariadb18") if [ "$OS" == "suse12" ]; then if [ ! `which rpm 2>/dev/null` ] ; then @@ -821,6 +909,24 @@ checkPackages() else checkContinue fi + + #check for package that shouldnt be installed + pass=true + for PKG in "${SUSE_PKG_NOT[@]}"; do + `rpm -qi "$PKG" > /tmp/pkg_check 2>&1` + `cat /tmp/pkg_check | grep "not installed" > /dev/null 2>&1` + if [ "$?" -ne 0 ]; then + echo "${bold}Failed${normal}, Local Node package ${bold}${PKG}${normal} is installed, please un-install" + pass=false + REPORTPASS=false + fi + done + + if $pass; then + echo "Local Node - Passed, all packages that should not be installed aren't installed" + else + checkContinue + fi fi echo "" @@ -844,11 +950,32 @@ checkPackages() pass=true fi echo "" + + #check for package that shouldnt be installed + for PKG in "${SUSE_PKG_NOT[@]}"; do + `$COLUMNSTORE_INSTALL_DIR/bin/remote_command.sh $ipadd $PASSWORD "rpm -qi '$PKG' > /tmp/pkg_check 2>&1" 1 > /tmp/remote_command_check 2>&1` + rc="$?" + if [ $rc -eq 0 ] ; then + echo "${bold}Failed${normal}, $ipadd Node package ${bold}${PKG}${normal} is installed, please un-install" + pass=false + REPORTPASS=false + fi + done + + if $pass; then + echo "$ipadd Node - Passed, all packages that should not be installed aren't installed" + else + checkContinue + pass=true + fi + echo "" + done fi fi declare -a UBUNTU_PKG=("libboost-all-dev" "expect" "libdbi-perl" "perl" "openssl" "file" "sudo" "libreadline-dev" "rsync" "libsnappy1V5" "net-tools" "libdbd-mysql-perl") + declare -a UBUNTU_PKG_NOT=("mariadb-server" "libmariadb18") if [ "$OS" == "ubuntu16" ] ; then if [ ! `which dpkg 2>/dev/null` ] ; then @@ -873,6 +1000,24 @@ checkPackages() else checkContinue fi + + #check for package that shouldnt be installed + pass=true + for PKG in "${UBUNTU_PKG_NOT[@]}"; do + `dpkg -s "$PKG" > /tmp/pkg_check 2>&1` + `cat /tmp/pkg_check | grep 'install ok installed' > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, Local Node package ${bold}${PKG}${normal} is installed, please un-install" + pass=false + REPORTPASS=false + fi + done + + if $pass; then + echo "Local Node - Passed, all packages that should not be installed aren't installed" + else + checkContinue + fi fi echo "" @@ -909,11 +1054,45 @@ checkPackages() pass=true fi echo "" + + #check for package that shouldnt be installed + for PKG in "${UBUNTU_PKG_NOT[@]}"; do + `$COLUMNSTORE_INSTALL_DIR/bin/remote_command.sh $ipadd $PASSWORD "dpkg -s '$PKG' > /tmp/pkg_check 2>&1" 1 > /tmp/remote_command_check 2>&1` + `$COLUMNSTORE_INSTALL_DIR/bin/remote_scp_get.sh $ipadd $PASSWORD /tmp/pkg_check > /tmp/remote_scp_get_check 2>&1` + if [ "$?" -ne 0 ]; then + echo "Error running remote_scp_get.sh to $ipadd Node, check /tmp/remote_scp_get_check" + else + `cat /tmp/remote_command_check | grep 'command not found' > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, $ipadd Node ${bold}dpkg${normal} package not installed" + pass=false + break + else + `cat pkg_check | grep 'install ok installed' > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, $ipadd Node package ${bold}${PKG}${normal} is installed, please un-install" + pass=false + fi + + `rm -f pkg_check` + fi + fi + done + + if $pass; then + echo "$ipadd Node - Passed, all packages that should not be installed aren't installed" + else + checkContinue + pass=true + fi + echo "" + done fi fi declare -a DEBIAN_PKG=("libboost-all-dev" "expect" "libdbi-perl" "perl" "openssl" "file" "sudo" "libreadline-dev" "rsync" "libsnappy1" "net-tools" "libdbd-mysql-perl") + declare -a DEBIAN_PKG_NOT=("libmariadb18" "mariadb-server") if [ "$OS" == "debian8" ]; then if [ ! `which dpkg 2>/dev/null` ] ; then @@ -938,6 +1117,24 @@ checkPackages() else checkContinue fi + + #check for package that shouldnt be installed + pass=true + for PKG in "${DEBIAN_PKG_NOT[@]}"; do + `dpkg -s "$PKG" > /tmp/pkg_check 2>&1` + `cat /tmp/pkg_check | grep 'install ok installed' > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, Local Node package ${bold}${PKG}${normal} is installed, please un-install" + pass=false + REPORTPASS=false + fi + done + + if $pass; then + echo "Local Node - Passed, all packages that should not be installed aren't installed" + else + checkContinue + fi fi echo "" @@ -974,11 +1171,45 @@ checkPackages() pass=true fi echo "" + + #check for package that shouldnt be installed + for PKG in "${DEBIAN_PKG_NOT[@]}"; do + `$COLUMNSTORE_INSTALL_DIR/bin/remote_command.sh $ipadd $PASSWORD "dpkg -s '$PKG' > /tmp/pkg_check 2>&1" 1 > /tmp/remote_command_check 2>&1` + `$COLUMNSTORE_INSTALL_DIR/bin/remote_scp_get.sh $ipadd $PASSWORD /tmp/pkg_check > /tmp/remote_scp_get_check 2>&1` + if [ "$?" -ne 0 ]; then + echo "Error running remote_scp_get.sh to $ipadd Node, check /tmp/remote_scp_get_check" + else + `cat /tmp/remote_command_check | grep 'command not found' > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, $ipadd Node ${bold}dpkg${normal} package not installed" + pass=false + break + else + `cat pkg_check | grep 'install ok installed' > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, $ipadd Node package ${bold}${PKG}${normal} is installed, please un-install" + pass=false + fi + + `rm -f pkg_check` + fi + fi + done + + if $pass; then + echo "$ipadd Node - Passed, all packages that should not be installed aren't installed" + else + checkContinue + pass=true + fi + echo "" + done fi fi declare -a DEBIAN9_PKG=("libboost-all-dev" "expect" "libdbi-perl" "perl" "openssl" "file" "sudo" "libreadline5" "rsync" "libsnappy1V5" "net-tools" "libaio1") + declare -a DEBIAN9_PKG_NOT=("libmariadb18" "mariadb-server") if [ "$OS" == "debian9" ]; then if [ ! `which dpkg 2>/dev/null` ] ; then @@ -1003,6 +1234,25 @@ checkPackages() else checkContinue fi + + #check for package that shouldnt be installed + pass=true + for PKG in "${DEBIAN9_PKG_NOT[@]}"; do + `dpkg -s "$PKG" > /tmp/pkg_check 2>&1` + `cat /tmp/pkg_check | grep 'install ok installed' > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, Local Node package ${bold}${PKG}${normal} is installed, please un-install" + pass=false + REPORTPASS=false + fi + done + + if $pass; then + echo "Local Node - Passed, all packages that should not be installed aren't installed" + else + checkContinue + fi + fi echo "" @@ -1039,6 +1289,39 @@ checkPackages() pass=true fi echo "" + + #check for package that shouldnt be installed + for PKG in "${DEBIAN9_PKG_NOT[@]}"; do + `$COLUMNSTORE_INSTALL_DIR/bin/remote_command.sh $ipadd $PASSWORD "dpkg -s '$PKG' > /tmp/pkg_check 2>&1" 1 > /tmp/remote_command_check 2>&1` + `$COLUMNSTORE_INSTALL_DIR/bin/remote_scp_get.sh $ipadd $PASSWORD /tmp/pkg_check > /tmp/remote_scp_get_check 2>&1` + if [ "$?" -ne 0 ]; then + echo "Error running remote_scp_get.sh to $ipadd Node, check /tmp/remote_scp_get_check" + else + `cat /tmp/remote_command_check | grep 'command not found' > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, $ipadd Node ${bold}dpkg${normal} package not installed" + pass=false + break + else + `cat pkg_check | grep 'install ok installed' > /dev/null 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Failed${normal}, $ipadd Node package ${bold}${PKG}${normal} is installed, please un-install" + pass=false + fi + + `rm -f pkg_check` + fi + fi + done + + if $pass; then + echo "$ipadd Node - Passed, all packages that should not be installed aren't installed" + else + checkContinue + pass=true + fi + echo "" + done fi fi @@ -1063,6 +1346,8 @@ if [ "$IPADDRESSES" != "" ]; then checkPorts checkTime fi + +checkMysqlPassword checkPackages if [ $REPORTPASS == true ] ; then From 8a78fbacd2e1c3c3124b5e401865401443d43d0d Mon Sep 17 00:00:00 2001 From: david hill Date: Wed, 17 Jan 2018 15:41:10 -0600 Subject: [PATCH 16/29] MCOL-1060 - fix spelling/grammer issues --- utils/clusterTester/columnstoreClusterTester.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/clusterTester/columnstoreClusterTester.sh b/utils/clusterTester/columnstoreClusterTester.sh index da0f83440..8fce62b7b 100755 --- a/utils/clusterTester/columnstoreClusterTester.sh +++ b/utils/clusterTester/columnstoreClusterTester.sh @@ -43,7 +43,7 @@ checkContinue() { helpPrint () { ################################################################################ echo "" - echo "This is the MariaDB ColumnStore Cluster System Test tool." + echo "This is the MariaDB ColumnStore Cluster System Test Tool." echo "" echo "It will run a set of test to validate the setup of the MariaDB Columnstore system." echo "This can be run prior to the install of MariaDB ColumnStore to make sure the" @@ -1352,7 +1352,7 @@ checkPackages if [ $REPORTPASS == true ] ; then echo "" - echo "*** Finished Validation of the Cluster, all Test Passed ***" + echo "*** Finished Validation of the Cluster, all Tests Passed ***" echo "" exit 0 else From c56555e9fe88a2b8df2b20eb1ee7692e7437e8bd Mon Sep 17 00:00:00 2001 From: david hill Date: Thu, 18 Jan 2018 17:14:14 -0600 Subject: [PATCH 17/29] MCOL-1137 - fixed master/slave setup after failvover --- .../disable-rep-columnstore.sh | 2 +- procmgr/main.cpp | 130 ++++++++++-------- procmgr/processmanager.cpp | 41 +++++- procmon/processmonitor.cpp | 10 ++ 4 files changed, 120 insertions(+), 63 deletions(-) diff --git a/oam/install_scripts/disable-rep-columnstore.sh b/oam/install_scripts/disable-rep-columnstore.sh index 0a8c9b17f..1c730da4c 100644 --- a/oam/install_scripts/disable-rep-columnstore.sh +++ b/oam/install_scripts/disable-rep-columnstore.sh @@ -56,7 +56,7 @@ checkForError # # Run reset slave command # -echo "Run start slave command" >>/tmp/disable-rep-status.log +echo "Run reset slave command" >>/tmp/disable-rep-status.log cat >/tmp/idb_disable-rep.sql < Date: Fri, 19 Jan 2018 03:43:00 -0600 Subject: [PATCH 18/29] MCOL-1042: add full list of dependencies. --- cpackEngineDEB.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpackEngineDEB.cmake b/cpackEngineDEB.cmake index 461714ced..7631c08b1 100644 --- a/cpackEngineDEB.cmake +++ b/cpackEngineDEB.cmake @@ -65,9 +65,9 @@ if (EXISTS "/etc/debian_version") set(DEBIAN_VERSION_NUMBER "${CMAKE_MATCH_1}") endif () if ("${DEBIAN_VERSION_NUMBER}" EQUAL "8") - SET(CPACK_DEBIAN_PLATFORM_PACKAGE_DEPENDS "expect, libboost-all-dev, mariadb-columnstore-libs, libsnappy1") + SET(CPACK_DEBIAN_PLATFORM_PACKAGE_DEPENDS "expect, perl, openssl, file, sudo, libdbi-perl, libreadline-dev, rsync, net-tools, libboost-all-dev, mariadb-columnstore-libs, libsnappy1") else () - SET(CPACK_DEBIAN_PLATFORM_PACKAGE_DEPENDS "expect, libboost-all-dev, mariadb-columnstore-libs, libsnappy1v5") + SET(CPACK_DEBIAN_PLATFORM_PACKAGE_DEPENDS "expect, perl, openssl, file, sudo, libdbi-perl, libreadline-dev, rsync, net-tools, libboost-all-dev, mariadb-columnstore-libs, libsnappy1v5") endif () SET(CPACK_DEBIAN_STORAGE-ENGINE_PACKAGE_DEPENDS "mariadb-columnstore-libs") From 5e30895b9c16af157dd849e0cdd233c61d2e372d Mon Sep 17 00:00:00 2001 From: david hill Date: Fri, 19 Jan 2018 10:56:35 -0600 Subject: [PATCH 19/29] MCOL-1137 - fixed procmgr host_standby --- procmgr/processmanager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 10d52846c..9f359e46a 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -9718,7 +9718,7 @@ std::string ProcessManager::getStandbyModule() { // Found a ProcessManager in a COLD_STANDBY state newStandbyModule = systemprocessstatus.processstatus[i].Module; - break; + continue; } if ( systemprocessstatus.processstatus[i].ProcessName == "ProcessManager" && From a002d33d5d451f3a8f78c5e782f995c239b66bb4 Mon Sep 17 00:00:00 2001 From: Ben Thompson Date: Fri, 19 Jan 2018 13:38:15 -0600 Subject: [PATCH 20/29] Merge deleted change to include columnstoreversion.h --- CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 51d2e7a8a..482e8d19e 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,8 +76,7 @@ ENDIF("${isSystemDir}" STREQUAL "-1") INCLUDE (configureEngine.cmake) -CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/build/releasenum.in ${CMAKE_CURRENT_BINARY_DIR}/build/releasenum @ONLY IMMEDIATE) -INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/build/releasenum DESTINATION ${INSTALL_ENGINE} COMPONENT platform) +CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/columnstoreversion.h.in ${CMAKE_CURRENT_SOURCE_DIR}/columnstoreversion.h) CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config.h) INCLUDE(bison.cmake) From c1035dae73c8cedbcfd521c0026e1c10c924825f Mon Sep 17 00:00:00 2001 From: david hill Date: Fri, 19 Jan 2018 15:21:28 -0600 Subject: [PATCH 21/29] MCOL-1060 - test tool change #2 --- utils/clusterTester/columnstoreClusterTester.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/clusterTester/columnstoreClusterTester.sh b/utils/clusterTester/columnstoreClusterTester.sh index 8fce62b7b..b512eeb20 100755 --- a/utils/clusterTester/columnstoreClusterTester.sh +++ b/utils/clusterTester/columnstoreClusterTester.sh @@ -1330,7 +1330,7 @@ checkPackages() } echo "" -echo "*** This is the MariaDB Columnstore Cluster System test tool ***" +echo "*** This is the MariaDB Columnstore Cluster System Test Tool ***" echo "" checkLocalOS From 403e0ef1dca773c9e7b3ce9e1dc97007b93d0e73 Mon Sep 17 00:00:00 2001 From: Ben Thompson Date: Fri, 19 Jan 2018 16:02:59 -0600 Subject: [PATCH 22/29] MCOL-1114: Change cmake minimum versions. --- CMakeLists.txt | 3 ++- cpackEngineDEB.cmake | 2 ++ cpackEngineRPM.cmake | 1 - 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b40c663a3..0932307ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,6 @@ -CMAKE_MINIMUM_REQUIRED(VERSION 2.6) +CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12) + # Avoid warnings in higher versions if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" GREATER 2.6) CMAKE_POLICY(VERSION 2.8) diff --git a/cpackEngineDEB.cmake b/cpackEngineDEB.cmake index 3c80e3ccd..30d3d0ab9 100644 --- a/cpackEngineDEB.cmake +++ b/cpackEngineDEB.cmake @@ -1,5 +1,7 @@ IF(DEB) +CMAKE_MINIMUM_REQUIRED(VERSION 3.4) + SET(CMAKE_INSTALL_PREFIX ${INSTALL_ENGINE}) SET(CPACK_GENERATOR "DEB") diff --git a/cpackEngineRPM.cmake b/cpackEngineRPM.cmake index 7ad235dc8..98974325d 100644 --- a/cpackEngineRPM.cmake +++ b/cpackEngineRPM.cmake @@ -5,7 +5,6 @@ SET(CMAKE_INSTALL_PREFIX ${INSTALL_ENGINE}) SET(CPACK_GENERATOR "RPM") SET(CPACK_RPM_PACKAGE_DEBUG 1) SET(CPACK_PACKAGING_INSTALL_PREFIX ${INSTALL_ENGINE}) -CMAKE_MINIMUM_REQUIRED(VERSION 2.8.7) SET(CPACK_RPM_COMPONENT_INSTALL ON) From 63adbd0f9921807bb480a570fcd8c9bc336fd5f4 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Mon, 22 Jan 2018 15:15:03 +0000 Subject: [PATCH 23/29] Fix missing compiler flag from 1.0 -> 1.1 merge --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 482e8d19e..c36bdcfaa 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,8 +107,8 @@ endif() INCLUDE(check_compiler_flag.cmake) -MY_CHECK_AND_SET_COMPILER_FLAG("-g -O3 -fno-strict-aliasing -Wall -fno-tree-vectorize -DDBUG_OFF -DHAVE_CONFIG_H" RELEASE RELWITHDEBINFO MINSIZEREL) -MY_CHECK_AND_SET_COMPILER_FLAG("-ggdb3 -fno-tree-vectorize -DSAFE_MUTEX -DSAFEMALLOC -DENABLED_DEBUG_SYNC -O0 -Wall -D_DEBUG -DHAVE_CONFIG_H" DEBUG) +MY_CHECK_AND_SET_COMPILER_FLAG("-g -O3 -fno-omit-frame-pointer -fno-strict-aliasing -Wall -fno-tree-vectorize -DDBUG_OFF -DHAVE_CONFIG_H" RELEASE RELWITHDEBINFO MINSIZEREL) +MY_CHECK_AND_SET_COMPILER_FLAG("-ggdb3 -fno-omit-frame-pointer -fno-tree-vectorize -DSAFE_MUTEX -DSAFEMALLOC -DENABLED_DEBUG_SYNC -O0 -Wall -D_DEBUG -DHAVE_CONFIG_H" DEBUG) # enable security hardening features, like most distributions do # in our benchmarks that costs about ~1% of performance, depending on the load From def46ca31b3df0e8c916a133c8d60c3e9627ad36 Mon Sep 17 00:00:00 2001 From: david hill Date: Mon, 22 Jan 2018 14:13:42 -0600 Subject: [PATCH 24/29] update version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 0a349c477..7053d9518 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ COLUMNSTORE_VERSION_MAJOR=1 COLUMNSTORE_VERSION_MINOR=0 -COLUMNSTORE_VERSION_PATCH=12 +COLUMNSTORE_VERSION_PATCH=13 COLUMNSTORE_VERSION_RELEASE=1 From 1a16847cd5041a67300dbb59303c4a0e41a37325 Mon Sep 17 00:00:00 2001 From: david hill Date: Mon, 22 Jan 2018 14:19:12 -0600 Subject: [PATCH 25/29] MCOL-1138 - fix issue where no HOT_STANDBY procmgr existed after pm1 outage --- procmgr/main.cpp | 11 ++++++++++- procmgr/processmanager.cpp | 5 +++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/procmgr/main.cpp b/procmgr/main.cpp index 5ad80014c..81e1593af 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -1828,9 +1828,18 @@ void pingDeviceThread() break; // if disabled and not amazon, skip - if (opState == oam::AUTO_DISABLED && !amazon) + if ( (opState == oam::AUTO_DISABLED) && !amazon) break; + // if disabled, amazon,and NOT terminated skip + if ( (opState == oam::AUTO_DISABLED) && amazon) + { + // return values = 'ip address' for running or rebooting, stopped or terminated + string currentIPAddr = oam.getEC2InstanceIpAddress(hostName); + if ( currentIPAddr != "terminated") + break; + } + log.writeLog(__LINE__, "module failed to respond to pings: " + moduleName, LOG_TYPE_WARNING); //bump module ping failure counter diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 9f359e46a..5a2739358 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -9455,7 +9455,7 @@ int ProcessManager::OAMParentModuleChange() //restart/reinit processes to force their release of the controller node port if ( ( config.ServerInstallType() == oam::INSTALL_COMBINE_DM_UM_PM) && - ( moduleNameList.size() <= 1 && config.moduleType() == "pm") ) + ( moduleNameList.size() <= 0 && config.moduleType() == "pm") ) { status = 0; } @@ -9710,7 +9710,8 @@ std::string ProcessManager::getStandbyModule() //already have a hot-standby return ""; - if ( backupStandbyModule != "NONE" ) + if ( ( backupStandbyModule != "NONE" ) || + ( newStandbyModule != "NONE" ) ) continue; if ( systemprocessstatus.processstatus[i].ProcessName == "ProcessManager" && From 8e0a5e65b6572fc139492f432740a15c09a3f6e6 Mon Sep 17 00:00:00 2001 From: Ben Thompson Date: Tue, 23 Jan 2018 10:47:16 -0600 Subject: [PATCH 26/29] MCOL-1042: edit list of debian package depends --- cpackEngineDEB.cmake | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpackEngineDEB.cmake b/cpackEngineDEB.cmake index 7631c08b1..d850d04db 100644 --- a/cpackEngineDEB.cmake +++ b/cpackEngineDEB.cmake @@ -65,9 +65,11 @@ if (EXISTS "/etc/debian_version") set(DEBIAN_VERSION_NUMBER "${CMAKE_MATCH_1}") endif () if ("${DEBIAN_VERSION_NUMBER}" EQUAL "8") - SET(CPACK_DEBIAN_PLATFORM_PACKAGE_DEPENDS "expect, perl, openssl, file, sudo, libdbi-perl, libreadline-dev, rsync, net-tools, libboost-all-dev, mariadb-columnstore-libs, libsnappy1") -else () - SET(CPACK_DEBIAN_PLATFORM_PACKAGE_DEPENDS "expect, perl, openssl, file, sudo, libdbi-perl, libreadline-dev, rsync, net-tools, libboost-all-dev, mariadb-columnstore-libs, libsnappy1v5") + SET(CPACK_DEBIAN_PLATFORM_PACKAGE_DEPENDS "expect, perl, openssl, file, sudo, libdbi-perl, libreadline-dev, rsync, net-tools, libboost-all-dev, mariadb-columnstore-libs, mariadb-columnstore-server, libsnappy1") +elseif ("${DEBIAN_VERSION_NUMBER}" EQUAL "9") + SET(CPACK_DEBIAN_PLATFORM_PACKAGE_DEPENDS "expect, perl, openssl, file, sudo, libdbi-perl, libreadline-dev, rsync, net-tools, libboost-all-dev, mariadb-columnstore-libs, mariadb-columnstore-server, libsnappy1v5, libreadline5") +else() + SET(CPACK_DEBIAN_PLATFORM_PACKAGE_DEPENDS "expect, perl, openssl, file, sudo, libdbi-perl, libboost-all-dev, libreadline-dev, rsync, snappy, net-tools") endif () SET(CPACK_DEBIAN_STORAGE-ENGINE_PACKAGE_DEPENDS "mariadb-columnstore-libs") From dfb5aa41c65a7cec766fb9c090c67c1dbad47174 Mon Sep 17 00:00:00 2001 From: david hill Date: Wed, 24 Jan 2018 16:39:24 -0600 Subject: [PATCH 27/29] MCOL-1137-fixed issue with no slaves --- procmon/processmonitor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index 51888329c..3a6d69f24 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -4951,8 +4951,8 @@ int ProcessMonitor::runMasterRep(std::string& masterLogFile, std::string& master string moduleName = (*pt).DeviceName; //skip if local module or module is not ACTIVE - if ( moduleName == config.moduleName() ) - continue; +// if ( moduleName == config.moduleName() ) +// continue; int opState = oam::ACTIVE; bool degraded; From bd5daf240b7ab5664bb81ba9d10cdce914e642ac Mon Sep 17 00:00:00 2001 From: david hill Date: Wed, 24 Jan 2018 16:43:23 -0600 Subject: [PATCH 28/29] MCOL-1137-fixed issue with no slaves --- procmon/processmonitor.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index 3a6d69f24..8f51cccf1 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -4950,9 +4950,7 @@ int ProcessMonitor::runMasterRep(std::string& masterLogFile, std::string& master { string moduleName = (*pt).DeviceName; - //skip if local module or module is not ACTIVE -// if ( moduleName == config.moduleName() ) -// continue; + //skip if module is not ACTIVE int opState = oam::ACTIVE; bool degraded; From 63f8e1ce7147fef2804b44587817bcafcfd35164 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Tue, 30 Jan 2018 11:46:47 +0000 Subject: [PATCH 29/29] MCOL-1176 Fix API extent rollover When the API inserts data into ColumnStore which will roll over into a new extent that data wasn't being put into the new extent and corruption occured. This patch now tracks the additional data and inserts it into the new extent. It also makes sure the LBIDs are stored so that they are correctly committed. --- writeengine/wrapper/writeengine.cpp | 187 ++++++++++++++++++++++++++-- writeengine/wrapper/writeengine.h | 1 + 2 files changed, 179 insertions(+), 9 deletions(-) diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index 8da33caad..dc863b823 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -1475,6 +1475,7 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, Column curCol; ColStruct curColStruct; ColStructList newColStructList; + std::vector colNewValueList; DctnryStructList newDctnryStructList; HWM hwm = 0; HWM oldHwm = 0; @@ -2055,6 +2056,19 @@ timer.stop("tokenize"); tableMetaData->setColExtsInfo(colStructList[i].dataOid, aColExtsInfo); } + //-------------------------------------------------------------------------- + //Prepare the valuelist for the new extent + //-------------------------------------------------------------------------- + + for (unsigned i=1; i <= totalColumns; i++) + { + // Copy values to second value list + for (uint64_t j=rowsLeft; j > 0; j--) + { + colNewValueList.push_back(colValueList[(totalRow*i)-j]); + } + } + // end of allocate row id #ifdef PROFILE @@ -2091,6 +2105,22 @@ timer.start("writeColumnRec"); } } } + // If we create a new extent for this batch + for (unsigned i = 0; i < newColStructList.size(); i++) + { + colOp = m_colOp[op(newColStructList[i].fCompressionType)]; + width = newColStructList[i].colWidth; + successFlag = colOp->calculateRowId(lastRidNew , BYTE_PER_BLOCK/width, width, curFbo, curBio); + if (successFlag) { + if (curFbo != lastFbo) { + RETURN_ON_ERROR(AddLBIDtoList(txnid, + lbids, + colDataTypes, + newColStructList[i], + curFbo)); + } + } + } } if (lbids.size() > 0) @@ -2100,7 +2130,7 @@ timer.start("writeColumnRec"); // Write row(s) to database file(s) //---------------------------------------------------------------------- bool versioning = !(isAutoCommitOn && insertSelect); - rc = writeColumnRecBinary(txnid, colStructList, colValueList, rowIdArray, newColStructList, tableOid, useTmpSuffix, versioning); // @bug 5572 HDFS tmp file + rc = writeColumnRecBinary(txnid, colStructList, colValueList, rowIdArray, newColStructList, colNewValueList, tableOid, useTmpSuffix, versioning); // @bug 5572 HDFS tmp file } return rc; } @@ -4564,6 +4594,7 @@ int WriteEngineWrapper::writeColumnRecBinary(const TxnID& txnid, std::vector& colValueList, RID* rowIdArray, const ColStructList& newColStructList, + std::vector& newColValueList, const int32_t tableOid, bool useTmpSuffix, bool versioning) @@ -4574,7 +4605,7 @@ int WriteEngineWrapper::writeColumnRecBinary(const TxnID& txnid, Column curCol; ColStructList::size_type totalColumn; ColStructList::size_type i; - size_t totalRow; + size_t totalRow1, totalRow2; setTransId(txnid); @@ -4582,11 +4613,21 @@ int WriteEngineWrapper::writeColumnRecBinary(const TxnID& txnid, #ifdef PROFILE StopWatch timer; #endif - totalRow = colValueList.size() / totalColumn; - valArray = malloc(sizeof(uint64_t) * totalRow); + totalRow1 = colValueList.size() / totalColumn; + if (newColValueList.size() > 0) + { + totalRow2 = newColValueList.size() / newColStructList.size(); + totalRow1 -= totalRow2; + } + else + { + totalRow2 = 0; + } - if (totalRow == 0) + valArray = malloc(sizeof(uint64_t) * totalRow1); + + if (totalRow1 == 0) return rc; TableMetaData* aTbaleMetaData = TableMetaData::makeTableMetaData(tableOid); @@ -4634,7 +4675,7 @@ StopWatch timer; if (versioning) { rc = processVersionBuffer(curCol.dataFile.pFile, txnid, colStructList[i], - colStructList[i].colWidth, totalRow, firstPart, rangeList); + colStructList[i].colWidth, totalRow1, firstPart, rangeList); if (rc != NO_ERROR) { if (colStructList[i].fCompressionType == 0) { @@ -4652,9 +4693,9 @@ StopWatch timer; uint8_t tmp8; uint16_t tmp16; uint32_t tmp32; - for (size_t j = 0; j < totalRow; j++) + for (size_t j = 0; j < totalRow1; j++) { - uint64_t curValue = colValueList[(totalRow*i) + j]; + uint64_t curValue = colValueList[((totalRow1 + totalRow2)*i) + j]; switch (colStructList[i].colType) { case WriteEngine::WR_VARBINARY : // treat same as char for now @@ -4692,7 +4733,7 @@ StopWatch timer; #ifdef PROFILE timer.start("writeRow "); #endif - rc = colOp->writeRow(curCol, totalRow, firstPart, valArray); + rc = colOp->writeRow(curCol, totalRow1, firstPart, valArray); #ifdef PROFILE timer.stop("writeRow "); #endif @@ -4707,7 +4748,135 @@ timer.stop("writeRow "); } // end of for (i = 0 if (valArray != NULL) + { free(valArray); + valArray = NULL; + } + + // MCOL-1176 - Write second extent + if (totalRow2) + { + valArray = malloc(sizeof(uint64_t) * totalRow2); + for (i = 0; i < newColStructList.size(); i++) + { + //@Bug 2205 Check if all rows go to the new extent + //Write the first batch + RID * secondPart = rowIdArray + totalRow1; + ColumnOp* colOp = m_colOp[op(newColStructList[i].fCompressionType)]; + + // set params + colOp->initColumn(curCol); + // need to pass real dbRoot, partition, and segment to setColParam + colOp->setColParam(curCol, 0, newColStructList[i].colWidth, + newColStructList[i].colDataType, newColStructList[i].colType, newColStructList[i].dataOid, + newColStructList[i].fCompressionType, newColStructList[i].fColDbRoot, + newColStructList[i].fColPartition, newColStructList[i].fColSegment); + + ColExtsInfo aColExtsInfo = aTbaleMetaData->getColExtsInfo(newColStructList[i].dataOid); + ColExtsInfo::iterator it = aColExtsInfo.begin(); + while (it != aColExtsInfo.end()) + { + if ((it->dbRoot == newColStructList[i].fColDbRoot) && (it->partNum == newColStructList[i].fColPartition) && (it->segNum == colStructList[i].fColSegment)) + break; + it++; + } + + if (it == aColExtsInfo.end()) //add this one to the list + { + ColExtInfo aExt; + aExt.dbRoot = newColStructList[i].fColDbRoot; + aExt.partNum = newColStructList[i].fColPartition; + aExt.segNum = newColStructList[i].fColSegment; + aExt.compType = newColStructList[i].fCompressionType; + aColExtsInfo.push_back(aExt); + aTbaleMetaData->setColExtsInfo(newColStructList[i].dataOid, aColExtsInfo); + } + + rc = colOp->openColumnFile(curCol, segFile, useTmpSuffix, IO_BUFF_SIZE); // @bug 5572 HDFS tmp file + if (rc != NO_ERROR) + break; + + // handling versioning + vector rangeList; + if (versioning) + { + rc = processVersionBuffer(curCol.dataFile.pFile, txnid, newColStructList[i], + newColStructList[i].colWidth, totalRow2, secondPart, rangeList); + if (rc != NO_ERROR) { + if (newColStructList[i].fCompressionType == 0) + { + curCol.dataFile.pFile->flush(); + } + + BRMWrapper::getInstance()->writeVBEnd(txnid, rangeList); + break; + } + } + + //totalRow1 -= totalRow2; + // have to init the size here + // nullArray = (bool*) malloc(sizeof(bool) * totalRow); + uint8_t tmp8; + uint16_t tmp16; + uint32_t tmp32; + for (size_t j = 0; j < totalRow2; j++) + { + uint64_t curValue = newColValueList[(totalRow2*i) + j]; + switch (newColStructList[i].colType) + { + case WriteEngine::WR_VARBINARY : // treat same as char for now + case WriteEngine::WR_CHAR: + case WriteEngine::WR_BLOB: + case WriteEngine::WR_TEXT: + ((uint64_t*)valArray)[j] = curValue; + break; + case WriteEngine::WR_INT: + case WriteEngine::WR_UINT: + case WriteEngine::WR_FLOAT: + tmp32 = curValue; + ((uint32_t*)valArray)[j] = tmp32; + break; + case WriteEngine::WR_ULONGLONG: + case WriteEngine::WR_LONGLONG: + case WriteEngine::WR_DOUBLE: + case WriteEngine::WR_TOKEN: + ((uint64_t*)valArray)[j] = curValue; + break; + case WriteEngine::WR_BYTE: + case WriteEngine::WR_UBYTE: + tmp8 = curValue; + ((uint8_t*)valArray)[j] = tmp8; + break; + case WriteEngine::WR_SHORT: + case WriteEngine::WR_USHORT: + tmp16 = curValue; + ((uint16_t*)valArray)[j] = tmp16; + break; + } + } + + + #ifdef PROFILE + timer.start("writeRow "); + #endif + rc = colOp->writeRow(curCol, totalRow2, secondPart, valArray); + #ifdef PROFILE + timer.stop("writeRow "); + #endif + colOp->closeColumnFile(curCol); + + if (versioning) + BRMWrapper::getInstance()->writeVBEnd(txnid, rangeList); + + // check error + if (rc != NO_ERROR) + break; + + } // end of for (i = 0 + } + if (valArray != NULL) + free(valArray); + #ifdef PROFILE timer.finish(); diff --git a/writeengine/wrapper/writeengine.h b/writeengine/wrapper/writeengine.h index 7c7862d5f..f254a28cd 100644 --- a/writeengine/wrapper/writeengine.h +++ b/writeengine/wrapper/writeengine.h @@ -663,6 +663,7 @@ private: int writeColumnRecBinary(const TxnID& txnid, const ColStructList& colStructList, std::vector& colValueList, RID* rowIdArray, const ColStructList& newColStructList, + std::vector& newColValueList, const int32_t tableOid, bool useTmpSuffix, bool versioning = true);