From 979d00a679266d95f3ca449d03310a5036493a1b Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Mon, 23 Jul 2018 10:40:18 +0100 Subject: [PATCH 01/17] MCOL-1579 Remove chmod of /dev/shm We appear to chmod /dev/shm as a workaround to a CentOS 7 bug that was fixed last year which accidentally set it to 755. If a user has /dev/shm locked down we should get them to fix it rather than modifying it ourselves. The code before this fixed changed a root install to 755 for /dev/shm which instantly broke anything using mmap() with MAP_SHARED as an unprivileged user. --- oam/install_scripts/post-install | 1 - procmon/main.cpp | 7 ------- procmon/processmonitor.cpp | 7 ------- 3 files changed, 15 deletions(-) diff --git a/oam/install_scripts/post-install b/oam/install_scripts/post-install index 4eaa28dda..5a61a9074 100755 --- a/oam/install_scripts/post-install +++ b/oam/install_scripts/post-install @@ -247,7 +247,6 @@ else $SUDO chmod 777 /tmp $installdir/bin/syslogSetup.sh --installdir=$installdir install > /tmp/syslog_install.log 2>&1 $SUDO chown $user:$user $installdir/etc/Columnstore.xml - $SUDO chmod -R 777 /dev/shm $SUDO mkdir /var/lock/subsys > /dev/null 2>&1 $SUDO chmod 777 /var/lock/subsys > /dev/null 2>&1 $SUDO rm -f /var/lock/subsys/mysql-Columnstore diff --git a/procmon/main.cpp b/procmon/main.cpp index b4e23a6e1..096cffcee 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -131,13 +131,6 @@ int main(int argc, char **argv) if (p && *p) USER = p; - // change permissions on /dev/shm - if ( !rootUser) - { - string cmd = "sudo chmod 777 /dev/shm >/dev/null 2>&1"; - system(cmd.c_str()); - } - // get and set locale language string systemLang = "C"; diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index 91f78e640..b0e0fc07f 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -988,13 +988,6 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO int requestStatus = oam::API_SUCCESS; log.writeLog(__LINE__, "MSG RECEIVED: Start All process request..."); - // change permissions on /dev/shm - string cmd = "chmod 755 /dev/shm >/dev/null 2>&1"; - if ( !rootUser) - cmd = "sudo chmod 777 /dev/shm >/dev/null 2>&1"; - - system(cmd.c_str()); - //start the mysqld daemon try { oam.actionMysqlCalpont(MYSQL_START); From 7ec1ccac5e660b1ffb026fd66dfaf02aecd96cec Mon Sep 17 00:00:00 2001 From: David Hall Date: Tue, 24 Jul 2018 10:16:26 -0500 Subject: [PATCH 02/17] MCOL-1472 Add switch to handle nested case --- dbcon/mysql/ha_calpont_execplan.cpp | 7 ++++++- dbcon/mysql/ha_calpont_impl_if.h | 6 +++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/dbcon/mysql/ha_calpont_execplan.cpp b/dbcon/mysql/ha_calpont_execplan.cpp index 6113e7ca4..42d26108c 100755 --- a/dbcon/mysql/ha_calpont_execplan.cpp +++ b/dbcon/mysql/ha_calpont_execplan.cpp @@ -1268,7 +1268,7 @@ bool buildPredicateItem(Item_func* ifp, gp_walk_info* gwip) ifp->functype() == Item_func::ISNOTNULL_FUNC) { ReturnedColumn* rhs = NULL; - if (!gwip->rcWorkStack.empty()) + if (!gwip->rcWorkStack.empty() && !gwip->inCaseStmt) { rhs = gwip->rcWorkStack.top(); gwip->rcWorkStack.pop(); @@ -3267,7 +3267,12 @@ FunctionColumn* buildCaseFunction(Item_func* item, gp_walk_info& gwi, bool& nonS if (funcName == "case_searched" && (i < arg_offset)) { + // MCOL-1472 Nested CASE with an ISNULL predicate. We don't want the predicate + // to pull off of rcWorkStack, so we set this inCaseStmt flag to tell it + // not to. + gwi.inCaseStmt = true; sptp.reset(buildParseTree((Item_func*)(item->arguments()[i]), gwi, nonSupport)); + gwi.inCaseStmt = false; if (!gwi.ptWorkStack.empty() && *gwi.ptWorkStack.top()->data() == sptp->data()) { gwi.ptWorkStack.pop(); diff --git a/dbcon/mysql/ha_calpont_impl_if.h b/dbcon/mysql/ha_calpont_impl_if.h index 9a4fd8bd7..ab7b81034 100644 --- a/dbcon/mysql/ha_calpont_impl_if.h +++ b/dbcon/mysql/ha_calpont_impl_if.h @@ -147,6 +147,9 @@ struct gp_walk_info int32_t recursionHWM; std::stack rcBookMarkStack; + // Kludge for MCOL-1472 + bool inCaseStmt; + gp_walk_info() : sessionid(0), fatalParseError(false), condPush(false), @@ -162,7 +165,8 @@ struct gp_walk_info lastSub(0), derivedTbCnt(0), recursionLevel(-1), - recursionHWM(0) + recursionHWM(0), + inCaseStmt(false) {} ~gp_walk_info() {} From 0e856ce9b0e8925938d1651c87bd571287df37fb Mon Sep 17 00:00:00 2001 From: drrtuy Date: Tue, 24 Jul 2018 23:05:09 +0300 Subject: [PATCH 03/17] MCOL-1551 CS now supports hostnames in Columnstore.xml. --- utils/messageqcpp/messagequeue.cpp | 81 +++++++++++++++++++------- utils/messageqcpp/messagequeue.h | 3 +- utils/messageqcpp/messagequeuepool.cpp | 6 +- utils/messageqcpp/messagequeuepool.h | 2 +- 4 files changed, 65 insertions(+), 27 deletions(-) diff --git a/utils/messageqcpp/messagequeue.cpp b/utils/messageqcpp/messagequeue.cpp index 4800faf13..085426d9c 100644 --- a/utils/messageqcpp/messagequeue.cpp +++ b/utils/messageqcpp/messagequeue.cpp @@ -152,26 +152,44 @@ void MessageQueueClient::shutdown() void MessageQueueClient::setup(bool syncProto) { - string otherEndIPStr; - string otherEndPortStr; - uint16_t port; + string otherEndIPStr; + string otherEndPortStr; + struct addrinfo hints, *servinfo; + int rc = 0; - otherEndIPStr = fConfig->getConfig(fOtherEnd, "IPAddr"); - otherEndPortStr = fConfig->getConfig(fOtherEnd, "Port"); + otherEndIPStr = fConfig->getConfig(fOtherEnd, "IPAddr"); + otherEndPortStr = fConfig->getConfig(fOtherEnd, "Port"); - if (otherEndIPStr.length() == 0) otherEndIPStr = "127.0.0.1"; + if (otherEndIPStr.length() == 0) otherEndIPStr = "127.0.0.1"; - if (otherEndPortStr.length() == 0 || (port = static_cast(strtol(otherEndPortStr.c_str(), 0, 0))) == 0) - { - string msg = "MessageQueueClient::MessageQueueClient: config error: Invalid/Missing Port attribute"; - throw runtime_error(msg); - } + if (otherEndPortStr.length() == 0 || static_cast(strtol(otherEndPortStr.c_str(), 0, 0)) == 0) + { + string msg = "MessageQueueClient::setup(): config error: Invalid/Missing Port attribute"; + throw runtime_error(msg); + } - memset(&fServ_addr, 0, sizeof(fServ_addr)); - sockaddr_in* sinp = reinterpret_cast(&fServ_addr); - sinp->sin_family = AF_INET; - sinp->sin_port = htons(port); - sinp->sin_addr.s_addr = inet_addr(otherEndIPStr.c_str()); + memset(&hints, 0, sizeof hints); + // ATM We support IPv4 only. + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_STREAM; + + + if( !(rc = getaddrinfo(otherEndIPStr.c_str(), otherEndPortStr.c_str(), &hints, &servinfo)) ) + { + memset(&fServ_addr, 0, sizeof(fServ_addr)); + sockaddr_in* sinp = reinterpret_cast(&fServ_addr); + *sinp = *reinterpret_cast(servinfo->ai_addr); + freeaddrinfo(servinfo); + } + else + { + string msg = "MessageQueueClient::setup(): "; + msg.append(gai_strerror(rc)); + logging::Message::Args args; + logging::LoggingID li(31); + args.add(msg); + fLogger.logMessage(logging::LOG_TYPE_ERROR, logging::M0000, args, li); + } #ifdef SKIP_IDB_COMPRESSION fClientSock.setSocketImpl(new InetStreamSocket()); @@ -197,15 +215,34 @@ MessageQueueClient::MessageQueueClient(const string& otherEnd, Config* config, b setup(syncProto); } -MessageQueueClient::MessageQueueClient(const string& ip, uint16_t port, bool syncProto) : +MessageQueueClient::MessageQueueClient(const string& dnOrIp, uint16_t port, bool syncProto) : fLogger(31), fIsAvailable(true) { - memset(&fServ_addr, 0, sizeof(fServ_addr)); - sockaddr_in* sinp = reinterpret_cast(&fServ_addr); - sinp->sin_family = AF_INET; - sinp->sin_port = htons(port); - sinp->sin_addr.s_addr = inet_addr(ip.c_str()); + struct addrinfo hints, *servinfo; + int rc = 0; + memset(&hints, 0, sizeof hints); + // ATM We support IPv4 only. + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_STREAM; + + if( !(rc = getaddrinfo(dnOrIp.c_str(), NULL, &hints, &servinfo)) ) + { + memset(&fServ_addr, 0, sizeof(fServ_addr)); + sockaddr_in* sinp = reinterpret_cast(&fServ_addr); + *sinp = *reinterpret_cast(servinfo->ai_addr); + sinp->sin_port = htons(port); + freeaddrinfo(servinfo); + } + else + { + string msg = "MessageQueueClient::MessageQueueClient(): "; + msg.append(gai_strerror(rc)); + logging::Message::Args args; + logging::LoggingID li(31); + args.add(msg); + fLogger.logMessage(logging::LOG_TYPE_ERROR, logging::M0000, args, li); + } #ifdef SKIP_IDB_COMPRESSION fClientSock.setSocketImpl(new InetStreamSocket()); #else diff --git a/utils/messageqcpp/messagequeue.h b/utils/messageqcpp/messagequeue.h index 8de4df398..e33e5cd84 100644 --- a/utils/messageqcpp/messagequeue.h +++ b/utils/messageqcpp/messagequeue.h @@ -33,6 +33,7 @@ #include #else #include +#include #endif #include "serversocket.h" @@ -182,7 +183,7 @@ public: * * construct a queue from this process to otherEnd on the given IP and Port. */ - EXPORT explicit MessageQueueClient(const std::string& ip, uint16_t port, bool syncProto=true); + EXPORT explicit MessageQueueClient(const std::string& dnOrIp, uint16_t port, bool syncProto=true); /** diff --git a/utils/messageqcpp/messagequeuepool.cpp b/utils/messageqcpp/messagequeuepool.cpp index 5b8c9862c..27459991f 100644 --- a/utils/messageqcpp/messagequeuepool.cpp +++ b/utils/messageqcpp/messagequeuepool.cpp @@ -36,12 +36,12 @@ static uint64_t TimeSpecToSeconds(struct timespec* ts) return (uint64_t)ts->tv_sec + (uint64_t)ts->tv_nsec / 1000000000; } -MessageQueueClient *MessageQueueClientPool::getInstance(const std::string &ip, uint64_t port) +MessageQueueClient *MessageQueueClientPool::getInstance(const std::string &dnOrIp, uint64_t port) { boost::mutex::scoped_lock lock(queueMutex); std::ostringstream oss; - oss << ip << "_" << port; + oss << dnOrIp << "_" << port; std::string searchString = oss.str(); MessageQueueClient *returnClient = MessageQueueClientPool::findInPool(searchString); @@ -58,7 +58,7 @@ MessageQueueClient *MessageQueueClientPool::getInstance(const std::string &ip, u clock_gettime(CLOCK_MONOTONIC, &now); uint64_t nowSeconds = TimeSpecToSeconds(&now); - newClientObject->client = new MessageQueueClient(ip, port); + newClientObject->client = new MessageQueueClient(dnOrIp, port); newClientObject->inUse = true; newClientObject->lastUsed = nowSeconds; clientMap.insert(std::pair(searchString, newClientObject)); diff --git a/utils/messageqcpp/messagequeuepool.h b/utils/messageqcpp/messagequeuepool.h index fc5576203..227b13b2c 100644 --- a/utils/messageqcpp/messagequeuepool.h +++ b/utils/messageqcpp/messagequeuepool.h @@ -41,7 +41,7 @@ class MessageQueueClientPool { public: static MessageQueueClient *getInstance(const std::string &module); - static MessageQueueClient *getInstance(const std::string &ip, uint64_t port); + static MessageQueueClient *getInstance(const std::string &dnOrIp, uint64_t port); static void releaseInstance(MessageQueueClient * client); static void deleteInstance(MessageQueueClient * client); static MessageQueueClient *findInPool(const std::string &search); From d86fabff653e7fbff234bd97f725be1da668f344 Mon Sep 17 00:00:00 2001 From: David Hill Date: Wed, 1 Aug 2018 10:17:13 -0500 Subject: [PATCH 04/17] MCOL-1145/1146 - fix nonroot install lib issue --- oamapps/postConfigure/installer.cpp | 10 +++- oamapps/postConfigure/postConfigure.cpp | 57 +++++++++++-------- .../postConfigure/quick_installer_amazon.sh | 4 +- .../quick_installer_multi_server.sh | 4 +- .../quick_installer_single_server.sh | 2 +- 5 files changed, 45 insertions(+), 32 deletions(-) diff --git a/oamapps/postConfigure/installer.cpp b/oamapps/postConfigure/installer.cpp index 1cc84ae0f..17c312c27 100644 --- a/oamapps/postConfigure/installer.cpp +++ b/oamapps/postConfigure/installer.cpp @@ -813,7 +813,10 @@ int main(int argc, char *argv[]) cout << "Enter the following command to define MariaDB ColumnStore Alias Commands" << endl << endl; - cout << ". " + installDir + "/bin/columnstoreAlias" << endl << endl; + if ( !rootUser ) + cout << ". /etc/profile.d/columnstoreEnv.sh" << endl; + + cout << ". /etc/profile.d/columnstoreAlias.sh" << endl << endl; cout << "Enter 'mcsmysql' to access the MariaDB ColumnStore SQL console" << endl; cout << "Enter 'mcsadmin' to access the MariaDB ColumnStore Admin console" << endl << endl; @@ -829,7 +832,10 @@ int main(int argc, char *argv[]) cout << endl << "ERROR: MariaDB ColumnStore Process failed to start, check log files in /var/log/mariadb/columnstore" << endl; cout << "Enter the following command to define MariaDB ColumnStore Alias Commands" << endl << endl; - cout << ". " + installDir + "/bin/columnstoreAlias" << endl << endl; + if ( !rootUser ) + cout << ". /etc/profile.d/columnstoreEnv.sh" << endl; + + cout << ". /etc/profile.d/columnstoreAlias.sh" << endl << endl; cout << "Enter 'mcsmysql' to access the MariaDB ColumnStore SQL console" << endl; cout << "Enter 'mcsadmin' to access the MariaDB ColumnStore Admin console" << endl << endl; diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index 98227da9d..e0df9761a 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -1247,26 +1247,16 @@ int main(int argc, char *argv[]) //amazon install setup check bool amazonInstall = false; string cloud = oam::UnassignedName; - system("aws --version > /tmp/amazon.log 2>&1"); - - ifstream in("/tmp/amazon.log"); - - in.seekg(0, std::ios::end); - int size = in.tellg(); - if ( size == 0 || oam.checkLogStatus("/tmp/amazon.log", "not found")) + + if (!multi_server_quick_install) { - // not running on amazon with ec2-api-tools - if (amazon_quick_install) - { - cout << "ERROR: Amazon Quick Installer was specified, bu the AMazon CLI API packages isnt installed, exiting" << endl; - exit(1); - } + system("aws --version > /tmp/amazon.log 2>&1"); - amazonInstall = false; - } - else - { - if ( size == 0 || oam.checkLogStatus("/tmp/amazon.log", "not installed")) + ifstream in("/tmp/amazon.log"); + + in.seekg(0, std::ios::end); + int size = in.tellg(); + if ( size == 0 || oam.checkLogStatus("/tmp/amazon.log", "not found")) { // not running on amazon with ec2-api-tools if (amazon_quick_install) @@ -1278,9 +1268,23 @@ int main(int argc, char *argv[]) amazonInstall = false; } else - amazonInstall = true; - } + { + if ( size == 0 || oam.checkLogStatus("/tmp/amazon.log", "not installed")) + { + // not running on amazon with ec2-api-tools + if (amazon_quick_install) + { + cout << "ERROR: Amazon Quick Installer was specified, bu the AMazon CLI API packages isnt installed, exiting" << endl; + exit(1); + } + amazonInstall = false; + } + else + amazonInstall = true; + } + } + try { cloud = sysConfig->getConfig(InstallSection, "Cloud"); } @@ -3641,9 +3645,6 @@ int main(int argc, char *argv[]) } //set mysql replication, if wasn't setup before on system -// if ( ( mysqlRep && pmwithum ) || -// ( mysqlRep && (umNumber > 1) ) || -// ( mysqlRep && (pmNumber > 1) && (IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM) ) ) if ( mysqlRep ) { cout << endl << "Run MariaDB ColumnStore Replication Setup.. "; @@ -3665,7 +3666,10 @@ int main(int argc, char *argv[]) cout << "Enter the following command to define MariaDB ColumnStore Alias Commands" << endl << endl; - cout << ". " + installDir + "/bin/columnstoreAlias" << endl << endl; + if ( !rootUser ) + cout << ". /etc/profile.d/columnstoreEnv.sh" << endl; + + cout << ". /etc/profile.d/columnstoreAlias.sh" << endl << endl; cout << "Enter 'mcsmysql' to access the MariaDB ColumnStore SQL console" << endl; cout << "Enter 'mcsadmin' to access the MariaDB ColumnStore Admin console" << endl << endl; @@ -3682,7 +3686,10 @@ int main(int argc, char *argv[]) cout << "Enter the following command to define MariaDB ColumnStore Alias Commands" << endl << endl; - cout << ". " + installDir + "/bin/columnstoreAlias" << endl << endl; + if ( !rootUser ) + cout << ". /etc/profile.d/columnstoreEnv.sh" << endl; + + cout << ". /etc/profile.d/columnstoreAlias.sh" << endl << endl; cout << "Enter 'mcsmysql' to access the MariaDB ColumnStore SQL console" << endl; cout << "Enter 'mcsadmin' to access the MariaDB ColumnStore Admin console" << endl << endl; diff --git a/oamapps/postConfigure/quick_installer_amazon.sh b/oamapps/postConfigure/quick_installer_amazon.sh index c71dc8920..4a1df903c 100755 --- a/oamapps/postConfigure/quick_installer_amazon.sh +++ b/oamapps/postConfigure/quick_installer_amazon.sh @@ -74,8 +74,8 @@ else echo "${bold}Run postConfigure script${normal}" echo "" if [[ $umCount = "" ]]; then - $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qa -pm-count $pmCount $systemName + . /etc/profile.d/columnstoreEnv.sh;$HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qa -pm-count $pmCount $systemName else - $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qa -pm-count $pmCount -um-count $umCount $systemName + . /etc/profile.d/columnstoreEnv.sh;$HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qa -pm-count $pmCount -um-count $umCount $systemName fi fi diff --git a/oamapps/postConfigure/quick_installer_multi_server.sh b/oamapps/postConfigure/quick_installer_multi_server.sh index 25d615d7e..dbb603220 100755 --- a/oamapps/postConfigure/quick_installer_multi_server.sh +++ b/oamapps/postConfigure/quick_installer_multi_server.sh @@ -78,8 +78,8 @@ else echo "${bold}Run postConfigure script${normal}" echo "" if [[ $umIpAddrs = "" ]]; then - $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qm -pm-ip-addrs $pmIpAddrs $nonDistrubutedInstall $systemName + . /etc/profile.d/columnstoreEnv.sh;$HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qm -pm-ip-addrs $pmIpAddrs $nonDistrubutedInstall $systemName else - $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qm -pm-ip-addrs $pmIpAddrs -um-ip-addrs $umIpAddrs $nonDistrubutedInstall $systemName + . /etc/profile.d/columnstoreEnv.sh;$HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qm -pm-ip-addrs $pmIpAddrs -um-ip-addrs $umIpAddrs $nonDistrubutedInstall $systemName fi fi diff --git a/oamapps/postConfigure/quick_installer_single_server.sh b/oamapps/postConfigure/quick_installer_single_server.sh index 2603bcec8..432b395c4 100755 --- a/oamapps/postConfigure/quick_installer_single_server.sh +++ b/oamapps/postConfigure/quick_installer_single_server.sh @@ -30,5 +30,5 @@ else $HOME/mariadb/columnstore/bin/post-install --installdir=$HOME/mariadb/columnstore echo "Run postConfigure script" echo "" - $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qs + . /etc/profile.d/columnstoreEnv.sh; $HOME/mariadb/columnstore/bin/postConfigure -i $HOME/mariadb/columnstore -qs fi From 8043674432a0e0c5f4ca4af4acc2cac0ecee561b Mon Sep 17 00:00:00 2001 From: David Hill Date: Wed, 1 Aug 2018 16:34:40 -0500 Subject: [PATCH 05/17] MCOL-1591 - add umask test to tester --- .../clusterTester/columnstoreClusterTester.sh | 156 +++++++++++++++--- utils/clusterTester/os_detect.sh | 2 +- 2 files changed, 132 insertions(+), 26 deletions(-) diff --git a/utils/clusterTester/columnstoreClusterTester.sh b/utils/clusterTester/columnstoreClusterTester.sh index 9f8f3e9a3..f68d16c55 100755 --- a/utils/clusterTester/columnstoreClusterTester.sh +++ b/utils/clusterTester/columnstoreClusterTester.sh @@ -10,7 +10,7 @@ CHECK=true REPORTPASS=true LOGFILE="" -OS_LIST=("centos6" "centos7" "debian8" "debian9" "suse12" "ubuntu16") +OS_LIST=("centos6" "centos7" "debian8" "debian9" "suse12" "ubuntu16" "ubuntu18") NODE_IPADDRESS="" @@ -37,7 +37,7 @@ checkContinue() { } ### -# Print Fucntions +# Print Functions ### helpPrint () { @@ -57,7 +57,7 @@ helpPrint () { echo "" echo "Additional information on Tool is documented at:" echo "" - echo "https://mariadb.com/kb/en/mariadb/*****/" + echo "https://mariadb.com/kb/en/library/mariadb-columnstore-cluster-test-tool/" echo "" echo "Items that are checked:" echo " Node Ping test" @@ -65,6 +65,7 @@ helpPrint () { echo " ColumnStore Port test" echo " OS version" echo " Locale settings" + echo " Umask settings" echo " Firewall settings" echo " Date/time settings" echo " Dependent packages installed" @@ -326,16 +327,18 @@ checkSSH() rc="$?" if [ $rc -eq 0 ] || ( [ $rc -eq 2 ] && [ $OS == "suse12" ] ) ; then if [ $PASSWORD == "ssh" ] ; then - echo $ipadd " Node Passed SSH login test using ssh-keys" + echo $ipadd " Node Passed SSH login test using ssh-keys" else - echo $ipadd " Node Passed SSH login test using user password" + echo $ipadd " Node Passed SSH login test using user password" fi else if [ $PASSWORD == "ssh" ] ; then - echo $ipadd " Node ${bold}Failed${normal} SSH login test using ssh-keys" + echo $ipadd " Node ${bold}Failed${normal} SSH login test using ssh-keys" else - echo $ipadd " Node ${bold}Failed${normal} SSH login test using user password" + echo $ipadd " Node ${bold}Failed${normal} SSH login test using user password" fi + + echo "Error - Fix the SSH login issue and rerun test" exit 1 fi done @@ -489,12 +492,47 @@ checkLocale() fi } -checkSELINUX() +checkLocalUMASK() +{ + # UMASK check + # + echo "" + echo "** Run Local UMASK check" + echo "" + + pass=true + filename=UMASKtest + + rm -f $filename + touch $filename + permission=$(stat -c "%A" "$filename") + result=${permission:4:1} + if [ ${result} == "r" ] ; then + result=${permission:7:1} + if [ ${result} == "r" ] ; then + echo "UMASK local setting test passed" + else + echo "${bold}Warning${normal}, UMASK test failed, check local UMASK setting. Requirement is set to 0022" + pass=false + fi + else + echo "${bold}Warning${normal}, UMASK test failed, check local UMASK setting. Requirement is set to 0022" + pass=false + fi + + if ! $pass; then + checkContinue + fi + + rm -f $filename +} + +checkLocalSELINUX() { # SELINUX check # echo "" - echo "** Run SELINUX check" + echo "** Run Local SELINUX check" echo "" pass=true @@ -511,21 +549,86 @@ checkSELINUX() echo "Local Node SELINUX setting is Not Enabled" fi - for ipadd in "${NODE_IPADDRESS[@]}"; do - `$COLUMNSTORE_INSTALL_DIR/bin/remote_scp_get.sh $ipadd $PASSWORD /etc/selinux/config > /tmp/remote_scp_get_check 2>&1` - if [ "$?" -ne 0 ]; then - echo "$ipadd Node SELINUX setting is Not Enabled" - else - `cat config | grep SELINUX | grep enforcing > /tmp/selinux_check 2>&1` - if [ "$?" -eq 0 ]; then - echo "${bold}Warning${normal}, $ipadd SELINUX setting is Enabled, check port test results" - pass=false - else - echo "$ipadd Node SELINUX setting is Not Enabled" - fi - `rm -f config` - fi - done + if ! $pass; then + checkContinue + fi +} + +checkUMASK() +{ + # UMASK check + # + echo "" + echo "** Run UMASK check" + echo "" + + pass=true + + for ipadd in "${NODE_IPADDRESS[@]}"; do + `$COLUMNSTORE_INSTALL_DIR/bin/remote_command.sh $ipadd $PASSWORD 'rm -f UMASKtest;touch UMASKtest;echo $(stat -c "%A" "UMASKtest") > test.log' > /tmp/remote_command_check 2>&1` + if [ "$?" -eq 0 ]; then + `$COLUMNSTORE_INSTALL_DIR/bin/remote_scp_get.sh $ipadd Calpont1 test.log >> /tmp/remote_scp_get 2>&1` + if [ "$?" -eq 0 ]; then + permission=`cat test.log` + result=${permission:4:1} + if [ ${result} == "r" ] ; then + result=${permission:7:1} + if [ ${result} == "r" ] ; then + echo "$ipadd Node UMASK setting test passed" + else + echo "${bold}Warning${normal}, $ipadd Node UMASK test failed, check UMASK setting. Requirement is set to 0022" + pass=false + fi + else + echo "${bold}Warning${normal}, $ipadd Node UMASK test failed, check UMASK setting. Requirement is set to 0022" + pass=false + fi + else + echo "${bold}Warning${normal}, $ipadd UMASK test failed, remote_scp_get.sh error, check /tmp/remote_scp_get" + pass=false + fi + else + echo "${bold}Warning${normal}, $ipadd UMASK test failed, remote_command.sh error, check /tmp/remote_command_check" + pass=false + fi + `rm -f test.log` + done + + if ! $pass; then + checkContinue + fi + + rm -f $filename +} + +checkSELINUX() +{ + # SELINUX check + # + echo "" + echo "** Run SELINUX check" + echo "" + + pass=true + for ipadd in "${NODE_IPADDRESS[@]}"; do + `$COLUMNSTORE_INSTALL_DIR/bin/remote_scp_get.sh $ipadd $PASSWORD /etc/selinux/config > /tmp/remote_scp_get_check 2>&1` + if [ "$?" -ne 0 ]; then + echo "$ipadd Node SELINUX setting is Not Enabled" + else + `cat config | grep SELINUX | grep enforcing > /tmp/selinux_check 2>&1` + if [ "$?" -eq 0 ]; then + echo "${bold}Warning${normal}, $ipadd SELINUX setting is Enabled, check port test results" + pass=false + else + echo "$ipadd Node SELINUX setting is Not Enabled" + fi + `rm -f config` + fi + done + + if ! $pass; then + checkContinue + fi } checkFirewalls() @@ -949,7 +1052,7 @@ checkPackages() declare -a UBUNTU_PKG=("libboost-all-dev" "expect" "libdbi-perl" "perl" "openssl" "file" "sudo" "libreadline-dev" "rsync" "libsnappy1V5" "net-tools" "libnuma1" ) declare -a UBUNTU_PKG_NOT=("mariadb-server" "libmariadb18") - if [ "$OS" == "ubuntu16" ] ; then + if [ "$OS" == "ubuntu16" ] || [ "$OS" == "ubuntu18" ]; then if [ ! `which dpkg 2>/dev/null` ] ; then echo "${bold}Failed${normal}, Local Node ${bold}rpm${normal} package not installed" pass=false @@ -1307,12 +1410,15 @@ echo "" checkLocalOS checkLocalDir +checkLocalUMASK +checkLocalSELINUX if [ "$IPADDRESSES" != "" ]; then checkPing checkSSH checkRemoteDir checkOS checkLocale + checkUMASK checkSELINUX checkFirewalls checkPorts diff --git a/utils/clusterTester/os_detect.sh b/utils/clusterTester/os_detect.sh index 7930c0daf..be69e870e 100755 --- a/utils/clusterTester/os_detect.sh +++ b/utils/clusterTester/os_detect.sh @@ -29,7 +29,7 @@ detectOS () { echo Operating System name: $osPrettyName echo Operating System tag: $osTag case "$osTag" in - centos6|centos7|ubuntu16|debian8|suse12|debian9) + centos6|centos7|ubuntu16|debian8|suse12|debian9|ubuntu18) ;; *) echo OS not supported From 5d245c8932fc8698ca26ab70d4f7b55efaa9567e Mon Sep 17 00:00:00 2001 From: David Hill Date: Thu, 2 Aug 2018 10:59:38 -0500 Subject: [PATCH 06/17] MCOL-1498 - add prompt for password on non-distibute installs --- oamapps/postConfigure/postConfigure.cpp | 72 ++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 8 deletions(-) diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index 98227da9d..774a7a490 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -3090,7 +3090,9 @@ int main(int argc, char *argv[]) //check if dbrm data resides in older directory path and inform user if it does dbrmDirCheck(); - if ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM && pmNumber == 1) { + if ( ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) || + ( (IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM) && pmwithum ) ) + { //run the mysql / mysqld setup scripts cout << endl << "===== Running the MariaDB ColumnStore MariaDB Server setup scripts =====" << endl << endl; @@ -3098,7 +3100,61 @@ int main(int argc, char *argv[]) // call the mysql setup scripts mysqlSetup(); - sleep(5); + sleep(3); + } + + if ( IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM || + pmNumber > 1 ) { + + if ( password.empty() ) + { + cout << endl; + cout << "Next step is to enter the password to access the other Servers." << endl; + cout << "This is either your password or you can default to using a ssh key" << endl; + cout << "If using a password, the password needs to be the same on all Servers." << endl << endl; + } + + while(true) + { + char *pass1, *pass2; + + if ( noPrompting ) { + cout << "Enter password, hit 'enter' to default to using a ssh key, or 'exit' > " << endl; + if ( password.empty() ) + password = "ssh"; + break; + } + + //check for command line option password + //if ( !password.empty() ) + // break; + + pass1=getpass("Enter password, hit 'enter' to default to using a ssh key, or 'exit' > "); + if ( strcmp(pass1, "") == 0 ) { + password = "ssh"; + break; + } + + string p1 = pass1; + if ( p1 == "exit") + exit(0); + + pass2=getpass("Confirm password > "); + string p2 = pass2; + if ( p1 == p2 ) { + password = p2; + break; + } + else + cout << "Password mismatch, please re-enter" << endl; + } + + //add single quote for special characters + if ( password != "ssh" ) + { + password = "'" + password + "'"; + } + } int thread_id = 0; @@ -3173,7 +3229,7 @@ int main(int argc, char *argv[]) if( !pkgCheck(columnstorePackage) ) exit(1); - if ( password.empty() ) +/* if ( password.empty() ) { cout << endl; cout << "Next step is to enter the password to access the other Servers." << endl; @@ -3221,10 +3277,10 @@ int main(int argc, char *argv[]) { password = "'" + password + "'"; } - +*/ checkSystemMySQLPort(mysqlPort, sysConfig, USER, password, childmodulelist, IserverTypeInstall, pmwithum); - if ( ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) || +/* if ( ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) || ( (IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM) && pmwithum ) ) { cout << endl << "===== Running the MariaDB ColumnStore MariaDB ColumnStore setup scripts =====" << endl << endl; @@ -3233,7 +3289,7 @@ int main(int argc, char *argv[]) mysqlSetup(); sleep(5); } - +*/ string AmazonInstall = "0"; if ( amazonInstall ) AmazonInstall = "1"; @@ -3411,7 +3467,7 @@ int main(int argc, char *argv[]) cout << " DONE" << endl; } } - else +/* else { if ( ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) || ( (IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM) && pmwithum ) ) @@ -3423,7 +3479,7 @@ int main(int argc, char *argv[]) sleep(5); } } - +*/ //configure data redundancy if (DataRedundancy) { From 0837f9a520eb4315f5b5d5e20d727d45bc35d9e4 Mon Sep 17 00:00:00 2001 From: David Hill Date: Thu, 2 Aug 2018 11:51:27 -0500 Subject: [PATCH 07/17] MCOL-1498 - add prompt for password on non-distibute installs --- oamapps/postConfigure/postConfigure.cpp | 141 ++++++------------------ 1 file changed, 33 insertions(+), 108 deletions(-) diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index 774a7a490..c31f0c5de 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -3104,57 +3104,53 @@ int main(int argc, char *argv[]) } if ( IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM || - pmNumber > 1 ) { - + pmNumber > 1 ) + { if ( password.empty() ) { cout << endl; cout << "Next step is to enter the password to access the other Servers." << endl; cout << "This is either your password or you can default to using a ssh key" << endl; cout << "If using a password, the password needs to be the same on all Servers." << endl << endl; - } - - while(true) - { - char *pass1, *pass2; if ( noPrompting ) { cout << "Enter password, hit 'enter' to default to using a ssh key, or 'exit' > " << endl; - if ( password.empty() ) - password = "ssh"; - break; - } - - //check for command line option password - //if ( !password.empty() ) - // break; - - pass1=getpass("Enter password, hit 'enter' to default to using a ssh key, or 'exit' > "); - if ( strcmp(pass1, "") == 0 ) { password = "ssh"; - break; } + else + { + while(true) + { + char *pass1, *pass2; - string p1 = pass1; - if ( p1 == "exit") - exit(0); + pass1=getpass("Enter password, hit 'enter' to default to using a ssh key, or 'exit' > "); + if ( strcmp(pass1, "") == 0 ) { + password = "ssh"; + break; + } - pass2=getpass("Confirm password > "); - string p2 = pass2; - if ( p1 == p2 ) { - password = p2; - break; - } - else - cout << "Password mismatch, please re-enter" << endl; - } + string p1 = pass1; + if ( p1 == "exit") + exit(0); - //add single quote for special characters - if ( password != "ssh" ) - { - password = "'" + password + "'"; - } + pass2=getpass("Confirm password > "); + string p2 = pass2; + if ( p1 == p2 ) { + password = p2; + break; + } + else + cout << "Password mismatch, please re-enter" << endl; + } + //add single quote for special characters + if ( password != "ssh" ) + { + password = "'" + password + "'"; + } + + } + } } int thread_id = 0; @@ -3229,67 +3225,8 @@ int main(int argc, char *argv[]) if( !pkgCheck(columnstorePackage) ) exit(1); -/* if ( password.empty() ) - { - cout << endl; - cout << "Next step is to enter the password to access the other Servers." << endl; - cout << "This is either your password or you can default to using a ssh key" << endl; - cout << "If using a password, the password needs to be the same on all Servers." << endl << endl; - } - - while(true) - { - char *pass1, *pass2; - - if ( noPrompting ) { - cout << "Enter password, hit 'enter' to default to using a ssh key, or 'exit' > " << endl; - if ( password.empty() ) - password = "ssh"; - break; - } - - //check for command line option password - if ( !password.empty() ) - break; - - pass1=getpass("Enter password, hit 'enter' to default to using a ssh key, or 'exit' > "); - if ( strcmp(pass1, "") == 0 ) { - password = "ssh"; - break; - } - - if ( pass1 == "exit") - exit(0); - - string p1 = pass1; - pass2=getpass("Confirm password > "); - string p2 = pass2; - if ( p1 == p2 ) { - password = p2; - break; - } - else - cout << "Password mismatch, please re-enter" << endl; - } - - //add single quote for special characters - if ( password != "ssh" ) - { - password = "'" + password + "'"; - } -*/ checkSystemMySQLPort(mysqlPort, sysConfig, USER, password, childmodulelist, IserverTypeInstall, pmwithum); -/* if ( ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) || - ( (IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM) && pmwithum ) ) - { - cout << endl << "===== Running the MariaDB ColumnStore MariaDB ColumnStore setup scripts =====" << endl << endl; - - // call the mysql setup scripts - mysqlSetup(); - sleep(5); - } -*/ string AmazonInstall = "0"; if ( amazonInstall ) AmazonInstall = "1"; @@ -3467,19 +3404,7 @@ int main(int argc, char *argv[]) cout << " DONE" << endl; } } -/* else - { - if ( ( IserverTypeInstall == oam::INSTALL_COMBINE_DM_UM_PM ) || - ( (IserverTypeInstall != oam::INSTALL_COMBINE_DM_UM_PM) && pmwithum ) ) - { - cout << endl << "===== Running the MariaDB ColumnStore MariaDB ColumnStore setup scripts =====" << endl << endl; - - // call the mysql setup scripts - mysqlSetup(); - sleep(5); - } - } -*/ + //configure data redundancy if (DataRedundancy) { From 515cc31d4fe7924eba6d5141c52975c5c22e9245 Mon Sep 17 00:00:00 2001 From: Ben Thompson Date: Mon, 6 Aug 2018 10:10:52 -0500 Subject: [PATCH 08/17] MCOL-1610: modify so if moving a dbroot fails with gluster it is reassigned to original owner. Add logging around failure for mounting gluster volumes. --- oam/oamcpp/liboamcpp.cpp | 101 +++++++++++++++++++++---------------- procmgr/main.cpp | 20 ++++---- procmgr/processmanager.cpp | 4 +- procmon/processmonitor.cpp | 7 +++ 4 files changed, 76 insertions(+), 56 deletions(-) diff --git a/oam/oamcpp/liboamcpp.cpp b/oam/oamcpp/liboamcpp.cpp index 9a405e978..7483ca239 100644 --- a/oam/oamcpp/liboamcpp.cpp +++ b/oam/oamcpp/liboamcpp.cpp @@ -5295,6 +5295,7 @@ namespace oam dbrootList dbroot1; dbroot1.push_back(*pt1); + bool returnDbRoot = false; //send msg to unmount dbroot if module is not offline int opState; @@ -5306,7 +5307,6 @@ namespace oam {} if (opState != oam::AUTO_OFFLINE || opState != oam::AUTO_DISABLED) { -// bool unmountPass = true; try { mountDBRoot(dbroot1, false); @@ -5316,13 +5316,8 @@ namespace oam writeLog("ERROR: dbroot failed to unmount", LOG_TYPE_ERROR ); cout << endl << "ERROR: umountDBRoot api failure" << endl; exceptionControl("manualMovePmDbroot", API_FAILURE); -// unmountPass = false; } -// if ( !unmountPass) { -// dbrootlist.erase(pt1); -// break; -// } } //check for amazon moving required @@ -5340,38 +5335,79 @@ namespace oam //if Gluster, do the assign command if ( DataRedundancyConfig == "y") { - try { + try + { string errmsg; int ret = glusterctl(oam::GLUSTER_ASSIGN, *pt1, toPM, errmsg); - if ( ret != 0 ) + if ( ret == 0 ) + { + todbrootConfigList.push_back(*pt2); + residedbrootConfigList.erase(pt2); + } + else { cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID + ", error: " + errmsg << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); + writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID + ", error: " + errmsg, LOG_TYPE_ERROR ); + returnDbRoot = true; } } catch (exception& e) { cout << endl << "**** glusterctl API exception: " << e.what() << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); + writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID, LOG_TYPE_ERROR ); + returnDbRoot = true; } catch (...) { cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); + writeLog("FAILURE: Error assigning gluster dbroot# " + *pt1 + " to pm" + toPMID, LOG_TYPE_ERROR ); + returnDbRoot = true; } } - todbrootConfigList.push_back(*pt2); - - residedbrootConfigList.erase(pt2); - + if (returnDbRoot) + { + // something went wrong return it back to original owner + try + { + string errmsg; + writeLog("reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR ); + int ret = glusterctl(oam::GLUSTER_ASSIGN, *pt1, residePM, errmsg); + if ( ret != 0 ) + { + cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID + ", error: " + errmsg << endl; + writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID + ", error: " + errmsg, LOG_TYPE_ERROR ); + exceptionControl("manualMovePmDbroot", API_INVALID_STATE); + } + mountDBRoot(dbroot1); + //get updated Columnstore.xml distributed + distributeConfigFile("system"); + return; + } + catch (exception& e) + { + cout << endl << "**** glusterctl API exception: " << e.what() << endl; + cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID << endl; + writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR ); + exceptionControl("manualMovePmDbroot", API_INVALID_STATE); + } + catch (...) + { + cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; + cerr << "FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID << endl; + writeLog("FAILURE: Error reassigning gluster dbroot# " + *pt1 + " to pm" + residePMID, LOG_TYPE_ERROR ); + exceptionControl("manualMovePmDbroot", API_INVALID_STATE); + } + } break; } } } + + //set the 2 pms dbroot config try { @@ -5381,7 +5417,7 @@ namespace oam { writeLog("ERROR: setPmDbrootConfig api failure for pm" + residePMID , LOG_TYPE_ERROR ); cout << endl << "ERROR: setPmDbrootConfig api failure for pm" + residePMID << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); + exceptionControl("manualMovePmDbroot", API_INVALID_STATE); } try @@ -5392,7 +5428,7 @@ namespace oam { writeLog("ERROR: setPmDbrootConfig api failure for pm" + toPMID , LOG_TYPE_ERROR ); cout << endl << "ERROR: setPmDbrootConfig api failure for pm" + toPMID << endl; - exceptionControl("manualMovePmDbroot", API_FAILURE); + exceptionControl("manualMovePmDbroot", API_INVALID_STATE); } //send msg to mount dbroot @@ -5980,7 +6016,7 @@ namespace oam } if (!found) { - writeLog("No dbroots found in ../Calpont/local/moveDbrootTransactionLog", LOG_TYPE_DEBUG ); + writeLog("No dbroots found in " + InstallDir + "/moveDbrootTransactionLog", LOG_TYPE_DEBUG ); cout << "No dbroots found in " << fileName << endl; } @@ -6518,32 +6554,7 @@ namespace oam for( ; pt3 != dbrootlist.end() ; pt3++) { todbrootConfigList.push_back(*pt3); - -/* if ( DataRedundancyConfig == "y") - { - try { - string errmsg; - int ret = glusterctl(oam::GLUSTER_ASSIGN, itoa(*pt3), toPM, errmsg); - if ( ret != 0 ) - { - cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(*pt3) + " to pm" + toPMID + ", error: " + errmsg << endl; - exceptionControl("assignPmDbrootConfig", API_FAILURE); - } - } - catch (exception& e) - { - cout << endl << "**** glusterctl API exception: " << e.what() << endl; - cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(*pt3) + " to pm" + toPMID << endl; - exceptionControl("assignPmDbrootConfig", API_FAILURE); - } - catch (...) - { - cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; - cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(*pt3) + " to pm" + toPMID << endl; - exceptionControl("assignPmDbrootConfig", API_FAILURE); - } - } -*/ } + } try { @@ -6961,12 +6972,14 @@ namespace oam { cout << endl << "**** glusterctl API exception: " << e.what() << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID) << endl; + writeLog("FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID), LOG_TYPE_ERROR ); exceptionControl("removeDbroot", API_FAILURE); } catch (...) { cout << endl << "**** glusterctl API exception: UNKNOWN" << endl; cerr << "FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID) << endl; + writeLog("FAILURE: Error assigning gluster dbroot# " + itoa(dbrootID), LOG_TYPE_ERROR ); exceptionControl("removeDbroot", API_FAILURE); } } diff --git a/procmgr/main.cpp b/procmgr/main.cpp index 2747fda16..995c851c2 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -1574,7 +1574,7 @@ void pingDeviceThread() { // no dbroots, fail module log.writeLog(__LINE__, "autoUnMovePmDbroot left no dbroots mounted, failing module restart: " + moduleName, LOG_TYPE_WARNING); - + //Issue an alarm aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET); @@ -1597,7 +1597,7 @@ void pingDeviceThread() //set query system state ready processManager.setQuerySystemState(true); - break; + goto break_case; } } catch(...) @@ -1619,25 +1619,24 @@ void pingDeviceThread() if ( retry == 5 ) { log.writeLog(__LINE__, "autoUnMovePmDbroot: Failed. Fail Module", LOG_TYPE_WARNING); - + log.writeLog(__LINE__, "System DBRM READ ONLY - Verify dbroot mounts.", LOG_TYPE_WARNING); //Issue an alarm aManager.sendAlarmReport(moduleName.c_str(), MODULE_DOWN_AUTO, SET); //set module to disable state processManager.disableModule(moduleName, true); + // Need to do something here to verify data mounts before resuming + // Best to assume if we reach this you need to put into readonly and verify all dbroots are mounted + //call dbrm control - oam.dbrmctl("reload"); - log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - - // resume the dbrm - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); + oam.dbrmctl("readonly"); + log.writeLog(__LINE__, "'dbrmctl readonly' done", LOG_TYPE_DEBUG); //clear count moduleInfoList[moduleName] = 0; - processManager.setSystemState(oam::ACTIVE); + processManager.setSystemState(oam::DEGRADED); //set query system state ready processManager.setQuerySystemState(true); @@ -2358,6 +2357,7 @@ void pingDeviceThread() } } //end of for loop } + break_case: // check and take action if LAN outage is flagged if (LANOUTAGESUPPORT && !LANOUTAGEACTIVE && LOCALNICDOWN) diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 0a054f9c3..3cc094a0a 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -6244,7 +6244,7 @@ int ProcessManager::sendMsgProcMon( std::string module, ByteStream msg, int requ string IPAddr = sysConfig->getConfig(msgPort, "IPAddr"); if ( IPAddr == oam::UnassignedIpAddr ) { - log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "sendMsgProcMon ping failure " + module + " " + IPAddr, LOG_TYPE_ERROR); return oam::API_SUCCESS; } @@ -6253,7 +6253,7 @@ int ProcessManager::sendMsgProcMon( std::string module, ByteStream msg, int requ string cmd = cmdLine + IPAddr + cmdOption; if ( system(cmd.c_str()) != 0) { //ping failure - log.writeLog(__LINE__, "sendMsgProcMon ping failure", LOG_TYPE_ERROR); + log.writeLog(__LINE__, "sendMsgProcMon ping failure " + module + " " + IPAddr, LOG_TYPE_ERROR); return oam::API_SUCCESS; } } diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index 754d6ccf2..8b7b13165 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -6107,10 +6107,13 @@ int ProcessMonitor::glusterAssign(std::string dbrootID) command = "sudo mount -tglusterfs -odirect-io-mode=enable " + moduleIPAddr + ":/dbroot" + dbrootID + " " + startup::StartUp::installDir() + "/data" + dbrootID + " > /tmp/glusterAssign.txt 2>&1"; } + int ret = system(command.c_str()); if ( WEXITSTATUS(ret) != 0 ) { + log.writeLog(__LINE__, "glusterAssign mount failure: dbroot: " + dbrootID + " error: " + oam.itoa(WEXITSTATUS(ret)), LOG_TYPE_ERROR); + ifstream in("/tmp/glusterAssign.txt"); in.seekg(0, std::ios::end); int size = in.tellg(); @@ -6151,9 +6154,13 @@ int ProcessMonitor::glusterUnassign(std::string dbrootID) { command = "sudo umount -f " + startup::StartUp::installDir() + "/data" + dbrootID + " > /tmp/glusterUnassign.txt 2>&1"; } + int ret = system(command.c_str()); + if ( WEXITSTATUS(ret) != 0 ) { + log.writeLog(__LINE__, "glusterUnassign mount failure: dbroot: " + dbrootID + " error: " + oam.itoa(WEXITSTATUS(ret)), LOG_TYPE_ERROR); + ifstream in("/tmp/glusterUnassign.txt"); in.seekg(0, std::ios::end); int size = in.tellg(); From 8a4294978f35f8728690641bdf0c5bbe195e34c8 Mon Sep 17 00:00:00 2001 From: David Hill Date: Tue, 7 Aug 2018 08:54:08 -0500 Subject: [PATCH 09/17] MCOL-1605 - changed error to debug, alarms trying to get issued before procmgr is up --- oamapps/alarmmanager/alarmmanager.cpp | 8 ++++---- procmon/main.cpp | 9 +++++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/oamapps/alarmmanager/alarmmanager.cpp b/oamapps/alarmmanager/alarmmanager.cpp index b9ba4f702..b9e8c20df 100644 --- a/oamapps/alarmmanager/alarmmanager.cpp +++ b/oamapps/alarmmanager/alarmmanager.cpp @@ -422,7 +422,7 @@ void ALARMManager::sendAlarmReport (const char* componentID, int alarmID, int st int pid = getpid(); int tid = gettid(); - // get reporting Pprocess Name + // get reporting Process Name string processName; if ( repProcessName.empty()) { // get current process name @@ -468,7 +468,7 @@ void ALARMManager::sendAlarmReport (const char* componentID, int alarmID, int st args.add("sendAlarmReport error:"); args.add(e.what()); msg.format(args); - ml.logErrorMessage(msg); + ml.logDebugMessage(msg); } catch (std::exception& e) { @@ -479,7 +479,7 @@ void ALARMManager::sendAlarmReport (const char* componentID, int alarmID, int st args.add("sendAlarmReport error:"); args.add(e.what()); msg.format(args); - ml.logErrorMessage(msg); + ml.logDebugMessage(msg); } catch (...) { @@ -490,7 +490,7 @@ void ALARMManager::sendAlarmReport (const char* componentID, int alarmID, int st args.add("sendAlarmReport error:"); args.add("general failure"); msg.format(args); - ml.logErrorMessage(msg); + ml.logDebugMessage(msg); } return; diff --git a/procmon/main.cpp b/procmon/main.cpp index 424944f6b..1c8ce19a9 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -308,8 +308,9 @@ int main(int argc, char **argv) if ( count >= 120 ) { log.writeLog(__LINE__, "Standby PM not responding, infinidb shutting down", LOG_TYPE_CRITICAL); //Set the alarm - aMonitor.sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET); - sleep (1); + // aMonitor.sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET); + // sleep (1); + string cmd = startup::StartUp::installDir() + "/bin/infinidb stop > /dev/null 2>&1"; system(cmd.c_str()); } @@ -493,8 +494,8 @@ int main(int argc, char **argv) { log.writeLog(__LINE__, "Check DB mounts failed, shutting down", LOG_TYPE_CRITICAL); //Set the alarm - aMonitor.sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET); - sleep (1); + // aMonitor.sendAlarm(config.moduleName().c_str(), STARTUP_DIAGNOTICS_FAILURE, SET); + // sleep (1); string cmd = startup::StartUp::installDir() + "/bin/columnstore stop > /dev/null 2>&1"; system(cmd.c_str()); } From 5927b1cf81d072300735266e7b9d16bfd6c57529 Mon Sep 17 00:00:00 2001 From: David Hill Date: Fri, 10 Aug 2018 16:01:12 -0500 Subject: [PATCH 10/17] MCOL-1523 - fix issue with query failing and enablemodule failing trying to get um1 back. --- oam/etc/ProcessConfig.xml | 4 ++-- procmgr/main.cpp | 24 ------------------------ procmgr/processmanager.cpp | 33 ++++++++++++++++----------------- 3 files changed, 18 insertions(+), 43 deletions(-) diff --git a/oam/etc/ProcessConfig.xml b/oam/etc/ProcessConfig.xml index 8a0c3618f..ca5d745f0 100644 --- a/oam/etc/ProcessConfig.xml +++ b/oam/etc/ProcessConfig.xml @@ -107,7 +107,7 @@ WriteEngineServer pm* DBRMWorkerNode - * + @ ExeMgr * SIMPLEX @@ -122,7 +122,7 @@ WriteEngineServer pm* DBRMWorkerNode - * + @ DDLProc @ SIMPLEX diff --git a/procmgr/main.cpp b/procmgr/main.cpp index 995c851c2..55f675cec 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -1523,9 +1523,6 @@ void pingDeviceThread() break; //set query system state not ready - BRM::DBRM dbrm; - dbrm.setSystemQueryReady(false); - processManager.setQuerySystemState(false); processManager.setSystemState(oam::BUSY_INIT); @@ -1806,9 +1803,6 @@ void pingDeviceThread() } } - //enable query stats - dbrm.setSystemQueryReady(true); - //set query system state ready processManager.setQuerySystemState(true); @@ -1865,9 +1859,6 @@ void pingDeviceThread() else processManager.setSystemState(oam::ACTIVE); - //enable query stats - dbrm.setSystemQueryReady(true); - //set query system state ready processManager.setQuerySystemState(true); @@ -1929,9 +1920,6 @@ void pingDeviceThread() log.writeLog(__LINE__, "module is down: " + moduleName, LOG_TYPE_CRITICAL); //set query system state not ready - BRM::DBRM dbrm; - dbrm.setSystemQueryReady(false); - processManager.setQuerySystemState(false); processManager.setSystemState(oam::BUSY_INIT); @@ -1993,9 +1981,6 @@ void pingDeviceThread() oam.dbrmctl("resume"); log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - //enable query stats - dbrm.setSystemQueryReady(true); - //set query system state ready processManager.setQuerySystemState(true); @@ -2201,9 +2186,6 @@ void pingDeviceThread() //set recycle process processManager.recycleProcess(moduleName); - //enable query stats - dbrm.setSystemQueryReady(true); - //set query system state ready processManager.setQuerySystemState(true); @@ -2220,9 +2202,6 @@ void pingDeviceThread() oam.dbrmctl("resume"); log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); - //enable query stats - dbrm.setSystemQueryReady(true); - //set query system state ready processManager.setQuerySystemState(true); } @@ -2236,9 +2215,6 @@ void pingDeviceThread() //set recycle process processManager.recycleProcess(moduleName); - //enable query stats - dbrm.setSystemQueryReady(true); - //set query system state ready processManager.setQuerySystemState(true); } diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 3cc094a0a..5f15f3446 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -829,8 +829,10 @@ void processMSG(messageqcpp::IOSocket* cfIos) if (opState == oam::MAN_OFFLINE || opState == oam::MAN_DISABLED || opState == oam::AUTO_DISABLED || opState == oam::AUTO_OFFLINE) { - oam.dbrmctl("halt"); - log.writeLog(__LINE__, "'dbrmctl halt' done", LOG_TYPE_DEBUG); + processManager.setSystemState(oam::BUSY_INIT); + + //set query system state not ready + processManager.setQuerySystemState(false); status = processManager.disableModule(moduleName, true); log.writeLog(__LINE__, "Disable Module Completed on " + moduleName, LOG_TYPE_INFO); @@ -839,14 +841,12 @@ void processMSG(messageqcpp::IOSocket* cfIos) //check for SIMPLEX Processes on mate might need to be started processManager.checkSimplexModule(moduleName); + + processManager.setSystemState(oam::ACTIVE); + + //set query system state ready + processManager.setQuerySystemState(true); - //call dbrm control -// oam.dbrmctl("reload"); -// log.writeLog(__LINE__, "'dbrmctl reload' done", LOG_TYPE_DEBUG); - - // resume the dbrm - oam.dbrmctl("resume"); - log.writeLog(__LINE__, "'dbrmctl resume' done", LOG_TYPE_DEBUG); } else { @@ -910,7 +910,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) DeviceNetworkList::iterator listPT = devicenetworklist.begin(); - //stopModules being removed with the REMOVE option, which will stop process + // do stopmodule then enable for( ; listPT != devicenetworklist.end() ; listPT++) { string moduleName = (*listPT).DeviceName; @@ -933,6 +933,9 @@ void processMSG(messageqcpp::IOSocket* cfIos) } if (opState == oam::MAN_DISABLED) { + processManager.stopModule(moduleName, graceful, manualFlag); + log.writeLog(__LINE__, "stop Module Completed on " + moduleName, LOG_TYPE_INFO); + status = processManager.enableModule(moduleName, oam::MAN_OFFLINE); log.writeLog(__LINE__, "Enable Module Completed on " + moduleName, LOG_TYPE_INFO); } @@ -2758,9 +2761,6 @@ void processMSG(messageqcpp::IOSocket* cfIos) log.writeLog(__LINE__, "MSG RECEIVED: Process Restarted on " + moduleName + "/" + processName); //set query system states not ready - BRM::DBRM dbrm; - dbrm.setSystemQueryReady(false); - processManager.setQuerySystemState(false); processManager.setSystemState(oam::BUSY_INIT); @@ -2841,7 +2841,8 @@ void processMSG(messageqcpp::IOSocket* cfIos) break; sleep(1); } - dbrm.setSystemQueryReady(true); + processManager.setQuerySystemState(true); + } // if a DDLProc was restarted, reinit DMLProc @@ -2894,8 +2895,6 @@ void processMSG(messageqcpp::IOSocket* cfIos) } //enable query stats - dbrm.setSystemQueryReady(true); - processManager.setQuerySystemState(true); processManager.setSystemState(oam::ACTIVE); @@ -6489,7 +6488,7 @@ void ProcessManager::setQuerySystemState(bool set) log.writeLog(__LINE__, "setQuerySystemState = " + oam.itoa(set), LOG_TYPE_DEBUG); try { - dbrm.setSystemQueryReady(set); + dbrm.setSystemQueryReady(true); log.writeLog(__LINE__, "setQuerySystemState successful", LOG_TYPE_DEBUG); } catch(...) From b5a39ea78901dcd89078de5d8a674837c3b35dbc Mon Sep 17 00:00:00 2001 From: David Hill Date: Fri, 10 Aug 2018 16:15:58 -0500 Subject: [PATCH 11/17] MCOL-1523 - fix issue with query failing and enablemodule failing trying to get um1 back. --- procmgr/processmanager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index 5f15f3446..a29502ae8 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -6488,7 +6488,7 @@ void ProcessManager::setQuerySystemState(bool set) log.writeLog(__LINE__, "setQuerySystemState = " + oam.itoa(set), LOG_TYPE_DEBUG); try { - dbrm.setSystemQueryReady(true); + dbrm.setSystemQueryReady(set); log.writeLog(__LINE__, "setQuerySystemState successful", LOG_TYPE_DEBUG); } catch(...) From e903e47201337bd49129bb68c9161e3caa510063 Mon Sep 17 00:00:00 2001 From: David Hill Date: Fri, 10 Aug 2018 18:37:40 -0500 Subject: [PATCH 12/17] MCOL-1523 - fix issue with query failing and enablemodule failing trying to get um1 back. --- procmgr/processmanager.cpp | 15 +++++++++++++-- procmon/main.cpp | 9 ++++++--- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index a29502ae8..a8258f19f 100755 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -1249,6 +1249,9 @@ void processMSG(messageqcpp::IOSocket* cfIos) log.writeLog(__LINE__, "STOPSYSTEM: ACK back to sender"); } + //set query system state ready + processManager.setQuerySystemState(true); + startsystemthreadStop = false; break; @@ -2848,6 +2851,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) // if a DDLProc was restarted, reinit DMLProc if( processName == "DDLProc") { processManager.reinitProcessType("DMLProc"); + processManager.setQuerySystemState(true); } //only run on auto process restart @@ -2894,7 +2898,7 @@ void processMSG(messageqcpp::IOSocket* cfIos) } } - //enable query stats + //set query system states ready processManager.setQuerySystemState(true); processManager.setSystemState(oam::ACTIVE); @@ -3773,6 +3777,7 @@ void ProcessManager::setSystemState(uint16_t state) Oam oam; ALARMManager aManager; Configuration config; + ProcessManager processManager(config, log); log.writeLog(__LINE__, "Set System State = " + oamState[state], LOG_TYPE_DEBUG); @@ -3793,6 +3798,9 @@ void ProcessManager::setSystemState(uint16_t state) // Process Alarms string system = "System"; if( state == oam::ACTIVE ) { + //set query system states ready + processManager.setQuerySystemState(true); + //clear alarms if set aManager.sendAlarmReport(system.c_str(), SYSTEM_DOWN_AUTO, CLEAR); aManager.sendAlarmReport(system.c_str(), SYSTEM_DOWN_MANUAL, CLEAR); @@ -6992,7 +7000,7 @@ void startSystemThread(oam::DeviceNetworkList Devicenetworklist) } //set query system state not ready - processManager.setQuerySystemState(true); + processManager.setQuerySystemState(false); // Bug 4554: Wait until DMLProc is finished with rollback if (status == oam::API_SUCCESS) @@ -7061,6 +7069,9 @@ void startSystemThread(oam::DeviceNetworkList Devicenetworklist) processManager.setSystemState(rtn); } + //set query system state ready + processManager.setQuerySystemState(true); + // exit thread log.writeLog(__LINE__, "startSystemThread Exit", LOG_TYPE_DEBUG); startsystemthreadStatus = status; diff --git a/procmon/main.cpp b/procmon/main.cpp index 1c8ce19a9..ad05a4f95 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -336,7 +336,7 @@ int main(int argc, char **argv) sysConfig->setConfig("ProcMgr_Alarm", "IPAddr", IPaddr); log.writeLog(__LINE__, "set ProcMgr IPaddr to Old Standby Module: " + IPaddr, LOG_TYPE_DEBUG); - //update Calpont Config table + //update MariaDB ColumnStore Config table try { sysConfig->write(); sleep(1); @@ -1333,7 +1333,7 @@ static void chldHandleThread(MonitorConfig config) (*listPtr).processID != 0 ) || ( (*listPtr).state == oam::ACTIVE && (*listPtr).processID == 0 ) ) { - log.writeLog(__LINE__, "*****Calpont Process Restarting: " + (*listPtr).ProcessName + ", old PID = " + oam.itoa((*listPtr).processID), LOG_TYPE_CRITICAL); + log.writeLog(__LINE__, "*****MariaDB ColumnStore Process Restarting: " + (*listPtr).ProcessName + ", old PID = " + oam.itoa((*listPtr).processID), LOG_TYPE_CRITICAL); if ( (*listPtr).dieCounter >= processRestartCount || processRestartCount == 0) { @@ -1530,7 +1530,7 @@ static void chldHandleThread(MonitorConfig config) } //Log this event - log.writeLog(__LINE__, "Calpont Process " + (*listPtr).ProcessName + restartStatus, LOG_TYPE_INFO); + log.writeLog(__LINE__, "MariaDB ColumnStore Process " + (*listPtr).ProcessName + restartStatus, LOG_TYPE_INFO); } } } @@ -2455,6 +2455,9 @@ void processStatusMSG(messageqcpp::IOSocket* cfIos) memcpy(fShmSystemStatus[0].StateChangeDate, oam.getCurrentTime().c_str(), DATESIZE); log.writeLog(__LINE__, "statusControl: REQUEST RECEIVED: Set System State = " + oamState[state], LOG_TYPE_DEBUG); } + + BRM::DBRM dbrm; + dbrm.setSystemQueryReady(true); } } break; From 5df447b6ec612cab24f8d62892d2d5aca0c762d0 Mon Sep 17 00:00:00 2001 From: Ravi Prakash Date: Tue, 14 Aug 2018 11:59:09 -0700 Subject: [PATCH 13/17] Fix MCOL-1635 where an "insert into table select query" crashes the server. This happens for a MEDIUMBLOB column type. --- dbcon/mysql/ha_calpont_dml.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dbcon/mysql/ha_calpont_dml.cpp b/dbcon/mysql/ha_calpont_dml.cpp index 9583d9f9e..c2dc36e47 100755 --- a/dbcon/mysql/ha_calpont_dml.cpp +++ b/dbcon/mysql/ha_calpont_dml.cpp @@ -1615,8 +1615,11 @@ int ha_calpont_impl_write_batch_row_(uchar *buf, TABLE* table, cal_impl_if::cal_ } else if (ci.columnTypes[colpos].colWidth < 16777216) { - dataLength = *(uint32_t*) buf; - buf = buf + 3 ; + dataLength = *(uint16_t*) buf; + buf = buf + 2 ; + if (*(uint8_t*)buf) + dataLength += 256*256*(*(uint8_t*)buf) ; + buf++; } else { From a98aec07fad9339b74f641c1a4644c1d4738c7cb Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Fri, 17 Aug 2018 01:10:00 +0300 Subject: [PATCH 14/17] MCOL-1655 removed hardcoded %debug from ddl.y. --- dbcon/ddlpackage/CMakeLists.txt | 4 ++-- dbcon/ddlpackage/ddl.l | 3 +++ dbcon/ddlpackage/ddl.y | 1 - 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/dbcon/ddlpackage/CMakeLists.txt b/dbcon/ddlpackage/CMakeLists.txt index ae2f82fa9..82b7ba756 100644 --- a/dbcon/ddlpackage/CMakeLists.txt +++ b/dbcon/ddlpackage/CMakeLists.txt @@ -1,4 +1,3 @@ - include_directories( ${ENGINE_COMMON_INCLUDES} ) ADD_CUSTOM_COMMAND( @@ -9,9 +8,10 @@ ADD_CUSTOM_COMMAND( DEPENDS ddl.y ddl.l ) + # Parser puts extra info to stderr. INCLUDE(../../check_compiler_flag.cmake) -MY_CHECK_AND_SET_COMPILER_FLAG("-DYYDEBUG" DEBUG) +MY_CHECK_AND_SET_COMPILER_FLAG("-DYYDEBUG=1" DEBUG) ########### next target ############### diff --git a/dbcon/ddlpackage/ddl.l b/dbcon/ddlpackage/ddl.l index 6eeaafb0b..7f9362cee 100644 --- a/dbcon/ddlpackage/ddl.l +++ b/dbcon/ddlpackage/ddl.l @@ -33,6 +33,9 @@ using namespace ddlpackage; typedef enum { NOOP, STRIP_QUOTES } copy_action_t; +#if YYDEBUG == 0 +int ddldebug = 0; +#endif int lineno = 1; void ddlerror(struct pass_to_bison* x, char const *s); diff --git a/dbcon/ddlpackage/ddl.y b/dbcon/ddlpackage/ddl.y index 96867cfb8..2556b8340 100644 --- a/dbcon/ddlpackage/ddl.y +++ b/dbcon/ddlpackage/ddl.y @@ -63,7 +63,6 @@ char* copy_string(const char *str); %pure-parser %lex-param {void * scanner} %parse-param {struct ddlpackage::pass_to_bison * x} -%debug /* Bison uses this to generate a C union definition. This is used to store the application created values associated with syntactic From 07bd4130530b228b8ef8d3fa575f1d0c3e375ec4 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Fri, 17 Aug 2018 22:27:02 +0300 Subject: [PATCH 15/17] MCOL-1660/1659 Table/column identifiers support spaces in DDL. MCOL-1660/1659 Table/column identifiers support spaces in DDL. --- dbcon/ddlpackage/ddl.l | 6 +++--- dbcon/ddlpackage/ddl.y | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbcon/ddlpackage/ddl.l b/dbcon/ddlpackage/ddl.l index 7f9362cee..926a836e2 100644 --- a/dbcon/ddlpackage/ddl.l +++ b/dbcon/ddlpackage/ddl.l @@ -72,9 +72,9 @@ ident_start [A-Za-z\200-\377_] ident_cont [A-Za-z\200-\377_0-9\$] identifier {ident_start}{ident_cont}* /* fully qualified names regexes */ -fq_identifier {identifier}\.{identifier} -identifier_quoted {grave_accent}{identifier}{grave_accent} -identifier_double_quoted {double_quote}{identifier}{double_quote} +ident_w_spaces {identifier}\x20* +identifier_quoted {grave_accent}{ident_w_spaces}+{grave_accent} +identifier_double_quoted {double_quote}{ident_w_spaces}+{double_quote} integer [-+]?{digit}+ decimal ([-+]?({digit}*\.{digit}+)|({digit}+\.{digit}*)) diff --git a/dbcon/ddlpackage/ddl.y b/dbcon/ddlpackage/ddl.y index 2556b8340..c9fc805ed 100644 --- a/dbcon/ddlpackage/ddl.y +++ b/dbcon/ddlpackage/ddl.y @@ -604,7 +604,7 @@ table_name: ; qualified_name: - | ident { + ident { if (x->fDBSchema.size()) $$ = new QualifiedName((char*)x->fDBSchema.c_str(), $1); else From 4572c25534f6fbbbd877e1dcd460cfc6ef75d996 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Tue, 28 Aug 2018 11:29:38 +0300 Subject: [PATCH 16/17] MCOL-1675 When insert record calculate HWM using a column with the smallest width instead of the first column in the same way as in MCOL-984. --- writeengine/wrapper/writeengine.cpp | 111 +++++++++++++++++++--------- writeengine/wrapper/writeengine.h | 5 ++ 2 files changed, 82 insertions(+), 34 deletions(-) diff --git a/writeengine/wrapper/writeengine.cpp b/writeengine/wrapper/writeengine.cpp index afea06fee..d163d1f42 100644 --- a/writeengine/wrapper/writeengine.cpp +++ b/writeengine/wrapper/writeengine.cpp @@ -182,6 +182,37 @@ int WriteEngineWrapper::checkValid(const TxnID& txnid, const ColStructList& colS return NO_ERROR; } +/*@brief findSmallestColumn --Find the smallest column for this table + */ +/*********************************************************** + * DESCRIPTION: + * Find the smallest column for this table + * PARAMETERS: + * lowColLen - returns smallest column width + * colId - returns smallest column id + * colStructList - column struct list + * RETURN: + * void + ***********************************************************/ +void WriteEngineWrapper::findSmallestColumn(uint32_t& colId, ColStructList colStructList) +// MCOL-1675: find the smallest column width to calculate the RowID from so +// that all HWMs will be incremented by this operation +{ + int32_t lowColLen = 8192; + for (uint32_t colIt = 0; colIt < colStructList.size(); colIt++) + { + if (colStructList[colIt].colWidth < lowColLen) + { + colId = colIt; + lowColLen = colStructList[colId].colWidth; + if ( lowColLen == 1 ) + { + break; + } + } + } +} + /*@convertValArray - Convert interface values to internal values */ /*********************************************************** @@ -847,6 +878,11 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, for (i = 0; i < colStructList.size(); i++) Convertor::convertColType(&colStructList[i]); + uint32_t colId = 0; + // MCOL-1675: find the smallest column width to calculate the RowID from so + // that all HWMs will be incremented by this operation + findSmallestColumn(colId, colStructList); + // rc = checkValid(txnid, colStructList, colValueList, ridList); // if (rc != NO_ERROR) // return rc; @@ -873,8 +909,8 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, //-------------------------------------------------------------------------- if (isFirstBatchPm) { - currentDBrootIdx = dbRootExtentTrackers[0]->getCurrentDBRootIdx(); - extentInfo = dbRootExtentTrackers[0]->getDBRootExtentList(); + currentDBrootIdx = dbRootExtentTrackers[colId]->getCurrentDBRootIdx(); + extentInfo = dbRootExtentTrackers[colId]->getDBRootExtentList(); dbRoot = extentInfo[currentDBrootIdx].fDbRoot; partitionNum = extentInfo[currentDBrootIdx].fPartition; @@ -914,7 +950,7 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, { colOp = m_colOp[op(colStructList[i].fCompressionType)]; colOp->initColumn(curCol); - colOp->setColParam(curCol, 0, colStructList[i].colWidth, colStructList[i].colDataType, + colOp->setColParam(curCol, colId, colStructList[i].colWidth, colStructList[i].colDataType, colStructList[i].colType, colStructList[i].dataOid, colStructList[i].fCompressionType, dbRoot, partitionNum, segmentNum); rc = colOp->extendColumn(curCol, false, extents[i].startBlkOffset, extents[i].startLbid, extents[i].allocSize, dbRoot, @@ -1040,7 +1076,7 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, } // if (isFirstBatchPm) else //get the extent info from tableMetaData { - ColExtsInfo aColExtsInfo = tableMetaData->getColExtsInfo(colStructList[0].dataOid); + ColExtsInfo aColExtsInfo = tableMetaData->getColExtsInfo(colStructList[colId].dataOid); ColExtsInfo::iterator it = aColExtsInfo.begin(); while (it != aColExtsInfo.end()) { @@ -1073,7 +1109,7 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, //-------------------------------------------------------------------------- // allocate row id(s) //-------------------------------------------------------------------------- - curColStruct = colStructList[0]; + curColStruct = colStructList[colId]; colOp = m_colOp[op(curColStruct.fCompressionType)]; colOp->initColumn(curCol); @@ -1084,23 +1120,27 @@ int WriteEngineWrapper::insertColumnRecs(const TxnID& txnid, vector fileInfo; dbRoot = curColStruct.fColDbRoot; //use the first column to calculate row id - ColExtsInfo aColExtsInfo = tableMetaData->getColExtsInfo(colStructList[0].dataOid); + ColExtsInfo aColExtsInfo = tableMetaData->getColExtsInfo(colStructList[colId].dataOid); ColExtsInfo::iterator it = aColExtsInfo.begin(); while (it != aColExtsInfo.end()) { - if ((it->dbRoot == colStructList[0].fColDbRoot) && (it->partNum == colStructList[0].fColPartition) && (it->segNum == colStructList[0].fColSegment) && it->current ) + if ((it->dbRoot == colStructList[colId].fColDbRoot) && + (it->partNum == colStructList[colId].fColPartition) && + (it->segNum == colStructList[colId].fColSegment) && it->current ) + { break; + } it++; } if (it != aColExtsInfo.end()) { hwm = it->hwm; - //cout << "Got from colextinfo hwm for oid " << colStructList[0].dataOid << " is " << hwm << " and seg is " << colStructList[0].fColSegment << endl; + //cout << "Got from colextinfo hwm for oid " << colStructList[colId].dataOid << " is " << hwm << " and seg is " << colStructList[0].fColSegment << endl; } oldHwm = hwm; //Save this info for rollback //need to pass real dbRoot, partition, and segment to setColParam - colOp->setColParam(curCol, 0, curColStruct.colWidth, curColStruct.colDataType, + colOp->setColParam(curCol, colId, curColStruct.colWidth, curColStruct.colDataType, curColStruct.colType, curColStruct.dataOid, curColStruct.fCompressionType, curColStruct.fColDbRoot, curColStruct.fColPartition, curColStruct.fColSegment); rc = colOp->openColumnFile(curCol, segFile, useTmpSuffix); // @bug 5572 HDFS tmp file @@ -1123,13 +1163,13 @@ timer.start("allocRowId"); if (idbdatafile::IDBPolicy::useHdfs()) insertSelect = true; - rc = colOp->allocRowId(txnid, bUseStartExtent, + rc = colOp->allocRowId(txnid, bUseStartExtent, curCol, (uint64_t)totalRow, rowIdArray, hwm, newExtent, rowsLeft, newHwm, newFile, newColStructList, newDctnryStructList, dbRootExtentTrackers, insertSelect, true, tableOid, isFirstBatchPm); - //cout << "after allocrowid, total row = " < 256K. // if totalRow == rowsLeft, then not adding rows to 1st extent, so skip it. //-------------------------------------------------------------------------- -// DMC-SHARED_NOTHING_NOTE: Is it safe to assume only part0 seg0 is abbreviated? + // DMC-SHARED_NOTHING_NOTE: Is it safe to assume only part0 seg0 is abbreviated? if ((curCol.dataFile.fPartition == 0) && (curCol.dataFile.fSegment == 0) && ((totalRow-rowsLeft) > 0) && (rowIdArray[totalRow-rowsLeft-1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k=1; ksetColParam(expandCol, 0, @@ -1505,18 +1548,10 @@ int WriteEngineWrapper::insertColumnRecsBinary(const TxnID& txnid, for (i = 0; i < colStructList.size(); i++) Convertor::convertColType(&colStructList[i]); - // MCOL-984: find the smallest column width to calculate the RowID from so - // that all HWMs will be incremented by this operation - int32_t lowColLen = 8192; - int32_t colId = 0; - for (uint32_t colIt = 0; colIt < colStructList.size(); colIt++) - { - if (colStructList[colIt].colWidth < lowColLen) - { - colId = colIt; - lowColLen = colStructList[colId].colWidth; - } - } + uint32_t colId = 0; + // MCOL-1675: find the smallest column width to calculate the RowID from so + // that all HWMs will be incremented by this operation + findSmallestColumn(colId, colStructList); // rc = checkValid(txnid, colStructList, colValueList, ridList); // if (rc != NO_ERROR) @@ -1809,7 +1844,7 @@ timer.stop("allocRowId"); // Expand initial abbreviated extent if any RID in 1st extent is > 256K. // if totalRow == rowsLeft, then not adding rows to 1st extent, so skip it. //-------------------------------------------------------------------------- -// DMC-SHARED_NOTHING_NOTE: Is it safe to assume only part0 seg0 is abbreviated? + // DMC-SHARED_NOTHING_NOTE: Is it safe to assume only part0 seg0 is abbreviated? if ((curCol.dataFile.fPartition == 0) && (curCol.dataFile.fSegment == 0) && ((totalRow-rowsLeft) > 0) && @@ -1821,7 +1856,8 @@ timer.stop("allocRowId"); if (k == colId) continue; Column expandCol; - colOp = m_colOp[op(colStructList[k].fCompressionType)]; + colOp = m_colOp[op(colStructList[k].fCompressionType)]; + // Shouldn't we change 0 to colId here? colOp->setColParam(expandCol, 0, colStructList[k].colWidth, colStructList[k].colDataType, @@ -2782,6 +2818,11 @@ StopWatch timer; for (i = 0; i < colStructList.size(); i++) Convertor::convertColType(&colStructList[i]); + uint32_t colId = 0; + // MCOL-1675: find the smallest column width to calculate the RowID from so + // that all HWMs will be incremented by this operation + findSmallestColumn(colId, colStructList); + rc = checkValid(txnid, colStructList, colValueList, ridList); if (rc != NO_ERROR) return rc; @@ -2799,7 +2840,7 @@ StopWatch timer; //-------------------------------------------------------------------------- // allocate row id(s) //-------------------------------------------------------------------------- - curColStruct = colStructList[0]; + curColStruct = colStructList[colId]; colOp = m_colOp[op(curColStruct.fCompressionType)]; colOp->initColumn(curCol); @@ -2834,7 +2875,7 @@ StopWatch timer; oldHwm = hwm; //Save this info for rollback //need to pass real dbRoot, partition, and segment to setColParam - colOp->setColParam(curCol, 0, curColStruct.colWidth, curColStruct.colDataType, + colOp->setColParam(curCol, colId, curColStruct.colWidth, curColStruct.colDataType, curColStruct.colType, curColStruct.dataOid, curColStruct.fCompressionType, dbRoot, partitionNum, segmentNum); @@ -2944,13 +2985,15 @@ timer.stop("allocRowId"); // if totalRow == rowsLeft, then not adding rows to 1st extent, so skip it. //-------------------------------------------------------------------------- // DMC-SHARED_NOTHING_NOTE: Is it safe to assume only part0 seg0 is abbreviated? - if ((colStructList[0].fColPartition == 0) && - (colStructList[0].fColSegment == 0) && + if ((colStructList[colId].fColPartition == 0) && + (colStructList[colId].fColSegment == 0) && ((totalRow-rowsLeft) > 0) && (rowIdArray[totalRow-rowsLeft-1] >= (RID)INITIAL_EXTENT_ROWS_TO_DISK)) { - for (unsigned k=1; ksetColParam(expandCol, 0, diff --git a/writeengine/wrapper/writeengine.h b/writeengine/wrapper/writeengine.h index 099854f77..93729ed75 100644 --- a/writeengine/wrapper/writeengine.h +++ b/writeengine/wrapper/writeengine.h @@ -607,6 +607,11 @@ private: */ int checkValid(const TxnID& txnid, const ColStructList& colStructList, const ColValueList& colValueList, const RIDList& ridList) const; + /** + * @brief Find the smallest column for this table + */ + void findSmallestColumn(uint32_t &colId, ColStructList colStructList); + /** * @brief Convert interface column type to a internal column type */ From 14d3a34c2893d4d66d62802382f33f0d39f87195 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Fri, 7 Sep 2018 11:43:54 +0100 Subject: [PATCH 17/17] MCOL-1694 & MCOL-1505 Improved exception handling This patch catches exceptions in DDLProc, DMLProc and ExeMgr which could potentially happen during startup. Logging them instead of silently ignoring them (or crashing in ExeMgr). --- ddlproc/ddlproc.cpp | 33 ++++++++++++++++++++++++++++-- dmlproc/dmlproc.cpp | 44 ++++++++++++++++++++++++++++++++++++++++ dmlproc/dmlprocessor.cpp | 20 ++++++++++++++++++ exemgr/main.cpp | 30 +++++++++++++++++++++++++-- 4 files changed, 123 insertions(+), 4 deletions(-) diff --git a/ddlproc/ddlproc.cpp b/ddlproc/ddlproc.cpp index 45bc6a48d..d74295cfa 100644 --- a/ddlproc/ddlproc.cpp +++ b/ddlproc/ddlproc.cpp @@ -135,8 +135,30 @@ int main(int argc, char* argv[]) { oam.processInitComplete("DDLProc", ACTIVE); } + catch (std::exception& ex) + { + cerr << ex.what() << endl; + LoggingID logid(23, 0, 0); + logging::Message::Args args1; + logging::Message msg(1); + args1.add("DDLProc init caught exception: "); + args1.add(ex.what()); + msg.format( args1 ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); + return 1; + } catch (...) { + cerr << "Caught unknown exception in init!" << endl; + LoggingID logid(23, 0, 0); + logging::Message::Args args1; + logging::Message msg(1); + args1.add("DDLProc init caught unknown exception"); + msg.format( args1 ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); + return 1; } } @@ -147,21 +169,28 @@ int main(int argc, char* argv[]) catch (std::exception& ex) { cerr << ex.what() << endl; + LoggingID logid(23, 0, 0); Message::Args args; Message message(8); args.add("DDLProc failed on: "); args.add(ex.what()); message.format( args ); - + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, message, logid); + return 1; } catch (...) { cerr << "Caught unknown exception!" << endl; + LoggingID logid(23, 0, 0); Message::Args args; Message message(8); args.add("DDLProc failed on: "); - args.add("receiving DDLPackage"); + args.add("receiving DDLPackage (unknown exception)"); message.format( args ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, message, logid); + return 1; } return 0; } diff --git a/dmlproc/dmlproc.cpp b/dmlproc/dmlproc.cpp index df17fbed6..106977824 100644 --- a/dmlproc/dmlproc.cpp +++ b/dmlproc/dmlproc.cpp @@ -494,8 +494,30 @@ int main(int argc, char* argv[]) // At first we set to BUSY_INIT oam.processInitComplete("DMLProc", oam::BUSY_INIT); } + catch (std::exception& ex) + { + cerr << ex.what() << endl; + LoggingID logid(21, 0, 0); + logging::Message::Args args1; + logging::Message msg(1); + args1.add("DMLProc init caught exception: "); + args1.add(ex.what()); + msg.format( args1 ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); + return 1; + } catch (...) { + cerr << "Caught unknown exception in init!" << endl; + LoggingID logid(21, 0, 0); + logging::Message::Args args1; + logging::Message msg(1); + args1.add("DMLProc init caught unknown exception"); + msg.format( args1 ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); + return 1; } //@Bug 1627 @@ -584,8 +606,30 @@ int main(int argc, char* argv[]) { oam.processInitComplete("DMLProc", ACTIVE); } + catch (std::exception& ex) + { + cerr << ex.what() << endl; + LoggingID logid(21, 0, 0); + logging::Message::Args args1; + logging::Message msg(1); + args1.add("DMLProc init caught exception: "); + args1.add(ex.what()); + msg.format( args1 ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); + return 1; + } catch (...) { + cerr << "Caught unknown exception in init!" << endl; + LoggingID logid(21, 0, 0); + logging::Message::Args args1; + logging::Message msg(1); + args1.add("DMLProc init caught unknown exception"); + msg.format( args1 ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_CRITICAL, msg, logid); + return 1; } Dec = DistributedEngineComm::instance(rm); diff --git a/dmlproc/dmlprocessor.cpp b/dmlproc/dmlprocessor.cpp index 2205d1712..3b3a5cffc 100644 --- a/dmlproc/dmlprocessor.cpp +++ b/dmlproc/dmlprocessor.cpp @@ -1155,8 +1155,28 @@ void DMLServer::start() } cancelThread.join(); } + catch (std::exception& ex) + { + cerr << ex.what() << endl; + logging::LoggingID lid(21); + Message::Args args; + Message message(8); + args.add("DMLProc init caught exception: "); + args.add(ex.what()); + message.format(args); + logging::Logger logger(lid.fSubsysID); + logger.logMessage(logging::LOG_TYPE_CRITICAL, message, lid); + } catch (...) { + cerr << "Caught unknown exception!" << endl; + logging::LoggingID lid(21); + Message::Args args; + Message message(8); + args.add("DMLProc init caught unknown exception"); + message.format(args); + logging::Logger logger(lid.fSubsysID); + logger.logMessage(logging::LOG_TYPE_CRITICAL, message, lid); } } diff --git a/exemgr/main.cpp b/exemgr/main.cpp index 92f949f57..716c2bf54 100644 --- a/exemgr/main.cpp +++ b/exemgr/main.cpp @@ -1300,8 +1300,34 @@ void cleanTempDir() assert(tmpPrefix != "/"); /* This is quite scary as ExeMgr usually runs as root */ - boost::filesystem::remove_all(tmpPrefix); - boost::filesystem::create_directories(tmpPrefix); + try + { + boost::filesystem::remove_all(tmpPrefix); + boost::filesystem::create_directories(tmpPrefix); + } + catch (std::exception& ex) + { + cerr << ex.what() << endl; + LoggingID logid(16, 0, 0); + Message::Args args; + Message message(8); + args.add("Execption whilst cleaning tmpdir: "); + args.add(ex.what()); + message.format( args ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_WARNING, message, logid); + } + catch (...) + { + cerr << "Caught unknown exception during tmpdir cleanup" << endl; + LoggingID logid(16, 0, 0); + Message::Args args; + Message message(8); + args.add("Unknown execption whilst cleaning tmpdir"); + message.format( args ); + logging::Logger logger(logid.fSubsysID); + logger.logMessage(LOG_TYPE_WARNING, message, logid); + } }