From 70b61444e2fbd73cbc04178398baa2b1a1b63a45 Mon Sep 17 00:00:00 2001 From: Ben Thompson Date: Fri, 11 Aug 2017 01:05:50 -0500 Subject: [PATCH 1/6] change GlusterConfig to DataRedundancyConfig in configuration file --- oam/etc/Columnstore.xml | 6 +-- oam/oamcpp/liboamcpp.cpp | 56 +++++++++++++-------------- oam/oamcpp/liboamcpp.h | 4 +- oamapps/mcsadmin/mcsadmin.cpp | 54 +++++++++++++------------- oamapps/serverMonitor/diskMonitor.cpp | 10 ++--- procmgr/main.cpp | 6 +-- procmgr/processmanager.cpp | 10 ++--- procmon/main.cpp | 12 +++--- procmon/processmonitor.cpp | 20 +++++----- tools/configMgt/autoConfigure.cpp | 20 +++++----- 10 files changed, 99 insertions(+), 99 deletions(-) diff --git a/oam/etc/Columnstore.xml b/oam/etc/Columnstore.xml index 262edc802..165951ddb 100644 --- a/oam/etc/Columnstore.xml +++ b/oam/etc/Columnstore.xml @@ -458,9 +458,9 @@ 0 unassigned unassigned - n - 0 - unassigned + n + 0 + unassigned n 3306 unassigned diff --git a/oam/oamcpp/liboamcpp.cpp b/oam/oamcpp/liboamcpp.cpp index 47a235527..59b965da7 100644 --- a/oam/oamcpp/liboamcpp.cpp +++ b/oam/oamcpp/liboamcpp.cpp @@ -5180,13 +5180,13 @@ namespace oam writeLog("manualMovePmDbroot: " + dbrootIDs + " from " + residePM + " to " + toPM, LOG_TYPE_DEBUG ); - string GlusterConfig = "n"; + string DataRedundancyConfig = "n"; try { - getSystemConfig( "GlusterConfig", GlusterConfig); + getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); } catch(...) { - GlusterConfig = "n"; + DataRedundancyConfig = "n"; } boost::char_separator sep(", "); @@ -5196,7 +5196,7 @@ namespace oam ++it) { //if gluster, check if there are copies on the to-pm - if ( GlusterConfig == "y") + if ( DataRedundancyConfig == "y") { string pmList = ""; try { @@ -5358,7 +5358,7 @@ namespace oam } //if Gluster, do the assign command - if ( GlusterConfig == "y") + if ( DataRedundancyConfig == "y") { try { string errmsg; @@ -5454,16 +5454,16 @@ namespace oam } catch(...) {} - string GlusterConfig = "n"; + string DataRedundancyConfig = "n"; try { - getSystemConfig( "GlusterConfig", GlusterConfig); + getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); } catch(...) { - GlusterConfig = "n"; + DataRedundancyConfig = "n"; } - if (DBRootStorageType == "internal" && GlusterConfig == "n") + if (DBRootStorageType == "internal" && DataRedundancyConfig == "n") return 1; // get current Module name @@ -5598,7 +5598,7 @@ namespace oam exceptionControl("autoMovePmDbroot", API_FAILURE); } - if ( GlusterConfig == "y") + if ( DataRedundancyConfig == "y") { try { string errmsg; @@ -5644,7 +5644,7 @@ namespace oam { //if Gluster, get it's list for DBroot and move to one of those string toPmID; - if ( GlusterConfig == "y") + if ( DataRedundancyConfig == "y") { string pmList = ""; try { @@ -5869,16 +5869,16 @@ namespace oam } catch(...) {} - string GlusterConfig = "n"; + string DataRedundancyConfig = "n"; try { - getSystemConfig( "GlusterConfig", GlusterConfig); + getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); } catch(...) { - GlusterConfig = "n"; + DataRedundancyConfig = "n"; } - if (DBRootStorageType == "internal" && GlusterConfig == "n") + if (DBRootStorageType == "internal" && DataRedundancyConfig == "n") return 1; //store in move dbroot transaction file @@ -6457,13 +6457,13 @@ namespace oam cout << endl << "Changes being applied" << endl << endl; //added entered dbroot IDs to to-PM list and do Gluster assign if needed - string GlusterConfig = "n"; + string DataRedundancyConfig = "n"; try { - getSystemConfig( "GlusterConfig", GlusterConfig); + getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); } catch(...) { - GlusterConfig = "n"; + DataRedundancyConfig = "n"; } DBRootConfigList::iterator pt3 = dbrootlist.begin(); @@ -6471,7 +6471,7 @@ namespace oam { todbrootConfigList.push_back(*pt3); -/* if ( GlusterConfig == "y") +/* if ( DataRedundancyConfig == "y") { try { string errmsg; @@ -6805,12 +6805,12 @@ namespace oam int SystemDBRootCount = 0; string cloud; string DBRootStorageType; - string GlusterConfig = "n"; + string DataRedundancyConfig = "n"; try { getSystemConfig("DBRootCount", SystemDBRootCount); getSystemConfig("Cloud", cloud); getSystemConfig("DBRootStorageType", DBRootStorageType); - getSystemConfig("GlusterConfig", GlusterConfig); + getSystemConfig("DataRedundancyConfig", DataRedundancyConfig); } catch(...) {} @@ -6896,7 +6896,7 @@ namespace oam } // if gluster, request volume delete - if ( GlusterConfig == "y") + if ( DataRedundancyConfig == "y") { try { string errmsg1; @@ -8261,7 +8261,7 @@ namespace oam int numberDBRootsPerPM = numberNewDBRoots/numberNewPMs; std::vector dbrootPms[dbrootCount]; - DataRedundancyConfig DataRedundancyConfigs[numberPMs]; + DataRedundancySetup DataRedundancyConfigs[numberPMs]; int startDBRootID = dbrootID; for (int pm=(pmID-1); pm < numberPMs; pm++,startDBRootID++) @@ -9582,18 +9582,18 @@ namespace oam } catch(...) {} - string GlusterConfig = "n"; + string DataRedundancyConfig = "n"; try { - getSystemConfig( "GlusterConfig", GlusterConfig); + getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); } catch(...) { - GlusterConfig = "n"; + DataRedundancyConfig = "n"; } - if ( (DBRootStorageType == "external" && GlusterConfig == "n") + if ( (DBRootStorageType == "external" && DataRedundancyConfig == "n") || - (GlusterConfig == "y" && !mount) ) + (DataRedundancyConfig == "y" && !mount) ) { dbrootList::iterator pt3 = dbrootConfigList.begin(); for( ; pt3 != dbrootConfigList.end() ; pt3++) diff --git a/oam/oamcpp/liboamcpp.h b/oam/oamcpp/liboamcpp.h index 7e708e45e..48bb9160a 100644 --- a/oam/oamcpp/liboamcpp.h +++ b/oam/oamcpp/liboamcpp.h @@ -1303,7 +1303,7 @@ namespace oam }; typedef std::vector DataRedundancyStorage; - struct DataRedundancyConfig_s + struct DataRedundancySetup_s { int pmID; std::string pmHostname; @@ -1311,7 +1311,7 @@ namespace oam std::vector dbrootCopies; DataRedundancyStorage storageLocations; }; - typedef struct DataRedundancyConfig_s DataRedundancyConfig; + typedef struct DataRedundancySetup_s DataRedundancySetup; // username / password for smbclient use const std::string USERNAME = "oamuser"; diff --git a/oamapps/mcsadmin/mcsadmin.cpp b/oamapps/mcsadmin/mcsadmin.cpp index 745970c77..55d734afb 100644 --- a/oamapps/mcsadmin/mcsadmin.cpp +++ b/oamapps/mcsadmin/mcsadmin.cpp @@ -1876,17 +1876,17 @@ int processCommand(string* arguments) } } - string GlusterConfig; + string DataRedundancyConfig; string DataRedundancyCopies; string DataRedundancyStorageType; try { - oam.getSystemConfig("GlusterConfig", GlusterConfig); + oam.getSystemConfig("DataRedundancyConfig", DataRedundancyConfig); oam.getSystemConfig("DataRedundancyCopies", DataRedundancyCopies); oam.getSystemConfig("DataRedundancyStorageType", DataRedundancyStorageType); } catch(...) {} - if ( GlusterConfig == "y" ) + if ( DataRedundancyConfig == "y" ) { cout << endl << "Data Redundant Configuration" << endl << endl; cout << "Copies Per DBroot = " << DataRedundancyCopies << endl; @@ -1952,14 +1952,14 @@ int processCommand(string* arguments) case 14: // addDbroot parameters: dbroot-number { - string GlusterConfig = "n"; + string DataRedundancyConfig = "n"; try { - oam.getSystemConfig( "GlusterConfig", GlusterConfig); + oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); } catch(...) {} - if (GlusterConfig == "y") { + if (DataRedundancyConfig == "y") { cout << endl << "**** addDbroot Not Supported on Data Redundancy Configured System, use addModule command to expand your capacity" << endl; break; } @@ -2042,9 +2042,9 @@ int processCommand(string* arguments) case 15: // removeDbroot parameters: dbroot-list { - string GlusterConfig = "n"; + string DataRedundancyConfig = "n"; try { - oam.getSystemConfig( "GlusterConfig", GlusterConfig); + oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); } catch(...) {} @@ -3262,15 +3262,15 @@ int processCommand(string* arguments) } catch(...) {} - string GlusterConfig = "n"; + string DataRedundancyConfig = "n"; try { - oam.getSystemConfig( "GlusterConfig", GlusterConfig); + oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); } catch(...) {} - if (DBRootStorageType == "internal" && GlusterConfig == "n") + if (DBRootStorageType == "internal" && DataRedundancyConfig == "n") { cout << endl << "**** switchParentOAMModule Failed : DBRoot Storage type = internal/non-data-replication" << endl; break; @@ -3307,7 +3307,7 @@ int processCommand(string* arguments) } //check for gluster system is do-able - if (GlusterConfig == "y") + if (DataRedundancyConfig == "y") { // get to-module assigned DBRoots and see if current active PM // has a copy @@ -3574,17 +3574,17 @@ int processCommand(string* arguments) cout << endl << "**** getSystemStatus Failed = " << e.what() << endl; } - string GlusterConfig; + string DataRedundancyConfig; string DataRedundancyCopies; string DataRedundancyStorageType; try { - oam.getSystemConfig("GlusterConfig", GlusterConfig); + oam.getSystemConfig("DataRedundancyConfig", DataRedundancyConfig); oam.getSystemConfig("DataRedundancyCopies", DataRedundancyCopies); oam.getSystemConfig("DataRedundancyStorageType", DataRedundancyStorageType); } catch(...) {} - if ( GlusterConfig == "y" ) + if ( DataRedundancyConfig == "y" ) { string arg1 = ""; string arg2 = ""; @@ -3754,13 +3754,13 @@ int processCommand(string* arguments) } //if gluster, check if toPM is has a copy - string GlusterConfig; + string DataRedundancyConfig; try { - oam.getSystemConfig("GlusterConfig", GlusterConfig); + oam.getSystemConfig("DataRedundancyConfig", DataRedundancyConfig); } catch(...) {} - if ( GlusterConfig == "y" ) + if ( DataRedundancyConfig == "y" ) { string pmList = ""; try { @@ -4224,16 +4224,16 @@ int processCommand(string* arguments) oam.getSystemConfig("DBRootStorageType", DBRootStorageType); if (DBRootStorageType == "external" ){ - string GlusterConfig = "n"; + string DataRedundancyConfig = "n"; string cloud = oam::UnassignedName; try { oam.getSystemConfig("Cloud", cloud); - oam.getSystemConfig( "GlusterConfig", GlusterConfig); + oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); } catch(...) {} - if ( GlusterConfig == "n" && cloud == oam::UnassignedName) + if ( DataRedundancyConfig == "n" && cloud == oam::UnassignedName) cout << " REMINDER: Update the /etc/fstab on " << toPM << " to include these dbroot mounts" << endl << endl; break; @@ -5103,7 +5103,7 @@ int processCommand(string* arguments) } } - string GlusterConfig = "n"; + string DataRedundancyConfig = "n"; int DataRedundancyCopies; string cloud = oam::UnassignedName; int DataRedundancyNetworkType; @@ -5113,7 +5113,7 @@ int processCommand(string* arguments) try { oam.getSystemConfig("Cloud", cloud); oam.getSystemConfig("AmazonVPCNextPrivateIP", AmazonVPCNextPrivateIP); - oam.getSystemConfig("GlusterConfig", GlusterConfig); + oam.getSystemConfig("DataRedundancyConfig", DataRedundancyConfig); oam.getSystemConfig("DataRedundancyCopies", DataRedundancyCopies); oam.getSystemConfig("DataRedundancyNetworkType", DataRedundancyNetworkType); oam.getSystemConfig("DataRedundancyStorageType", DataRedundancyStorageType); @@ -5206,7 +5206,7 @@ int processCommand(string* arguments) break; } - if ( GlusterConfig == "y" && moduleType == "pm" ) { + if ( DataRedundancyConfig == "y" && moduleType == "pm" ) { if ( localModule != parentOAMModule ) { // exit out since not on active module cout << endl << "**** addModule Failed : Can only run command on Active OAM Parent Module (" << parentOAMModule << ")." << endl; @@ -5435,7 +5435,7 @@ int processCommand(string* arguments) devicenetworkconfig.hostConfigList.clear(); moduleName.clear(); - if ( GlusterConfig == "y" && DataRedundancyNetworkType == 2 && moduleType == "pm") + if ( DataRedundancyConfig == "y" && DataRedundancyNetworkType == 2 && moduleType == "pm") { string DataRedundancyIPAddress = sysConfig->getConfig("DataRedundancyConfig",dataDupIPaddr); string DataRedundancyHostname = sysConfig->getConfig("DataRedundancyConfig",dataDupHostName); @@ -5463,7 +5463,7 @@ int processCommand(string* arguments) storageDeviceList storagedevicelist; string deviceType; - if ( GlusterConfig == "y" && moduleType == "pm") + if ( DataRedundancyConfig == "y" && moduleType == "pm") { cout << endl << "System is configured with Data Redundancy, DBRoot Storage will" << endl; cout << "will be created with the Modules during this command." << endl; @@ -5528,7 +5528,7 @@ int processCommand(string* arguments) cout << "Add Module(s) successfully completed" << endl; - if ( GlusterConfig == "y" && moduleType == "pm" ) { + if ( DataRedundancyConfig == "y" && moduleType == "pm" ) { { //send messages to update fstab to new modules, if needed diff --git a/oamapps/serverMonitor/diskMonitor.cpp b/oamapps/serverMonitor/diskMonitor.cpp index 4a043db0c..c1ca0c834 100644 --- a/oamapps/serverMonitor/diskMonitor.cpp +++ b/oamapps/serverMonitor/diskMonitor.cpp @@ -93,13 +93,13 @@ void diskMonitor() } //get Gluster Config setting - string GlusterConfig = "n"; + string DataRedundancyConfig = "n"; try { - oam.getSystemConfig( "GlusterConfig", GlusterConfig); + oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); } catch(...) { - GlusterConfig = "n"; + DataRedundancyConfig = "n"; } int diskSpaceCheck = 0; @@ -323,7 +323,7 @@ void diskMonitor() //check for external file systems/devices if (Externalflag || - (!Externalflag && GlusterConfig == "y" && moduleType == "pm") ){ + (!Externalflag && DataRedundancyConfig == "y" && moduleType == "pm") ){ try { DBRootConfigList dbrootConfigList; @@ -566,7 +566,7 @@ void diskMonitor() } //do Gluster status check, if configured - if ( GlusterConfig == "y") + if ( DataRedundancyConfig == "y") { bool pass = true; string errmsg = "unknown"; diff --git a/procmgr/main.cpp b/procmgr/main.cpp index 228980f43..11edf64f3 100644 --- a/procmgr/main.cpp +++ b/procmgr/main.cpp @@ -51,7 +51,7 @@ bool amazon = false; string PMInstanceType; string UMInstanceType; string AmazonPMFailover = "y"; -string GlusterConfig = "n"; +string DataRedundancyConfig = "n"; bool rootUser = true; string USER = "root"; bool HDFS = false; @@ -153,11 +153,11 @@ int main(int argc, char **argv) //get gluster config try { - oam.getSystemConfig( "GlusterConfig", GlusterConfig); + oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); } catch(...) { - GlusterConfig = "n"; + DataRedundancyConfig = "n"; } //hdfs / hadoop config diff --git a/procmgr/processmanager.cpp b/procmgr/processmanager.cpp index dddc7adb8..7599692c2 100644 --- a/procmgr/processmanager.cpp +++ b/procmgr/processmanager.cpp @@ -48,7 +48,7 @@ extern bool runStandby; extern string iface_name; extern string PMInstanceType; extern string UMInstanceType; -extern string GlusterConfig; +extern string DataRedundancyConfig; extern bool rootUser; extern string USER; extern bool HDFS; @@ -8539,7 +8539,7 @@ int ProcessManager::switchParentOAMModule(std::string newActiveModuleName) log.writeLog(__LINE__, "switchParentOAMModule Function Started", LOG_TYPE_DEBUG); - if ( DBRootStorageType == "internal" && GlusterConfig == "n") { + if ( DBRootStorageType == "internal" && DataRedundancyConfig == "n") { log.writeLog(__LINE__, "ERROR: DBRootStorageType = internal", LOG_TYPE_ERROR); pthread_mutex_unlock(&THREAD_LOCK); return API_INVALID_PARAMETER; @@ -9138,7 +9138,7 @@ int ProcessManager::OAMParentModuleChange() } - if ( DBRootStorageType == "internal" && failover && GlusterConfig == "n") + if ( DBRootStorageType == "internal" && failover && DataRedundancyConfig == "n") { log.writeLog(__LINE__, "DBRoot Storage configured for internal, don't do standby-active failover", LOG_TYPE_DEBUG); @@ -9537,7 +9537,7 @@ std::string ProcessManager::getStandbyModule() //check if gluster, if so then find PMs that have copies of DBROOT #1 string pmList = ""; - if (GlusterConfig == "y") { + if (DataRedundancyConfig == "y") { try { string errmsg; @@ -10024,7 +10024,7 @@ int ProcessManager::mountDBRoot(std::string dbrootID) ProcessManager processManager(config, log); Oam oam; - if (GlusterConfig == "y") + if (DataRedundancyConfig == "y") return oam::API_SUCCESS; //get pm assigned to that dbroot diff --git a/procmon/main.cpp b/procmon/main.cpp index e9c0bd50f..435d6fc8f 100644 --- a/procmon/main.cpp +++ b/procmon/main.cpp @@ -47,7 +47,7 @@ static void mysqlMonitorThread(MonitorConfig config); string systemOAM; string dm_server; string cloud; -string GlusterConfig = "n"; +string DataRedundancyConfig = "n"; bool HDFS = false; void updateShareMemory(processStatusList* aPtr); @@ -207,14 +207,14 @@ int main(int argc, char **argv) //get gluster config try { - oam.getSystemConfig( "GlusterConfig", GlusterConfig); + oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); } catch(...) { - GlusterConfig = "n"; + DataRedundancyConfig = "n"; } - if ( GlusterConfig == "y" ) { + if ( DataRedundancyConfig == "y" ) { system("mount -a > /dev/null 2>&1"); } @@ -524,7 +524,7 @@ int main(int argc, char **argv) //check if gluster, if so then find PMs that have copies of DBROOT #1 string pmList = ""; - if (GlusterConfig == "y") { + if (DataRedundancyConfig == "y") { try { string errmsg; @@ -2021,7 +2021,7 @@ static void statusControlThread() std::vectordbrootList; if ( DBRootStorageType == "external" || - GlusterConfig == "y") { + DataRedundancyConfig == "y") { //get system dbroots DBRootConfigList dbrootConfigList; try diff --git a/procmon/processmonitor.cpp b/procmon/processmonitor.cpp index f735923ef..e5bfd7ddc 100644 --- a/procmon/processmonitor.cpp +++ b/procmon/processmonitor.cpp @@ -46,7 +46,7 @@ extern bool runStandby; extern bool processInitComplete; extern int fmoduleNumber; extern string cloud; -extern string GlusterConfig; +extern string DataRedundancyConfig; extern bool rootUser; extern string USER; extern bool HDFS; @@ -1615,7 +1615,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO flushInodeCache(); int return_status = API_SUCCESS; - if (GlusterConfig == "n") + if (DataRedundancyConfig == "n") { int retry = 1; for ( ; retry < 5 ; retry++) @@ -1689,7 +1689,7 @@ void ProcessMonitor::processMessage(messageqcpp::ByteStream msg, messageqcpp::IO log.writeLog(__LINE__, "MSG RECEIVED: Mount DBRoot: " + dbrootID);; int return_status = API_SUCCESS; - if (GlusterConfig == "n") + if (DataRedundancyConfig == "n") { string cmd = "export LC_ALL=C;mount " + startup::StartUp::installDir() + "/data" + dbrootID + " > /tmp/mount.txt 2>&1"; system(cmd.c_str()); @@ -2461,7 +2461,7 @@ pid_t ProcessMonitor::startProcess(string processModuleType, string processName, } // now delete the dbrm data from local disk - if ( !gOAMParentModuleFlag && !HDFS && GlusterConfig == "n") { + if ( !gOAMParentModuleFlag && !HDFS && DataRedundancyConfig == "n") { string cmd = "rm -f " + DBRMDir + "/*"; system(cmd.c_str()); log.writeLog(__LINE__, "removed DBRM file with command: " + cmd, LOG_TYPE_DEBUG); @@ -5719,12 +5719,12 @@ void ProcessMonitor::unmountExtraDBroots() oam.getSystemConfig("DBRootStorageType", DBRootStorageType); if ( DBRootStorageType == "hdfs" || - ( DBRootStorageType == "internal" && GlusterConfig == "n") ) + ( DBRootStorageType == "internal" && DataRedundancyConfig == "n") ) return; } catch(...) {} -// if (GlusterConfig == "y") +// if (DataRedundancyConfig == "y") // return; try @@ -5755,7 +5755,7 @@ void ProcessMonitor::unmountExtraDBroots() if (config.moduleID() != moduleID) { - if ( GlusterConfig == "n" ) + if ( DataRedundancyConfig == "n" ) { string cmd = "umount " + startup::StartUp::installDir() + "/data" + oam.itoa(id) + " > /dev/null 2>&1"; system(cmd.c_str()); @@ -5863,7 +5863,7 @@ int ProcessMonitor::checkDataMount() catch(...) {} //asign DBRoot is gluster - if (GlusterConfig == "y") + if (DataRedundancyConfig == "y") { vector::iterator p = dbrootList.begin(); while ( p != dbrootList.end() ) @@ -5886,7 +5886,7 @@ int ProcessMonitor::checkDataMount() } if ( DBRootStorageType == "hdfs" || - (DBRootStorageType == "internal" && GlusterConfig == "n") ) { + (DBRootStorageType == "internal" && DataRedundancyConfig == "n") ) { //create OAM-Test-Flag vector::iterator p = dbrootList.begin(); while ( p != dbrootList.end() ) @@ -5921,7 +5921,7 @@ int ProcessMonitor::checkDataMount() string dbroot = installDir + "/data" + *p; string fileName = dbroot + "/OAMdbrootCheck"; - if ( GlusterConfig == "n" ) { + if ( DataRedundancyConfig == "n" ) { //remove any local check flag for starters string cmd = "umount " + dbroot + " > /tmp/umount.txt 2>&1"; system(cmd.c_str()); diff --git a/tools/configMgt/autoConfigure.cpp b/tools/configMgt/autoConfigure.cpp index 031f04544..c39939fdf 100644 --- a/tools/configMgt/autoConfigure.cpp +++ b/tools/configMgt/autoConfigure.cpp @@ -201,21 +201,21 @@ int main(int argc, char *argv[]) } //set gluster flag if it exists - string GlusterConfig; - string GlusterCopies; - string GlusterStorageType; + string DataRedundancyConfig; + string DataRedundancyCopies; + string DataRedundancyStorageType; try { - GlusterConfig = sysConfigOld->getConfig(InstallSection, "GlusterConfig"); - GlusterCopies = sysConfigOld->getConfig(InstallSection, "GlusterCopies"); - GlusterStorageType = sysConfigOld->getConfig(InstallSection, "GlusterStorageType"); + DataRedundancyConfig = sysConfigOld->getConfig(InstallSection, "DataRedundancyConfig"); + DataRedundancyCopies = sysConfigOld->getConfig(InstallSection, "DataRedundancyCopies"); + DataRedundancyStorageType = sysConfigOld->getConfig(InstallSection, "DataRedundancyStorageType"); } catch(...) {} - if ( !GlusterConfig.empty() ) { + if ( !DataRedundancyConfig.empty() ) { try { - sysConfigNew->setConfig(InstallSection, "GlusterConfig", GlusterConfig); - sysConfigNew->setConfig(InstallSection, "GlusterCopies", GlusterCopies); - sysConfigNew->setConfig(InstallSection, "GlusterStorageType", GlusterStorageType); + sysConfigNew->setConfig(InstallSection, "DataRedundancyConfig", DataRedundancyConfig); + sysConfigNew->setConfig(InstallSection, "DataRedundancyCopies", DataRedundancyCopies); + sysConfigNew->setConfig(InstallSection, "DataRedundancyStorageType", DataRedundancyStorageType); } catch(...) {} From 3504b8df617383a911ad10120b4651ec9323a5c2 Mon Sep 17 00:00:00 2001 From: Ben Thompson Date: Fri, 11 Aug 2017 01:16:31 -0500 Subject: [PATCH 2/6] Remove storage type output for data redundancy configuration in mcsadmin --- oamapps/mcsadmin/mcsadmin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oamapps/mcsadmin/mcsadmin.cpp b/oamapps/mcsadmin/mcsadmin.cpp index 55d734afb..cbc5400dc 100644 --- a/oamapps/mcsadmin/mcsadmin.cpp +++ b/oamapps/mcsadmin/mcsadmin.cpp @@ -1890,7 +1890,7 @@ int processCommand(string* arguments) { cout << endl << "Data Redundant Configuration" << endl << endl; cout << "Copies Per DBroot = " << DataRedundancyCopies << endl; - cout << "Storage Type = " << DataRedundancyStorageType << endl; + //cout << "Storage Type = " << DataRedundancyStorageType << endl; oamModuleInfo_t st; string moduleType; From fa17a98d60bc472ea6a4fed9254e4b1c4b217cb2 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Fri, 11 Aug 2017 07:17:07 +0100 Subject: [PATCH 3/6] MCOL-744 Fix BPP mutex crash Whilst very rare we can hit a case where we attempt to unlock objLock when it is already unlocked. With the Boost version in Ubuntu 16.04 this triggers an abort() effectively crashing PrimProc. In this patch we switch to a pthread mutex instead which does not have this limitation. At a later date we can look into refactoring how BPP and this mutex works. --- .../primproc/batchprimitiveprocessor.cpp | 26 +++++++++---------- primitives/primproc/batchprimitiveprocessor.h | 9 +++++-- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/primitives/primproc/batchprimitiveprocessor.cpp b/primitives/primproc/batchprimitiveprocessor.cpp index 8678e1381..6f15cb7dd 100755 --- a/primitives/primproc/batchprimitiveprocessor.cpp +++ b/primitives/primproc/batchprimitiveprocessor.cpp @@ -112,6 +112,7 @@ BatchPrimitiveProcessor::BatchPrimitiveProcessor() : { pp.setLogicalBlockMode(true); pp.setBlockPtr((int *) blockData); + pthread_mutex_init(&objLock,NULL); } BatchPrimitiveProcessor::BatchPrimitiveProcessor(ByteStream &b, double prefetch, @@ -153,6 +154,7 @@ BatchPrimitiveProcessor::BatchPrimitiveProcessor(ByteStream &b, double prefetch, pp.setLogicalBlockMode(true); pp.setBlockPtr((int *) blockData); sendThread = bppst; + pthread_mutex_init(&objLock, NULL); initBPP(b); // cerr << "made a BPP\n"; } @@ -175,6 +177,7 @@ BatchPrimitiveProcessor::~BatchPrimitiveProcessor() counterLock.lock(); } counterLock.unlock(); + pthread_mutex_destroy(&objLock); } /** @@ -233,7 +236,7 @@ void BatchPrimitiveProcessor::initBPP(ByteStream &bs) } if (doJoin) { - objLock.lock(); + pthread_mutex_lock(&objLock); if (ot == ROW_GROUP) { bs >> joinerCount; // cout << "joinerCount = " << joinerCount << endl; @@ -325,7 +328,7 @@ void BatchPrimitiveProcessor::initBPP(ByteStream &bs) joiner.reset(new Joiner((bool) tmp8)); } #ifdef __FreeBSD__ - objLock.unlock(); + pthread_mutex_unlock(&objLock); #endif } @@ -401,7 +404,7 @@ void BatchPrimitiveProcessor::resetBPP(ByteStream &bs, const SP_UM_MUTEX& w, uint32_t i; vector preloads; - objLock.lock(); + pthread_mutex_lock(&objLock); writelock = w; sock = s; @@ -452,7 +455,7 @@ void BatchPrimitiveProcessor::resetBPP(ByteStream &bs, const SP_UM_MUTEX& w, buildVSSCache(count); #ifdef __FreeBSD__ - objLock.unlock(); + pthread_mutex_unlock(&objLock); #endif } @@ -599,7 +602,7 @@ int BatchPrimitiveProcessor::endOfJoiner() #endif #ifndef __FreeBSD__ - objLock.unlock(); + pthread_mutex_unlock(&objLock); #endif return 0; } @@ -1486,7 +1489,7 @@ void BatchPrimitiveProcessor::execute() #endif #ifndef __FreeBSD__ - objLock.unlock(); + pthread_mutex_unlock(&objLock); #endif throw n; // need to pass this through to BPPSeeder } @@ -1777,12 +1780,7 @@ int BatchPrimitiveProcessor::operator()() vssCache.clear(); #ifndef __FreeBSD__ - // If we've been aborted the lock *may* have been released already - // By doing try_lock, we ensure the unlock will work whether it was - // locked or not. - if (sendThread->aborted()) - objLock.try_lock(); - objLock.unlock(); + pthread_mutex_unlock(&objLock); #endif freeLargeBuffers(); #ifdef PRIMPROC_STOPWATCH @@ -1881,7 +1879,7 @@ SBPP BatchPrimitiveProcessor::duplicate() } bpp->doJoin = doJoin; if (doJoin) { - bpp->objLock.lock(); + pthread_mutex_lock(&bpp->objLock); bpp->joinerSize = joinerSize; if (ot == ROW_GROUP) { /* There are add'l join vars, but only these are necessary for processing @@ -1921,7 +1919,7 @@ SBPP BatchPrimitiveProcessor::duplicate() else bpp->joiner = joiner; #ifdef __FreeBSD__ - bpp->objLock.unlock(); + pthread_mutex_unlock(&bpp->objLock); #endif } diff --git a/primitives/primproc/batchprimitiveprocessor.h b/primitives/primproc/batchprimitiveprocessor.h index c57479ef9..ca782b3c7 100755 --- a/primitives/primproc/batchprimitiveprocessor.h +++ b/primitives/primproc/batchprimitiveprocessor.h @@ -121,7 +121,7 @@ class BatchPrimitiveProcessor // these two functions are used by BPPV to create BPP instances // on demand. TRY not to use unlock() for anything else. - void unlock() { objLock.try_lock(); objLock.unlock(); } + void unlock() { pthread_mutex_unlock(&objLock); } bool hasJoin() { return doJoin; } private: BatchPrimitiveProcessor(); @@ -200,7 +200,12 @@ class BatchPrimitiveProcessor messageqcpp::SBS serialized; SP_UM_MUTEX writelock; - boost::mutex objLock; + // MCOL-744 using pthread mutex instead of Boost mutex because + // in it is possible that this lock could be unlocked when it is + // already unlocked. In Ubuntu 16.04's Boost this triggers a + // crash. Whilst it is very hard to hit this it is still bad. + // Longer term TODO: fix/remove objLock and/or refactor BPP + pthread_mutex_t objLock; bool LBIDTrace; bool fBusy; From ace37e882f574762370da1c799acec2eed0449d9 Mon Sep 17 00:00:00 2001 From: Ben Thompson Date: Fri, 11 Aug 2017 01:45:44 -0500 Subject: [PATCH 4/6] Fixes for user inputs and brick setup --- oamapps/postConfigure/postConfigure.cpp | 179 ++++++++++++++++-------- 1 file changed, 122 insertions(+), 57 deletions(-) diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index ee1abd2ae..23919f283 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -177,7 +177,7 @@ bool noPrompting = false; bool rootUser = true; string USER = "root"; bool hdfs = false; -bool gluster = false; +bool DataRedundancy = false; bool pmwithum = false; bool mysqlRep = false; string MySQLRep = "y"; @@ -1461,7 +1461,12 @@ int main(int argc, char *argv[]) exit(1); continue; } - + if ( moduleType == "pm" && DataRedundancy && moduleCount == 1) { + cout << endl << "ERROR: DataRedundancy requires " + moduleType + " module type to be 2 or greater, please re-enter or select a different data storage type." << endl << endl; + if ( noPrompting ) + exit(1); + continue; + } //update count try { string ModuleCountParm = "ModuleCount" + oam.itoa(i+1); @@ -3173,13 +3178,13 @@ int main(int argc, char *argv[]) } //configure data redundancy - if (gluster ) + if (DataRedundancy ) { cout << endl; string start = "y"; if ( reuseConfig == "y" ) start = "n"; - +/* while(true) { pcommand = callReadline("Would you like to configure MariaDB ColumnStore Data Redundancy? [y,n] (" + start + ") > "); @@ -3196,7 +3201,7 @@ int main(int argc, char *argv[]) if ( noPrompting ) exit(1); } - +*/ if ( start == "y" ) { cout << endl << "===== Configuring MariaDB ColumnStore Data Redundancy Functionality =====" << endl << endl; if (!glusterSetup(password)) @@ -3429,7 +3434,7 @@ int main(int argc, char *argv[]) cout << " DONE" << endl; // IF gluster is enabled we need to modify fstab on remote systems. - if (gluster ) + if (DataRedundancy ) { int numberDBRootsPerPM = DBRootCount/pmNumber; for (int pm=0; pm < pmNumber; pm++) @@ -4119,8 +4124,8 @@ bool storageSetup(bool amazonInstall) if ( DBRootStorageType == "hdfs") hdfs = true; - if ( DBRootStorageType == "gluster") - gluster = true; + if ( DBRootStorageType == "DataRedundancy") + DataRedundancy = true; if ( reuseConfig == "y" ) { cout << "===== Storage Configuration = " + DBRootStorageType + " =====" << endl << endl; @@ -4463,7 +4468,7 @@ bool storageSetup(bool amazonInstall) storageType = "1"; if ( DBRootStorageType == "external" ) storageType = "2"; - if ( DBRootStorageType == "gluster" ) + if ( DBRootStorageType == "DataRedundancy" ) storageType = "3"; if ( DBRootStorageType == "hdfs" ) storageType = "4"; @@ -4476,7 +4481,7 @@ bool storageSetup(bool amazonInstall) prompt = "Select the type of Data Storage [1=internal, 2=external] (" + storageType + ") > "; } - if ( glusterInstalled == "y" && hadoopInstalled == "n" ) + if ( (glusterInstalled == "y" && singleServerInstall != "1") && hadoopInstalled == "n" ) { cout << "There are 3 options when configuring the storage: internal, external, or DataRedundancy" << endl << endl; prompt = "Select the type of Data Storage [1=internal, 2=external, 3=DataRedundancy] (" + storageType + ") > "; @@ -4488,7 +4493,7 @@ bool storageSetup(bool amazonInstall) prompt = "Select the type of Data Storage [1=internal, 2=external, 4=hdfs] (" + storageType + ") > "; } - if ( glusterInstalled == "y" && hadoopInstalled == "y" ) + if ( (glusterInstalled == "y" && singleServerInstall != "1") && hadoopInstalled == "y" ) { cout << "There are 5 options when configuring the storage: internal, external, DataRedundancy, or hdfs" << endl << endl; prompt = "Select the type of Data Storage [1=internal, 2=external, 3=DataRedundancy, 4=hdfs] (" + storageType + ") > "; @@ -4499,7 +4504,7 @@ bool storageSetup(bool amazonInstall) cout << " 'external' - This is specified when the DBRoot directories are mounted." << endl; cout << " High Availability Server Failover is Supported in this mode." << endl << endl; - if ( glusterInstalled == "y" ) + if ( glusterInstalled == "y" && singleServerInstall != "1") { cout << " 'DataRedundancy' - This is specified when gluster is installed and you want" << endl; cout << " the DBRoot directories to be controlled by ColumnStore Data Redundancy." << endl; @@ -4531,7 +4536,7 @@ bool storageSetup(bool amazonInstall) exit(1); } - if ( glusterInstalled == "y" && hadoopInstalled == "n" ) + if ( (glusterInstalled == "y" && singleServerInstall != "1") && hadoopInstalled == "n" ) { if ( storageType == "1" || storageType == "2" || storageType == "3") break; @@ -4550,7 +4555,7 @@ bool storageSetup(bool amazonInstall) exit(1); } - if ( glusterInstalled == "y" && hadoopInstalled == "y" ) + if ( (glusterInstalled == "y" && singleServerInstall != "1") && hadoopInstalled == "y" ) { if ( storageType == "1" || storageType == "2" || storageType == "3" || storageType == "4") break; @@ -4560,6 +4565,34 @@ bool storageSetup(bool amazonInstall) } } + if (storageType != "3" && DataRedundancy) + { + cout << "WARNING: This system was configured with ColumnStore DataRedundancy" << endl; + cout << " The selection to change from DataRedundancy to a different" << endl; + cout << " storage type will require to cleanup. Exit and refer to" << endl; + cout << " ColumnStore documentation for procedures or continue." << endl; + + cout << endl; + string continueInstall = "y"; + while(true) + { + pcommand = callReadline("Would you like to continue with this storage setting? [y,n] (" + continueInstall + ") > "); + if (pcommand) + { + if (strlen(pcommand) > 0) continueInstall = pcommand; + callFree(pcommand); + } + if ( continueInstall == "y" || continueInstall == "n" ) + break; + else + cout << "Invalid Entry, please enter 'y' for yes or 'n' for no" << endl; + continueInstall = "y"; + if ( noPrompting ) + exit(1); + } + if ( continueInstall == "n") + exit(1); + } switch ( atoi(storageType.c_str()) ) { case (1): { @@ -4573,7 +4606,7 @@ bool storageSetup(bool amazonInstall) } case (3): { - DBRootStorageType = "gluster"; + DBRootStorageType = "DataRedundancy"; break; } case (4): @@ -4762,14 +4795,14 @@ bool storageSetup(bool amazonInstall) // if gluster if ( storageType == "3" ) { - gluster = true; - sysConfig->setConfig(InstallSection, "GlusterConfig", "y"); + DataRedundancy = true; + sysConfig->setConfig(InstallSection, "DataRedundancyConfig", "y"); sysConfig->setConfig("PrimitiveServers", "DirectIO", "n"); } else { - gluster = false; - sysConfig->setConfig(InstallSection, "GlusterConfig", "n"); + DataRedundancy = false; + sysConfig->setConfig(InstallSection, "DataRedundancyConfig", "n"); sysConfig->setConfig("PrimitiveServers", "DirectIO", "y"); } @@ -5403,42 +5436,56 @@ bool glusterSetup(string password) { int numberDBRootsPerPM = DBRootCount/pmNumber; int numberBricksPM = 0; std::vector dbrootPms[DBRootCount]; - DataRedundancyConfig DataRedundancyConfigs[pmNumber]; + DataRedundancySetup DataRedundancyConfigs[pmNumber]; string command = ""; string remoteCommand = installDir + "/bin/remote_command.sh "; // how many copies? - cout << endl; - cout << "Setup the Number of Copies: This is the total number of copies of the data" << endl; - cout << "in the system and a non-redundant system has 1 copy, so choose 2 or more," << endl; - cout << "but not more than the number of PMs which is " + oam.itoa(pmNumber) + "." << endl; - - while(dataRedundancyCopies < 2 || dataRedundancyCopies > pmNumber) + if (pmNumber > 2) { - dataRedundancyCopies = 2; - prompt = "Enter Number of Copies [2-" + oam.itoa(pmNumber) + "] ("+ oam.itoa(dataRedundancyCopies) +") > "; - pcommand = callReadline(prompt.c_str()); - if (pcommand) { - if (strlen(pcommand) > 0) dataRedundancyCopies = atoi(pcommand); - callFree(pcommand); - } - - if ( dataRedundancyCopies < 2 || dataRedundancyCopies > pmNumber ) { - cout << endl << "ERROR: Invalid Copy Count '" + oam.itoa(dataRedundancyCopies) + "', please re-enter" << endl << endl; - if ( noPrompting ) - exit(1); - continue; - } - - //update count - try { - sysConfig->setConfig(InstallSection, "DataRedundancyCopies", oam.itoa(dataRedundancyCopies)); - } - catch(...) + cout << endl; + cout << "Setup the Number of Copies: This is the total number of copies of the data" << endl; + cout << "in the system. At least 2, but not more than the number of PMs(" + oam.itoa(pmNumber) + "), are required." << endl; + while(dataRedundancyCopies < 2 || dataRedundancyCopies > pmNumber) { - cout << "ERROR: Problem setting DataRedundancyCopies in the MariaDB ColumnStore System Configuration file" << endl; - exit(1); + dataRedundancyCopies = 2; //minimum 2 copies + prompt = "Enter Number of Copies [2-" + oam.itoa(pmNumber) + "] ("+ oam.itoa(dataRedundancyCopies) +") > "; + pcommand = callReadline(prompt.c_str()); + if (pcommand) { + if (strlen(pcommand) > 0) dataRedundancyCopies = atoi(pcommand); + callFree(pcommand); + } + + if ( dataRedundancyCopies < 2 || dataRedundancyCopies > pmNumber ) { + cout << endl << "ERROR: Invalid Copy Count '" + oam.itoa(dataRedundancyCopies) + "', please re-enter" << endl << endl; + if ( noPrompting ) + exit(1); + continue; + } } } + else if (pmNumber == 2) + { + dataRedundancyCopies = 2; //minimum 2 copies + cout << endl; + cout << "Only 2 PMs configured. Setting number of copies at 2." << endl; + } + else + { + // This should never happen + cout << endl; + cout << "ERROR: Invalid value for pm count Data Redundancy could not be configured." << endl; + exit(1); + } + + //update count + try { + sysConfig->setConfig(InstallSection, "DataRedundancyCopies", oam.itoa(dataRedundancyCopies)); + } + catch(...) + { + cout << "ERROR: Problem setting DataRedundancyCopies in the MariaDB ColumnStore System Configuration file" << endl; + exit(1); + } numberBricksPM = numberDBRootsPerPM * dataRedundancyCopies; @@ -5612,6 +5659,7 @@ bool glusterSetup(string password) { DataRedundancyConfigs[pm].pmHostname = sysConfig->getConfig("SystemModuleConfig",pmHostName); } } +/* cout << endl; cout << "OK. You have " + oam.itoa(pmNumber) + " PMs, " + oam.itoa(DBRootCount) + " DBRoots, and you have chosen to keep " + oam.itoa(dataRedundancyCopies) << endl; cout << "copies of the data. You can choose to place the copies in " << endl; @@ -5620,7 +5668,7 @@ bool glusterSetup(string password) { while( dataRedundancyStorage != 1 && dataRedundancyStorage != 2 ) { dataRedundancyStorage = 1; - prompt = "Select the data redundancy network [1=directory, 2=storage] (" + oam.itoa(dataRedundancyStorage) + ") > "; + prompt = "Select the data redundancy storage device [1=directory, 2=storage] (" + oam.itoa(dataRedundancyStorage) + ") > "; pcommand = callReadline(prompt.c_str()); if (pcommand) { @@ -5657,6 +5705,12 @@ bool glusterSetup(string password) { //loop through pms and get storage locations for each for (int pm=0; pm < pmNumber; pm++) { + vector::iterator dbrootID = DataRedundancyConfigs[pm].dbrootCopies.begin(); + for (; dbrootID < DataRedundancyConfigs[pm].dbrootCopies.end(); dbrootID++ ) + { + int brick = (*dbrootID); + cout << "PM#" + oam.itoa(DataRedundancyConfigs[pm].pmID) + " brick#" + oam.itoa(brick) + " : " << endl; + } for (int brick=0; brick < numberBricksPM; brick++) { prompt = "Enter a storage locations for PM#" + oam.itoa(DataRedundancyConfigs[pm].pmID) + " brick#" + oam.itoa(brick) + " : "; @@ -5686,7 +5740,7 @@ bool glusterSetup(string password) { } } } - +*/ // User config complete setup the gluster bricks // This will distribute DBRootCopies evenly across PMs for (int pm=0; pm < pmNumber; pm++) @@ -5754,6 +5808,7 @@ bool glusterSetup(string password) { cout << "ERROR: command failed: " << command << endl; exit(1); } +/* if (dataRedundancyStorage == 2) { //walk data storage locations and modify fstab to reflect the storage locations entered by user @@ -5811,15 +5866,17 @@ bool glusterSetup(string password) { } } +*/ } if (rootUser) { - command = "gluster peer probe " + DataRedundancyConfigs[pm].pmIpAddr; + command = "gluster peer probe " + DataRedundancyConfigs[pm].pmIpAddr + " >> /tmp/glusterCommands.txt 2>&1"; } else { - command = "sudo gluster peer probe " + DataRedundancyConfigs[pm].pmIpAddr; + command = "sudo gluster peer probe " + DataRedundancyConfigs[pm].pmIpAddr + " >> /tmp/glusterCommands.txt 2>&1"; } + cout << "gluster peer probe " + DataRedundancyConfigs[pm].pmIpAddr << endl; status = system(command.c_str()); if (WEXITSTATUS(status) != 0 ) { @@ -5828,7 +5885,7 @@ bool glusterSetup(string password) { } } sleep(5); - command = "gluster peer status "; + command = "gluster peer status >> /tmp/glusterCommands.txt 2>&1"; status = system(command.c_str()); if (WEXITSTATUS(status) != 0 ) { @@ -5839,6 +5896,11 @@ bool glusterSetup(string password) { //TODO: figureout a cleaner way to do this. sleep(10); // Build the gluster volumes and start them for each dbroot + int pmnextbrick[pmNumber]; + for (int pm=0; pm < pmNumber; pm++) + { + pmnextbrick[pm]=1; + } for (int db=0; db < DBRootCount; db++) { int dbrootID = db + 1; @@ -5854,9 +5916,11 @@ bool glusterSetup(string password) { for (; dbrootPmIter < dbrootPms[db].end(); dbrootPmIter++ ) { int pm = (*dbrootPmIter) - 1; - command += DataRedundancyConfigs[pm].pmIpAddr + ":" + installDir +"/gluster/brick" + oam.itoa(dbrootID) + " "; + command += DataRedundancyConfigs[pm].pmIpAddr + ":" + installDir +"/gluster/brick" + oam.itoa(pmnextbrick[pm]) + " "; + pmnextbrick[pm]++; } - command += "force"; + command += "force >> /tmp/glusterCommands.txt 2>&1"; + cout << "Gluster create and start volume dbroot" << oam.itoa(dbrootID) << "..."; status = system(command.c_str()); if (WEXITSTATUS(status) != 0 ) { @@ -5865,11 +5929,11 @@ bool glusterSetup(string password) { } if (rootUser) { - command = "gluster volume start dbroot" + oam.itoa(dbrootID); + command = "gluster volume start dbroot" + oam.itoa(dbrootID) + " >> /tmp/glusterCommands.txt 2>&1"; } else { - command = "sudo gluster volume start dbroot" + oam.itoa(dbrootID); + command = "sudo gluster volume start dbroot" + oam.itoa(dbrootID) + " >> /tmp/glusterCommands.txt 2>&1"; } status = system(command.c_str()); if (WEXITSTATUS(status) != 0 ) @@ -5877,6 +5941,7 @@ bool glusterSetup(string password) { cout << "ERROR: command failed: " << command << endl; exit(1); } + cout << "DONE" << endl; } return true; From bd43c259dfcc4162ae30a87f5ef0d5b370da89c6 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Fri, 11 Aug 2017 10:06:13 +0100 Subject: [PATCH 5/6] MCOL-835 Fix use-after-free crash in ExeMgr It is possible that DistributedEngineComm can get the Stats object from an MQE object and the MQE object freed before it's stats object is passed to InetStreamSocket. This patch makes sure that DistributedEngineComm gets a reference to MQE instead of the pointer to the Stats object in another reference. Therefore making sure that the Stats object still exists in InetStreamSocket. --- dbcon/joblist/distributedenginecomm.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dbcon/joblist/distributedenginecomm.cpp b/dbcon/joblist/distributedenginecomm.cpp index 19efe7e5c..40dddbc29 100644 --- a/dbcon/joblist/distributedenginecomm.cpp +++ b/dbcon/joblist/distributedenginecomm.cpp @@ -765,12 +765,15 @@ void DistributedEngineComm::write(messageqcpp::ByteStream &msg, uint32_t connect mutex::scoped_lock lk(fMlock, defer_lock_t()); MessageQueueMap::iterator it; + // This keeps mqe's stats from being freed until end of function + boost::shared_ptr mqe; Stats *senderStats = NULL; lk.lock(); it = fSessionMessages.find(senderID); if (it != fSessionMessages.end()) - senderStats = &(it->second->stats); + mqe = it->second; + senderStats = &(mqe->stats); lk.unlock(); newClients[connection]->write(msg, NULL, senderStats); @@ -829,6 +832,8 @@ int DistributedEngineComm::writeToClient(size_t index, const ByteStream& bs, uin { mutex::scoped_lock lk(fMlock, defer_lock_t()); MessageQueueMap::iterator it; + // Keep mqe's stats from being freed early + boost::shared_ptr mqe; Stats *senderStats = NULL; uint32_t interleaver = 0; @@ -839,7 +844,8 @@ int DistributedEngineComm::writeToClient(size_t index, const ByteStream& bs, uin lk.lock(); it = fSessionMessages.find(sender); if (it != fSessionMessages.end()) { - senderStats = &(it->second->stats); + mqe = it->second; + senderStats = &(mqe->stats); if (doInterleaving) interleaver = it->second->interleaver[index % it->second->pmCount]++; } From 980ee891fd3a18baf32def00ea76145ac59ea39e Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Fri, 11 Aug 2017 10:12:56 +0100 Subject: [PATCH 6/6] MCOL-835 Fix non-braced 'if' --- dbcon/joblist/distributedenginecomm.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbcon/joblist/distributedenginecomm.cpp b/dbcon/joblist/distributedenginecomm.cpp index 40dddbc29..fc631df7c 100644 --- a/dbcon/joblist/distributedenginecomm.cpp +++ b/dbcon/joblist/distributedenginecomm.cpp @@ -772,8 +772,10 @@ void DistributedEngineComm::write(messageqcpp::ByteStream &msg, uint32_t connect lk.lock(); it = fSessionMessages.find(senderID); if (it != fSessionMessages.end()) + { mqe = it->second; senderStats = &(mqe->stats); + } lk.unlock(); newClients[connection]->write(msg, NULL, senderStats);