diff --git a/oam/oamcpp/liboamcpp.cpp b/oam/oamcpp/liboamcpp.cpp index 7a65f0b76..286b9d348 100644 --- a/oam/oamcpp/liboamcpp.cpp +++ b/oam/oamcpp/liboamcpp.cpp @@ -8267,7 +8267,9 @@ namespace oam { int pmID = atoi(argument1.c_str()); int dbrootID = atoi(argument2.c_str()); + string password = errmsg; string command = ""; + int status; writeLog("glusterctl: GLUSTER_ADD: dbroot = " + argument2 + " pm = " + argument1, LOG_TYPE_DEBUG ); Config* sysConfig = Config::makeConfig(); @@ -8354,17 +8356,24 @@ namespace oam for (int pm=(pmID-1); pm < numberPMs; pm++) { - command = glustercmd + "peer probe " + DataRedundancyConfigs[pm].pmIpAddr; - int status = system(command.c_str()); - if (WEXITSTATUS(status) != 0 ) - { - writeLog("ERROR: command failed: ",LOG_TYPE_DEBUG); - exceptionControl("GLUSTER_ADD", API_FAILURE); - } + cout << "gluster peer probe " + DataRedundancyConfigs[pm].pmIpAddr << endl; + status = system(command.c_str()); + if (WEXITSTATUS(status) != 0 ) + { + cout << "ERROR: peer probe command failed." << endl; + command = InstallDir + "/bin/remote_command.sh " + DataRedundancyConfigs[pm].pmIpAddr + " " + password + "'stat /var/run/glusterd.pid > /dev/null 2>&1'"; + status = system(command.c_str()); + if (WEXITSTATUS(status) != 0 ) + { + cout << "ERROR: No glusterd process detected at " << DataRedundancyConfigs[pm].pmIpAddr << "." << endl; + cout << " Start and enable glusterd and run postConfigure again." << endl; + } + exceptionControl("GLUSTER_ADD", API_FAILURE); + } } sleep(5); - command = glustercmd + "peer status "; - int status = system(command.c_str()); + command = glustercmd + "peer status " + " >> /tmp/glusterCommands.txt 2>&1"; + status = system(command.c_str()); if (WEXITSTATUS(status) != 0 ) { cout << "ERROR: command failed: " << command << endl; @@ -8372,6 +8381,11 @@ namespace oam } //Need to wait since peer probe success does not always mean it is ready for volume create command sleep(10); + int pmnextbrick[numberPMs]; + for (int pm=(pmID-1); pm < numberPMs; pm++) + { + pmnextbrick[pm]=1; + } for (int db=(dbrootID-1); db < dbrootCount; db++) { int newDbrootID = db + 1; @@ -8382,22 +8396,25 @@ namespace oam for (; dbrootPmIter < dbrootPms[db].end(); dbrootPmIter++ ) { int pm = (*dbrootPmIter) - 1; - command += DataRedundancyConfigs[pm].pmIpAddr + ":" + InstallDir +"/gluster/brick" + itoa(newDbrootID) + " "; + command += DataRedundancyConfigs[pm].pmIpAddr + ":" + InstallDir +"/gluster/brick" + itoa(pmnextbrick[pm]) + " "; + pmnextbrick[pm]++; } - command += "force"; - int status = system(command.c_str()); + command += "force >> /tmp/glusterCommands.txt 2>&1"; + cout << "Gluster create and start volume dbroot" << itoa(newDbrootID) << "..."; + status = system(command.c_str()); if (WEXITSTATUS(status) != 0 ) { writeLog("ERROR: command failed: " + command,LOG_TYPE_DEBUG); exceptionControl("GLUSTER_ADD", API_FAILURE); } - command = glustercmd + "volume start dbroot" + itoa(newDbrootID); + command = glustercmd + "volume start dbroot" + itoa(newDbrootID) + " >> /tmp/glusterCommands.txt 2>&1"; status = system(command.c_str()); if (WEXITSTATUS(status) != 0 ) { writeLog("ERROR: command failed: ",LOG_TYPE_DEBUG); exceptionControl("GLUSTER_ADD", API_FAILURE); } + cout << "DONE" << endl; } try @@ -8435,7 +8452,7 @@ namespace oam int status; writeLog("glusterctl: GLUSTER_DELETE: dbroot = " + dbrootID, LOG_TYPE_DEBUG ); - command = glustercmd + "volume stop dbroot" + dbrootID; + command = glustercmd + "--mode=script volume stop dbroot" + dbrootID + " >> /tmp/glusterCommands.txt 2>&1"; status = system(command.c_str()); if (WEXITSTATUS(status) != 0 ) @@ -8443,8 +8460,10 @@ namespace oam writeLog("ERROR: command failed: ",LOG_TYPE_DEBUG); exceptionControl("GLUSTER_DELETE", API_FAILURE); } + // give time for transaction to finish after stopping + sleep(10); - command = glustercmd + "volume delete dbroot" + dbrootID; + command = glustercmd + " --mode=script volume delete dbroot" + dbrootID + " >> /tmp/glusterCommands.txt 2>&1"; status = system(command.c_str()); if (WEXITSTATUS(status) != 0 ) @@ -8455,6 +8474,32 @@ namespace oam break; } + case (oam::GLUSTER_PEERPROBE): + { + string ipAddress = argument1; + string password = argument2; + string command = ""; + int status; + + command = glustercmd + "peer probe " + ipAddress + " >> /tmp/glusterCommands.txt 2>&1"; + + cout << "gluster peer probe " + ipAddress << endl; + status = system(command.c_str()); + if (WEXITSTATUS(status) != 0 ) + { + cout << "ERROR: peer probe command failed." << endl; + command = InstallDir + "/bin/remote_command.sh " + ipAddress + " " + password + "'stat /var/run/glusterd.pid > /dev/null 2>&1'"; + status = system(command.c_str()); + if (WEXITSTATUS(status) != 0 ) + { + cout << "ERROR: No glusterd process detected at " << ipAddress << "." << endl; + cout << " Start and enable glusterd and run postConfigure again." << endl; + } + return 1; + } + break; + } + default: break; } diff --git a/oam/oamcpp/liboamcpp.h b/oam/oamcpp/liboamcpp.h index 8c0e3ed18..db670ff70 100644 --- a/oam/oamcpp/liboamcpp.h +++ b/oam/oamcpp/liboamcpp.h @@ -138,7 +138,8 @@ namespace oam GLUSTER_WHOHAS, GLUSTER_UNASSIGN, GLUSTER_ADD, - GLUSTER_DELETE + GLUSTER_DELETE, + GLUSTER_PEERPROBE }; diff --git a/oamapps/mcsadmin/mcsadmin.cpp b/oamapps/mcsadmin/mcsadmin.cpp index cbc5400dc..8d9cd55c3 100644 --- a/oamapps/mcsadmin/mcsadmin.cpp +++ b/oamapps/mcsadmin/mcsadmin.cpp @@ -2049,6 +2049,11 @@ int processCommand(string* arguments) catch(...) {} + if (DataRedundancyConfig == "y") { + cout << endl << "**** removeDbroot Not Supported on Data Redundancy Configured System, use removeModule command to remove modules and dbroots" << endl; + break; + } + if ( localModule != parentOAMModule ) { // exit out since not on active module cout << endl << "**** removeDbroot Failed : Can only run command on Active OAM Parent Module (" << parentOAMModule << ")." << endl; @@ -5424,6 +5429,16 @@ int processCommand(string* arguments) } } + if ( DataRedundancyConfig == "y") + { + string errmsg1; + string errmsg2; + int ret = oam.glusterctl(oam::GLUSTER_PEERPROBE, IPAddress, password, errmsg2); + if ( ret != 0 ) + { + return 1; + } + } hostconfig.IPAddr = IPAddress; hostconfig.HostName = hostName; hostconfig.NicID = j+1; @@ -5465,14 +5480,7 @@ int processCommand(string* arguments) if ( DataRedundancyConfig == "y" && moduleType == "pm") { - cout << endl << "System is configured with Data Redundancy, DBRoot Storage will" << endl; - cout << "will be created with the Modules during this command." << endl; - cout << "The Data Redundancy Packages should already be installed on the" << endl; - cout << "Servers being installed." << endl; - - // confirm request - if (confirmPrompt(" ")) - break; + cout << endl << "Data Redundancy storage will be expanded when module(s) are added." << endl; if ( dbrootPerPM == 0) { cout << endl; @@ -5634,11 +5642,10 @@ int processCommand(string* arguments) cout << endl << "Run Data Redundancy Setup for DBRoots" << endl; try { - string errmsg; - int ret = oam.glusterctl(oam::GLUSTER_ADD, firstPM, firstDBroot, errmsg); + int ret = oam.glusterctl(oam::GLUSTER_ADD, firstPM, firstDBroot, password); if ( ret != 0 ) { - cout << endl << "**** Failed Data Redundancy Add of DBRoots: " << errmsg << endl; + cout << endl << "**** Failed Data Redundancy Add of DBRoots, " << endl; break; } @@ -5677,6 +5684,22 @@ int processCommand(string* arguments) case 49: // removeModule - parameters: Module name/type, number-of-modules { + string DataRedundancyConfig = "n"; + int DataRedundancyCopies; + try { + oam.getSystemConfig( "DataRedundancyConfig", DataRedundancyConfig); + } + catch(...) + {} + if (DataRedundancyConfig == "y") + { + try { + oam.getSystemConfig( "DataRedundancyCopies", DataRedundancyCopies); + } + catch(...) + {} + } + if ( SingleServerInstall == "y" ) { // exit out since not on single-server install cout << endl << "**** removeModule Failed : not support on a Single-Server type installs " << endl; @@ -5738,6 +5761,10 @@ int processCommand(string* arguments) break; } + if ( DataRedundancyConfig == "y" ) { + cout << endl << "**** removeModule Failed : Data Redundancy requires you to specify modules to remove in groups." << endl; + break; + } cout << endl; moduleType = arguments[1]; @@ -5810,7 +5837,7 @@ int processCommand(string* arguments) devicenetworklist.push_back(devicenetworkconfig); moduleType = (*it).substr(0,MAX_MODULE_TYPE_SIZE); - + try{ oam.getSystemConfig(moduleType, moduletypeconfig); } @@ -5820,9 +5847,9 @@ int processCommand(string* arguments) quit = true; break; } - + int currentModuleCount = moduletypeconfig.ModuleCount; - + if ( moduleType == "pm" && currentModuleCount == 1) { cout << endl << "**** removeModule Failed : Failed to Remove Module, you can't remove last Performance Module" << endl; quit = true; @@ -5837,6 +5864,10 @@ int processCommand(string* arguments) } } + if ( DataRedundancyConfig == "y" && devicenetworklist.size() != DataRedundancyCopies) { + cout << endl << "**** removeModule Failed : Data Redundancy requires you to remove modules in groups equal to number of copies" << endl; + quit = true; + } if (quit) break; @@ -5855,13 +5886,76 @@ int processCommand(string* arguments) } catch(...) {} - - if ( !dbrootConfigList.empty() ) { + if ( !dbrootConfigList.empty() && DataRedundancyConfig == "n") { cout << "**** removeModule Failed : " << (*pt).DeviceName << " has dbroots still assigned. Please run movePmDbrootConfig or unassignDbrootPmConfig."; quit = true; cout << endl; break; } + else if (DataRedundancyConfig == "y") + { + bool PMlistError = true; + DBRootConfigList::iterator dbrootListPt = dbrootConfigList.begin(); + for( ; dbrootListPt != dbrootConfigList.end() ; dbrootListPt++) + { + // check if ACTIVE PM has a copy of Dbroot + string pmList = ""; + try { + string errmsg; + int ret = oam.glusterctl(oam::GLUSTER_WHOHAS, oam.itoa(*dbrootListPt), pmList, errmsg); + if ( ret != 0 ) + { + cout << endl << "**** removeModule Failed : " << (*pt).DeviceName << " glusterctl error" << endl; + break; + } + } + catch (...) + { + cout << endl << "**** removeModule Failed : " << (*pt).DeviceName << " glusterctl error" << endl; + break; + } + boost::char_separator sep(" "); + boost::tokenizer< boost::char_separator > tokens(pmList, sep); + for ( boost::tokenizer< boost::char_separator >::iterator it1 = tokens.begin(); + it1 != tokens.end(); + ++it1) + { + PMlistError = true; + DeviceNetworkList::iterator deviceNetListStartPt = devicenetworklist.begin(); + string pmWithThisdbrootCopy = (*it1); + // walk the list of PMs that have copies of this dbroot + // and be sure they are in the list of nodes to be removed + for( ; deviceNetListStartPt != endpt ; deviceNetListStartPt++) + { + string thisModuleID = (*deviceNetListStartPt).DeviceName.substr(MAX_MODULE_TYPE_SIZE,MAX_MODULE_ID_SIZE); + //cout << "pmWithThisDBRoot: " << pmWithThisdbrootCopy << " thisModuleID: " << thisModuleID << endl; + if (pmWithThisdbrootCopy == thisModuleID) + { + PMlistError = false; + } + } + if (PMlistError) + { + cout << "**** removeModule Failed : Attempting to remove PMs: "<< arguments[1] << " -- DBRoot" << oam.itoa(*dbrootListPt) << " has copies on PMs " << pmList << endl; + quit = true; + } + } + } + if (!quit) + { + try + { + oam.removeDbroot(dbrootConfigList); + + cout << endl << " Successful Removal of DBRoots " << endl << endl; + } + catch (exception& e) + { + cout << endl << "**** removeModule : Removal of DBRoots Failed: " << e.what() << endl; + quit = true; + } + } + } } // check module status diff --git a/oamapps/postConfigure/postConfigure.cpp b/oamapps/postConfigure/postConfigure.cpp index 51fa2bbf6..c14d1e490 100644 --- a/oamapps/postConfigure/postConfigure.cpp +++ b/oamapps/postConfigure/postConfigure.cpp @@ -1467,7 +1467,7 @@ int main(int argc, char *argv[]) continue; } if ( moduleType == "pm" && DataRedundancy && moduleCount == 1) { - cout << endl << "ERROR: DataRedundancy requires " + moduleType + " module type to be 2 or greater, please re-enter or select a different data storage type." << endl << endl; + cout << endl << "ERROR: DataRedundancy requires 2 or more " + moduleType + " modules. type to be 2 or greater, please re-enter or restart to select a different data storage type." << endl << endl; if ( noPrompting ) exit(1); continue; @@ -4572,10 +4572,10 @@ bool storageSetup(bool amazonInstall) if (storageType != "3" && DataRedundancy) { - cout << "WARNING: This system was configured with ColumnStore DataRedundancy" << endl; - cout << " The selection to change from DataRedundancy to a different" << endl; - cout << " storage type will require to cleanup. Exit and refer to" << endl; - cout << " ColumnStore documentation for procedures or continue." << endl; + cout << "WARNING: This system was configured with ColumnStore DataRedundancy storage." << endl; + cout << " Before changing from DataRedundancy to another storage type," << endl; + cout << " existing data should be migrated to the targeted storage." << endl; + cout << " Please refer to the ColumnStore documentation for more information." << endl; cout << endl; string continueInstall = "y"; @@ -5445,8 +5445,9 @@ bool glusterSetup(string password) { if (pmNumber > 2) { cout << endl; + cout << endl << "----- Setup Data Redundancy Copy Count Configuration -----" << endl << endl; cout << "Setup the Number of Copies: This is the total number of copies of the data" << endl; - cout << "in the system. At least 2, but not more than the number of PMs(" + oam.itoa(pmNumber) + "), are required." << endl; + cout << "in the system. At least 2, but not more than the number of PMs(" + oam.itoa(pmNumber) + "), are required." << endl << endl; while(dataRedundancyCopies < 2 || dataRedundancyCopies > pmNumber) { dataRedundancyCopies = 2; //minimum 2 copies @@ -5491,11 +5492,16 @@ bool glusterSetup(string password) { numberBricksPM = numberDBRootsPerPM * dataRedundancyCopies; + + cout << endl << "----- Setup Data Redundancy Network Configuration -----" << endl << endl; + + + cout << " 'existing' - This is specified when using previously configured network devices. (NIC Interface #1)" << endl; + cout << " No additional network configuration is required with this option." << endl << endl; + cout << " 'dedicated' - This is specified when it is desired for Data Redundancy traffic to use" << endl; + cout << " a separate network than one previously configured for ColumnStore." << endl; + cout << " You will be prompted to provide Hostname and IP information for each PM." << endl << endl; cout << endl; - cout << "You can choose to run redundancy over the existing network that ColumnStore " << endl; - cout << "is currently using or you can configure a dedicated redundancy network. " << endl; - cout << "If you choose a dedicated redundancy network, you will need to provide " << endl; - cout << "hostname and IP address information." << endl; while( dataRedundancyNetwork != 1 && dataRedundancyNetwork != 2 ) { dataRedundancyNetwork = 1; @@ -5744,6 +5750,8 @@ bool glusterSetup(string password) { } */ // User config complete setup the gluster bricks + cout << endl << "----- Performing Data Redundancy Configuration -----" << endl << endl; + // This will distribute DBRootCopies evenly across PMs for (int pm=0; pm < pmNumber; pm++) { @@ -5803,11 +5811,18 @@ bool glusterSetup(string password) { for ( int brick=1; brick<=numberBricksPM; brick++) { // create the gluster brick directories now - command = remoteCommand + DataRedundancyConfigs[pm].pmIpAddr + " " + password + " 'mkdir -p " + installDir + "/gluster/brick" + oam.itoa(brick) + "'"; + if (rootUser) + { + command = remoteCommand + DataRedundancyConfigs[pm].pmIpAddr + " " + password + " 'mkdir -p " + installDir + "/gluster/brick" + oam.itoa(brick) + "'"; + } + else + { + command = remoteCommand + DataRedundancyConfigs[pm].pmIpAddr + " " + password + " 'sudo mkdir -p " + installDir + "/gluster/brick" + oam.itoa(brick) + "'"; + } status = system(command.c_str()); if (WEXITSTATUS(status) != 0 ) { - cout << "ERROR: command failed: " << command << endl; + cout << "ERROR: failed to make directory(" << DataRedundancyConfigs[pm].pmIpAddr << "): 'sudo mkdir -p " << installDir << "/gluster/brick" << oam.itoa(brick) << "'" << endl; exit(1); } /* @@ -5870,19 +5885,11 @@ bool glusterSetup(string password) { } */ } - if (rootUser) + string errmsg1; + string errmsg2; + int ret = oam.glusterctl(oam::GLUSTER_PEERPROBE, DataRedundancyConfigs[pm].pmIpAddr, password, errmsg2); + if ( ret != 0 ) { - command = "gluster peer probe " + DataRedundancyConfigs[pm].pmIpAddr + " >> /tmp/glusterCommands.txt 2>&1"; - } - else - { - command = "sudo gluster peer probe " + DataRedundancyConfigs[pm].pmIpAddr + " >> /tmp/glusterCommands.txt 2>&1"; - } - cout << "gluster peer probe " + DataRedundancyConfigs[pm].pmIpAddr << endl; - status = system(command.c_str()); - if (WEXITSTATUS(status) != 0 ) - { - cout << "ERROR: command failed: " << command << endl; exit(1); } } @@ -5891,7 +5898,7 @@ bool glusterSetup(string password) { status = system(command.c_str()); if (WEXITSTATUS(status) != 0 ) { - cout << "ERROR: command failed: " << command << endl; + cout << "ERROR: peer status command failed." << endl; exit(1); } //Need to wait since peer probe success does not always mean it is ready for volume create command @@ -5926,8 +5933,28 @@ bool glusterSetup(string password) { status = system(command.c_str()); if (WEXITSTATUS(status) != 0 ) { - cout << "ERROR: command failed: " << command << endl; - exit(1); + if (oam.checkLogStatus("/tmp/glusterCommands.txt" , "dbroot" + oam.itoa(dbrootID) + " already exists" )) + { + string errmsg1; + string errmsg2; + int ret = oam.glusterctl(oam::GLUSTER_DELETE, oam.itoa(dbrootID), errmsg1, errmsg2); + if ( ret != 0 ) + { + cerr << "FAILURE: Error replacing existing gluster dbroot# " + oam.itoa(dbrootID) + ", error: " + errmsg1 << endl; + exit(1); + } + status = system(command.c_str()); + if (WEXITSTATUS(status) != 0 ) + { + cout << "ERROR: Failed to create volume dbroot" << oam.itoa(dbrootID) << endl; + exit(1); + } + } + else + { + cout << "ERROR: Failed to create volume dbroot" << oam.itoa(dbrootID) << endl; + exit(1); + } } if (rootUser) { @@ -5940,12 +5967,14 @@ bool glusterSetup(string password) { status = system(command.c_str()); if (WEXITSTATUS(status) != 0 ) { - cout << "ERROR: command failed: " << command << endl; + cout << "ERROR: Failed to start dbroot" << oam.itoa(dbrootID) << endl; exit(1); } cout << "DONE" << endl; } + cout << endl << "----- Data Redundancy Configuration Complete -----" << endl << endl; + return true; }