You've already forked mariadb-columnstore-engine
							
							
				mirror of
				https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
				synced 2025-10-31 18:30:33 +03:00 
			
		
		
		
	Tracing requests Custom log factory adds all trace values as one log record parameter (it will be empty if trace values are empty, like in MainThread where there are no incoming requests)
		
			
				
	
	
		
			1151 lines
		
	
	
		
			41 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			1151 lines
		
	
	
		
			41 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| '''
 | |
| This file contains functions to manipulate the CS config file to add a node,
 | |
| remove a node, etc.  Should be synchronized externally.
 | |
| '''
 | |
| 
 | |
| import datetime
 | |
| import logging
 | |
| import os
 | |
| import shutil
 | |
| import socket
 | |
| import subprocess
 | |
| import time
 | |
| from typing import Optional
 | |
| 
 | |
| import requests
 | |
| from lxml import etree
 | |
| 
 | |
| from cmapi_server import helpers
 | |
| from cmapi_server.constants import (
 | |
|     CMAPI_CONF_PATH, CMAPI_SINGLE_NODE_XML, DEFAULT_MCS_CONF_PATH, LOCALHOSTS,
 | |
|     MCS_DATA_PATH,
 | |
| )
 | |
| from cmapi_server.traced_session import get_traced_session
 | |
| from cmapi_server.managers.network import NetworkManager
 | |
| from cmapi_server.tracer import get_tracer
 | |
| from mcs_node_control.models.node_config import NodeConfig
 | |
| 
 | |
| 
 | |
| PMS_NODE_PORT = '8620'
 | |
| EXEMGR_NODE_PORT = '8601'
 | |
| 
 | |
| 
 | |
| # TODO: add some description of the public interfaces...
 | |
| #       Split this file or change the structure and move functions out of here
 | |
| 
 | |
| 
 | |
| def switch_node_maintenance(
 | |
|     maintenance_state: bool,
 | |
|     input_config_filename: str = DEFAULT_MCS_CONF_PATH,
 | |
|     output_config_filename: str = DEFAULT_MCS_CONF_PATH,
 | |
| ):
 | |
|     """Change maintenance flag value in Columnstore xml config file.
 | |
| 
 | |
|     :param maintenance_state: state of maintenance flag
 | |
|     :type maintenance_state: bool
 | |
|     :param input_config_filename: mcs input config path,
 | |
|                                   defaults to DEFAULT_MCS_CONF_PATH
 | |
|     :type input_config_filename: str, optional
 | |
|     :param output_config_filename: mcs output config path,
 | |
|                                    defaults to DEFAULT_MCS_CONF_PATH
 | |
|     :type output_config_filename: str, optional
 | |
|     """
 | |
|     node_config = NodeConfig()
 | |
|     config_root = node_config.get_current_config_root(input_config_filename)
 | |
|     maintenance_element = config_root.find('Maintenance')
 | |
|     # should be done in upgrade_from_v0, but looks like better to doublecheck
 | |
|     if maintenance_element is None:
 | |
|         maintenance_element = etree.SubElement(config_root, 'Maintenance')
 | |
|     maintenance_element.text = str(maintenance_state).lower()
 | |
|     node_config.write_config(config_root, filename=output_config_filename)
 | |
|     # TODO: probably move publishing to cherrypy.engine failover channel here?
 | |
| 
 | |
| 
 | |
| def add_node(
 | |
|     node: str, input_config_filename: str = DEFAULT_MCS_CONF_PATH,
 | |
|     output_config_filename: Optional[str] = None,
 | |
|     use_rebalance_dbroots: bool = True
 | |
| ):
 | |
|     """Add node to a cluster.
 | |
| 
 | |
|     Check whether or not '127.0.0.1' or 'localhost' are in the config file, and
 | |
|     if so, replace those instances with this node's external hostname
 | |
|     Do we need to detect IP addresses given as node, and use the hostname?
 | |
|         - if we're always using hostnames or always using addrs everywhere
 | |
|           it won't matter
 | |
|     Add to the PMS section
 | |
|     Add an ExeMgr section
 | |
|     Add the DBRM workers
 | |
|     Add the writeengineservers
 | |
|     Add "Module*" keys
 | |
|     Move DBRoots (moved to a separate function)
 | |
|     Update CEJ to point to ExeMgr1 (for now)
 | |
|     Update the list of active nodes
 | |
| 
 | |
|     :param node: node address or hostname
 | |
|     :type node: str
 | |
|     :param input_config_filename: mcs input config path,
 | |
|                                   defaults to DEFAULT_MCS_CONF_PATH
 | |
|     :type input_config_filename: str, optional
 | |
|     :param output_config_filename: mcs output config path, defaults to None
 | |
|     :type output_config_filename: Optional[str], optional
 | |
|     :param use_rebalance_dbroots: rebalance dbroots or not, defaults to True
 | |
|     :type use_rebalance_dbroots: bool, optional
 | |
|     """
 | |
|     node_config = NodeConfig()
 | |
|     c_root = node_config.get_current_config_root(input_config_filename)
 | |
| 
 | |
|     try:
 | |
|         if not _replace_localhost(c_root, node):
 | |
|             pm_num = _add_node_to_PMS(c_root, node)
 | |
|             _add_WES(c_root, pm_num, node)
 | |
|             _add_DBRM_Worker(c_root, node)
 | |
|             _add_Module_entries(c_root, node)
 | |
|             _add_active_node(c_root, node)
 | |
|             _add_node_to_ExeMgrs(c_root, node)
 | |
|             if use_rebalance_dbroots:
 | |
|                 _rebalance_dbroots(c_root)
 | |
|                 _move_primary_node(c_root)
 | |
|     except Exception:
 | |
|         logging.error(
 | |
|             'Caught exception while adding node, config file is unchanged',
 | |
|             exc_info=True
 | |
|         )
 | |
|         raise
 | |
|     else:
 | |
|         if output_config_filename is None:
 | |
|             node_config.write_config(c_root)
 | |
|         else:
 | |
|             node_config.write_config(c_root, filename=output_config_filename)
 | |
| 
 | |
| 
 | |
| def remove_node(
 | |
|     node: str, input_config_filename: str = DEFAULT_MCS_CONF_PATH,
 | |
|     output_config_filename: Optional[str] = None,
 | |
|     deactivate_only: bool = True,
 | |
|     use_rebalance_dbroots: bool = True, **kwargs
 | |
| ):
 | |
|     """Remove node from a cluster.
 | |
| 
 | |
|     - Rebuild the PMS section w/o node
 | |
|     - Remove the DBRM_Worker entry
 | |
|     - Remove the WES entry
 | |
|     - Rebuild the "Module*" entries w/o node
 | |
|     - Update the list of active / inactive / desired nodes
 | |
| 
 | |
|     :param node: node address or hostname
 | |
|     :type node: str
 | |
|     :param input_config_filename: mcs input config path,
 | |
|                                   defaults to DEFAULT_MCS_CONF_PATH
 | |
|     :type input_config_filename: str, optional
 | |
|     :param output_config_filename: mcs output config path, defaults to None
 | |
|     :type output_config_filename: Optional[str], optional
 | |
|     :param deactivate_only: indicates whether the node is being removed
 | |
|                             completely from the cluster, or whether it has gone
 | |
|                             offline and should still be monitored in case it
 | |
|                             comes back.
 | |
|                             Note!  this does not pick a new primary node,
 | |
|                             use the move_primary_node() fcn to change that.,
 | |
|                             defaults to True
 | |
|     :type deactivate_only: bool, optional
 | |
|     :param use_rebalance_dbroots: rebalance dbroots or not, defaults to True
 | |
|     :type use_rebalance_dbroots: bool, optional
 | |
|     """
 | |
|     node_config = NodeConfig()
 | |
|     c_root = node_config.get_current_config_root(input_config_filename)
 | |
| 
 | |
|     try:
 | |
|         active_nodes = helpers.get_active_nodes(input_config_filename)
 | |
| 
 | |
|         if len(active_nodes) > 1:
 | |
|             pm_num = _remove_node_from_PMS(c_root, node)
 | |
|             _remove_WES(c_root, pm_num)
 | |
|             _remove_DBRM_Worker(c_root, node)
 | |
|             _remove_Module_entries(c_root, node)
 | |
|             _remove_from_ExeMgrs(c_root, node)
 | |
| 
 | |
|             if deactivate_only:
 | |
|                 _deactivate_node(c_root, node)
 | |
|             else:
 | |
|                 # TODO: unspecific name, need to think of a better one
 | |
|                 _remove_node(c_root, node)
 | |
| 
 | |
|             if use_rebalance_dbroots:
 | |
|                 _rebalance_dbroots(c_root)
 | |
|                 _move_primary_node(c_root)
 | |
|         else:
 | |
|             # TODO:
 | |
|             #   - IMO undefined behaviour here. Removing one single node
 | |
|             #     in some cases can produces Single node cluster.
 | |
|             #   - No MCS services stopped after removing node.
 | |
|             #
 | |
|             # reproduce:
 | |
|             #   add node by ip then remove using localhost, got one active node
 | |
|             #   127.0.0.1. If use same name got no active nodes but all working
 | |
|             #   mcs processes.
 | |
|             shutil.copyfile(
 | |
|                 CMAPI_SINGLE_NODE_XML,
 | |
|                 output_config_filename
 | |
|                 if output_config_filename
 | |
|                 else input_config_filename
 | |
|             )
 | |
|             return
 | |
| 
 | |
|     except Exception:
 | |
|         logging.error(
 | |
|             'remove_node(): Caught exception, did not modify the config file',
 | |
|             exc_info=True
 | |
|         )
 | |
|         raise
 | |
|     else:
 | |
|         if output_config_filename is None:
 | |
|             node_config.write_config(c_root)
 | |
|         else:
 | |
|             node_config.write_config(c_root, filename=output_config_filename)
 | |
| 
 | |
| 
 | |
| def rebalance_dbroots(
 | |
|     input_config_filename: Optional[str] = None,
 | |
|     output_config_filename: Optional[str] = None
 | |
| ) -> None:
 | |
|     """Rebalance dbroots between nodes.
 | |
| 
 | |
|     :param input_config_filename: input mcs config path, defaults to None
 | |
|     :type input_config_filename: Optional[str], optional
 | |
|     :param output_config_filename: oputput mcs config path, defaults to None
 | |
|     :type output_config_filename: Optional[str], optional
 | |
|     :rases: Exception if error happens while rebalancing.
 | |
|     """
 | |
|     node_config = NodeConfig()
 | |
|     if input_config_filename is None:
 | |
|         c_root = node_config.get_current_config_root()
 | |
|     else:
 | |
|         c_root = node_config.get_current_config_root(
 | |
|             config_filename=input_config_filename
 | |
|         )
 | |
| 
 | |
|     try:
 | |
|         _rebalance_dbroots(c_root)
 | |
|     except Exception:
 | |
|         logging.error(
 | |
|             'Caught exception while rebalancing dbroots, did not modify '
 | |
|             'the config file.',
 | |
|             exc_info=True
 | |
|         )
 | |
|         raise
 | |
|     else:
 | |
|         if output_config_filename is None:
 | |
|             node_config.write_config(c_root)
 | |
|         else:
 | |
|             node_config.write_config(c_root, filename=output_config_filename)
 | |
| 
 | |
| # all params are optional.  If node_id is unset, it will add a dbroot but not attach it to a node.
 | |
| # if node_id is set, it will attach the new dbroot to that node.  Node_id should be either
 | |
| # 'pm1' 'PM1' or '1'.  Those three all refer to node 1 as identified by the Module* entries in the
 | |
| # config file.  TBD whether we need a different identifier for the node.  Maybe the hostname instead.
 | |
| #
 | |
| # returns the id of the new dbroot on success
 | |
| # raises an exception on error
 | |
| def add_dbroot(input_config_filename = None, output_config_filename = None, host = None):
 | |
|     node_config = NodeConfig()
 | |
|     if input_config_filename is None:
 | |
|         c_root = node_config.get_current_config_root()
 | |
|     else:
 | |
|         c_root = node_config.get_current_config_root(config_filename = input_config_filename)
 | |
| 
 | |
|     try:
 | |
|         ret = _add_dbroot(c_root, host)
 | |
|     except Exception as e:
 | |
|         logging.error(f"add_dbroot(): Caught exception: '{str(e)}', did not modify the config file")
 | |
|         raise
 | |
| 
 | |
|     if output_config_filename is None:
 | |
|         node_config.write_config(c_root)
 | |
|     else:
 | |
|         node_config.write_config(c_root, filename = output_config_filename)
 | |
|     return ret
 | |
| 
 | |
| def move_primary_node(
 | |
|     input_config_filename=None, output_config_filename=None, **kwargs
 | |
| ):
 | |
|     node_config = NodeConfig()
 | |
|     if input_config_filename is None:
 | |
|         c_root = node_config.get_current_config_root()
 | |
|     else:
 | |
|         c_root = node_config.get_current_config_root(
 | |
|             config_filename=input_config_filename
 | |
|         )
 | |
| 
 | |
|     try:
 | |
|         _move_primary_node(c_root)
 | |
|     except Exception:
 | |
|         logging.error(
 | |
|             'move_primary_node(): did not modify the config file',
 | |
|             exc_info=True
 | |
|         )
 | |
|         raise
 | |
|     else:
 | |
|         if output_config_filename is None:
 | |
|             node_config.write_config(c_root)
 | |
|         else:
 | |
|             node_config.write_config(c_root, filename=output_config_filename)
 | |
| 
 | |
| 
 | |
| def find_dbroot1(root):
 | |
|     smc_node = root.find("./SystemModuleConfig")
 | |
|     pm_count = int(smc_node.find("./ModuleCount3").text)
 | |
|     for pm_num in range(1, pm_count + 1):
 | |
|         dbroot_count = int(smc_node.find(f"./ModuleDBRootCount{pm_num}-3").text)
 | |
|         for dbroot_num in range(1, dbroot_count + 1):
 | |
|             dbroot = smc_node.find(f"./ModuleDBRootID{pm_num}-{dbroot_num}-3").text
 | |
|             if dbroot == "1":
 | |
|                 name = smc_node.find(f"ModuleHostName{pm_num}-1-3").text
 | |
|                 addr = smc_node.find(f"ModuleIPAddr{pm_num}-1-3").text
 | |
|                 return (name, addr)
 | |
|     raise NodeNotFoundException("Could not find dbroot 1 in the list of dbroot assignments!")
 | |
| 
 | |
| 
 | |
| def _move_primary_node(root):
 | |
|     '''
 | |
|     Verify new_primary is in the list of active nodes
 | |
| 
 | |
|     Change ExeMgr1
 | |
|     Change CEJ
 | |
|     Change DMLProc
 | |
|     Change DDLProc
 | |
|     Change Contollernode
 | |
|     Change PrimaryNode
 | |
|     '''
 | |
| 
 | |
|     new_primary = find_dbroot1(root)
 | |
|     logging.info(f"_move_primary_node(): dbroot 1 is assigned to {new_primary}")
 | |
|     active_nodes = root.findall("./ActiveNodes/Node")
 | |
|     found = False
 | |
|     for node in active_nodes:
 | |
|         if node.text in new_primary:
 | |
|             found = True
 | |
|             break
 | |
|     if not found:
 | |
|         raise NodeNotFoundException(f"{new_primary} is not in the list of active nodes")
 | |
| 
 | |
|     root.find("./ExeMgr1/IPAddr").text = new_primary[0]
 | |
|     root.find("./DMLProc/IPAddr").text = new_primary[0]
 | |
|     root.find("./DDLProc/IPAddr").text = new_primary[0]
 | |
|     # keep controllernode as hostname
 | |
|     # b/c if IP used got troubles on a SKYSQL side
 | |
|     # on the other hand if hostname/fqdn used customers with wrong address
 | |
|     # resolving can got troubles on their side
 | |
|     # related issues: MCOL-4804, MCOL-4440, MCOL-5017, DBAAS-7442
 | |
|     root.find("./DBRM_Controller/IPAddr").text = new_primary[0]
 | |
|     root.find("./PrimaryNode").text = new_primary[0]
 | |
| 
 | |
| 
 | |
| def _add_active_node(root, node):
 | |
|     '''
 | |
|     if in inactiveNodes, delete it there
 | |
|     if not in desiredNodes, add it there
 | |
|     if not in activeNodes, add it there
 | |
|     '''
 | |
| 
 | |
|     nodes = root.findall("./DesiredNodes/Node")
 | |
|     found = False
 | |
|     for n in nodes:
 | |
|         if n.text == node:
 | |
|             found = True
 | |
|     if not found:
 | |
|         desired_nodes = root.find("./DesiredNodes")
 | |
|         etree.SubElement(desired_nodes, "Node").text = node
 | |
| 
 | |
|     __remove_helper(root.find("./InactiveNodes"), node)
 | |
| 
 | |
|     active_nodes = root.find("./ActiveNodes")
 | |
|     nodes = active_nodes.findall("./Node")
 | |
|     found = False
 | |
|     for n in nodes:
 | |
|         if n.text == node:
 | |
|             found = True
 | |
|             break
 | |
|     if not found:
 | |
|         etree.SubElement(active_nodes, "Node").text = node
 | |
| 
 | |
| 
 | |
| def __remove_helper(parent_node, node):
 | |
|     nodes = list(parent_node.findall("./Node"))
 | |
|     for n in nodes:
 | |
|         if n.text == node:
 | |
|             parent_node.remove(n)
 | |
| 
 | |
| 
 | |
| def _remove_node(root, node):
 | |
|     '''
 | |
|     remove node from DesiredNodes, InactiveNodes, and ActiveNodes
 | |
|     '''
 | |
| 
 | |
|     for n in (root.find("./DesiredNodes"), root.find("./InactiveNodes"), root.find("./ActiveNodes")):
 | |
|         __remove_helper(n, node)
 | |
| 
 | |
| 
 | |
| # This moves a node from ActiveNodes to InactiveNodes
 | |
| def _deactivate_node(root, node):
 | |
|     __remove_helper(root.find("./ActiveNodes"), node)
 | |
|     inactive_nodes = root.find("./InactiveNodes")
 | |
|     etree.SubElement(inactive_nodes, "Node").text = node
 | |
| 
 | |
| 
 | |
| def _add_dbroot(root, host) -> int:
 | |
|     """Add a dbroot to the system.
 | |
| 
 | |
|     Attach it to node_id if it's specified.
 | |
|     Increment the nextdbrootid.
 | |
| 
 | |
|     :param root: xml config root
 | |
|     :type root: xml.Tree.ElementTree.Element
 | |
|     :param host: host
 | |
|     :type host: any?
 | |
|     :raises NodeNotFoundException: if node not in a cluster
 | |
|     :return: Added dbroot number
 | |
|     :rtype: int
 | |
|     """
 | |
|     sysconf_node = root.find('./SystemConfig')
 | |
|     dbroot_count_node = sysconf_node.find('./DBRootCount')
 | |
|     dbroot_count = int(dbroot_count_node.text)
 | |
|     dbroot_count += 1
 | |
|     dbroot_count_node.text = str(dbroot_count)
 | |
| 
 | |
|     next_dbroot_node = root.find('./NextDBRootId')
 | |
|     next_dbroot_id = int(next_dbroot_node.text)
 | |
| 
 | |
|     # Use DBRoot path from Columnstore.xml
 | |
|     dbroot1_path = sysconf_node.find('./DBRoot1')
 | |
|     if dbroot1_path is not None:
 | |
|         dbroots_path = os.path.dirname(dbroot1_path.text)
 | |
|     else:
 | |
|         dbroots_path = MCS_DATA_PATH
 | |
| 
 | |
|     etree.SubElement(
 | |
|         sysconf_node, f'DBRoot{next_dbroot_id}'
 | |
|     ).text = os.path.join(dbroots_path, f'data{next_dbroot_id}')
 | |
| 
 | |
|     current_dbroot_id = next_dbroot_id
 | |
| 
 | |
|     # find an unused dbroot id from 1-99
 | |
|     for i in range(1, 100):
 | |
|         if sysconf_node.find(f'./DBRoot{i}') is None:
 | |
|             next_dbroot_id = i
 | |
|             break
 | |
|     next_dbroot_node.text = str(next_dbroot_id)
 | |
| 
 | |
|     if host is None:
 | |
|         return current_dbroot_id
 | |
| 
 | |
|     # Attach it to the specified node
 | |
| 
 | |
|     # get the existing dbroot info for pm X
 | |
|     smc_node = root.find('./SystemModuleConfig')
 | |
| 
 | |
|     # find the node id we're trying to add to
 | |
|     mod_count = int(smc_node.find('./ModuleCount3').text)
 | |
|     node_id = 0
 | |
|     for i in range(1, mod_count+1):
 | |
|         ip_addr = smc_node.find(f'./ModuleIPAddr{i}-1-3').text
 | |
|         hostname = smc_node.find(f'./ModuleHostName{i}-1-3').text
 | |
|         if host == ip_addr or host == hostname:
 | |
|             node_id = i
 | |
|             break
 | |
|     if node_id == 0:
 | |
|         raise NodeNotFoundException(
 | |
|             f'Host {host} is not currently part of the cluster'
 | |
|         )
 | |
| 
 | |
|     dbroot_count_node = smc_node.find(f'./ModuleDBRootCount{node_id}-3')
 | |
|     dbroot_count = int(dbroot_count_node.text)
 | |
|     dbroot_count += 1
 | |
|     etree.SubElement(
 | |
|         smc_node, f'ModuleDBRootID{node_id}-{dbroot_count}-3'
 | |
|     ).text = str(current_dbroot_id)
 | |
|     dbroot_count_node.text = str(dbroot_count)
 | |
|     return current_dbroot_id
 | |
| 
 | |
| 
 | |
| # check if the node is the master, maxscale might have chose new master
 | |
| # so use CEJ User to check via mariadb if the node has slave connections
 | |
| def is_master():
 | |
|     node_config = NodeConfig()
 | |
|     root = node_config.get_current_config_root()
 | |
|     host, port, username, password = helpers.get_cej_info(root)
 | |
| 
 | |
|     if username is None:
 | |
|         return False
 | |
| 
 | |
|     cmd = (
 | |
|         f"/usr/bin/mariadb -h '{host}' "
 | |
|                          f"-P '{port}' "
 | |
|                          f"-u '{username}' "
 | |
|                          f"--password='{password}' "
 | |
|                           "-sN -e "
 | |
|                           "\"SELECT COUNT(1) AS slave_threads "
 | |
|                           "FROM information_schema.PROCESSLIST "
 | |
|                           "WHERE USER = 'system user' "
 | |
|                           "AND COMMAND LIKE 'Slave%';\""
 | |
|     )
 | |
| 
 | |
|     ret = subprocess.run(cmd, stdout=subprocess.PIPE, shell=True)
 | |
|     if ret.returncode == 0:
 | |
|         response = ret.stdout.decode("utf-8").strip()
 | |
|         # Primary will have no slave_threads
 | |
|         if response == '0':
 | |
|             return True
 | |
|         else:
 | |
|             return False
 | |
|     return None
 | |
| 
 | |
| 
 | |
| def unassign_dbroot1(root):
 | |
|     smc_node = root.find("./SystemModuleConfig")
 | |
|     pm_count = int(smc_node.find("./ModuleCount3").text)
 | |
|     owner_id = 0
 | |
|     for i in range(1, pm_count + 1):
 | |
|         dbroot_count_node = smc_node.find(f"./ModuleDBRootCount{i}-3")
 | |
|         dbroot_count = int(dbroot_count_node.text)
 | |
|         dbroot_list = []
 | |
|         for j in range(1, dbroot_count + 1):
 | |
|             dbroot = smc_node.find(f"./ModuleDBRootID{i}-{j}-3").text
 | |
|             if dbroot == "1":
 | |
|                 owner_id = i                  # this node has dbroot 1
 | |
|             else:
 | |
|                 dbroot_list.append(dbroot)    # the dbroot assignments to keep
 | |
|         if owner_id != 0:
 | |
|             break
 | |
|     if owner_id == 0:
 | |
|         return     # dbroot 1 is already unassigned (primary node must have gone down)
 | |
| 
 | |
|     # remove the dbroot entries for node owner_id
 | |
|     for i in range(1, dbroot_count + 1):
 | |
|         doomed_node = smc_node.find(f"./ModuleDBRootID{owner_id}-{i}-3")
 | |
|         smc_node.remove(doomed_node)
 | |
|     # create the new dbroot entries
 | |
|     dbroot_count_node.text = str(len(dbroot_list))
 | |
|     i = 1
 | |
|     for dbroot in dbroot_list:
 | |
|         etree.SubElement(smc_node, f"ModuleDBRootID{owner_id}-{i}-3").text = dbroot
 | |
|         i += 1
 | |
| 
 | |
| 
 | |
| def _rebalance_dbroots(root, test_mode=False):
 | |
|     # TODO: add code to detect whether we are using shared storage or not.  If not, exit
 | |
|     # without doing anything.
 | |
| 
 | |
|     '''
 | |
|     this will be a pita
 | |
|     identify unassigned dbroots
 | |
|     assign those to the node with the fewest dbroots
 | |
| 
 | |
|     then,
 | |
|     id the nodes with the most dbroots and the least dbroots
 | |
|     when most - least <= 1, we're done
 | |
|     else, move a dbroot from the node with the most to the one with the least
 | |
| 
 | |
|     Not going to try to be clever about the alg.  We're dealing with small lists.
 | |
|     Aiming for simplicity and comprehensibility.
 | |
|     '''
 | |
| 
 | |
|     '''
 | |
|     Borderline hack here.  We are going to remove dbroot1 from its current host so that
 | |
|     it will always look for the current replication master and always resolve the discrepancy
 | |
|     between what maxscale and what cmapi choose for the primary/master node.
 | |
| 
 | |
|     We know of 2 constraints around primary node selection.
 | |
|         1) dbroot 1 has to be assigned to the primary node b/c controllernode and possibly
 | |
|            other processes try to access data1 directly
 | |
|         2) The primary node has to be the same as the master replication node chosen by
 | |
|            Maxscale b/c there is a schema sync issue
 | |
| 
 | |
|     Right now the code is doing this because we discovered these restrictions late in the dev
 | |
|     process:
 | |
|         1) unassign dbroot 1 to force new primary node selection
 | |
|         2) look for the master repl node
 | |
|         3) put dbroot 1 on it
 | |
|         4) look for dbroot 1
 | |
|         5) make it the primary node
 | |
| 
 | |
|     Once we are done with the constraint discovery process, we should refactor this.
 | |
|     '''
 | |
|     unassign_dbroot1(root)
 | |
| 
 | |
|     current_mapping = get_current_dbroot_mapping(root)
 | |
|     sysconf_node = root.find("./SystemConfig")
 | |
| 
 | |
|     # There can be holes in the dbroot numbering, so can't just scan from [1-dbroot_count]
 | |
|     # Going to scan from 1-99 instead.
 | |
|     existing_dbroots = []
 | |
|     for num in range(1, 100):
 | |
|         node = sysconf_node.find(f"./DBRoot{num}")
 | |
|         if node is not None:
 | |
|             existing_dbroots.append(num)
 | |
| 
 | |
|     # assign the unassigned dbroots
 | |
|     unassigned_dbroots = set(existing_dbroots) - set(current_mapping[0])
 | |
| 
 | |
|     '''
 | |
|     If dbroot 1 is in the unassigned list, then we need to put it on the node that will be the next
 | |
|     primary node.  Need to choose the same node as maxscale here.  For now, we will wait until
 | |
|     maxscale does the replication reconfig, then choose the new master.  Later,
 | |
|     we will choose the node using the same method that maxscale does to avoid
 | |
|     the need to go through the mariadb client.
 | |
| 
 | |
|     If this process goes on longer than 1 min, then we will assume there is no maxscale,
 | |
|     so this should choose where dbroot 1 should go itself.
 | |
|     '''
 | |
|     if 1 in unassigned_dbroots:
 | |
|         logging.info("Waiting for Maxscale to choose the new repl master...")
 | |
|         smc_node = root.find("./SystemModuleConfig")
 | |
|         # Maybe iterate over the list of ModuleHostName tags instead
 | |
|         pm_count = int(smc_node.find("./ModuleCount3").text)
 | |
|         found_master = False
 | |
|         final_time = datetime.datetime.now() + datetime.timedelta(seconds = 30)
 | |
| 
 | |
|         # skip this if in test mode.
 | |
|         retry = True
 | |
|         while not found_master and datetime.datetime.now() < final_time and not test_mode:
 | |
|             for node_num in range(1, pm_count + 1):
 | |
|                 node_ip = smc_node.find(f"./ModuleIPAddr{node_num}-1-3").text
 | |
|                 node_name = smc_node.find(f"./ModuleHostName{node_num}-1-3").text
 | |
|                 if pm_count == 1:
 | |
|                     found_master = True
 | |
|                 else:
 | |
|                     cfg_parser = helpers.get_config_parser(CMAPI_CONF_PATH)
 | |
|                     key = helpers.get_current_key(cfg_parser)
 | |
|                     version = helpers.get_version()
 | |
|                     headers = {'x-api-key': key}
 | |
|                     url = f"https://{node_ip}:8640/cmapi/{version}/node/new_primary"
 | |
|                     try:
 | |
|                         r = get_traced_session().request(
 | |
|                             'GET', url, verify=False, headers=headers, timeout=10
 | |
|                         )
 | |
|                         r.raise_for_status()
 | |
|                         r = r.json()
 | |
|                         is_primary = r['is_primary']
 | |
|                         if is_primary is None:
 | |
|                             # neither True nor False
 | |
|                             # possible node is not ready, leave retry as-is
 | |
|                             pass
 | |
|                         elif is_primary:
 | |
|                             found_master = True
 | |
|                     except requests.exceptions.Timeout:
 | |
|                         # timed out
 | |
|                         # possible node is not ready, leave retry as-is
 | |
|                         pass
 | |
|                     except Exception as e:
 | |
|                         retry = False
 | |
| 
 | |
|                 if not found_master:
 | |
|                     if not retry:
 | |
|                         logging.info("There was an error retrieving replication master")
 | |
|                         break
 | |
|                     else:
 | |
|                         continue
 | |
| 
 | |
|                 # assign dbroot 1 to this node, put at the front of the list
 | |
|                 current_mapping[node_num].insert(0, 1)
 | |
|                 unassigned_dbroots.remove(1)
 | |
|                 logging.info(f"The new replication master is {node_name}")
 | |
|                 break
 | |
|             if not found_master:
 | |
|                 logging.info("New repl master has not been chosen yet")
 | |
|                 time.sleep(1)
 | |
|         if not found_master:
 | |
|             logging.info("Maxscale has not reconfigured repl master, continuing...")
 | |
| 
 | |
|     for dbroot in unassigned_dbroots:
 | |
|         (_min, min_index) = _find_min_max_length(current_mapping)[0]
 | |
|         if dbroot != 1:
 | |
|             current_mapping[min_index].append(dbroot)
 | |
|         else:
 | |
|             # make dbroot 1 move only if the new node goes down by putting it at the front of the list
 | |
|             current_mapping[min_index].insert(0, dbroot)
 | |
| 
 | |
|     # balance the distribution
 | |
|     ((_min, min_index), (_max, max_index)) = _find_min_max_length(current_mapping)
 | |
|     while _max - _min > 1:
 | |
|         current_mapping[min_index].append(current_mapping[max_index].pop(-1))
 | |
|         ((_min, min_index), (_max, max_index)) = _find_min_max_length(current_mapping)
 | |
| 
 | |
|     # write the new mapping
 | |
|     sysconf_node = root.find("./SystemModuleConfig")
 | |
|     for i in range(1, len(current_mapping)):
 | |
|         dbroot_count_node = sysconf_node.find(f"./ModuleDBRootCount{i}-3")
 | |
|         # delete the original assignments for node i
 | |
|         for dbroot_num in range(1, int(dbroot_count_node.text) + 1):
 | |
|             old_node = sysconf_node.find(f"./ModuleDBRootID{i}-{dbroot_num}-3")
 | |
|             sysconf_node.remove(old_node)
 | |
| 
 | |
|         # write the new assignments for node i
 | |
|         dbroot_count_node.text = str(len(current_mapping[i]))
 | |
|         for dbroot_num in range(len(current_mapping[i])):
 | |
|             etree.SubElement(sysconf_node, f"ModuleDBRootID{i}-{dbroot_num+1}-3").text = str(current_mapping[i][dbroot_num])
 | |
| 
 | |
| 
 | |
| # returns ((min, index-of-min), (max, index-of-max))
 | |
| def _find_min_max_length(mappings):
 | |
|     _min = 100
 | |
|     min_index = -1
 | |
|     _max = -1
 | |
|     max_index = -1
 | |
|     for i in range(1, len(mappings)):
 | |
|         this_len = len(mappings[i])
 | |
|         if this_len < _min:
 | |
|             _min = this_len
 | |
|             min_index = i
 | |
|         if this_len > _max:
 | |
|             _max = this_len
 | |
|             max_index = i
 | |
|     return ((_min, min_index), (_max, max_index))
 | |
| 
 | |
| 
 | |
| # returns a list indexed by node_num, where the value is a list of dbroot ids (ints)
 | |
| # so, list[1] == [1, 2, 3] would mean that node 1 has dbroots 1, 2, & 3.
 | |
| # To align the list with node IDs, element 0 is a list with all of the assigned dbroots
 | |
| def get_current_dbroot_mapping(root):
 | |
|     '''
 | |
|     get the current node count
 | |
|     iterate over the ModuleDBRootIDX-Y-3 entries to build the mapping
 | |
|     '''
 | |
| 
 | |
|     smc_node = root.find("./SystemModuleConfig")
 | |
|     node_count = int(smc_node.find("./ModuleCount3").text)
 | |
|     current_mapping = [[]]
 | |
| 
 | |
|     for i in range(1, node_count + 1):
 | |
|         dbroot_count = int(smc_node.find(f"./ModuleDBRootCount{i}-3").text)
 | |
|         dbroots_on_this_node = []
 | |
|         for dbroot_num in range(1, dbroot_count + 1):
 | |
|             dbroot_id = int(smc_node.find(f"./ModuleDBRootID{i}-{dbroot_num}-3").text)
 | |
|             dbroots_on_this_node.append(dbroot_id)
 | |
|             current_mapping[0].append(dbroot_id)
 | |
|         current_mapping.append(dbroots_on_this_node)
 | |
| 
 | |
|     return current_mapping
 | |
| 
 | |
| 
 | |
| def _remove_Module_entries(root, node):
 | |
|     '''
 | |
|         figure out which module_id node is
 | |
|         store info from the other modules
 | |
|             ModuleIPAddr
 | |
|             ModuleHostName
 | |
|             ModuleDBRootCount
 | |
|             ModuleDBRootIDs
 | |
|         delete all of those tags
 | |
|         write new versions
 | |
|         write new ModuleCount3 value
 | |
|         write new NextNodeID
 | |
|     '''
 | |
|     smc_node = root.find("./SystemModuleConfig")
 | |
|     mod_count_node = smc_node.find("./ModuleCount3")
 | |
|     current_module_count = int(mod_count_node.text)
 | |
|     node_module_id = 0
 | |
| 
 | |
|     for num in range(1, current_module_count + 1):
 | |
|         m_ip_node = smc_node.find(f"./ModuleIPAddr{num}-1-3")
 | |
|         m_name_node = smc_node.find(f"./ModuleHostName{num}-1-3")
 | |
|         if node == m_ip_node.text or node == m_name_node.text:
 | |
|             node_module_id = num
 | |
|             break
 | |
|     if node_module_id == 0:
 | |
|         logging.warning(f"remove_module_entries(): did not find node {node} in the Module* entries of the config file")
 | |
|         return
 | |
| 
 | |
|     # Get the existing info except for node, remove the existing nodes
 | |
|     new_module_info = []
 | |
|     for num in range(1, current_module_count + 1):
 | |
|         m_ip_node = smc_node.find(f"./ModuleIPAddr{num}-1-3")
 | |
|         m_name_node = smc_node.find(f"./ModuleHostName{num}-1-3")
 | |
|         dbrc_node = smc_node.find(f"./ModuleDBRootCount{num}-3")
 | |
|         dbr_count = int(dbrc_node.text)
 | |
|         smc_node.remove(dbrc_node)
 | |
|         dbroots = []
 | |
|         for i in range(1, dbr_count + 1):
 | |
|             dbr_node = smc_node.find(f"./ModuleDBRootID{num}-{i}-3")
 | |
|             dbroots.append(dbr_node.text)
 | |
|             smc_node.remove(dbr_node)
 | |
| 
 | |
|         if node != m_ip_node.text and node != m_name_node.text:
 | |
|             new_module_info.append((m_ip_node.text, m_name_node.text, dbroots))
 | |
| 
 | |
|         smc_node.remove(m_ip_node)
 | |
|         smc_node.remove(m_name_node)
 | |
| 
 | |
|     # Regenerate these entries
 | |
|     current_module_count = len(new_module_info)
 | |
|     for num in range(1, current_module_count + 1):
 | |
|         (ip, name, dbroots) = new_module_info[num - 1]
 | |
|         etree.SubElement(smc_node, f"ModuleIPAddr{num}-1-3").text = ip
 | |
|         etree.SubElement(smc_node, f"ModuleHostName{num}-1-3").text = name
 | |
|         etree.SubElement(smc_node, f"ModuleDBRootCount{num}-3").text = str(len(dbroots))
 | |
|         for i in range(1, len(dbroots) + 1):
 | |
|             etree.SubElement(smc_node, f"ModuleDBRootID{num}-{i}-3").text = dbroots[i - 1]
 | |
| 
 | |
|     # update NextNodeId and ModuleCount3
 | |
|     nni_node = root.find("./NextNodeId")
 | |
|     nni_node.text = str(current_module_count + 1)
 | |
|     mod_count_node.text = str(current_module_count)
 | |
| 
 | |
| 
 | |
| def _remove_WES(root, pm_num):
 | |
|     '''
 | |
|     Avoid gaps in pm numbering where possible.
 | |
|     Read the existing pmX_WriteEngineServer entries except where X = pm_num,
 | |
|     Delete them,
 | |
|     Write new entries
 | |
| 
 | |
|     Not sure yet, but I believe for the dbroot -> PM mapping to work, the node # in the Module
 | |
|     entries has to match the pm # in other fields.  They should be written consistently and intact
 | |
|     already, but this is a guess at this point.  Short-term, a couple options. 1) Construct an argument
 | |
|     that they are maintained consistently right now.  2) Add consistency checking logic, and on a mismatch,
 | |
|     remove all affected sections and reconstruct them with add_node() and add_dbroot().
 | |
| 
 | |
|     Longer term, make the config file less stupid.  Ex:
 | |
|     <PrimProcPort>
 | |
|     <ExeMgrPort>
 | |
|     ...
 | |
|     <PrimaryNode>hostname<PrimaryNode>
 | |
|     <PM1>
 | |
|         <IPAddr>hostname-or-ipv4</IPAddr>
 | |
|         <DBRoots>1,2,3</DBRoots>
 | |
|     </PM1>
 | |
|     ...
 | |
| 
 | |
| 
 | |
|     ^^ The above is all we need to figure out where everything is and what each node should run
 | |
|     '''
 | |
| 
 | |
|     pm_count = int(root.find("./PrimitiveServers/Count").text)
 | |
|     pms = []
 | |
|     # This is a bit of a hack.  We already decremented the pm count; need to add 2 to this loop instead of 1
 | |
|     # to scan the full range of these entries [1, pm_count + 2)
 | |
|     for i in range(1, pm_count + 2):
 | |
|         node = root.find(f"./pm{i}_WriteEngineServer")
 | |
|         if node is not None:
 | |
|             if i != pm_num:
 | |
|                 pms.append(node.find("./IPAddr").text)
 | |
|             root.remove(node)
 | |
| 
 | |
|     # Write the new entries
 | |
|     for i in range(1, len(pms) + 1):
 | |
|         wes = etree.SubElement(root, f"pm{i}_WriteEngineServer")
 | |
|         etree.SubElement(wes, "IPAddr").text = pms[i - 1]
 | |
|         etree.SubElement(wes, "Port").text = "8630"
 | |
| 
 | |
| 
 | |
| def _remove_DBRM_Worker(root, node):
 | |
|     '''
 | |
|     regenerate the DBRM_Worker list without node
 | |
|     update NumWorkers
 | |
|     '''
 | |
| 
 | |
|     num = 1
 | |
|     workers = []
 | |
|     while True:
 | |
|         w_node = root.find(f"./DBRM_Worker{num}")
 | |
|         if w_node is not None:
 | |
|             addr = w_node.find("./IPAddr").text
 | |
|             if addr != "0.0.0.0" and addr != node:
 | |
|                 workers.append(addr)
 | |
|             root.remove(w_node)
 | |
|         else:
 | |
|             break
 | |
|         num += 1
 | |
| 
 | |
|     for num in range(len(workers)):
 | |
|         w_node = etree.SubElement(root, f"DBRM_Worker{num+1}")
 | |
|         etree.SubElement(w_node, "IPAddr").text = workers[num]
 | |
|         etree.SubElement(w_node, "Port").text = "8700"
 | |
|     root.find("./DBRM_Controller/NumWorkers").text = str(len(workers))
 | |
| 
 | |
| 
 | |
| def _remove_from_ExeMgrs(root, node):
 | |
|     """Remove the corresponding ExeMgrX section from the config."""
 | |
|     num = 1
 | |
|     ems = []
 | |
|     # TODO: use loop by nodes count instead of "while True"
 | |
|     while True:
 | |
|         em_node = root.find(f"./ExeMgr{num}")
 | |
|         if em_node is not None:
 | |
|             addr = em_node.find("./IPAddr").text
 | |
|             if addr != "0.0.0.0" and addr != node:
 | |
|                 ems.append(addr)
 | |
|             root.remove(em_node)
 | |
|         else:
 | |
|             break
 | |
|         num += 1
 | |
| 
 | |
|     for num in range(len(ems)):
 | |
|         em_node = etree.SubElement(root, f"ExeMgr{num+1}")
 | |
|         etree.SubElement(em_node, "IPAddr").text = ems[num]
 | |
|         etree.SubElement(em_node, "Port").text = "8601"
 | |
|         etree.SubElement(em_node, "Module").text = "unassigned"
 | |
| 
 | |
| 
 | |
| def _remove_node_from_PMS(root, node):
 | |
|     '''
 | |
|     find the PM number we're removing
 | |
|     replace existing PMS entries
 | |
|     '''
 | |
|     connections_per_pm = int(
 | |
|         root.find("./PrimitiveServers/ConnectionsPerPrimProc").text
 | |
|     )
 | |
|     count_node = root.find("./PrimitiveServers/Count")
 | |
|     pm_count = int(count_node.text)
 | |
| 
 | |
|     # get current list of PMs to avoid changing existing assignments
 | |
|     pm_list = []
 | |
|     pm_num = 0
 | |
|     for num in range(1, pm_count+1):
 | |
|         addr = root.find(f"./PMS{num}/IPAddr")
 | |
|         if addr.text != node:
 | |
|             pm_list.append(addr.text)
 | |
|         else:
 | |
|             pm_num = num
 | |
| 
 | |
|     if pm_num == 0:
 | |
|         return 0
 | |
| 
 | |
|     # remove the existing PMS entries
 | |
|     num = 1
 | |
|     while True:
 | |
|         pmsnode = root.find(f"./PMS{num}")
 | |
|         if pmsnode is not None:
 | |
|             root.remove(pmsnode)
 | |
|         else:
 | |
|             break
 | |
|         num += 1
 | |
| 
 | |
|     # generate new list
 | |
|     pm_count = len(pm_list)
 | |
|     count_node.text = str(pm_count)
 | |
|     pm_list.append(node)
 | |
|     for num in range(pm_count*connections_per_pm):
 | |
|         pmsnode = etree.SubElement(root, f"PMS{num+1}")
 | |
|         addrnode = etree.SubElement(pmsnode, "IPAddr")
 | |
|         addrnode.text = pm_list[num % pm_count]
 | |
|         portnode = etree.SubElement(pmsnode, "Port")
 | |
|         portnode.text = PMS_NODE_PORT
 | |
| 
 | |
|     return pm_num
 | |
| 
 | |
| def _add_Module_entries(root, node: str) -> None:
 | |
|     '''
 | |
|     get new node id
 | |
|     add ModuleIPAddr, ModuleHostName, ModuleDBRootCount (don't set ModuleDBRootID* here)
 | |
|     set ModuleCount3 and NextNodeId
 | |
|     no need to rewrite existing entries for this fcn
 | |
|     '''
 | |
| 
 | |
|     # XXXPAT: No guarantee these are the values used in the rest of the system.
 | |
|     # TODO: what should we do with complicated network configs where node has
 | |
|     #       several ips and\or several hostnames
 | |
|     ip4, hostname = NetworkManager.resolve_ip_and_hostname(node)
 | |
|     logging.info(f'Using ip address {ip4} and hostname {hostname}')
 | |
| 
 | |
|     smc_node = root.find('./SystemModuleConfig')
 | |
|     mod_count_node = smc_node.find('./ModuleCount3')
 | |
|     nnid_node = root.find('./NextNodeId')
 | |
|     nnid = int(nnid_node.text)
 | |
|     current_module_count = int(mod_count_node.text)
 | |
| 
 | |
|     # look for existing entries and fix if they exist
 | |
|     for i in range(1, nnid):
 | |
|         curr_ip_node = smc_node.find(f'./ModuleIPAddr{i}-1-3')
 | |
|         curr_name_node = smc_node.find(f'./ModuleHostName{i}-1-3')
 | |
|         # TODO: NETWORK: seems it's useless even in very rare cases.
 | |
|         #       Even simplier to rewrite resolved IP an Hostname
 | |
|         # if we find a matching IP address, but it has a different hostname,
 | |
|         # update the addr
 | |
|         if curr_ip_node is not None and curr_ip_node.text == ip4:
 | |
|             logging.info(f'Found ip address already at ModuleIPAddr{i}-1-3')
 | |
|             if curr_name_node != hostname:
 | |
|                 new_ip_addr = NetworkManager.resolve_hostname_to_ip(
 | |
|                     curr_name_node
 | |
|                 )
 | |
|                 logging.info(
 | |
|                     'Hostname doesn\'t match, updating address to '
 | |
|                     f'{new_ip_addr!r}'
 | |
|                 )
 | |
|                 smc_node.find(f'ModuleHostName{i}-1-3').text = new_ip_addr
 | |
|             else:
 | |
|                 logging.info('No update for ModuleIPAddr{i}-1-3 is necessary')
 | |
|                 return
 | |
| 
 | |
|         # if we find a matching hostname, update the ip addr
 | |
|         if curr_name_node is not None and curr_name_node.text == hostname:
 | |
|             logging.info(
 | |
|                 f'Found existing entry for {hostname!r}, updating its '
 | |
|                 f'address to {ip4!r}'
 | |
|             )
 | |
|             curr_ip_node.text = ip4
 | |
|             return
 | |
| 
 | |
|     etree.SubElement(smc_node, f'ModuleIPAddr{nnid}-1-3').text = ip4
 | |
|     etree.SubElement(smc_node, f'ModuleHostName{nnid}-1-3').text = hostname
 | |
|     etree.SubElement(smc_node, f'ModuleDBRootCount{nnid}-3').text = '0'
 | |
|     mod_count_node.text = str(current_module_count + 1)
 | |
|     nnid_node.text = str(nnid + 1)
 | |
| 
 | |
| 
 | |
| def _add_WES(root, pm_num, node):
 | |
|     wes_node = etree.SubElement(root, f"pm{pm_num}_WriteEngineServer")
 | |
|     etree.SubElement(wes_node, "IPAddr").text = node
 | |
|     etree.SubElement(wes_node, "Port").text = "8630"
 | |
| 
 | |
| 
 | |
| def _add_DBRM_Worker(root, node):
 | |
|     '''
 | |
|     find the highest numbered DBRM_Worker entry, or one that isn't used atm
 | |
|     prune unused entries
 | |
|     add this node at the end
 | |
|     '''
 | |
| 
 | |
|     num = 1
 | |
|     already_exists = False
 | |
|     while True:
 | |
|         e_node = root.find(f"./DBRM_Worker{num}")
 | |
|         if e_node is None:
 | |
|             break
 | |
|         addr = e_node.find("./IPAddr").text
 | |
|         if addr == "0.0.0.0":
 | |
|             root.remove(e_node)
 | |
|         elif addr == node:
 | |
|             logging.info(f"_add_DBRM_Worker(): node {node} is already a worker node")
 | |
|             already_exists = True
 | |
|         num += 1
 | |
| 
 | |
|     if already_exists:
 | |
|         return
 | |
| 
 | |
|     num_workers_node = root.find("./DBRM_Controller/NumWorkers")
 | |
|     num_workers = int(num_workers_node.text) + 1
 | |
|     brm_node = etree.SubElement(root, f"DBRM_Worker{num_workers}")
 | |
|     etree.SubElement(brm_node, "Port").text = "8700"
 | |
|     etree.SubElement(brm_node, "IPAddr").text = node
 | |
|     num_workers_node.text = str(num_workers)
 | |
| 
 | |
| 
 | |
| def _add_node_to_ExeMgrs(root, node):
 | |
|     """Find the highest numbered ExeMgr entry, add this node at the end."""
 | |
|     num = 1
 | |
|     while True:
 | |
|         e_node = root.find(f"./ExeMgr{num}")
 | |
|         if e_node is None:
 | |
|             break
 | |
|         addr = e_node.find("./IPAddr")
 | |
|         if addr.text == node:
 | |
|             logging.info(f"_add_node_to_ExeMgrs(): node {node} already exists")
 | |
|             return
 | |
|         num += 1
 | |
|     e_node = etree.SubElement(root, f"ExeMgr{num}")
 | |
|     addr_node = etree.SubElement(e_node, "IPAddr")
 | |
|     addr_node.text = node
 | |
|     port_node = etree.SubElement(e_node, "Port")
 | |
|     port_node.text = EXEMGR_NODE_PORT
 | |
| 
 | |
| 
 | |
| def _add_node_to_PMS(root, node):
 | |
|     '''
 | |
|     the PMS section is a sequential list of connections descriptions
 | |
| 
 | |
|     For example, if ConnectionsPerPrimProc is 2, and the Count is 2, then
 | |
|     the PMS entries look like this:
 | |
| 
 | |
|     PMS1 = connection 1 of PM 1
 | |
|     PMS2 = connection 1 of PM 2
 | |
| 
 | |
|     The easiest way to add a node is probably to generate a whole new list.
 | |
|     '''
 | |
|     count_node = root.find('./PrimitiveServers/Count')
 | |
|     pm_count = int(count_node.text)
 | |
| 
 | |
|     # get current list of PMs to avoid changing existing assignments
 | |
|     pm_list = {}
 | |
|     new_pm_num = 0
 | |
|     for num in range(1, pm_count+1):
 | |
|         addr = root.find(f'./PMS{num}/IPAddr')
 | |
|         if addr.text == node and new_pm_num == 0:
 | |
|             logging.info(f'_add_node_to_PMS(): node {node} already exists')
 | |
|             new_pm_num = num
 | |
|         else:
 | |
|             pm_list[num] = addr.text
 | |
| 
 | |
|     # remove the existing PMS entries
 | |
|     num = 1
 | |
|     while True:
 | |
|         pmsnode = root.find(f'./PMS{num}')
 | |
|         if pmsnode is not None:
 | |
|             root.remove(pmsnode)
 | |
|         else:
 | |
|             break
 | |
|         num += 1
 | |
| 
 | |
|     # generate new list
 | |
|     if new_pm_num == 0:
 | |
|         pm_count += 1
 | |
|         count_node.text = str(pm_count)
 | |
|         pm_list[pm_count] = node
 | |
|         new_pm_num = pm_count
 | |
|     for num in range(pm_count):
 | |
|         pmsnode = etree.SubElement(root, f'PMS{num+1}')
 | |
|         addrnode = etree.SubElement(pmsnode, 'IPAddr')
 | |
|         addrnode.text = pm_list[(num % pm_count) + 1]
 | |
|         portnode = etree.SubElement(pmsnode, 'Port')
 | |
|         portnode.text = PMS_NODE_PORT
 | |
| 
 | |
|     return new_pm_num
 | |
| 
 | |
| def _replace_localhost(root, node):
 | |
|     # if DBRM_Controller/IPAddr is 127.0.0.1 or localhost,
 | |
|     # then replace all instances, else do nothing.
 | |
|     controller_host = root.find('./DBRM_Controller/IPAddr')
 | |
|     if controller_host.text not in LOCALHOSTS:
 | |
|         logging.debug(
 | |
|             'Nothing to replace, DBRM_Controller/IPAddr isn\'t localhost.'
 | |
|         )
 | |
|         return False
 | |
| 
 | |
|     # getaddrinfo returns list of 5-tuples (..., sockaddr)
 | |
|     # use sockaddr to retrieve ip, sockaddr = (address, port) for AF_INET
 | |
|     ipaddr = socket.getaddrinfo(node, 8640, family=socket.AF_INET)[0][-1][0]
 | |
|     # signifies that node is an IP addr already
 | |
|     if ipaddr == node:
 | |
|         # use the primary hostname if given an ip addr
 | |
|         hostname = socket.gethostbyaddr(ipaddr)[0]
 | |
|     else:
 | |
|         hostname = node   # use whatever name they gave us
 | |
|     logging.info(
 | |
|         f'add_node(): replacing 127.0.0.1/localhost with {ipaddr}/{hostname} '
 | |
|         f'as this node\'s name. Be sure {hostname} resolves to {ipaddr} on '
 | |
|          'all other nodes in the cluster.'
 | |
|     )
 | |
| 
 | |
|     nodes_to_reassign = [
 | |
|         n for n in root.findall('.//') if n.text in LOCALHOSTS
 | |
|     ]
 | |
| 
 | |
|     # Host field is contained within CrossEngineSupport User and QueryTele.
 | |
|     # Leave these values as default (will be local IP)
 | |
|     exclude = ['Host']
 | |
|     for n in nodes_to_reassign:
 | |
|         if 'ModuleIPAddr' in n.tag:
 | |
|             n.text = ipaddr
 | |
|         elif 'ModuleHostName' in n.tag:
 | |
|             n.text = hostname
 | |
|         elif n.tag not in exclude:
 | |
|             # if tag is neither ip nor hostname, then save as node
 | |
|             n.text = node
 | |
| 
 | |
|     controller_host.text = hostname # keep controllernode as fqdn
 | |
| 
 | |
|     return True
 | |
| 
 | |
| # New Exception types
 | |
| class NodeNotFoundException(Exception):
 | |
|     pass
 |