diff --git a/oam/install_scripts/mcs-savebrm.py.in b/oam/install_scripts/mcs-savebrm.py.in index a3a15b6df..bb301bc26 100755 --- a/oam/install_scripts/mcs-savebrm.py.in +++ b/oam/install_scripts/mcs-savebrm.py.in @@ -11,6 +11,7 @@ import struct import subprocess import sys import time +from typing import Optional import xml.etree.ElementTree as ET from urllib.request import Request, urlopen from urllib.error import HTTPError, URLError @@ -23,6 +24,8 @@ MCS_CONFIG_PATH = os.path.join(MCS_ETC_PATH, 'Columnstore.xml') SM_CONFIG_PATH = os.path.join(MCS_ETC_PATH, 'storagemanager.cnf') MCS_BIN_DIR = '@ENGINE_BINDIR@' SAVEBRM = os.path.join(MCS_BIN_DIR, 'save_brm') +CLEAR_ALL_SHMEM_LOCKS = os.path.join(MCS_BIN_DIR, 'mcs-shmem-locks') +CLEAR_ALL_SHMEM_LOCKS_ARGS = '-a' EM_FILE_SUFFIX = '_em' EM_FILE_SIZE_THRESHOLD = 1000 FIVE_SECS = 5 @@ -68,7 +71,7 @@ def cmapi_available(): :return: is CMAPI running or not :rtype: bool """ - logging.error('Detecting CMAPI is up and running.') + logging.info('Detecting CMAPI is up and running.') url = 'https://{}:{}/notfound'.format(LOCALHOST, API_PORT) request = Request(method='POST', url=url) ctx = get_unverified_context() @@ -109,7 +112,7 @@ def get_ip_address_by_nic(ifname): )[20:24] ) except Exception as exc: - logging.error( + logging.debug( 'Exception while getting IP address of an "{}" interface'.format( ifname ), @@ -128,7 +131,7 @@ def is_primary_fallback(current_hostname): :return: is node primary :rtype: bool """ - logging.error( + logging.info( 'Current DBRM_Controller/IPAddr is {}'.format(current_hostname) ) hostnames = set() @@ -139,7 +142,7 @@ def is_primary_fallback(current_hostname): hostnames.update([hostnames_3tuple[0], *hostnames_3tuple[1]]) except: pass - logging.error('Found hostnames {}.'.format(hostnames)) + logging.info('Found hostnames {}.'.format(hostnames)) return current_hostname in LOCALHOSTS or current_hostname in hostnames @@ -233,7 +236,7 @@ def clean_up_backup_brm_files(save_brm_dir_path): files_to_remove = filenames[NUMBER_OF_FILES_TO_KEEP:] for filename in files_to_remove: file_path = os.path.join(save_brm_dir_path, filename) - logging.error('Clean up {}.'.format(file_path)) + logging.info('Clean up {}.'.format(file_path)) try: os.remove(file_path) except OSError as e: @@ -318,72 +321,65 @@ def get_save_brm_path_prefix(a_mcs_config_root): return get_save_brm_dir_path(a_mcs_config_root) + '/' + BRM_BACKUP_PATH_PART.format(epoch_prefix) -def call_save_brm(path): - """Calls save_brm first and then tries to call it with local path. +def call_executable_with_params(executable: str, args: str) -> bool: + """Calls executable and return optional result - :param file_path: xml config XML root - :rtype: None + :param executable: executable to call + :rtype: Optional[str] """ - savebrm_cmd = SAVEBRM + ' ' + path + executable_w_args: str = executable + ' ' + args try: - subprocess.check_call(savebrm_cmd, shell=True) + subprocess.check_call(executable_w_args, shell=True) except subprocess.CalledProcessError as exc: - logging.error('The call to {} exits with {}.'.format(savebrm_cmd, exc.returncode)) - return None + logging.error('The call to {} exits with {}.'.format(executable, exc.returncode)) + return False except OSError: - logging.error('Os error while calling savebrm', exc_info=True) - return None - return path + logging.error('Os error while calling {}.'.format(executable), exc_info=True) + return False + return True -def call_save_brm_locally(a_mcs_config_root): - """Calls save_brm first and then tries to call it with local path. +def clear_shmem_locks() -> bool: + """Clears shmem locks before save_brm call - :param file_path: xml config XML root - :rtype: None + :rtype: Optional[str] """ - local_path = get_save_brm_path_prefix(a_mcs_config_root) - return call_save_brm(local_path) + logging.info('Clear shmem read locks.') + return call_executable_with_params(CLEAR_ALL_SHMEM_LOCKS, CLEAR_ALL_SHMEM_LOCKS_ARGS) -def call_save_brm_with_local_fallback(a_mcs_config_root): - """Calls save_brm first and then tries to call it with local path. +def call_save_brm(path) -> bool: + """Calls save_brm with a path. - :param file_path: xml config XML root - :rtype: None + :param path: path to save_brm + :rtype: Optional[str] """ - try: - subprocess.check_call(SAVEBRM, shell=True) - except subprocess.CalledProcessError as exc: - logging.error('The primary call to {} exits with {}.'.format(exc.cmd, exc.returncode)) - backup_path = get_save_brm_path_prefix(a_mcs_config_root) - logging.error('Back up BRM files locally to {}.'.format(backup_path)) - backup_cmd = SAVEBRM + ' ' + backup_path - try: - subprocess.check_call(backup_cmd, shell=True) - except subprocess.CalledProcessError: - logging.error('The backup call to {} exits with {}.'.format(exc.cmd, exc.returncode)) - except OSError: - logging.error('Os error while calling savebrm during the backup', exc_info=True) - - sys.exit(1) - except OSError: - logging.error('Os error while calling savebrm', exc_info=True) - sys.exit(1) + return call_executable_with_params(SAVEBRM, path) if __name__ == '__main__': + # Configure logging to show INFO level messages + logging.basicConfig(level=logging.INFO, format='%(levelname)s:%(name)s:%(message)s') + mcs_config_root = get_config_root_from_file(MCS_CONFIG_PATH) + if clear_shmem_locks() is None: + logging.error('Exiting with error cleaning locks.') + sys.exit(1) + em_local_path_prefix = get_save_brm_path_prefix(mcs_config_root) # config_root can be None if is_node_primary(mcs_config_root): - em_local_path_prefix = call_save_brm_locally(mcs_config_root) - if not em_local_path_prefix or em_is_empty(em_local_path_prefix): - # remove_files_by_prefix_if_exist(em_local_path_prefix) - logging.error('Exiting with error.') + if not call_save_brm(em_local_path_prefix) or em_is_empty(em_local_path_prefix): + logging.error('Exiting with error trying to safe BRM locally on primary node.') sys.exit(1) clean_up_backup_brm_files(get_save_brm_dir_path(mcs_config_root)) call_save_brm(DEFAULT_EM_LOCAL_PATH_PREFIX) + else: + # Node is not primary. Call save_brm locally to save a copy of BRM localy + logging.error('Node is not primary. Call save_brm locally') + if not call_save_brm(em_local_path_prefix) or em_is_empty(em_local_path_prefix): + logging.error('Exiting with error trying to safe BRM locally on non-primary node.') + sys.exit(1) sys.exit(0) diff --git a/versioning/BRM/shmem_locks.cpp b/versioning/BRM/shmem_locks.cpp index f8a455597..bb0c4f2a0 100644 --- a/versioning/BRM/shmem_locks.cpp +++ b/versioning/BRM/shmem_locks.cpp @@ -39,6 +39,16 @@ std::string getShmemLocksList() return oss.str(); } +int resetAllLocks() +{ + for (size_t i = 0; i < RWLockNames.size(); ++i) + { + auto rwlock = RWLock(0x10000 * i); + rwlock.reset(); + } + return 0; +} + int viewLock(uint8_t lockId) { size_t minLockId = (lockId > 0) ? lockId : 1; @@ -112,6 +122,7 @@ int main(int argc, char** argv) bool write = false; bool lock = false; bool unlock = false; + bool resetAll = false; po::options_description desc( "A tool to operate or view shmem locks. If neither read nor write operation is specified, the tool " @@ -122,12 +133,14 @@ int main(int argc, char** argv) // clang-format off desc.add_options()("help", "produce help message") - ("lock-id,i", po::value(&lockId)->required(), lockid_description.c_str()) + ("lock-id,i", po::value(&lockId)->default_value(RWLockNames.size()), lockid_description.c_str()) ("read-lock,r", po::bool_switch(&read)->default_value(false), "Use read lock.") ("write-lock,w", po::bool_switch(&write)->default_value(false), "Use write lock.") ("lock,l", po::bool_switch(&lock)->default_value(false), "Lock the corresponding shmem lock.") ("unlock,u", po::bool_switch(&unlock)->default_value(false), "Unlock the corresponding shmem write lock.") - ("debug,d", po::bool_switch(&debug)->default_value(false), "Print extra output."); + ("debug,d", po::bool_switch(&debug)->default_value(false), "Print extra output.") + ("reset-all,a", po::bool_switch(&resetAll)->default_value(false), "Reset all shmem locks."); + // clang-format on po::variables_map vm; @@ -139,12 +152,29 @@ int main(int argc, char** argv) return 1; } + conflicting_options(vm, "reset-all", "lock-id"); conflicting_options(vm, "lock", "unlock"); conflicting_options(vm, "read-lock", "write-lock"); - check_value(vm, "lock-id", 0, RWLockNames.size()); + + // Only require lock-id validation if reset-all is not used + if (!resetAll && (vm.count("lock-id") && !vm["lock-id"].defaulted())) + { + check_value(vm, "lock-id", 0, RWLockNames.size()); + } + + // Require lock-id for operations other than reset-all + if (!resetAll && !vm.count("lock-id")) + { + throw std::logic_error("lock-id is required when not using reset-all"); + } po::notify(vm); + if (resetAll) + { + return resetAllLocks(); + } + if (!read && !write) { return viewLock(lockId);