diff --git a/DEVELOPING.md b/DEVELOPING.md index 81f9beb5d..7a9b18d9f 100644 --- a/DEVELOPING.md +++ b/DEVELOPING.md @@ -1,3 +1,11 @@ +## DBRM unresponsive timeout + +The master DBRM node previously used a fixed 300s (5 minutes) timeout before forcing read-only mode when a worker didn't respond. This is now configurable via Columnstore.xml only: + +- SystemConfig/DBRMUnresponsiveTimeout (seconds, default 300) + +The value controls how long the master waits for workers to reconfigure/respond after a network error is detected before switching to read-only. See `versioning/BRM/masterdbrmnode.cpp` for details. + This file documents helpful knowledge, commands and setup instructions for better developing flow. ## Logging @@ -18,7 +26,7 @@ std::cout << row.toString() << std::endl; ## Restarting services after a crash -Sometimes, e.g. during debugging, single processes can crash. +Sometimes, e.g. during debugging, single processes can crash. To restart a MCS specific unit process run: @@ -48,7 +56,7 @@ For interaction with storage engines, MariaDB has a template that is basically a Especially during debugging you might end up killing a process, which leads to error messages like: -`ERROR 1815 (HY000): Internal error: MCS-2033: Error occurred when calling system catalog.` +`ERROR 1815 (HY000): Internal error: MCS-2033: Error occurred when calling system catalog.` This error message occurs when the `PrimProc` process is killed, but all other processes continue running and cannot access the system catalog which is served by `PrimProc`. @@ -58,7 +66,7 @@ You can verified that this happened by having a look at all running processes fo ps -axwffu | grep mysql ``` -And restart any service via +And restart any service via ```bash systemctl restart mcs- @@ -85,6 +93,6 @@ Using the provided Vagrantfile the setup of develop VM is as easy as: 1. `MARIA_DB_SERVER_REPOSITORY` and `MCS_REPOSITORY` . These options expect the HTTPS GitHub URL of the referenced repositories. If a build with a fork of the official repos is wanted, this is where the fork URLs should be provided. (For any questions regarding a general build, please refer to the `BUILD.md`). 2. `PROVIDER` . Vagrant allows to configure the underlying VM software used (the so called provider). The current version of the Vagrantfile uses VMWare as a VM provider. VMware provides free licenses for personal use, students and open-source development, otherwise it is a paid service. If you don’t have a license or want to use another provider either way, you can either use the out of the box provided VirtualBox provider or install another provider. Read more about Vagrant VM providers [here](https://developer.hashicorp.com/vagrant/docs/providers). Read more about how to install VMWare as a provider [here](https://developer.hashicorp.com/vagrant/docs/providers/vmware/installation). 3. `BOX` . Vagrant uses boxes to package Vagrant environments. The box needs to match your system and architecture. The easiest way to obtain a a box is to select one from the publicly available, pre-defined boxes at [VagrantCloud](https://app.vagrantup.com/boxes/search). - 4. `MEMSIZE/NUMVCPUS`: Adapt the number of cores and the amount of RAM you want to give your VM. -2. Run `vagrant up` to create and/or start the virtual machine as specified in the `Vagrantfile`. + 4. `MEMSIZE/NUMVCPUS`: Adapt the number of cores and the amount of RAM you want to give your VM. +2. Run `vagrant up` to create and/or start the virtual machine as specified in the `Vagrantfile`. 3. Run `vagrant ssh` to obtain a terminal directly in your VM - or to develop on the virtual machine in your preferred IDE, obtain the ssh config data of the machine with `vagrant ssh-config` and use it to connect. (For even easier connection add the ssh connection data to your `~/.ssh/config` .) \ No newline at end of file diff --git a/cmapi/.gitignore b/cmapi/.gitignore index 4b87c115f..6258c213b 100644 --- a/cmapi/.gitignore +++ b/cmapi/.gitignore @@ -90,4 +90,11 @@ buildinfo.txt # Self-signed certificates cmapi_server/self-signed.crt -cmapi_server/self-signed.key \ No newline at end of file +cmapi_server/self-signed.key +# Comes in handy if you build packages locally +pp +mcs +mcs_aws +mcs_gsutil +_CPack_Packages/ +venv/ \ No newline at end of file diff --git a/cmapi/cmapi_server/SingleNode.xml b/cmapi/cmapi_server/SingleNode.xml index 67c8637bd..cd8c2ce24 100644 --- a/cmapi/cmapi_server/SingleNode.xml +++ b/cmapi/cmapi_server/SingleNode.xml @@ -58,6 +58,7 @@ /var/lib/columnstore/data1/systemFiles/dbrm/BRM_saves /var/lib/columnstore/data1/systemFiles/dbrm/tablelocks 15 + 300 100000 10 95 diff --git a/cmapi/cmapi_server/__main__.py b/cmapi/cmapi_server/__main__.py index e037f4f7f..040cad153 100644 --- a/cmapi/cmapi_server/__main__.py +++ b/cmapi/cmapi_server/__main__.py @@ -24,7 +24,7 @@ from tracing.trace_tool import register_tracing_tools from cmapi_server import helpers from cmapi_server.constants import DEFAULT_MCS_CONF_PATH, CMAPI_CONF_PATH -from cmapi_server.controllers.dispatcher import dispatcher, jsonify_error +from cmapi_server.controllers.dispatcher import dispatcher, jsonify_error, jsonify_404 from cmapi_server.failover_agent import FailoverAgent from cmapi_server.managers.application import AppManager from cmapi_server.managers.process import MCSProcessManager @@ -155,6 +155,7 @@ if __name__ == '__main__': root_config = { "request.dispatch": dispatcher, "error_page.default": jsonify_error, + "error_page.404": jsonify_404, # Enable tracing tools 'tools.trace.on': True, 'tools.trace_end.on': True, diff --git a/cmapi/cmapi_server/cmapi_logger.conf b/cmapi/cmapi_server/cmapi_logger.conf index e0edb52f6..8ef9f510b 100644 --- a/cmapi/cmapi_server/cmapi_logger.conf +++ b/cmapi/cmapi_server/cmapi_logger.conf @@ -3,6 +3,9 @@ "filters": { "add_ip_filter": { "()": "cmapi_server.logging_management.AddIpFilter" + }, + "trace_params_filter": { + "()": "cmapi_server.logging_management.TraceParamsFilter" } }, "formatters": { @@ -17,25 +20,30 @@ "container_sh": { "format" : "`%(asctime)s`: %(message)s", "datefmt": "%a %d %b %Y %I:%M:%S %p %Z" + }, + "json_formatter": { + "()": "cmapi_server.logging_management.JsonFormatter" } }, "handlers": { "cmapi_server": { "level": "DEBUG", "class": "logging.StreamHandler", - "filters": ["add_ip_filter"], + "filters": ["add_ip_filter", "trace_params_filter"], "formatter": "cmapi_server", "stream": "ext://sys.stdout" }, "console": { "level": "DEBUG", "class": "logging.StreamHandler", + "filters": ["trace_params_filter"], "formatter": "default", "stream": "ext://sys.stdout" }, "file": { "level": "DEBUG", "class": "logging.handlers.RotatingFileHandler", + "filters": ["trace_params_filter"], "formatter": "default", "filename": "/var/log/mariadb/columnstore/cmapi_server.log", "mode": "a", @@ -52,6 +60,16 @@ "maxBytes": 1024, "backupCount": 3, "encoding": "utf8" + }, + "json_trace": { + "level": "DEBUG", + "class": "logging.handlers.RotatingFileHandler", + "formatter": "json_formatter", + "filename": "/var/log/mariadb/columnstore/cmapi_json_trace.log", + "mode": "a", + "maxBytes": 1048576, + "backupCount": 10, + "encoding": "utf8" } }, "loggers": { @@ -78,6 +96,11 @@ "root": { "handlers": ["console", "file"], "level": "DEBUG" + }, + "json_trace": { + "handlers": ["json_trace"], + "level": "DEBUG", + "propagate": false } }, "disable_existing_loggers": false diff --git a/cmapi/cmapi_server/controllers/dispatcher.py b/cmapi/cmapi_server/controllers/dispatcher.py index ba7c90565..e8ec8830a 100644 --- a/cmapi/cmapi_server/controllers/dispatcher.py +++ b/cmapi/cmapi_server/controllers/dispatcher.py @@ -1,4 +1,5 @@ import json +import logging import cherrypy @@ -13,6 +14,7 @@ from cmapi_server.controllers.s3dataload import S3DataLoadController _version = '0.4.0' dispatcher = cherrypy.dispatch.RoutesDispatcher() +logger = logging.getLogger(__name__) # /_version/status (GET) @@ -280,3 +282,18 @@ def jsonify_error(status, message, traceback, version): \ cherrypy.response.status = status return response_body + + +def jsonify_404(status, message, traceback, version): + # pylint: disable=unused-argument + """Specialized renderer for 404 Not Found that logs context, then renders JSON. + """ + try: + req = cherrypy.request + method = getattr(req, 'method', '') + path = getattr(req, 'path_info', '') or '/' + remote_ip = getattr(getattr(req, 'remote', None), 'ip', '') or '?' + logger.error("404 Not Found: %s %s from %s", method, path, remote_ip) + except Exception: + pass + return jsonify_error(status, message, traceback, version) diff --git a/cmapi/cmapi_server/logging_management.py b/cmapi/cmapi_server/logging_management.py index 8406bfb1f..0d78b57b9 100644 --- a/cmapi/cmapi_server/logging_management.py +++ b/cmapi/cmapi_server/logging_management.py @@ -17,27 +17,23 @@ class AddIpFilter(logging.Filter): return True -def install_trace_record_factory() -> None: - """Install a LogRecord factory that adds 'trace_params' field. - 'trace_params' will be an empty string if there is no active trace/span - (like in MainThread, where there is no incoming requests). - Otherwise it will contain trace parameters. - """ - current_factory = logging.getLogRecordFactory() +class TraceParamsFilter(logging.Filter): + """Filter that adds trace_params to log records, except for the 'tracer' logger.""" + def filter(self, record: logging.LogRecord) -> bool: + # Don't print trace params for tracer logger family; it already prints trace data + if record.name == 'tracer' or record.name.startswith('tracer.'): + record.trace_params = "" + return True - def factory(*args, **kwargs): # type: ignore[no-untyped-def] - record = current_factory(*args, **kwargs) trace_id, span_id, parent_span_id = get_tracer().current_trace_ids() if trace_id and span_id: - record.trace_params = f'rid={trace_id} sid={span_id}' + trace_params = f"rid={trace_id} sid={span_id}" if parent_span_id: - record.trace_params += f' psid={parent_span_id}' + trace_params += f" psid={parent_span_id}" + record.trace_params = trace_params else: record.trace_params = "" - return record - - logging.setLogRecordFactory(factory) - + return True def custom_cherrypy_error( self, msg='', context='', severity=logging.INFO, traceback=False @@ -142,8 +138,7 @@ def config_cmapi_server_logging(): cherrypy._cplogging.LogManager.access_log_format = ( '{h} ACCESS "{r}" code {s}, bytes {b}, user-agent "{a}"' ) - # Ensure trace_params is available on every record - install_trace_record_factory() + # trace_params are populated via TraceParamsFilter configured in logging config dict_config(CMAPI_LOG_CONF_PATH) disable_unwanted_loggers() @@ -164,3 +159,22 @@ def change_loggers_level(level: str): def disable_unwanted_loggers(): logging.getLogger("urllib3").setLevel(logging.WARNING) + + +class JsonFormatter(logging.Formatter): + # Standard LogRecord fields + skip_fields = set(logging.LogRecord('',0,'',0,'',(),None).__dict__.keys()) + + def format(self, record): + data = { + "ts": self.formatTime(record, self.datefmt), + "level": record.levelname, + "logger": record.name, + "msg": record.getMessage(), + } + # Extract extras from the record (all attributes except standard LogRecord fields) + for k, v in record.__dict__.items(): + if k not in self.skip_fields: + data[k] = v + # Allow non-serializable extras (e.g., bytes, datetime) to be stringified + return json.dumps(data, ensure_ascii=False, sort_keys=True, default=str) \ No newline at end of file diff --git a/cmapi/cmapi_server/node_manipulation.py b/cmapi/cmapi_server/node_manipulation.py index 4d2d9e5dd..f2146a874 100644 --- a/cmapi/cmapi_server/node_manipulation.py +++ b/cmapi/cmapi_server/node_manipulation.py @@ -123,7 +123,7 @@ def add_node( def remove_node( node: str, input_config_filename: str = DEFAULT_MCS_CONF_PATH, output_config_filename: Optional[str] = None, - deactivate_only: bool = True, + deactivate_only: bool = False, use_rebalance_dbroots: bool = True, **kwargs ): """Remove node from a cluster. diff --git a/cmapi/cmapi_server/test/CS-config-test.xml b/cmapi/cmapi_server/test/CS-config-test.xml index ac4995629..5f851bb18 100644 --- a/cmapi/cmapi_server/test/CS-config-test.xml +++ b/cmapi/cmapi_server/test/CS-config-test.xml @@ -237,6 +237,7 @@ /var/lib/columnstore/data1/systemFiles/dbrm/BRM_saves /var/lib/columnstore/data1/systemFiles/dbrm/tablelocks 15 + 300 100000 10 95 diff --git a/cmapi/cmapi_server/test/Columnstore_apply_config.xml b/cmapi/cmapi_server/test/Columnstore_apply_config.xml index 580a829ae..4eb618d24 100644 --- a/cmapi/cmapi_server/test/Columnstore_apply_config.xml +++ b/cmapi/cmapi_server/test/Columnstore_apply_config.xml @@ -239,6 +239,7 @@ /var/lib/columnstore/data1/systemFiles/dbrm/BRM_saves /var/lib/columnstore/data1/systemFiles/dbrm/tablelocks 20 + 300 100000 10 95 diff --git a/cmapi/mcs_node_control/models/node_config.py b/cmapi/mcs_node_control/models/node_config.py index e83f1e541..0068c35cf 100644 --- a/cmapi/mcs_node_control/models/node_config.py +++ b/cmapi/mcs_node_control/models/node_config.py @@ -7,6 +7,7 @@ import socket from os import mkdir, replace, chown from pathlib import Path from shutil import copyfile +from typing import Iterator from xml.dom import minidom # to pick up pretty printing functionality from lxml import etree @@ -366,12 +367,10 @@ class NodeConfig: return False - def get_network_addresses(self): + def get_network_addresses(self) -> Iterator[str]: """Retrievs the list of the network addresses Generator that yields network interface addresses. - - :rtype: str """ for ni in get_network_interfaces(): for fam in [socket.AF_INET, socket.AF_INET6]: diff --git a/cmapi/scripts/cs_package_manager.sh b/cmapi/scripts/cs_package_manager.sh index 2ffba1522..88071cc0b 100644 --- a/cmapi/scripts/cs_package_manager.sh +++ b/cmapi/scripts/cs_package_manager.sh @@ -3,10 +3,11 @@ # Variables enterprise_token="" -dev_drone_key="" -ci_user="" -ci_pwd="" -cs_pkg_manager_version="3.10" +dev_drone_key="" +ci_user="" +ci_pwd="" +cs_pkg_manager_version="3.11" +USE_DEV_PACKAGES=false if [ ! -f /var/lib/columnstore/local/module ]; then pm="pm1"; else pm=$(cat /var/lib/columnstore/local/module); fi; pm_number=$(echo "$pm" | tr -dc '0-9') action=$1 @@ -15,11 +16,11 @@ print_help_text() { echo " MariaDB Columnstore Package Manager -Version: $cs_pkg_manager_version +Version: $cs_pkg_manager_version Actions: - check Print the mariadb server version to columnstore version mapping for this os/cpu/machine + check Print the mariadb server version to columnstore version mapping for this os/cpu/machine remove Uninstall MariaDB Columnstore install Install a specific version of MariaDB Columnstore upgrade Upgrade your columnstore deployment running on this machine @@ -91,7 +92,7 @@ The remove action will uninstall MariaDB & Columnstore but keep configurations a Example: bash $0 remove bash $0 remove --all --force - + " } @@ -121,7 +122,7 @@ Usage: bash $0 upgrade [enterprise|community] [version] --token [token] Flags: -h | --help Help text & documentation -Example: +Example: bash $0 upgrade enterprise 10.6.18-14 --token [token] bash $0 upgrade community 11.1.4 " @@ -133,20 +134,20 @@ wait_cs_down() { if ! command -v pgrep &> /dev/null; then printf "\n[!] pgrep not found. Please install pgrep\n\n" - exit 1; - fi + exit 1; + fi # Loop until the maximum number of retries is reached # printf " - Checking Columnstore Offline ..."; while [ $retries -lt $max_number_of_retries ]; do # If columnstore is offline, return cs_processlist=$(pgrep -f "PrimProc|ExeMgr|DMLProc|DDLProc|WriteEngineServer|StorageManager|controllernode|workernode|save_brm|mcs-loadbrm.py") - if [ -z "$cs_processlist" ]; then + if [ -z "$cs_processlist" ]; then # printf " Done \n"; mcs_offine=true return 0 - else - printf "\n[!] Columnstore is ONLINE - waiting 5s to retry, attempt: $retries...\n"; + else + printf "\n[!] Columnstore is ONLINE - waiting 5s to retry, attempt: $retries...\n"; if (( retries % 5 == 0 )); then echo "PID List: $cs_processlist" echo "$(ps aux | grep -E "PrimProc|ExeMgr|DMLProc|DDLProc|WriteEngineServer|StorageManager|controllernode|workernode|save_brm|mcs-loadbrm.py" | grep -v grep) " @@ -177,11 +178,11 @@ print_and_delete() { init_cs_down() { mcs_offine=false if [ "$pm_number" == "1" ]; then - if [ -z $(pidof "PrimProc") ]; then + if [ -z $(pidof "PrimProc") ]; then # printf "\n[+] Columnstore offline already"; mcs_offine=true else - + if is_cmapi_installed ; then confirm_cmapi_online_and_configured @@ -192,13 +193,13 @@ init_cs_down() { echo "[!] Failed stopping via mcs ... trying cmapi curl" stop_cs_cmapi_via_curl fi - printf "Done - $(date)\n" + printf "Done - $(date)\n" # Handle Errors with exit 0 code if [ ! -z "$(echo $mcs_output | grep "Internal Server Error")" ];then stop_cs_via_systemctl_override fi - else + else stop_cs_cmapi_via_curl fi else @@ -208,17 +209,17 @@ init_cs_down() { fi } -init_cs_up(){ +init_cs_up(){ if [ "$pm_number" == "1" ]; then - if [ -n "$(pidof PrimProc)" ]; then + if [ -n "$(pidof PrimProc)" ]; then num_cs_processlist=$(pgrep -f "PrimProc|ExeMgr|DMLProc|DDLProc|WriteEngineServer|StorageManager|controllernode|workernode|save_brm|mcs-loadbrm.py" | wc -l) - if [ $num_cs_processlist -gt "0" ]; then + if [ $num_cs_processlist -gt "0" ]; then printf "%-35s ... $num_cs_processlist processes \n" " - Columnstore Engine Online" fi - else + else # Check cmapi installed if is_cmapi_installed ; then @@ -231,9 +232,9 @@ init_cs_up(){ echo "[!] Failed starting via mcs ... trying cmapi curl" start_cs_cmapi_via_curl fi - printf " Done - $(date)\n" + printf " Done - $(date)\n" - else + else start_cs_cmapi_via_curl fi else @@ -258,7 +259,7 @@ is_cmapi_installed() { cmapi_installed_command="" case $package_manager in - yum ) + yum ) cmapi_installed_command="yum list installed MariaDB-columnstore-cmapi &> /dev/null;"; ;; apt ) @@ -271,13 +272,13 @@ is_cmapi_installed() { if eval $cmapi_installed_command ; then return 0 - else + else return 1 fi } start_cmapi() { - + if ! command -v systemctl &> /dev/null ; then printf "[!!] shutdown_mariadb_and_cmapi: Cant access systemctl\n\n" exit 1; @@ -295,7 +296,7 @@ start_cmapi() { } stop_cmapi() { - + if ! command -v systemctl &> /dev/null ; then printf "[!!] shutdown_mariadb_and_cmapi: Cant access systemctl\n\n" exit 1; @@ -317,7 +318,7 @@ stop_cmapi() { # $2 = version desired to install # Returns 0 if $2 is greater, else exit compare_versions() { - local version1="$1" + local version1="$1" local version2="$2" local exit_message="\n[!] The desired upgrade version: $2 \nis NOT greater than the current installed version of $1\n\n" @@ -327,10 +328,10 @@ compare_versions() { # Compare each segment of the version numbers for (( i = 0; i < ${#v1_nums[@]}; i++ )); do - - v1=${v1_nums[i]} + + v1=${v1_nums[i]} v2=${v2_nums[i]} - + if (( v1 > v2 )); then # The existing version is newer than the desired version: Exit echo -e $exit_message @@ -356,7 +357,7 @@ is_mariadb_installed() { mariadb_installed_command="" case $package_manager in - yum ) + yum ) mariadb_installed_command="yum list installed MariaDB-server &>/dev/null" ;; apt ) @@ -366,8 +367,8 @@ is_mariadb_installed() { echo "\nshutdown_mariadb_and_cmapi - package manager not implemented: $package_manager\n" exit 2; esac - - + + if eval $mariadb_installed_command ; then # Check if the systemd service file exists # if systemctl status mariadb &> /dev/null; then @@ -378,18 +379,18 @@ is_mariadb_installed() { # return 1 # fi return 0 - else + else return 1 fi } confirm_mariadb_online() { - + printf "%-35s ..." " - Checking local MariaDB online" counter=0 sleep_timer=1 while true; do - + if mariadb -e "SELECT 1;" &> /dev/null; then printf " Success\n" break; @@ -411,7 +412,7 @@ start_mariadb() { printf "[!!] start_mariadb: Cant access systemctl\n\n" exit 1; fi - + # Start MariaDB if is_mariadb_installed ; then printf "%-35s ..." " - Starting MariaDB Server" @@ -574,7 +575,7 @@ do_apt_remove() { # remove all current MDB packages if [ "$(apt list --installed mariadb-* 2>/dev/null | wc -l)" -gt 1 ]; then if [ "$REMOVE_ALL" == true ]; then - DEBIAN_FRONTEND=noninteractive apt remove --purge -y mariadb-plugin-columnstore mariadb-columnstore-cmapi + DEBIAN_FRONTEND=noninteractive apt remove --purge -y mariadb-plugin-columnstore mariadb-columnstore-cmapi DEBIAN_FRONTEND=noninteractive apt remove --purge -y mariadb-* else if ! apt remove mariadb-columnstore-cmapi --purge -y; then @@ -616,7 +617,7 @@ do_remove() { check_operating_system check_package_managers - + case $distro_info in centos | rhel | rocky | almalinux ) do_yum_remove "$@" @@ -645,7 +646,7 @@ Flags: -rp | --replication-pwd Replication password Default: Mariadb123% -mu | --maxscale-user Maxscale user Default: mxs -mp | --maxscale-pwd Maxscale password Default: Mariadb123% - -cu | --cross-engine-user Cross-engine user Default: cross_engine + -cu | --cross-engine-user Cross-engine user Default: cross_engine -cp | --cross-engine-pwd Cross-engine password Default: Mariadb123% -t | --token Enterprise token Required -h | --help Help Text @@ -670,7 +671,7 @@ Flags: -rp | --replication-pwd Replication password Default: Mariadb123% -mu | --maxscale-user Maxscale user Default: mxs -mp | --maxscale-pwd Maxscale password Default: Mariadb123% - -cu | --cross-engine-user Cross-engine user Default: cross_engine + -cu | --cross-engine-user Cross-engine user Default: cross_engine -cp | --cross-engine-pwd Cross-engine password Default: Mariadb123% -h | --help Help Text @@ -693,8 +694,9 @@ Flags: -rp | --replication-pwd Replication password Default: Mariadb123% -mu | --maxscale-user Maxscale user Default: mxs -mp | --maxscale-pwd Maxscale password Default: Mariadb123% - -cu | --cross-engine-user Cross-engine user Default: cross_engine + -cu | --cross-engine-user Cross-engine user Default: cross_engine -cp | --cross-engine-pwd Cross-engine password Default: Mariadb123% + --with-dev Include development client packages (MariaDB-devel on RPM, libmariadb-dev on DEB) -h | --help Help Text Example: @@ -719,7 +721,7 @@ Flags: -rp | --replication-pwd Replication password Default: Mariadb123% -mu | --maxscale-user Maxscale user Default: mxs -mp | --maxscale-pwd Maxscale password Default: Mariadb123% - -cu | --cross-engine-user Cross-engine user Default: cross_engine + -cu | --cross-engine-user Cross-engine user Default: cross_engine -cp | --cross-engine-pwd Cross-engine password Default: Mariadb123% -h | --help Help Text @@ -743,7 +745,7 @@ Flags: -rp | --replication-pwd Replication password Default: Mariadb123% -mu | --maxscale-user Maxscale user Default: mxs -mp | --maxscale-pwd Maxscale password Default: Mariadb123% - -cu | --cross-engine-user Cross-engine user Default: cross_engine + -cu | --cross-engine-user Cross-engine user Default: cross_engine -cp | --cross-engine-pwd Cross-engine password Default: Mariadb123% -h | --help Help Text @@ -775,7 +777,7 @@ Examples: } print_install_help_text() { - + case $repo in enterprise | enterprise_staging ) print_enterprise_install_help_text @@ -826,7 +828,7 @@ parse_install_cluster_additional_args() { if [ -z "$error_on_unknown_option" ]; then error_on_unknown_option=true fi - + if [ $repo == "dev" ]; then # $4 = branch/build number shift 4 @@ -842,6 +844,10 @@ parse_install_cluster_additional_args() { install|enterprise|community|dev|local) shift # past argument ;; + --with-dev) + USE_DEV_PACKAGES=true + shift # past argument + ;; -t | --token) enterprise_token="$2" shift # past argument @@ -900,7 +906,7 @@ parse_install_cluster_additional_args() { print_install_help_text echo "parse_install_cluster_additional_args: unknown flag: $1" exit 1 - fi + fi shift # past argument ;; esac @@ -909,7 +915,7 @@ parse_install_cluster_additional_args() { # Enterprise checks if [ $repo == "enterprise" ] || [ $repo == "enterprise_staging" ] ; then - if [ -z "$enterprise_token" ]; then + if [ -z "$enterprise_token" ]; then printf "\n[!] Enterprise token empty: $enterprise_token\n" printf "1) edit $0 enterprise_token='xxxxxx' \n" printf "2) add flag --token xxxxxxxxx \n" @@ -935,7 +941,7 @@ check_package_managers() { if [ -n "$mac" ] && [ "$mac" = true ]; then return fi - + package_manager=''; if command -v apt &> /dev/null ; then if ! command -v dpkg-query &> /dev/null ; then @@ -947,9 +953,9 @@ check_package_managers() { if command -v yum &> /dev/null ; then package_manager="yum"; - fi + fi - if [ $package_manager == '' ]; then + if [ $package_manager == '' ]; then echo "[!!] No package manager found: yum or apt must be installed" exit 1; fi; @@ -957,15 +963,15 @@ check_package_managers() { # Confirms mac have critical binaries to run this script -# As of 3/2024 supports cs_package_manager.sh check +# As of 3/2024 supports cs_package_manager.sh check check_mac_dependencies() { - + # Install ggrep if not exists if ! which ggrep >/dev/null 2>&1; then echo "Attempting Auto install of ggrep" if ! which brew >/dev/null 2>&1; then - echo "Attempting Auto install of brew" + echo "Attempting Auto install of brew" bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" fi brew install grep @@ -999,7 +1005,7 @@ set_distro_based_on_distro_info() { *) # unknown option printf "\ncheck_operating_system: unknown os & version: $distro_info\n" exit 2; - esac + esac distro_short="${distro_info:0:3}${version_id}" } @@ -1040,14 +1046,14 @@ check_cpu_architecture() { ;; *) # unknown option echo "Error: Unsupported architecture ($architecture)" - esac + esac } check_mdb_installed() { packages="" current_mariadb_version="" case $package_manager in - yum ) + yum ) packages=$(yum list installed | grep -i mariadb) current_mariadb_version=$(rpm -q --queryformat '%{VERSION}\n' MariaDB-server 2>/dev/null) ;; @@ -1070,7 +1076,7 @@ check_mdb_installed() { exit 2; fi; - + } check_no_mdb_installed() { @@ -1115,7 +1121,7 @@ check_aws_cli_installed() { ;; *) # unknown option echo "Error: Unsupported architecture ($architecture)" - esac + esac case $distro_info in centos | rhel | rocky | almalinux ) @@ -1125,7 +1131,7 @@ check_aws_cli_installed() { unzip -q awscliv2.zip; sudo ./aws/install; mv /usr/local/bin/aws /usr/bin/aws; - aws configure set default.s3.max_concurrent_requests 70 + aws configure set default.s3.max_concurrent_requests 70 ;; ubuntu | debian ) rm -rf aws awscliv2.zip @@ -1138,14 +1144,14 @@ check_aws_cli_installed() { unzip -q awscliv2.zip; sudo ./aws/install; mv /usr/local/bin/aws /usr/bin/aws; - aws configure set default.s3.max_concurrent_requests 70 + aws configure set default.s3.max_concurrent_requests 70 ;; *) # unknown option printf "\nos & version not implemented: $distro_info\n" exit 2; esac - - + + fi } @@ -1155,7 +1161,7 @@ check_cluster_dependancies() { printf "\n[!] telnet not found. Attempting Auto-install\n\n" case $package_manager in - yum ) + yum ) if ! yum install telnet -y; then printf "\n[!!] Failed to install telnet. Please manually install \n\n" exit 1; @@ -1172,12 +1178,12 @@ check_cluster_dependancies() { printf "\ncheck_cluster_dependancies: package manager not implemented - $package_manager\n" exit 2; esac - fi + fi } # main goal is to enable binlog with unique server_id for replication configure_default_mariadb_server_config() { - + case $distro_info in centos | rhel | rocky | almalinux ) server_cnf_dir="/etc/my.cnf.d" @@ -1191,7 +1197,7 @@ configure_default_mariadb_server_config() { printf "\nconfigure_default_mariadb_server_config: os & version not implemented: $distro_info\n" exit 2; esac - + if [ ! -d $server_cnf_dir ]; then echo " - Creating $server_cnf_dir" mkdir -p $server_cnf_dir @@ -1215,7 +1221,7 @@ lower_case_table_names=1 # This must be unique on each MariaDB Enterprise Node server_id = $dbroot" > $server_cnf_location - + chown mysql:mysql $server_cnf_location stop_mariadb start_mariadb @@ -1296,7 +1302,7 @@ create_mariadb_users() { } configure_columnstore_cross_engine_user() { - + if [ -n "$cross_engine_user" ] && [ -n "$cross_engine_pwd" ]; then if command -v mcsSetConfig &> /dev/null ; then mcsSetConfig CrossEngineSupport User "$cross_engine_user" @@ -1341,17 +1347,17 @@ poll_for_cmapi_online() { } configure_cluster_via_cmapi() { - + if [ -z $api_key ]; then get_set_cmapi_key; fi; local dbroot=1 for node in $(echo $nodes | tr "," "\n"); do - + if command -v mcs &> /dev/null ; then printf "%-35s ..." " - Adding Node $dbroot: $node " - if mcs_output=$( timeout 120s mcs cluster node add --node $node ); then + if mcs_output=$( timeout 120s mcs cluster node add --node $node ); then printf " Done - $( echo $cmapi_output | jq -r tostring ) \n" - else + else echo "[!] Failed ... trying cmapi curl" echo "$mcs_output" add_node_cmapi_via_curl $node @@ -1365,7 +1371,7 @@ configure_cluster_via_cmapi() { } poll_for_primary_mariadb_connectivity() { - + if [ -z "$primary_ip" ]; then echo "Primary IP not defined" exit 1; @@ -1375,7 +1381,7 @@ poll_for_primary_mariadb_connectivity() { local sleep_timer=3 local counter=0 - # Use telnet to check port 3306 on primary node + # Use telnet to check port 3306 on primary node printf "%-35s ..." " - Checking mariadb port $port on $primary_ip..." while true; do if telnet $primary_ip $port < /dev/null 2>&1 | grep -q 'Connected'; then @@ -1393,11 +1399,11 @@ poll_for_primary_mariadb_connectivity() { sleep $sleep_timer ((counter++)) done - + local counter=0 printf "%-35s ..." " - Checking replication credentials" while true; do - + if mariadb -h $primary_ip -u $replication_user -p"$replication_pwd" -e "SELECT 1;" &> /dev/null; then printf " Success\n" break; @@ -1430,7 +1436,7 @@ configure_mariadb_replication() { check_dev_build_exists() { - if ! aws s3 ls $s3_path --no-sign-request &> /dev/null; then + if ! aws s3 ls $s3_path --no-sign-request &> /dev/null; then printf "[!] Defined dev build doesnt exist in aws\n\n" exit 2; fi; @@ -1452,14 +1458,14 @@ check_ci_build_exists() { } optionally_install_cmapi_dependencies() { - + case $package_manager in - yum ) + yum ) packages=("jq" "libxcrypt-compat") for package in "${packages[@]}"; do - yum install $package -y; + yum install $package -y; done ;; @@ -1473,21 +1479,21 @@ optionally_install_cmapi_dependencies() { printf "\noptionally_install_cmapi_dependencies: package manager not implemented - $package_manager\n" exit 2; esac - + } post_cmapi_install_configuration() { - + systemctl daemon-reload systemctl enable mariadb-columnstore-cmapi systemctl start mariadb-columnstore-cmapi mariadb -e "show status like '%Columnstore%';" sleep 1; - - + + if [ -n "$nodes" ] && [ $dbroot == 1 ] ; then - + # Handle Cluster Configuration - Primary Node configure_default_mariadb_server_config create_mariadb_users @@ -1495,14 +1501,14 @@ post_cmapi_install_configuration() { configure_columnstore_cross_engine_user poll_for_cmapi_online configure_cluster_via_cmapi - + elif [ -n "$nodes" ] && [ $dbroot -gt 1 ]; then - + # Handle Cluster Configuration - Replica Nodes configure_default_mariadb_server_config poll_for_primary_mariadb_connectivity configure_mariadb_replication - + else # Handle Single node confirm_cmapi_online_and_configured @@ -1526,14 +1532,14 @@ do_enterprise_apt_install() { systemctl start mariadb # Install Columnstore - if ! apt install mariadb-plugin-columnstore -y --quiet; then + if ! apt install mariadb-plugin-columnstore -y --quiet; then printf "\n[!] Failed to install columnstore \n\n" exit 1; fi # Install CMAPI if $CONFIGURE_CMAPI ; then - if ! apt install mariadb-columnstore-cmapi jq -y --quiet; then + if ! apt install mariadb-columnstore-cmapi jq -y --quiet; then printf "\n[!] Failed to install cmapi\n\n" mariadb -e "show status like '%Columnstore%';" else @@ -1566,7 +1572,7 @@ do_enterprise_yum_install() { if ! yum install MariaDB-columnstore-cmapi jq -y; then printf "\n[!] Failed to install cmapi\n\n" mariadb -e "show status like '%Columnstore%';" - else + else post_cmapi_install_configuration fi else @@ -1622,7 +1628,7 @@ process_cluster_variables() { } quick_version_check() { - if [ -z $version ]; then + if [ -z $version ]; then printf "\n[!] Version empty: $version\n\n" exit 1; fi; @@ -1636,7 +1642,7 @@ quick_version_check() { else print_install_help_text fi - + exit 0; ;; -* ) @@ -1669,7 +1675,7 @@ check_columnstore_install_dependencies() { exit 1; fi done - + ;; ubuntu | debian ) @@ -1682,7 +1688,7 @@ check_columnstore_install_dependencies() { exit 1; fi done - + ;; *) # unknown option printf "\ncheck_columnstore_install_dependencies: os & version not implemented: $distro_info\n" @@ -1705,12 +1711,12 @@ print_install_variables() { echo "MariaDB Community Version: $version" fi - + } enterprise_install() { - - version=$3 + + version=$3 quick_version_check parse_install_cluster_additional_args "$@" @@ -1720,7 +1726,7 @@ enterprise_install() { echo "-----------------------------------------------" url="https://dlm.mariadb.com/enterprise-release-helpers/mariadb_es_repo_setup" - if $enterprise_staging; then + if $enterprise_staging; then url="https://dlm.mariadb.com/$enterprise_token/enterprise-release-helpers-staging/mariadb_es_repo_setup" fi @@ -1730,7 +1736,7 @@ enterprise_install() { # Download Repo setup script rm -rf mariadb_es_repo_setup curl -LO "$url" -o mariadb_es_repo_setup; - chmod +x mariadb_es_repo_setup; + chmod +x mariadb_es_repo_setup; if ! bash mariadb_es_repo_setup --token="$enterprise_token" --apply --mariadb-server-version="$version"; then printf "\n[!] Failed to apply mariadb_es_repo_setup...\n\n" exit 2; @@ -1741,25 +1747,25 @@ enterprise_install() { if [ ! -f "/etc/yum.repos.d/mariadb.repo" ]; then printf "\n[!] Expected to find mariadb.repo in /etc/yum.repos.d \n\n"; exit 1; fi; - if $enterprise_staging; then + if $enterprise_staging; then sed -i 's/mariadb-es-main/mariadb-es-staging/g' /etc/yum.repos.d/mariadb.repo sed -i 's/mariadb-enterprise-server/mariadb-enterprise-staging/g' /etc/yum.repos.d/mariadb.repo printf "\n\n[+] Adjusted mariadb.repo to: mariadb-enterprise-staging\n\n" fi; - do_enterprise_yum_install "$@" + do_enterprise_yum_install "$@" ;; ubuntu | debian ) if [ ! -f "/etc/apt/sources.list.d/mariadb.list" ]; then printf "\n[!] Expected to find mariadb.list in /etc/apt/sources.list.d \n\n"; exit 1; fi; - if $enterprise_staging; then + if $enterprise_staging; then sed -i 's/mariadb-enterprise-server/mariadb-enterprise-staging/g' /etc/apt/sources.list.d/mariadb.list apt update printf "\n\n[+] Adjusted mariadb.list to: mariadb-enterprise-staging\n\n" fi; - do_enterprise_apt_install "$@" + do_enterprise_apt_install "$@" ;; *) # unknown option printf "\nenterprise_install: os & version not implemented: $distro_info\n" @@ -1768,10 +1774,10 @@ enterprise_install() { } community_install() { - - version=$3 + + version=$3 quick_version_check - + parse_install_cluster_additional_args "$@" print_install_variables check_no_mdb_installed @@ -1780,7 +1786,7 @@ community_install() { # Download Repo setup rm -rf mariadb_repo_setup - + community_setup_script="https://downloads.mariadb.com/MariaDB/mariadb_repo_setup" if ! curl -sSL $community_setup_script | bash -s -- --mariadb-server-version=mariadb-$version ; then echo "version bad or mariadb_repo_setup unavailable. exiting ..." @@ -1789,10 +1795,10 @@ community_install() { case $distro_info in centos | rhel | rocky | almalinux ) - do_community_yum_install "$@" + do_community_yum_install "$@" ;; ubuntu | debian ) - do_community_apt_install "$@" + do_community_apt_install "$@" ;; *) # unknown option printf "\ncommunity_install: os & version not implemented: $distro_info\n" @@ -1819,10 +1825,17 @@ do_community_yum_install() { exit 1; fi + # Optionally install dev client packages + if $USE_DEV_PACKAGES; then + if ! yum install MariaDB-devel -y; then + printf "\n[!] Failed to install MariaDB-devel (dev packages) \n\n" + fi + fi + # Install CMAPI if $CONFIGURE_CMAPI ; then - cmapi_installable=$(yum list | grep MariaDB-columnstore-cmapi) - if [ -n "$cmapi_installable" ]; then + cmapi_installable=$(yum list | grep MariaDB-columnstore-cmapi) + if [ -n "$cmapi_installable" ]; then if ! yum install MariaDB-columnstore-cmapi jq -y; then printf "\n[!] Failed to install cmapi\n\n" exit 1; @@ -1855,12 +1868,19 @@ do_community_apt_install() { exit 1; fi; + # Optionally install dev client packages + if $USE_DEV_PACKAGES; then + if ! apt install libmariadb-dev -y --quiet; then + printf "\n[!] Failed to install libmariadb-dev (dev packages) \n\n" + fi + fi + # Install CMAPI if $CONFIGURE_CMAPI ; then - if ! apt install mariadb-columnstore-cmapi jq -y --quiet ; then + if ! apt install mariadb-columnstore-cmapi jq -y --quiet ; then printf "\n[!] Failed to install cmapi \n\n" mariadb -e "show status like '%Columnstore%';" - else + else post_cmapi_install_configuration fi else @@ -1873,20 +1893,20 @@ get_set_cmapi_key() { CMAPI_CNF="/etc/columnstore/cmapi_server.conf" - if [ ! -f $CMAPI_CNF ]; then + if [ ! -f $CMAPI_CNF ]; then echo "[!!] No cmapi config file found" exit 1; fi; # Add API Key if missing if [ -z "$(grep ^x-api-key $CMAPI_CNF)" ]; then - + if ! command -v openssl &> /dev/null ; then api_key="19bb89d77cb8edfe0864e05228318e3dfa58e8f45435fbd9bd12c462a522a1e9" - else + else api_key=$(openssl rand -hex 32) fi - + printf "%-35s ..." " - Setting API Key:" if cmapi_output=$( curl -s https://127.0.0.1:8640/cmapi/0.4.0/cluster/status \ --header 'Content-Type:application/json' \ @@ -1897,7 +1917,7 @@ get_set_cmapi_key() { printf " Failed to set API key\n\n" exit 1; fi - else + else api_key=$(grep ^x-api-key $CMAPI_CNF | cut -d "=" -f 2 | tr -d " ") fi } @@ -1939,7 +1959,7 @@ start_cs_cmapi_via_curl() { --header "x-api-key:$api_key" \ --data '{"timeout":20}'; then echo " - Started Columnstore" - else + else echo " - [!] Failed to start columnstore via cmapi curl" echo " - Trying via systemctl ..." start_cs_via_systemctl @@ -1955,7 +1975,7 @@ stop_service_if_exists() { fi fi } - + stop_cs_via_systemctl_override() { stop_service_if_exists "mcs-ddlproc" @@ -1996,7 +2016,7 @@ stop_cs_cmapi_via_curl() { --header "x-api-key:$api_key" \ --data '{"timeout":20}'; then echo " - Stopped Columnstore via curl" - else + else printf "\n[!] Failed to stop columnstore via cmapi\n" stop_cs_via_systemctl fi; @@ -2006,21 +2026,21 @@ add_primary_node_cmapi() { primary_ip="127.0.0.1" if [ -z $api_key ]; then get_set_cmapi_key; fi; - + if command -v mcs &> /dev/null ; then # Only add 127.0.0.1 if no nodes are configured in cmapi if [ "$(mcs cluster status | jq -r '.num_nodes')" == "0" ]; then printf "%-35s ..." " - Adding primary node" - if mcs_output=$( timeout 30s mcs cluster node add --node $primary_ip ); then + if mcs_output=$( timeout 30s mcs cluster node add --node $primary_ip ); then echo " Done - $( echo $mcs_output | jq -r tostring )" - else + else echo "[!] Failed ... trying cmapi curl" echo "$mcs_output" add_node_cmapi_via_curl $primary_ip fi; fi; - - else + + else echo "mcs - binary could not be found" add_node_cmapi_via_curl $primary_ip printf "%-35s ..." " - Starting Columnstore Engine" @@ -2030,7 +2050,7 @@ add_primary_node_cmapi() { dev_install() { - + check_aws_cli_installed parse_install_cluster_additional_args "$@" print_install_variables @@ -2044,7 +2064,7 @@ dev_install() { if [ -z $dev_drone_key ]; then printf "Missing dev_drone_key: \n"; exit; fi; if [ -z "$branch" ]; then printf "Missing branch: $branch\n"; exit 2; fi; if [ -z "$build" ]; then printf "Missing build: $branch\n"; exit 2; fi; - + # Construct URLs s3_path="$dronePath/$branch/$build/$product/$arch" @@ -2054,17 +2074,17 @@ dev_install() { echo "Drone: $drone_http" process_cluster_variables echo "###################################" - + check_dev_build_exists case $distro_info in centos | rhel | rocky | almalinux ) s3_path="${s3_path}/$distro" drone_http="${drone_http}/$distro" - do_dev_yum_install "$@" + do_dev_yum_install "$@" ;; ubuntu | debian ) - do_dev_apt_install "$@" + do_dev_apt_install "$@" ;; *) # unknown option printf "\ndev_install: os & version not implemented: $distro_info\n" @@ -2137,7 +2157,7 @@ EOF # Install MariaDB Server apt-get clean - apt-get update + apt-get update if ! apt install mariadb-server -y --quiet; then printf "\n[!] Failed to install mariadb-server \n\n" exit 1; @@ -2155,10 +2175,10 @@ EOF # Install CMAPI if $CONFIGURE_CMAPI ; then - if ! apt install mariadb-columnstore-cmapi jq -y --quiet ; then + if ! apt install mariadb-columnstore-cmapi jq -y --quiet ; then printf "\n[!] Failed to install cmapi \n\n" mariadb -e "show status like '%Columnstore%';" - else + else post_cmapi_install_configuration fi else @@ -2171,9 +2191,13 @@ EOF do_ci_yum_install() { # list packages + grep_list="MariaDB-backup-*|MariaDB-client-*|MariaDB-columnstore-engine-*|MariaDB-common-*|MariaDB-server-*|MariaDB-shared-*|galera-enterprise-*|cmapi" + if $USE_DEV_PACKAGES; then + grep_list="${grep_list}|MariaDB-devel-*" + fi rpm_list=$(curl -s -u $ci_user:$ci_pwd $ci_url/$os_package/ | grep -oP '(?<=href=").+?\.rpm' | sed 's/.*\///' | grep -v debug | grep -E "$grep_list") - + if [ -z "$rpm_list" ]; then echo "No RPMs found" echo "command: curl -s -u $ci_user:$ci_pwd $ci_url/$os_package/" @@ -2195,7 +2219,11 @@ do_ci_yum_install() { fi # Install MariaDB Server - if ! yum install MariaDB-server-* galera-enterprise-* MariaDB-client-* MariaDB-common-* MariaDB-shared-* -y; then + install_list="MariaDB-server-* galera-enterprise-* MariaDB-client-* MariaDB-common-* MariaDB-shared-*" + if $USE_DEV_PACKAGES; then + install_list+=" MariaDB-devel-*" + fi + if ! yum install $install_list -y; then printf "\n[!] Failed to install MariaDB-server \n\n" exit 1; fi @@ -2211,7 +2239,7 @@ do_ci_yum_install() { fi if ! ls MariaDB-columnstore-cmapi* 1> /dev/null 2>&1; then - + # Construct URLs dronePath="s3://$dev_drone_key" branch="stable-23.10" @@ -2220,7 +2248,7 @@ do_ci_yum_install() { s3_path="$dronePath/$branch/$build/$product/$arch/rockylinux${version_id}" check_aws_cli_installed echo "Attempting to download cmapi from drone: $s3_path" - + aws s3 cp $s3_path/ . --recursive --exclude "*" --include "MariaDB-columnstore-cmapi-*" --exclude "*debug*" --no-sign-request if ! ls MariaDB-columnstore-cmapi* 1> /dev/null 2>&1; then @@ -2241,7 +2269,7 @@ do_ci_yum_install() { create_cross_engine_user configure_columnstore_cross_engine_user fi - + return } @@ -2249,6 +2277,9 @@ do_ci_apt_install() { # list packages echo "++++++++++++++++++++++++++++++++++++++++++++" grep_list="mariadb-backup-*|mariadb-client-*|mariadb-plugin-columnstore-*|mariadb-common*|mariadb-server-*|mariadb-shared-*|galera-enterprise-*|cmapi|libmariadb3|mysql-common" + if $USE_DEV_PACKAGES; then + grep_list="${grep_list}|libmariadb-dev" + fi rpm_list=$(curl -s -u $ci_user:$ci_pwd $ci_url/$os_package/ | grep -oP '(?<=href=").+?\.deb' | sed 's/.*\///' | grep -v debug | grep -E "$grep_list") if [ -z "$rpm_list" ]; then @@ -2256,7 +2287,7 @@ do_ci_apt_install() { echo "command: curl -s -u $ci_user:$ci_pwd $ci_url/$os_package/" exit 2 fi - + for rpm in $rpm_list; do echo "Downloading $rpm..." if ! curl -s --user "$ci_user:$ci_pwd" -o "$rpm" "$ci_url/$os_package/$rpm"; then @@ -2266,12 +2297,17 @@ do_ci_apt_install() { done # Install MariaDB - DEBIAN_FRONTEND=noninteractive apt update + DEBIAN_FRONTEND=noninteractive apt update DEBIAN_FRONTEND=noninteractive apt upgrade -y --quiet apt-get clean DEBIAN_FRONTEND=noninteractive sudo apt install gawk libdbi-perl lsof perl rsync -y --quiet DEBIAN_FRONTEND=noninteractive sudo apt install libdbi-perl socat libhtml-template-perl -y --quiet - if ! DEBIAN_FRONTEND=noninteractive apt install $(pwd)/mysql-common*.deb $(pwd)/mariadb-server*.deb $(pwd)/galera-enterprise-* $(pwd)/mariadb-common*.deb $(pwd)/mariadb-client-*.deb $(pwd)/libmariadb3*.deb -y --quiet; then + # Optionally include libmariadb-dev if it was downloaded + DEV_DEB="" + if $USE_DEV_PACKAGES; then + DEV_DEB="$(ls $(pwd)/libmariadb-dev*.deb 2>/dev/null || true)" + fi + if ! DEBIAN_FRONTEND=noninteractive apt install $(pwd)/mysql-common*.deb $(pwd)/mariadb-server*.deb $(pwd)/galera-enterprise-* $(pwd)/mariadb-common*.deb $(pwd)/mariadb-client-*.deb $(pwd)/libmariadb3*.deb $DEV_DEB -y --quiet; then printf "\n[!] Failed to install mariadb-server \n\n" exit 1; fi @@ -2287,7 +2323,7 @@ do_ci_apt_install() { fi; if ! ls mariadb-columnstore-cmapi*.deb 1> /dev/null 2>&1; then - + # Construct URLs dronePath="s3://$dev_drone_key" branch="stable-23.10" @@ -2296,7 +2332,7 @@ do_ci_apt_install() { s3_path="$dronePath/$branch/$build/$product/$arch/$distro" check_aws_cli_installed echo "Attempting to download cmapi from drone: $s3_path" - + aws s3 cp $s3_path/ . --recursive --exclude "*" --include "mariadb-columnstore-cmapi*" --exclude "*debug*" --no-sign-request if ! ls mariadb-columnstore-cmapi*.deb 1> /dev/null 2>&1; then @@ -2308,10 +2344,10 @@ do_ci_apt_install() { # Install CMAPI if $CONFIGURE_CMAPI ; then - if ! DEBIAN_FRONTEND=noninteractive apt install $(pwd)/mariadb-columnstore-cmapi*.deb jq -y --quiet ; then + if ! DEBIAN_FRONTEND=noninteractive apt install $(pwd)/mariadb-columnstore-cmapi*.deb jq -y --quiet ; then printf "\n[!] Failed to install cmapi \n\n" mariadb -e "show status like '%Columnstore%';" - else + else post_cmapi_install_configuration fi else @@ -2338,14 +2374,14 @@ ci_install() { print_install_variables check_no_mdb_installed - # Construct URLs + # Construct URLs ci_url="https://es-repo.mariadb.net/jenkins/${product_branch_commit}/" echo "CI URL: $ci_url" process_cluster_variables echo "###################################" - + check_ci_build_exists - + if $CONFIGURE_CMAPI && [ -z $dev_drone_key ]; then printf "Missing dev_drone_key: \n"; exit; fi; @@ -2356,9 +2392,9 @@ ci_install() { if [ "$architecture" == "arm64" ]; then os_package+="rhel-$version_id-arm" fi - do_ci_yum_install "$@" + do_ci_yum_install "$@" ;; - + ubuntu | debian ) ci_url="${ci_url}DEB" @@ -2368,12 +2404,12 @@ ci_install() { else os_package="debian-$version_id" fi - - + + if [ "$architecture" == "arm64" ]; then os_package="${os_package}-arm" fi - do_ci_apt_install "$@" + do_ci_apt_install "$@" ;; *) # unknown option printf "\nci_install: os & version not implemented: $distro_info\n" @@ -2382,7 +2418,7 @@ ci_install() { } parse_install_local_additional_args() { - + # Default values rpm_deb_files_directory="/tmp/" @@ -2476,7 +2512,7 @@ check_rpms_debs_exist() { } do_local_rpm_install() { - + extra_packages="" galera_rpm="$(ls ${rpm_deb_files_directory}/*galera*.rpm 1> /dev/null 2>&1)" if [ -n "$galera_rpm" ]; then @@ -2506,11 +2542,11 @@ do_local_rpm_install() { # Install CMAPI if $CONFIGURE_CMAPI ; then - + if ! yum install ${rpm_deb_files_directory}/*columnstore-cmapi*.rpm -y; then printf "\n[!] Failed to install cmapi\n\n" mariadb -e "show status like '%Columnstore%';" - else + else if ! yum install jq -y; then printf "\n[!] Failed to install jq\nNot critical but please manually resolve\n" fi @@ -2524,7 +2560,7 @@ do_local_rpm_install() { } local_install() { - + parse_install_local_additional_args "$@" error_on_unknown_option=false parse_install_cluster_additional_args "$@" @@ -2537,10 +2573,10 @@ local_install() { case $distro_info in centos | rhel | rocky | almalinux ) - do_local_rpm_install "$@" + do_local_rpm_install "$@" ;; ubuntu | debian ) - # do_local_deb_install "$@" + # do_local_deb_install "$@" printf "not implemented\n" exit 0; ;; @@ -2576,7 +2612,7 @@ do_install() { # pull from dev repo - requires dev_drone_key dev_install "$@" ; ;; - ci ) + ci ) # pull from ci repo - requires ci_user_name and ci_password ci_install "$@" ; ;; @@ -2632,7 +2668,7 @@ confirm_cmapi_online_and_configured() { if systemctl start mariadb-columnstore-cmapi; then cmapi_check_ready printf " Pass\n" - else + else echo "[!!] Failed to start CMAPI" exit 1; fi @@ -2640,7 +2676,7 @@ confirm_cmapi_online_and_configured() { printf "systemd is not running - cant start cmapi\n\n" exit 1; fi - else + else # Check if the JSON string is in the expected format if ! echo "$cmapi_current_status" | jq -e '.started | type == "boolean"' >/dev/null; then @@ -2660,7 +2696,7 @@ confirm_cmapi_online_and_configured() { cmapi_check_ready printf " Done\n" fi; - + confirm_nodes_configured } @@ -2676,12 +2712,12 @@ confirm_nodes_configured() { sleep 1; fi else - + if [ "$(curl -k -s https://127.0.0.1:8640/cmapi/0.4.0/cluster/status \ --header 'Content-Type:application/json' \ --header "x-api-key:$api_key" | jq -r '.num_nodes')" == "0" ] ; then echo " - Stopped Columnstore via curl" - else + else add_primary_node_cmapi sleep 1; fi; @@ -2692,10 +2728,10 @@ confirm_nodes_configured() { do_dev_upgrade() { case $package_manager in - yum ) + yum ) s3_path="${s3_path}/$distro" drone_http="${drone_http}/$distro" - + echo "[drone] name=Drone Repository baseurl="$drone_http" @@ -2712,7 +2748,7 @@ enabled=1 echo "Error: No MariaDB-server RPMs were found." exit 1 fi - + # Run the YUM update printf "\nBeginning Update\n" if yum update "$mariadb_rpm" "MariaDB-*" "MariaDB-columnstore-engine" "MariaDB-columnstore-cmapi"; then @@ -2732,7 +2768,7 @@ EOF # Install MariaDB Server apt-get clean - apt-get update + apt-get update # Run the APT update printf "\nBeginning Update\n" @@ -2760,7 +2796,7 @@ EOF # For future release dev_upgrade() { - + # Variables if [ -z $dev_drone_key ]; then printf "[!] Missing dev_drone_key \nvi $0\n"; exit; fi; check_aws_cli_installed @@ -2793,7 +2829,7 @@ dev_upgrade() { # Stop All init_cs_down - wait_cs_down + wait_cs_down stop_mariadb stop_cmapi @@ -2826,7 +2862,7 @@ do_community_upgrade () { fi; case $package_manager in - yum ) + yum ) if [ ! -f "/etc/yum.repos.d/mariadb.repo" ]; then printf "\n[!] enterprise_upgrade: Expected to find mariadb.repo in /etc/yum.repos.d \n\n"; exit 1; fi; # Run the YUM update @@ -2867,7 +2903,7 @@ do_community_upgrade () { } community_upgrade() { - + version=$3 quick_version_check print_upgrade_variables @@ -2879,7 +2915,7 @@ community_upgrade() { # Prechecks printf "\nPrechecks\n" check_gtid_strict_mode - check_mariadb_versions + check_mariadb_versions # Stop All init_cs_down @@ -2891,7 +2927,7 @@ community_upgrade() { pre_upgrade_dbrm_backup pre_upgrade_configuration_backup - # Upgrade + # Upgrade do_community_upgrade # Start All @@ -2917,7 +2953,7 @@ confirm_dbrmctl_ok() { printf "." current_status=$(dbrmctl -v status); if [ $? -ne 0 ]; then - printf "\n[!] Failed to get dbrmctl -v status\n\n" + printf "\n[!] Failed to get dbrmctl -v status\n\n" exit 1 fi if [ $retry_counter -ge $retry_limit ]; then @@ -2937,7 +2973,7 @@ pre_upgrade_dbrm_backup() { if [ ! -f "$mcs_backup_manager_file" ]; then curl -O $url chmod +x "$mcs_backup_manager_file" - fi; + fi; # Check if the download was successful if [[ $? -ne 0 ]]; then @@ -2950,12 +2986,12 @@ pre_upgrade_dbrm_backup() { if grep -q "404: Not Found" "$mcs_backup_manager_file"; then echo "Error: File not found at the URL" printf "$url \n\n" - rm -f "$mcs_backup_manager_file" + rm -f "$mcs_backup_manager_file" exit 1 fi # Source the file - if ! source "$mcs_backup_manager_file" source ;then + if ! source "$mcs_backup_manager_file" source ;then printf "\n[!!] Failed to source $mcs_backup_manager_file\n\n" exit 1; else @@ -2965,7 +3001,7 @@ pre_upgrade_dbrm_backup() { # Confirm the function exists and the source of mcs_backup_manager.sh worked if command -v process_dbrm_backup &> /dev/null; then # Take an automated backup - if ! process_dbrm_backup -r 9999 -nb preupgrade_dbrm_backup --quiet ; then + if ! process_dbrm_backup -r 9999 -nb preupgrade_dbrm_backup --quiet ; then echo "[!!] Failed to take a DBRM backup before restoring" echo "exiting ..." exit 1; @@ -2974,7 +3010,7 @@ pre_upgrade_dbrm_backup() { echo "Error: 'process_dbrm_backup' function not found via $mcs_backup_manager_file"; exit 1; fi - + } pre_upgrade_configuration_backup() { @@ -3009,12 +3045,12 @@ check_mariadb_versions() { if [ -z "$current_mariadb_version" ]; then printf "[!] No current current_mariadb_version detected" exit 2; - fi + fi if [ -z "$version" ]; then printf "[!] No current upgrade version detected" exit 2; - fi + fi printf "%-35s ..." " - Checking MariaDB Version Newer" compare_versions "$current_mariadb_version" "$version" @@ -3024,30 +3060,30 @@ check_mariadb_versions() { check_gtid_strict_mode() { if ! command -v my_print_defaults &> /dev/null; then printf "\n[!] my_print_defaults not found. Ensure gtid_strict_mode=0 \n" - else + else printf "%-35s ..." " - Checking gtid_strict_mode" strict_mode=$(my_print_defaults --mysqld 2>/dev/null | grep "gtid[-_]strict[-_]mode") if [ -n "$strict_mode" ] && [ $strict_mode == "--gtid_strict_mode=1" ]; then echo "my_print_defaults --mysqld | grep gtid[-_]strict[-_]mode Result: $strict_mode" printf "Disable gtid_strict_mode before trying again\n\n" exit 1; - else + else printf " Done\n" fi - fi + fi } run_mariadb_upgrade() { if ! command -v mariadb-upgrade &> /dev/null; then printf "\n[!] mariadb-upgrade not found. Please install mariadb-upgrade\n\n" - exit 1; - fi + exit 1; + fi if [ "$pm_number" == "1" ]; then printf "\nMariaDB Upgrade\n" if ! mariadb-upgrade --write-binlog ; then printf "[!!] Failed to complete mariadb-upgrade \n" - exit 1; + exit 1; fi fi } @@ -3058,21 +3094,21 @@ do_enterprise_upgrade() { printf "\nDownloading Repo Setup\n" rm -rf mariadb_es_repo_setup url="https://dlm.mariadb.com/enterprise-release-helpers/mariadb_es_repo_setup" - if $enterprise_staging; then + if $enterprise_staging; then url="https://dlm.mariadb.com/$enterprise_token/enterprise-release-helpers-staging/mariadb_es_repo_setup" fi curl -LO "$url" -o mariadb_es_repo_setup; - chmod +x mariadb_es_repo_setup; + chmod +x mariadb_es_repo_setup; if ! bash mariadb_es_repo_setup --token="$enterprise_token" --apply --mariadb-server-version="$version"; then printf "\n[!] Failed to apply mariadb_es_repo_setup...\n\n" exit 2; fi; case $package_manager in - yum ) + yum ) if [ ! -f "/etc/yum.repos.d/mariadb.repo" ]; then printf "\n[!] enterprise_upgrade: Expected to find mariadb.repo in /etc/yum.repos.d \n\n"; exit 1; fi; - if $enterprise_staging; then + if $enterprise_staging; then sed -i 's/mariadb-es-main/mariadb-es-staging/g' /etc/yum.repos.d/mariadb.repo sed -i 's/mariadb-enterprise-server/mariadb-enterprise-staging/g' /etc/yum.repos.d/mariadb.repo printf "\n\n[+] Adjusted mariadb.repo to: mariadb-enterprise-staging\n\n" @@ -3090,7 +3126,7 @@ do_enterprise_upgrade() { apt ) if [ ! -f "/etc/apt/sources.list.d/mariadb.list" ]; then printf "\n[!] enterprise_upgrade: Expected to find mariadb.list in /etc/apt/sources.list.d \n\n"; exit 1; fi; - if $enterprise_staging; then + if $enterprise_staging; then sed -i 's/mariadb-enterprise-server/mariadb-enterprise-staging/g' /etc/apt/sources.list.d/mariadb.list apt update printf "\n\n[+] Adjusted mariadb.list to: mariadb-enterprise-staging\n\n" @@ -3130,9 +3166,9 @@ print_upgrade_variables() { } enterprise_upgrade() { - + # Variables - check_set_es_token "$@" + check_set_es_token "$@" version=$3 if [ -z "$version" ]; then printf "[!] Version not defined\n" @@ -3145,7 +3181,7 @@ enterprise_upgrade() { if pgrep -x "mariadbd" > /dev/null; then mariadb -e "show status like '%Columnstore%';" fi - + # Prechecks printf "\nPrechecks\n" check_gtid_strict_mode @@ -3153,7 +3189,7 @@ enterprise_upgrade() { # Stop All init_cs_down - wait_cs_down + wait_cs_down stop_mariadb stop_cmapi @@ -3173,11 +3209,11 @@ enterprise_upgrade() { # Post Upgrade confirm_dbrmctl_ok run_mariadb_upgrade - + } do_upgrade() { - + check_operating_system check_cpu_architecture check_package_managers @@ -3231,17 +3267,17 @@ prompt_user_for_cpu_architecture(){ arch="arm64" break ;; - *) + *) echo "Invalid option, please try again." ;; esac done } -# A quick way when a mac user runs "cs_package_manager.sh check" +# A quick way when a mac user runs "cs_package_manager.sh check" # since theres no /etc/os-release to auto detect what OS & version to search the mariadb repos on mac prompt_user_for_os() { - + # Prompt the user to select an operating system echo "Please select an operating system to search for:" os_options=("centos" "rhel" "rocky" "ubuntu" "debian") @@ -3260,8 +3296,8 @@ prompt_user_for_os() { prompt_user_for_cpu_architecture break ;; - - *) + + *) echo "Invalid option, please try again." ;; esac @@ -3281,8 +3317,8 @@ prompt_user_for_os() { prompt_user_for_cpu_architecture break ;; - - *) + + *) echo "Invalid option, please try again." ;; esac @@ -3290,7 +3326,7 @@ prompt_user_for_os() { break ;; - *) + *) echo "Invalid option, please try again." ;; esac @@ -3301,7 +3337,7 @@ prompt_user_for_os() { echo "Version: $version_id" echo "Architecture: $architecture ($arch)" echo "------------------------------------------------------------------" - + } # Seperated to allow re-use of the function for different path= variables @@ -3309,7 +3345,7 @@ maxscale_yum_minor_version_search() { curl -s "$url_base$minor_link$path" > $dbm_tmp_file package_links=$($grep -oP 'href="\K[^"]+' $dbm_tmp_file | grep "$path" | grep ".rpm" | grep -v $ignore | grep -i "maxscale-" | grep -v "-experimental" ) if [ ! -z "$package_links" ]; then - + at_least_one=true maxscale_link="$(echo $package_links | cut -f 1 -d " ")" maxscale_basename=$(basename $maxscale_link) @@ -3328,7 +3364,7 @@ maxscale_yum_minor_version_search() { maxscale_apt_minor_version_search() { #echo "searching: $url_base$minor_link$path" curl -s "$url_base$minor_link$path" > $dbm_tmp_file - + maxscale_links=$($grep -oP 'href="\K[^"]+' $dbm_tmp_file | grep $path | grep -v $ignore | grep $version_codename ) if [ ! -z "$maxscale_links" ]; then at_least_one=true @@ -3347,13 +3383,13 @@ maxscale_apt_minor_version_search() { fi; } -do_maxscale_check() { +do_maxscale_check() { ignore="/login" at_least_one=false curl -s "$url_base$url_page" > $dbm_tmp_file if [ $? -ne 0 ]; then printf "\n[!] Failed to access $url_base$url_page\n\n" - exit 1 + exit 1 fi if grep -q "404 - Page Not Found" $dbm_tmp_file; then printf "\n[!] 404 - Failed to access $url_base$url_page\n" @@ -3390,7 +3426,7 @@ do_maxscale_check() { if ! $at_least_one; then echo "[!] No MaxScale packages found for: $distro_info $version_id $architecture in $url_base$minor_link" fi - + ;; ubuntu | debian ) @@ -3408,7 +3444,7 @@ do_maxscale_check() { if ! $at_least_one; then echo "[!] No MaxScale packages found for: $distro_info $version_id $architecture in $url_base$minor_link" - fi + fi ;; *) # unknown option printf "\ndo_check: Not implemented for: $distro_info\n\n" @@ -3439,7 +3475,7 @@ handle_check_maxscale() { url_base="https://dlm.mariadb.com" case $repo in enterprise ) - check_set_es_token "$@" + check_set_es_token "$@" url_page="/browse/$enterprise_token/mariadb_maxscale_enterprise/" do_maxscale_check ;; @@ -3456,7 +3492,7 @@ handle_check_maxscale() { exit 2; esac exit 1 - fi; + fi; } parse_check_additional_args() { @@ -3481,15 +3517,15 @@ parse_check_additional_args() { } do_check() { - + parse_check_additional_args "$@" check_operating_system - check_cpu_architecture + check_cpu_architecture repo=$2 dbm_tmp_file="mdb-tmp.html" grep=$(which grep) - if [ $distro_info == "mac" ]; then + if [ $distro_info == "mac" ]; then grep=$(which ggrep) mac=true prompt_user_for_os @@ -3499,7 +3535,7 @@ do_check() { echo "Repository: $repo" case $repo in enterprise ) - check_set_es_token "$@" + check_set_es_token "$@" url_base="https://dlm.mariadb.com" url_page="/browse/$enterprise_token/mariadb_enterprise_server/" @@ -3508,7 +3544,7 @@ do_check() { curl -s "$url_base$url_page" > $dbm_tmp_file if [ $? -ne 0 ]; then printf "\n[!] Failed to access $url_base$url_page\n\n" - exit 1 + exit 1 fi if grep -q "404 - Page Not Found" $dbm_tmp_file; then printf "\n[!] 404 - Failed to access $url_base$url_page\n" @@ -3516,7 +3552,7 @@ do_check() { printf "See: https://customers.mariadb.com/downloads/token/ \n\n" exit 1 fi - + major_version_links=$($grep -oP 'href="\K[^"]+' $dbm_tmp_file | grep $url_page | grep -v $ignore | grep -v -x $url_page ) #echo $major_version_links for major_link in ${major_version_links[@]} @@ -3548,7 +3584,7 @@ do_check() { fi; ;; ubuntu | debian ) - + path="deb/pool/main/m/" curl -s "$url_base$minor_link$path" > $dbm_tmp_file @@ -3572,7 +3608,7 @@ do_check() { printf "%-8s %-12s %-12s %-12s\n" "MariaDB:" "$mariadb_version" "Columnstore:" "$columnstore_version"; fi; done - + ;; *) # unknown option printf "\ndo_check: Not implemented for: $distro_info\n\n" @@ -3645,7 +3681,7 @@ do_check() { fi done done - + if ! $at_least_one; then printf "\n[!] No columnstore packages found for: $distro_short $arch \n\n" fi @@ -3662,7 +3698,7 @@ do_check() { do_local_apt_maxscale_download_loop() { #echo "URL: $url${path}" - curl -s "${url}${path}" > $dbm_tmp_file + curl -s "${url}${path}" > $dbm_tmp_file #major_version_to_search="$(echo $version| cut -d'.' -f1-2)" maxscale_links=$($grep -oP 'href="\K[^"]+' $dbm_tmp_file | grep "maxscale-" | grep ".deb" | grep $version_codename ) if [ -n "$maxscale_links" ]; then @@ -3677,10 +3713,10 @@ do_local_apt_maxscale_download() { # Validate major version exists url="${url_base}${url_page}${version}" - curl -s "${url}/" > $dbm_tmp_file + curl -s "${url}/" > $dbm_tmp_file if [ $? -ne 0 ]; then printf "\n[!] Failed to access ${url}/\n\n" - exit 1 + exit 1 fi if grep -q "404 - Page Not Found" $dbm_tmp_file; then printf "[!] 404 - Version: ${version} does not exist \n\n" @@ -3707,7 +3743,7 @@ do_local_apt_maxscale_download() { break fi done - + if [ -z "$maxscale_link" ]; then printf "No DEB files found for MaxScale Version: $version OS_CPU: $distro_info $architecture \n\n" exit 1; @@ -3719,7 +3755,7 @@ do_local_apt_maxscale_download() { do_local_yum_maxscale_download_loop() { #echo "URL: $url${path}" - curl -s "${url}${path}" > $dbm_tmp_file + curl -s "${url}${path}" > $dbm_tmp_file #major_version_to_search="$(echo $version| cut -d'.' -f1-2)" maxscale_links=$($grep -oP 'href="\K[^"]+' $dbm_tmp_file | grep "maxscale-" | grep ".rpm" ) if [ -n "$maxscale_links" ]; then @@ -3731,13 +3767,13 @@ do_local_yum_maxscale_download_loop() { } do_local_yum_maxscale_download() { - + # Validate major version exists url="${url_base}${url_page}${version}" - curl -s "${url}/" > $dbm_tmp_file + curl -s "${url}/" > $dbm_tmp_file if [ $? -ne 0 ]; then printf "\n[!] Failed to access ${url}/\n\n" - exit 1 + exit 1 fi if grep -q "404 - Page Not Found" $dbm_tmp_file; then printf "[!] 404 - Version: ${version} does not exist \n\n" @@ -3764,7 +3800,7 @@ do_local_yum_maxscale_download() { break fi done - + if [ -z "$maxscale_link" ]; then printf "[!] No RPM files found for MaxScale Version: $version OS_CPU: $distro_info $architecture \n\n" exit 1; @@ -3777,20 +3813,20 @@ do_local_yum_maxscale_download() { download_maxscale_enterprise() { - check_set_es_token "$@" + check_set_es_token "$@" quick_version_check print_download_variables echo "------------------------------------------------------------------" - + url_base="https://dlm.mariadb.com" url_page="/browse/$enterprise_token/mariadb_maxscale_enterprise/" dbm_tmp_file="mdb-tmp.html" # Validate Enterprise Token Works - curl -s "${url_base}${url_page}/" > $dbm_tmp_file + curl -s "${url_base}${url_page}/" > $dbm_tmp_file if [ $? -ne 0 ]; then printf "\n[!] Failed to access ${url_base}${url_page}/\n\n" - exit 1 + exit 1 fi if grep -q "404 - Page Not Found" $dbm_tmp_file; then printf "\n[!] 404 - Failed to access ${url_base}${url_page}/\n" @@ -3820,7 +3856,7 @@ download_maxscale_community() { quick_version_check print_download_variables echo "------------------------------------------------------------------" - + url_base="https://dlm.mariadb.com" url_page="/browse/mariadbmaxscale/" dbm_tmp_file="mdb-tmp.html" @@ -3834,7 +3870,7 @@ download_maxscale_community() { ;; *) # unknown option printf "\ndownload_community: os & version not implemented: $distro_info\n" - exit 2; + exit 2; esac } @@ -3842,7 +3878,7 @@ download_maxscale_community() { handle_download_maxscale() { repo="$2" version="$3" - + while [[ $# -gt 0 ]]; do parameter="$1" @@ -3873,7 +3909,7 @@ handle_download_maxscale() { exit 2; esac exit 1 - fi; + fi; } parse_download_additional_args() { @@ -3887,6 +3923,10 @@ parse_download_additional_args() { key="$1" case $key in + --with-dev) + USE_DEV_PACKAGES=true + shift # past argument + ;; -t | --token) enterprise_token="$2" shift # past argument @@ -3930,7 +3970,7 @@ print_download_variables() { if [ $repo == "enterprise" ] || [ $repo == "enterprise_staging" ] ; then echo "Token: $enterprise_token" fi - + if [ $repo != "dev" ]; then echo "Version: $version" else @@ -3963,7 +4003,7 @@ version_greater_equal() { # Split versions into arrays read -r -a ver1_parts <<< "$ver1" read -r -a ver2_parts <<< "$ver2" - + # Pad with zeros to ensure both have the same length while (( ${#ver1_parts[@]} < ${#ver2_parts[@]} )); do ver1_parts+=("0") @@ -4002,7 +4042,7 @@ do_local_yum_enterprise_download() { if [ "$download_all" == true ]; then search="" fi - + if [ "$remove_debug" == true ]; then rpm_file_links=$($grep -oP 'href="\K[^"]+' $dbm_tmp_file | $grep "${url_base}/${enterprise_token}" | $grep -E "$search" | $grep -vE "debug|devel" ) else @@ -4013,7 +4053,7 @@ do_local_yum_enterprise_download() { echo "No RPM files found matching the criteria." exit 1 fi - + printf "Downloading RPMs\n"; highest_columnstore_version="" highest_columnstore_version_rpm_link="" @@ -4052,7 +4092,7 @@ do_local_yum_enterprise_download() { elif [[ "$rpm_link" =~ .*columnstore-cmapi-[0-9]+.* ]]; then cmapi_version="${rpm_link#*columnstore-cmapi-}" cmapi_version="${cmapi_version%%-*}" - + if [[ -z "$highest_cmapi_version" ]] || version_greater_equal "$cmapi_version" "$highest_cmapi_version"; then highest_cmapi_version="$cmapi_version" highest_cmapi_version_rpm_link="$rpm_link" @@ -4063,7 +4103,7 @@ do_local_yum_enterprise_download() { fi done - + if [ -n "$highest_columnstore_version" ]; then print_and_download "$highest_columnstore_version_rpm_link" fi @@ -4095,12 +4135,12 @@ do_local_apt_enterprise_download() { cmpai_url="${url_base}${url_page}${version}/deb/pool/main/m/mariadb-columnstore-cmapi/" curl -s "$url" > $dbm_tmp_file curl -s "$cmpai_url" > ${dbm_tmp_file}_cmapi - + search="mariadb-server-|mariadb-plugin-columnstore|mariadb-client-|mariadb-common|mariadb-shared-|mariadb-backup|libmariadb3_|mysql-common|columnstore-cmapi" if [ "$download_all" == true ]; then search="" fi - + if [ "$remove_debug" == true ]; then rpm_file_links=$($grep -oP 'href="\K[^"]+' $dbm_tmp_file | $grep "${url_base}/${enterprise_token}" | $grep -E "$search" | $grep -vE "dbgsym" | $grep "$distro_short" ) rpm_file_links_cmapi=$($grep -oP 'href="\K[^"]+' "${dbm_tmp_file}_cmapi" | $grep "${url_base}/${enterprise_token}" | $grep -E "$search" | $grep -vE "dbgsym" | $grep "$distro_short" ) @@ -4123,7 +4163,7 @@ do_local_apt_enterprise_download() { highest_debug_columnstore_version_rpm_link="" for rpm_link in ${rpm_file_links[@]} do - + # Confirm arch matches if [[ "$rpm_link" != *"$arch"* ]] && [[ "$rpm_link" != *"_all"* ]] ; then continue @@ -4158,7 +4198,7 @@ do_local_apt_enterprise_download() { print_and_download "$rpm_link" fi done - + pivit_character_between_version_and_arch="+" # Backward comaibility of cmapi 10.6.14-9 and prior if [ ${#rpm_file_links_cmapi[@]} -eq 0 ] || [ "${rpm_file_links_cmapi[@]}" == "" ]; then @@ -4211,11 +4251,11 @@ download_enterprise() { quick_version_check print_download_variables echo "------------------------------------------------------------------" - + url_base="https://dlm.mariadb.com" url_page="/browse/$enterprise_token/mariadb_enterprise_server/" dbm_tmp_file="mdb-tmp.html" - + case $distro_info in centos | rhel | rocky | almalinux ) @@ -4234,7 +4274,7 @@ download_enterprise() { } download_dev() { - + # bash cs_package_manager.sh download dev develop-23.02 pull_request/11460 if [ -z $dev_drone_key ]; then printf "Missing dev_drone_key: \n"; exit; fi; check_aws_cli_installed @@ -4244,7 +4284,7 @@ download_dev() { product="10.6-enterprise" if [ -z "$branch" ]; then printf "Missing branch: $branch\n"; exit 2; fi; if [ -z "$build" ]; then printf "Missing build: $branch\n"; exit 2; fi; - + print_download_variables s3_path="$dronePath/$branch/$build/$product/$arch/$distro" drone_http=$(echo "$s3_path" | sed "s|s3://$dev_drone_key/|https://${dev_drone_key}.s3.amazonaws.com/|") @@ -4254,9 +4294,17 @@ download_dev() { echo "------------------------------------------------------------------" check_dev_build_exists + # Optional dev package includes + DEV_RPM_INCLUDE="" + DEV_DEB_INCLUDE="" + if $USE_DEV_PACKAGES; then + DEV_RPM_INCLUDE='--include "MariaDB-devel-*.rpm"' + DEV_DEB_INCLUDE='--include "libmariadb-dev*.deb" --include "libmariadb-dev-compat*.deb"' + fi + case $distro_info in centos | rhel | rocky | almalinux ) - + printf "Removing $(pwd)/*.rpm"; if rm -rf *.rpm; then printf " ... Done\n" @@ -4268,7 +4316,7 @@ download_dev() { if [ "$download_all" == true ]; then aws s3 cp $s3_path/ . --exclude "*" --include "*.rpm" --recursive --no-sign-request fi - + if [ "$remove_debug" == true ]; then aws s3 cp $s3_path/ . --recursive --exclude "*" \ --include "MariaDB-server-*.rpm" \ @@ -4276,18 +4324,20 @@ download_dev() { --include "MariaDB-columnstore-cmapi-*.rpm" \ --include "MariaDB-columnstore-engine-*.rpm" \ --include "MariaDB-shared-*.rpm" \ + $DEV_RPM_INCLUDE \ --include "MariaDB-backup-*.rpm" \ --include "MariaDB-client-*.rpm" \ --include "galera*" \ --include "jemalloc*" \ --exclude "*debug*" --no-sign-request else - aws s3 cp $s3_path/ . --recursive --exclude "*" \ + aws s3 cp $s3_path/ . --recursive --exclude "*" \ --include "MariaDB-server-*.rpm" \ --include "MariaDB-common-*.rpm" \ --include "MariaDB-columnstore-cmapi-*.rpm" \ --include "MariaDB-columnstore-engine-*.rpm" \ --include "MariaDB-shared-*.rpm" \ + $DEV_RPM_INCLUDE \ --include "MariaDB-backup-*.rpm" \ --include "MariaDB-client-*.rpm" \ --include "galera*" \ @@ -4296,7 +4346,7 @@ download_dev() { ;; ubuntu | debian ) - + printf "Removing $(pwd)/*.deb"; if rm -rf *.deb; then printf " ... Done\n" @@ -4309,30 +4359,33 @@ download_dev() { aws s3 cp $s3_path/ . --exclude "*" --include "*.deb" --recursive --no-sign-request fi - if [ "$remove_debug" == true ]; then - aws s3 cp $s3_path/ . --recursive --exclude "*" \ - --include "mariadb-server*.deb" \ - --include "mariadb-common*.deb" \ - --include "mariadb-columnstore-cmapi*.deb" \ - --include "mariadb-plugin-columnstore*.deb" \ - --include "mysql-common*.deb" \ - --include "mariadb-client*.deb" \ - --include "libmariadb3_*.deb" \ - --include "galera*" \ - --include "jemalloc*" \ - --exclude "*debug*" --no-sign-request - else - aws s3 cp $s3_path/ . --recursive --exclude "*" \ - --include "mariadb-server*.deb" \ - --include "mariadb-common*.deb" \ - --include "mariadb-columnstore-cmapi*.deb" \ - --include "mariadb-plugin-columnstore*.deb" \ - --include "mysql-common*.deb" \ - --include "mariadb-client*.deb" \ - --include "libmariadb3_*.deb" \ - --include "galera*" \ - --include "jemalloc*" --no-sign-request + AWS_ARGS=("s3" "cp" "$s3_path/" "." "--recursive" "--exclude" "*") + AWS_ARGS+=( + "--include" "mariadb-server*.deb" + "--include" "mariadb-common*.deb" + "--include" "mariadb-columnstore-cmapi*.deb" + "--include" "mariadb-plugin-columnstore*.deb" + "--include" "mysql-common*.deb" + "--include" "mariadb-client*.deb" + "--include" "libmariadb3_*.deb" + "--include" "galera*" + "--include" "jemalloc*" + ) + # Optional dev headers + if $USE_DEV_PACKAGES; then + AWS_ARGS+=( + "--include" "libmariadb-dev*.deb" + "--include" "libmariadb-dev-compat*.deb" + ) fi + # Exclude debug if requested + if [ "$remove_debug" == true ]; then + AWS_ARGS+=("--exclude" "*debug*") + fi + # Always add no-sign-request + AWS_ARGS+=("--no-sign-request") + + aws "${AWS_ARGS[@]}" ;; *) # unknown option @@ -4356,7 +4409,7 @@ do_download() { check_operating_system check_cpu_architecture grep=$(which grep) - if [ $distro_info == "mac" ]; then + if [ $distro_info == "mac" ]; then grep=$(which ggrep) mac=true prompt_user_for_os @@ -4410,7 +4463,7 @@ check_set_es_token() { esac done - if [ -z $enterprise_token ]; then + if [ -z $enterprise_token ]; then printf "\n[!] Enterprise token empty: $enterprise_token\n" printf "1) edit $0 enterprise_token='xxxxxx' \n" printf "2) add flag --token xxxxxxxxx \n" @@ -4423,12 +4476,12 @@ check_set_es_token() { global_dependencies() { if ! command -v curl &> /dev/null; then printf "\n[!] curl not found. Please install curl\n\n" - exit 1; - fi + exit 1; + fi if ! command -v cut &> /dev/null; then printf "\n[!] cut not found. Please install cut\n\n" - exit 1; - fi + exit 1; + fi } print_cs_pkg_mgr_version_info() { diff --git a/cmapi/scripts/trace_to_plantuml.py b/cmapi/scripts/trace_to_plantuml.py new file mode 100644 index 000000000..c0146c41d --- /dev/null +++ b/cmapi/scripts/trace_to_plantuml.py @@ -0,0 +1,773 @@ +#!/usr/bin/env python3 +""" +Read CMAPI tracing JSONL logs (JsonFormatter) from one or more nodes and render a +PlantUML sequence diagram showing inbound (SERVER) and outbound (CLIENT) requests +between nodes. + +Inputs + - One or more JSONL log files + - Each input log file is assumed to be from a single node. + +Output + - PlantUML written to the specified output file. + +Parsing and correlation + - Detects span_begin/span_end events from JSONL (msg in {"span_begin","span_end"}). + - Per file, reads the "my_addresses" record and takes the first non-loopback IP (preferring IPv4) + as the node's local IP; all spans from that file carry this local IP. + - Merge spans across files by (trace_id, span_id). Duration and status_code are added when present. + +Normalization and aliasing + - Localhost remap: any 127.0.0.1/::1 seen in that file is replaced with the file's local IP. + - Remote host normalization: hostnames with embedded IPs (dashed or dotted) are converted to dotted IPs. + - Aliasing: if an IP is provided via --alias IP=NAME, the diagram uses NAME for that participant. + +Diagram semantics + - SERVER spans: " --> METHOD /path" render as remote -> local (local is the file's node). + - CLIENT spans: "HTTP METHOD https://host/path" render as local -> remote. + - CLIENT spans prefer request_duration_ms; otherwise duration_ms is shown if available. + - SERVER spans include [status_code] when available. + +CLI options + - --filter-trace TRACE_ID Only include the specified trace_id. + - --alias IP=NAME Map an IP to a friendly name (may be specified multiple times). + - --list-ips Print the set of encountered IPs and exit. + +Usage examples + python3 cmapi/scripts/trace_to_plantuml.py json_trace_mcs1 json_trace_mcs2 json_trace_mcs3 --output diagram.puml + python3 cmapi/scripts/trace_to_plantuml.py json_trace_mcs1 json_trace_mcs2 json_trace_mcs3 --output diagram.puml + --filter-trace 1f3b2cb2... --alias 172.31.41.127=mcs2 --alias 172.31.45.34=mcs3 +""" +from __future__ import annotations + +import argparse +import colorsys +import dataclasses +import datetime as dt +import hashlib +import ipaddress +import json +import logging +import os +import re +import sys +from typing import Dict, List, Optional, Tuple +from urllib.parse import urlparse + +JSON_TS_FORMATS = [ + "%Y-%m-%d %H:%M:%S,%f", # default logging formatter + "%Y-%m-%d %H:%M:%S", # without millis +] + +# Minimum processing time to show activation lifeline (in milliseconds) +ACTIVATION_MIN_MS = 1000 + +# dashed IP fragment pattern; will convert '172-31-45-34' -> '172.31.45.34' +DASHED_IP_RE = re.compile(r"(?P\b\d{1,3}(?:-\d{1,3}){3}\b)") + +logger = logging.getLogger("trace_viz") + +def _normalize_cmapi_path(path: str) -> str: + """Remove common CMAPI prefixes like '/cmapi/0.4.0' and '/cmapi/'. + """ + if not path: + return "/" + # normalize double slashes + while '//' in path: + path = path.replace('//', '/') + prefixes = [ + '/cmapi/0.4.0', + '/cmapi/', + ] + for pref in prefixes: + if path.startswith(pref): + path = path[len(pref):] + break + if not path.startswith('/'): + path = '/' + path + return path if path != '//' else '/' + +def _hex_color_for_trace(trace_id: str, depth: int) -> str: + """Derive a stable color from trace_id and vary by depth. + + We map trace_id hash to a base hue and then shift lightness slightly per depth. + Returns a hex color string like '#a1b2c3'. + """ + h = hashlib.sha1(trace_id.encode('utf-8')).digest() + # base hue from first byte, saturation fixed, lightness varies by depth + base_hue = h[0] / 255.0 # 0..1 + sat = 0.55 + base_lightness = 0.55 + # depth shift: +/- 0.06 per level alternating + shift = ((depth % 6) - 3) * 0.02 # -0.06..+0.06 + lightness = max(0.35, min(0.75, base_lightness + shift)) + # convert HSL to RGB + r, g, b = colorsys.hls_to_rgb(base_hue, lightness, sat) + return f"#{int(r*255):02x}{int(g*255):02x}{int(b*255):02x}" + + +def _colorized_arrow(arrow: str, color_hex: str) -> str: + """Insert a PlantUML color into an arrow token. + + Examples: + '->' -> '-[#hex]->' + '-->' -> '-[#hex]-->' + '->>' -> '-[#hex]->>' + '-->>' -> '-[#hex]-->>' + """ + if not color_hex: + return arrow + if arrow.startswith('--'): + return f"-[{color_hex}]{arrow}" + return f"-[{color_hex}]{arrow}" + + +def _fmt_secs(ms: float) -> str: + """Format milliseconds as seconds with 2 decimal places.""" + return f"{(ms or 0.0)/1000.0:.2f}s" + +@dataclasses.dataclass +class Span: + trace_id: str + span_id: str + parent_span_id: Optional[str] + kind: str # SERVER or CLIENT + name: str + start_ts: dt.datetime + duration_ms: Optional[float] = None + status_code: Optional[int] = None + request_duration_ms: Optional[float] = None + local_label: str = "local" + local_ip: Optional[str] = None + local_file: Optional[str] = None # full path to source log file + # Structured attributes + http_method: Optional[str] = None + http_path: Optional[str] = None + http_url: Optional[str] = None + client_ip: Optional[str] = None + request_is_sync: Optional[bool] = None + + +@dataclasses.dataclass +class Message: + ts: dt.datetime + src: str + dst: str + label: str + arrow: str # '->', '->>', '-->', '-->>' + + +@dataclasses.dataclass +class Directive: + ts: dt.datetime + text: str # e.g., 'activate mcs1' or 'deactivate mcs1' + + +def parse_json_ts(ts_str: str) -> Optional[dt.datetime]: + for fmt in JSON_TS_FORMATS: + try: + return dt.datetime.strptime(ts_str, fmt) + except Exception: + continue + return None + + +def parse_json_span_event(obj: Dict[str, object]) -> Optional[Tuple[str, Dict[str, object]]]: + """Return (msg, data) if this JSON object is a span_begin/span_end with usable fields.""" + msg = str(obj.get("msg", "")) + if msg not in {"span_begin", "span_end"}: + return None + return msg, obj + + +def local_label_from_path(path: str) -> str: + base = os.path.basename(path) + return base + + +def collect_known_ips_from_span(sp: "Span", known_ips: set[str]) -> None: + """Populate known IPs using structured span attributes only.""" + try: + if sp.kind == "SERVER" and sp.client_ip and is_ip(sp.client_ip): + known_ips.add(sp.client_ip) + elif sp.kind == "CLIENT" and sp.http_url: + try: + host = urlparse(sp.http_url).hostname + except Exception: + host = None + if host and is_ip(host): + known_ips.add(host) + except Exception: + pass + + +def normalize_participant(name: str) -> str: + # PlantUML participant names cannot contain spaces or some punctuation unless quoted. + # We'll use an alias form: participant "label" as alias, where alias is alnum/underscore. + # This function returns a safe alias; the label is kept original elsewhere. + alias = re.sub(r"[^A-Za-z0-9_]", "_", name) + # Avoid alias starting with a digit + if alias and alias[0].isdigit(): + alias = f"n_{alias}" + return alias or "unknown" + + +# Legacy name-parsing helpers removed; rely on structured attributes. + + +def is_ip(value: str) -> bool: + try: + ipaddress.ip_address(value) + return True + except Exception: + return False + + +def parse_jsonl_logs(log_paths: List[str]) -> Tuple[List[Span], List[str]]: + spans_by_key: Dict[Tuple[str, str], Span] = {} + known_ips: set[str] = set() + for path in log_paths: + local_label = local_label_from_path(path) + # Per-file local IP discovered from a special 'my_addresses' record + file_local_ip: Optional[str] = None + if not os.path.exists(path): + logger.error("Input log file does not exist: %s", path) + raise FileNotFoundError(path) + with open(path, "r", encoding="utf-8", errors="ignore") as f: + for line in f: + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + except Exception as exc: + logger.error("Failed to parse JSON line in %s: %s", path, line[:256]) + raise ValueError(f"JSON parse error in {path}: {exc}") + # Detect and store local IPs for this file + if str(obj.get("msg", "")) == "my_addresses": + addrs = obj.get("my_addresses") + if isinstance(addrs, list): + # Pick first non-loopback IPv4 if available, else first non-loopback + candidates = [a for a in addrs if isinstance(a, str) and a not in {"127.0.0.1", "::1"}] + # Prefer IPv4 + ipv4 = [a for a in candidates if re.match(r"^\d{1,3}(?:\.\d{1,3}){3}$", a)] + chosen = ipv4[0] if ipv4 else (candidates[0] if candidates else None) + if chosen: + file_local_ip = chosen + known_ips.add(chosen) + else: + logger.error("No non-loopback addresses found in my_addresses for %s: %s", path, addrs) + raise ValueError(f"No non-loopback addresses found in my_addresses for {path}: {addrs}") + # continue scanning; spans may come later + continue + parsed = parse_json_span_event(obj) + if not parsed: + continue + msg, data = parsed + ts_raw = str(data.get("ts", "")) + ts = parse_json_ts(ts_raw) + if ts is None: + logger.error("Unparseable timestamp '%s' in %s; defaulting to epoch", ts_raw, path) + ts = dt.datetime.fromtimestamp(0) + # Fields from flattened span dict + name = str(data.get("span_name", data.get("name", ""))) + kind = str(data.get("kind", "")).upper() + tid = str(data.get("trace_id", "")) + sid = str(data.get("span_id", "")) + psid = data.get("parent_span_id") + key = (tid, sid) + + if msg == "span_begin": + if not tid or not sid or not name or not kind: + logger.error( + "Malformed span_begin in %s (missing fields): tid=%s sid=%s name=%s kind=%s", + path, tid, sid, name, kind, + ) + raise ValueError(f"Malformed span_begin in {path} (missing fields): tid={tid} sid={sid} name={name} kind={kind}") + if kind not in {"SERVER", "CLIENT"}: + logger.error( + "Unexpected span kind in span_begin: kind=%s tid=%s sid=%s name=%s (file=%s)", + kind, tid, sid, name, path, + ) + raise ValueError(f"Unexpected span kind in span_begin: kind={kind} tid={tid} sid={sid} name={name} (file={path})") + sp = Span( + trace_id=tid, + span_id=sid, + parent_span_id=str(psid) if psid else None, + kind=kind, + name=name, + start_ts=ts, + local_label=local_label, + local_ip=file_local_ip, + local_file=path, + ) + # Capture structured fields when present + sp.http_method = str(data.get("http.method")) if data.get("http.method") is not None else sp.http_method + sp.http_path = str(data.get("http.path")) if data.get("http.path") is not None else sp.http_path + sp.http_url = str(data.get("http.url")) if data.get("http.url") is not None else sp.http_url + sp.client_ip = str(data.get("client.ip")) if data.get("client.ip") is not None else sp.client_ip + # Status code if present + if data.get("http.status_code") is not None: + try: + sp.status_code = int(data.get("http.status_code")) + except Exception: + pass + if data.get("request_is_sync") is not None: + try: + sp.request_is_sync = bool(data.get("request_is_sync")) + except Exception: + pass + spans_by_key[key] = sp + collect_known_ips_from_span(sp, known_ips) + elif msg == "span_end": + sp = spans_by_key.get(key) + if sp is None: + # Create minimal span if begin was not seen in provided files + logger.error( + "span_end without matching span_begin (tid=%s sid=%s) in %s; creating synthetic span", + tid, sid, path, + ) + sp = Span( + trace_id=tid, + span_id=sid or f"unknown_{len(spans_by_key)}", + parent_span_id=str(psid) if psid else None, + kind=kind or "SERVER", + name=name, + start_ts=ts, + local_label=local_label, + local_ip=file_local_ip, + local_file=path, + ) + spans_by_key[key] = sp + collect_known_ips_from_span(sp, known_ips) + # Update structured fields from end event (often richer) + try: + if data.get("http.method") is not None: + sp.http_method = str(data.get("http.method")) + if data.get("http.path") is not None: + sp.http_path = str(data.get("http.path")) + if data.get("http.url") is not None: + sp.http_url = str(data.get("http.url")) + if data.get("client.ip") is not None: + sp.client_ip = str(data.get("client.ip")) + if data.get("http.status_code") is not None: + try: + sp.status_code = int(data.get("http.status_code")) + except Exception: + pass + if data.get("request_is_sync") is not None: + sp.request_is_sync = bool(data.get("request_is_sync")) + except Exception: + pass + # Duration may be present as duration_ms attribute in JSON; otherwise leave None + dur = data.get("duration_ms") + try: + sp.duration_ms = float(dur) if dur is not None else sp.duration_ms + if sp.duration_ms is not None and sp.duration_ms < 0: + logger.warning( + "Negative duration_ms for span: tid=%s sid=%s duration_ms=%s", + sp.trace_id, sp.span_id, sp.duration_ms, + ) + except Exception: + pass + # Prefer explicit outbound request duration if present + try: + req_dur = data.get("request_duration_ms") + if req_dur is not None: + sp.request_duration_ms = float(req_dur) + if sp.request_duration_ms < 0: + logger.warning( + "Negative request_duration_ms for CLIENT span: tid=%s sid=%s duration_ms=%s", + sp.trace_id, sp.span_id, sp.request_duration_ms, + ) + except Exception: + pass + # Retroactively assign local_ip to any earlier spans from this file that were created + # before the 'my_addresses' record was seen + if file_local_ip: + for sp in spans_by_key.values(): + if sp.local_file == path and sp.local_ip is None: + sp.local_ip = file_local_ip + else: + logger.error("Did not see my_addresses for file %s; local_ip unknown for its spans", path) + # Sort spans by start_ts + spans = sorted(spans_by_key.values(), key=lambda s: s.start_ts) + # Post-parse validation: check unresolved parent_span_id references + by_trace: Dict[str, set[str]] = {} + for sp in spans: + by_trace.setdefault(sp.trace_id, set()).add(sp.span_id) + for sp in spans: + if sp.parent_span_id and sp.parent_span_id not in by_trace.get(sp.trace_id, set()): + logger.error( + "Unresolved parent_span_id: tid=%s sid=%s parent_sid=%s (no matching span_begin/end in inputs)", + sp.trace_id, sp.span_id, sp.parent_span_id, + ) + return spans, sorted(known_ips) + + +def normalize_hostname_to_ip(host: str, known_ips: List[str], host_map: Dict[str, str]) -> str: + # If host contains a dashed IP that matches a known dotted IP, replace with that IP + # First, use explicit host->ip mappings from the log + if host in host_map: + return host_map[host] + # Prefer explicit matches against our known IPs (dotted or dashed forms) + for ip in known_ips: + dashed_ip = ip.replace('.', '-') + if dashed_ip in host or ip in host: + return ip + # If the hostname includes a dashed-quad anywhere, extract and return only the dotted IP + m = DASHED_IP_RE.search(host) + if m: + dashed = m.group("dashed") + dotted = dashed.replace("-", ".") + return dotted + # If the hostname includes a dotted-quad anywhere, return that IP + m2 = re.search(r"\b(\d{1,3}(?:\.\d{1,3}){3})\b", host) + if m2: + return m2.group(1) + return host + + +def build_plantuml(spans: List[Span], filter_trace: Optional[str], known_ips: List[str], ip_alias_map: Dict[str, str]) -> str: + lines: List[str] = [] + logged_replacements: set[Tuple[str, str]] = set() # (original, replaced) + lines.append("@startuml") + # Collect participants from spans + participants: Dict[str, str] = {} # alias -> label + + msgs: List[Message] = [] + directives: List[Directive] = [] + unaliased_ips_logged: set[str] = set() + + # Build a per-file preferred local label from alias map by looking at peers in the same file + file_preferred_label: Dict[str, str] = {} + file_label_counts: Dict[str, Dict[str, int]] = {} + for sp in spans: + fname = sp.local_file or "" + if not fname: + continue + # Consider both SERVER remote and CLIENT host using structured attributes + if sp.kind == "SERVER" and sp.client_ip: + norm = normalize_hostname_to_ip(sp.client_ip, known_ips, {}) + if norm in ip_alias_map: + label = ip_alias_map[norm] + file_label_counts.setdefault(fname, {}).setdefault(label, 0) + file_label_counts[fname][label] += 1 + elif sp.kind == "CLIENT" and sp.http_url: + try: + host = urlparse(sp.http_url).hostname or "" + except Exception: + host = "" + if host: + norm = normalize_hostname_to_ip(host, known_ips, {}) + if norm in ip_alias_map: + label = ip_alias_map[norm] + file_label_counts.setdefault(fname, {}).setdefault(label, 0) + file_label_counts[fname][label] += 1 + for fname, counts in file_label_counts.items(): + best_label = max(counts.items(), key=lambda kv: kv[1])[0] + file_preferred_label[fname] = best_label + + for sp in spans: + if filter_trace and sp.trace_id != filter_trace: + continue + # Compute depth (nesting) via parent chain length within the same trace + depth = 0 + try: + seen = set() + cur = sp.parent_span_id + while cur: + if (sp.trace_id, cur) in seen: + break + seen.add((sp.trace_id, cur)) + depth += 1 + parent = next((p for p in spans if p.trace_id == sp.trace_id and p.span_id == cur), None) + cur = parent.parent_span_id if parent else None + except Exception: + depth = 0 + color = _hex_color_for_trace(sp.trace_id, depth) + trace_tag = f"[{sp.trace_id[:4]}-{sp.span_id[:2]}]" + time_str = sp.start_ts.strftime("%H:%M:%S") + # Prefer explicit outbound request duration for CLIENT spans if present + if sp.kind == "CLIENT" and sp.request_duration_ms is not None: + dur_str = f" ({_fmt_secs(sp.request_duration_ms)})" + else: + dur_str = f" ({_fmt_secs(sp.duration_ms)})" if sp.duration_ms is not None else "" + + if sp.kind == "SERVER": + # Structured attributes required: client_ip, http_method, http_path + remote = sp.client_ip + method = sp.http_method + path = sp.http_path + if not (remote and method and path): + logger.warning( + "Skipping SERVER span due to missing structured fields: tid=%s sid=%s name=%s client.ip=%s http.method=%s http.path=%s", + sp.trace_id, sp.span_id, sp.name, sp.client_ip, sp.http_method, sp.http_path, + ) + continue + label = f"{method} {_normalize_cmapi_path(path)}" + # Server status code + code_str = f" [{sp.status_code}]" if sp.status_code is not None else "" + if sp.status_code is None: + logger.warning( + "Missing http.status_code for SERVER span at render time: tid=%s sid=%s method=%s path=%s", + sp.trace_id, sp.span_id, method, path, + ) + # Replace loopback with file's real IP if available + if remote in {"127.0.0.1", "::1"} and sp.local_ip: + orig = remote + remote = sp.local_ip + key = (orig, remote) + if key not in logged_replacements: + file_label = os.path.basename(sp.local_file) if sp.local_file else "?" + logger.info("Loopback remapped (SERVER): %s -> %s [file=%s]", orig, remote, file_label) + logged_replacements.add(key) + # do not filter self traffic + # If remote is an IP, keep as-is; else use hostname + # apply aliasing for IPs + # Log unaliased IPs + if is_ip(remote) and remote not in ip_alias_map and remote not in unaliased_ips_logged: + logger.info("Unaliased IP encountered (SERVER remote): %s. Consider providing --alias %s=NAME", remote, remote) + unaliased_ips_logged.add(remote) + remote_label = ip_alias_map.get(remote, remote) + remote_alias = normalize_participant(remote_label) + participants.setdefault(remote_alias, remote_label) + # Prefer local IP alias (e.g., mcs1); otherwise use the local IP string. + if sp.local_ip: + if is_ip(sp.local_ip) and sp.local_ip not in ip_alias_map and sp.local_ip not in unaliased_ips_logged: + logger.info("Unaliased IP encountered (SERVER local): %s. Consider providing --alias %s=NAME", sp.local_ip, sp.local_ip) + unaliased_ips_logged.add(sp.local_ip) + local_label_effective = ip_alias_map.get(sp.local_ip, sp.local_ip) + else: + # As a last resort, use filename label + local_label_effective = sp.local_label + local_alias = normalize_participant(local_label_effective) + participants.setdefault(local_alias, local_label_effective) + arrow = _colorized_arrow('->', color) # SERVER handling is synchronous from remote to local + msgs.append(Message(sp.start_ts, remote_alias, local_alias, f"{trace_tag} [{time_str}] {label}{dur_str}{code_str}", arrow)) + # Add a note for the request with trace tag and start timestamp + directives.append(Directive(sp.start_ts, f"note over {remote_alias}, {local_alias}: {trace_tag} [{time_str}]")) + # Activate server processing lifeline only if duration is known and >= threshold + if sp.duration_ms is not None and sp.duration_ms >= ACTIVATION_MIN_MS: + directives.append(Directive(sp.start_ts, f"activate {local_alias} {color}")) + try: + srv_end = sp.start_ts + dt.timedelta(milliseconds=sp.duration_ms) + directives.append(Directive(srv_end, f"deactivate {local_alias}")) + except Exception: + pass + elif sp.duration_ms is None: + # Log that we couldn't determine server processing duration + logger.warning( + "No duration for SERVER span: tid=%s sid=%s method=%s path=%s local=%s", + sp.trace_id, sp.span_id, method, path, local_label_effective, + ) + elif sp.kind == "CLIENT": + # Structured attributes required: http_url, http_method + if not (sp.http_url and sp.http_method): + logger.warning( + "Skipping CLIENT span due to missing structured fields: tid=%s sid=%s name=%s http.url=%s http.method=%s", + sp.trace_id, sp.span_id, sp.name, sp.http_url, sp.http_method, + ) + continue + # Extract host and path from URL + try: + p = urlparse(sp.http_url) + host = p.hostname or sp.http_url + path = p.path or "/" + except Exception: + logger.warning("Failed to parse URL for CLIENT span (tid=%s sid=%s): %s", sp.trace_id, sp.span_id, sp.http_url) + host = sp.http_url + path = sp.http_url + label = f"{sp.http_method} {_normalize_cmapi_path(path)}" + # Replace loopback with file's real IP if available + if host in {"127.0.0.1", "::1"} and sp.local_ip: + orig = host + host = sp.local_ip + key = (orig, host) + if key not in logged_replacements: + file_label = os.path.basename(sp.local_file) if sp.local_file else "?" + logger.info("Loopback remapped (CLIENT): %s -> %s [file=%s]", orig, host, file_label) + logged_replacements.add(key) + # Normalize hostnames that embed an IP in dashed form to dotted IP when recognized + new_host = normalize_hostname_to_ip(host, known_ips, {}) + if new_host != host: + key = (host, new_host) + if key not in logged_replacements: + file_label = os.path.basename(sp.local_file) if sp.local_file else "?" + logger.info("Host normalized: %s -> %s [file=%s]", host, new_host, file_label) + logged_replacements.add(key) + host = new_host + # do not filter self traffic + # If host resolved to IP, allow user alias to apply + if is_ip(host) and host not in ip_alias_map and host not in unaliased_ips_logged: + logger.info("Unaliased IP encountered (CLIENT host): %s. Consider providing --alias %s=NAME", host, host) + unaliased_ips_logged.add(host) + remote_label = ip_alias_map.get(host, host) + remote_alias = normalize_participant(remote_label) + participants.setdefault(remote_alias, remote_label) + # Prefer local IP alias (e.g., mcs1); otherwise use the local IP string. + if sp.local_ip: + if is_ip(sp.local_ip) and sp.local_ip not in ip_alias_map and sp.local_ip not in unaliased_ips_logged: + logger.info("Unaliased IP encountered (CLIENT local): %s. Consider providing --alias %s=NAME", sp.local_ip, sp.local_ip) + unaliased_ips_logged.add(sp.local_ip) + local_label_effective = ip_alias_map.get(sp.local_ip, sp.local_ip) + else: + # As a last resort, use filename label + local_label_effective = sp.local_label + local_alias = normalize_participant(local_label_effective) + participants.setdefault(local_alias, local_label_effective) + # Use async arrow for async requests + is_sync = sp.request_is_sync + arrow = _colorized_arrow('->' if (is_sync is True) else '->>' if (is_sync is False) else '->', color) + msgs.append(Message(sp.start_ts, local_alias, remote_alias, f"{trace_tag} [{time_str}] {label}{dur_str}", arrow)) + # Add a response arrow back at end time if we know duration + end_ts: Optional[dt.datetime] = None + if sp.request_duration_ms is not None: + try: + end_ts = sp.start_ts + dt.timedelta(milliseconds=sp.request_duration_ms) + except Exception: + end_ts = None + elif sp.duration_ms is not None: + try: + end_ts = sp.start_ts + dt.timedelta(milliseconds=sp.duration_ms) + except Exception: + end_ts = None + if end_ts is not None: + end_time_str = end_ts.strftime("%H:%M:%S") + resp_arrow = '-->' if (is_sync is True or is_sync is None) else '-->>' + # Build details once + dur_val = None + if sp.request_duration_ms is not None: + dur_val = sp.request_duration_ms + elif sp.duration_ms is not None: + dur_val = sp.duration_ms + same_second = (end_time_str == time_str) + # Response label: always include method/path; timestamp first + resp_label_parts = ["Response", f"[{end_time_str}]", label] + if sp.status_code is not None: + code_token = str(sp.status_code) + if sp.status_code != 200: + code_token = f"{code_token}" + resp_label_parts.append(f"[{code_token}]") + else: + logger.error( + "Missing status_code for CLIENT response: tid=%s sid=%s url=%s", + sp.trace_id, sp.span_id, sp.http_url, + ) + resp_label = ' '.join(resp_label_parts) + msgs.append(Message(end_ts, remote_alias, local_alias, f"{trace_tag} {resp_label}", _colorized_arrow(resp_arrow, color))) + if same_second: + # Single combined note at the (end) time with tag and duration only + parts = [f"{trace_tag}"] + if dur_val is not None: + parts.append(f"dur={_fmt_secs(dur_val)}") + note_text = f"note over {local_alias}, {remote_alias}: " + ", ".join(parts) + directives.append(Directive(end_ts, note_text)) + else: + # Two separate notes: request note at start, response note at end + directives.append(Directive(sp.start_ts, f"note over {local_alias}, {remote_alias}: {trace_tag} [{time_str}]")) + parts = [f"{trace_tag}"] + if dur_val is not None: + parts.append(f"dur={_fmt_secs(dur_val)}") + note_text = f"note over {remote_alias}, {local_alias}: " + ", ".join(parts) + directives.append(Directive(end_ts, note_text)) + else: + # No end time available; emit only the request note at start + directives.append(Directive(sp.start_ts, f"note over {local_alias}, {remote_alias}: {trace_tag} [{time_str}]")) + logger.info( + "No duration for CLIENT request: tid=%s sid=%s method=%s url=%s", + sp.trace_id, sp.span_id, sp.http_method, sp.http_url, + ) + + # Emit participants sorted by label (case-insensitive) + items = list(participants.items()) # (alias, label) + items.sort(key=lambda x: x[1].lower()) + + def emit_part(alias: str, label: str): + if alias == label and alias.isidentifier(): + lines.append(f"participant {alias}") + else: + lines.append(f"participant \"{label}\" as {alias}") + + for alias, label in items: + emit_part(alias, label) + + # Sort and emit messages and directives merged by time + msgs.sort(key=lambda m: m.ts) + directives.sort(key=lambda d: d.ts) + i = j = 0 + while i < len(msgs) or j < len(directives): + if j >= len(directives) or (i < len(msgs) and msgs[i].ts <= directives[j].ts): + m = msgs[i] + lines.append(f"{m.src} {m.arrow} {m.dst} : {m.label}") + i += 1 + else: + d = directives[j] + lines.append(d.text) + j += 1 + + lines.append("@enduml") + return "\n".join(lines) + "\n" + + +def main(argv: Optional[List[str]] = None) -> int: + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") + ap = argparse.ArgumentParser(description="Convert CMAPI tracing JSONL logs to PlantUML sequence diagram") + ap.add_argument("paths", nargs="+", help="One or more JSONL log files.") + ap.add_argument("--filter-trace", help="Only include spans for the given trace_id (tid)") + ap.add_argument("--alias", action="append", default=[], metavar="IP=NAME", help="Assign a label NAME to IP. May be used multiple times.") + ap.add_argument("--list-ips", action="store_true", help="Print only the set of IP addresses encountered and exit") + ap.add_argument("--output", default="diagram.puml", help="Output PlantUML path (default: diagram.puml)") + args = ap.parse_args(argv) + + if args.list_ips: + logs = args.paths + if not logs: + ap.error("At least one log path is required with --list-ips") + spans, known_ips = parse_jsonl_logs(logs) + else: + # Determine logs and output path + logs = args.paths + output_path = args.output + if not logs: + ap.error("Provide at least one JSONL log path") + spans, known_ips = parse_jsonl_logs(logs) + + # Build alias map from args.alias entries (used by both list-ips and diagram output) + ip_alias_map: Dict[str, str] = {} + for item in args.alias: + if "=" in item: + ip, name = item.split("=", 1) + ip = ip.strip() + name = name.strip() + if ip: + ip_alias_map[ip] = name + else: + logger.error("Invalid alias format: %s", item) + return 2 + + if args.list_ips: + to_print = sorted(set(known_ips)) + for ip in to_print: + parts = [ip] + if ip in ip_alias_map: + parts.append(f"alias={ip_alias_map[ip]}") + print(" ".join(parts)) + return 0 + + + puml = build_plantuml(spans, args.filter_trace, known_ips, ip_alias_map) + + if args.list_ips: + # Already handled above, but keep structure consistent + return 0 + + # Write to output file + with open(output_path, "w", encoding="utf-8") as f: + f.write(puml) + logger.info("Diagram written to %s", output_path) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/cmapi/tracing/span.py b/cmapi/tracing/span.py index cc1ac8996..163fd1ebc 100644 --- a/cmapi/tracing/span.py +++ b/cmapi/tracing/span.py @@ -43,4 +43,12 @@ class TraceSpan: def record_exception(self, exc: BaseException) -> None: self.tracer._notify_exception(self, exc) + def to_flat_dict(self) -> Dict[str, Any]: + fd = self.__dict__.copy() + fd['span_name'] = fd.pop('name') # name field is reserved in log records + # Remove non-serializable references + fd.pop('tracer', None) + attributes = fd.pop("attributes") + fd.update(attributes) + return fd diff --git a/cmapi/tracing/trace_tool.py b/cmapi/tracing/trace_tool.py index 5d0c278db..826830d91 100644 --- a/cmapi/tracing/trace_tool.py +++ b/cmapi/tracing/trace_tool.py @@ -8,7 +8,6 @@ import cherrypy from tracing.tracer import get_tracer - def _on_request_start() -> None: req = cherrypy.request tracer = get_tracer() @@ -22,20 +21,27 @@ def _on_request_start() -> None: trace_id, parent_span_id = tracer.extract_traceparent(headers) tracer.set_incoming_context(trace_id, parent_span_id) - span_name = f"{getattr(req, 'method', 'HTTP')} {getattr(req, 'path_info', '/')}" + requester_host = getattr(getattr(req, 'remote', None), 'ip', '') + + method = getattr(req, 'method', 'HTTP') + path = getattr(req, 'path_info', '/') + if requester_host: + span_name = f"{requester_host} --> {method} {path}" + else: + span_name = f"{method} {path}" ctx = tracer.start_as_current_span(span_name, kind="SERVER") span = ctx.__enter__() span.set_attribute('http.method', getattr(req, 'method', '')) span.set_attribute('http.path', getattr(req, 'path_info', '')) - span.set_attribute('client.ip', getattr(getattr(req, 'remote', None), 'ip', '')) + span.set_attribute('client.ip', requester_host) span.set_attribute('instance.hostname', socket.gethostname()) safe_headers = {k: v for k, v in headers.items() if k.lower() not in {'authorization', 'x-api-key'}} span.set_attribute('sentry.incoming_headers', safe_headers) req._trace_span_ctx = ctx req._trace_span = span - tracer.inject_traceparent(cherrypy.response.headers) # type: ignore[arg-type] + tracer.inject_traceparent(cherrypy.response.headers) def _on_request_end() -> None: diff --git a/cmapi/tracing/traced_aiohttp.py b/cmapi/tracing/traced_aiohttp.py index d9468e0b1..e71980c39 100644 --- a/cmapi/tracing/traced_aiohttp.py +++ b/cmapi/tracing/traced_aiohttp.py @@ -1,10 +1,17 @@ """Async sibling of TracedSession.""" from typing import Any +import logging +import time import aiohttp from tracing.tracer import get_tracer +# Limit for raw JSON string preview (in characters) +_PREVIEW_MAX_CHARS = 512 + +logger = logging.getLogger("tracer") + class TracedAsyncSession(aiohttp.ClientSession): async def _request( @@ -22,6 +29,7 @@ class TracedAsyncSession(aiohttp.ClientSession): with tracer.start_as_current_span(span_name, kind="CLIENT") as span: span.set_attribute("http.method", method) span.set_attribute("http.url", url_text) + span.set_attribute("request_is_sync", False) tracer.inject_outbound_headers(headers) try: response = await super()._request(method, str_or_url, *args, **kwargs) @@ -30,7 +38,11 @@ class TracedAsyncSession(aiohttp.ClientSession): raise else: span.set_attribute("http.status_code", response.status) + await _record_outbound_json_preview(response, span) return response + finally: + duration_ms = (time.time_ns() - span.start_ns) / 1_000_000.0 + span.set_attribute("request_duration_ms", duration_ms) def create_traced_async_session(**kwargs: Any) -> TracedAsyncSession: @@ -38,3 +50,20 @@ def create_traced_async_session(**kwargs: Any) -> TracedAsyncSession: +async def _record_outbound_json_preview(response: aiohttp.ClientResponse, span) -> None: + """If response is JSON, attach small part of it to span + + We don't use streaming in aiohttp, so reading text is safe here. + """ + try: + content_type = str(response.headers.get('Content-Type', '')).lower() + if 'application/json' not in content_type: + return + text = await response.text() + if text is None: + text = "" + span.set_attribute('http.response.body.size', len(text)) + span.set_attribute('http.response.json', text[:_PREVIEW_MAX_CHARS]) + except Exception: + logger.exception("Could not extract JSON response body") + return None diff --git a/cmapi/tracing/traced_session.py b/cmapi/tracing/traced_session.py index 823023a26..49cb1d869 100644 --- a/cmapi/tracing/traced_session.py +++ b/cmapi/tracing/traced_session.py @@ -1,9 +1,16 @@ """Customized requests.Session that automatically traces outbound HTTP calls.""" from typing import Any, Optional +import logging +import time import requests -from tracing.tracer import get_tracer +from tracing.tracer import get_tracer, TraceSpan + +# Limit for raw JSON string preview (in characters) +_PREVIEW_MAX_CHARS = 512 + +logger = logging.getLogger("tracer") class TracedSession(requests.Session): @@ -19,6 +26,7 @@ class TracedSession(requests.Session): with tracer.start_as_current_span(span_name, kind="CLIENT") as span: span.set_attribute("http.method", method) span.set_attribute("http.url", url) + span.set_attribute("request_is_sync", True) tracer.inject_outbound_headers(headers) try: @@ -28,7 +36,11 @@ class TracedSession(requests.Session): raise else: span.set_attribute("http.status_code", response.status_code) + _record_outbound_json_preview(response, span) return response + finally: + duration_ms = (time.time_ns() - span.start_ns) / 1_000_000.0 + span.set_attribute("request_duration_ms", duration_ms) _default_session: Optional[TracedSession] = None @@ -41,4 +53,14 @@ def get_traced_session() -> TracedSession: return _default_session - +def _record_outbound_json_preview(response: requests.Response, span: TraceSpan) -> None: + """If response is JSON, attach small part of it to span""" + try: + content_type = str(response.headers.get('Content-Type', '')).lower() + if 'application/json' not in content_type: + return + text = response.text # requests will decode using inferred/declared encoding + span.set_attribute('http.response.body.size', len(text)) + span.set_attribute('http.response.json', text[:_PREVIEW_MAX_CHARS]) + except Exception: + logger.exception("Could not extract JSON response body") diff --git a/cmapi/tracing/traceparent_backend.py b/cmapi/tracing/traceparent_backend.py index e486f9bd3..f5f57fb15 100644 --- a/cmapi/tracing/traceparent_backend.py +++ b/cmapi/tracing/traceparent_backend.py @@ -2,37 +2,63 @@ import logging import time from typing import Any, Dict, Optional +import cherrypy +from mcs_node_control.models.node_config import NodeConfig from tracing.tracer import TracerBackend, TraceSpan from tracing.utils import swallow_exceptions -logger = logging.getLogger("tracing") +logger = logging.getLogger("tracer") +json_logger = logging.getLogger("json_trace") class TraceparentBackend(TracerBackend): """Default backend that logs span lifecycle and mirrors events/status.""" + def __init__(self): + my_addresses = list(NodeConfig().get_network_addresses()) + logger.info("My addresses: %s", my_addresses) + json_logger.info("my_addresses", extra={"my_addresses": my_addresses}) @swallow_exceptions def on_span_start(self, span: TraceSpan) -> None: logger.info( - "span_begin name=%s kind=%s trace_id=%s span_id=%s parent=%s attrs=%s", + "span_begin name='%s' kind=%s tid=%s sid=%s%s", span.name, span.kind, span.trace_id, span.span_id, - span.parent_span_id, span.attributes, + f' psid={span.parent_span_id}' if span.parent_span_id else '', ) + json_logger.info("span_begin", extra=span.to_flat_dict()) @swallow_exceptions def on_span_end(self, span: TraceSpan, exc: Optional[BaseException]) -> None: - duration_ms = (time.time_ns() - span.start_ns) / 1_000_000 + end_ns = time.time_ns() + duration_ms = (end_ns - span.start_ns) / 1_000_000 + span.set_attribute("duration_ms", duration_ms) + span.set_attribute("end_ns", end_ns) + + # Try to set status code if not already set + if span.kind == "SERVER" and "http.status_code" not in span.attributes: + try: + status_str = str(cherrypy.response.status) + code = int(status_str.split()[0]) + span.set_attribute("http.status_code", code) + except Exception: + pass + logger.info( - "span_end name=%s kind=%s trace_id=%s span_id=%s parent=%s duration_ms=%.3f attrs=%s", + "span_end name='%s' kind=%s tid=%s sid=%s%s duration_ms=%.3f", span.name, span.kind, span.trace_id, span.span_id, - span.parent_span_id, duration_ms, span.attributes, + f' psid={span.parent_span_id}' if span.parent_span_id else '', + duration_ms, ) + json_logger.info("span_end", extra=span.to_flat_dict()) @swallow_exceptions def on_span_event(self, span: TraceSpan, name: str, attrs: Dict[str, Any]) -> None: logger.info( - "span_event name=%s trace_id=%s span_id=%s attrs=%s", - name, span.trace_id, span.span_id, attrs, + "span_event name='%s' tid=%s sid=%s%s attrs=%s", + name, span.trace_id, span.span_id, + f' psid={span.parent_span_id}' if span.parent_span_id else '', + attrs, ) + json_logger.info("span_event", extra=span.to_flat_dict()) diff --git a/cmapi/tracing/utils.py b/cmapi/tracing/utils.py index 26e9d4a0c..e67e003f0 100644 --- a/cmapi/tracing/utils.py +++ b/cmapi/tracing/utils.py @@ -50,5 +50,3 @@ def parse_traceparent(header: str) -> Optional[Tuple[str, str, str]]: except Exception: logger.exception("Failed to parse traceparent: %s", header) return None - - diff --git a/mysql-test/columnstore/basic/t/cal_named_udfs.test b/mysql-test/columnstore/basic/t/cal_named_udfs.test index bd03af5bc..3858dee8e 100644 --- a/mysql-test/columnstore/basic/t/cal_named_udfs.test +++ b/mysql-test/columnstore/basic/t/cal_named_udfs.test @@ -20,6 +20,7 @@ select calgetversion()=mcsgetversion(); select calviewtablelock("t1"); select calcleartablelock(0); select callastinsertid("t1"); +--replace_regex /(Running SQL statements 1)/Running SQL statements 0/ select calgetsqlcount(); DROP TABLE t1; diff --git a/versioning/BRM/brmtypes.h b/versioning/BRM/brmtypes.h index 336db709f..b17f96028 100644 --- a/versioning/BRM/brmtypes.h +++ b/versioning/BRM/brmtypes.h @@ -429,7 +429,7 @@ EXPORT void log(const std::string& msg, logging::LOG_TYPE = logging::LOG_TYPE_CR EXPORT void log_errno(const std::string& msg, logging::LOG_TYPE = logging::LOG_TYPE_CRITICAL); EXPORT void errString(int rc, std::string& errMsg); -const struct timespec FIVE_MIN_TIMEOUT = {300, 0}; +// Note: Unresponsive timeouts are now configurable in the master and not defined here. /* Function identifiers used for master-slave communication. diff --git a/versioning/BRM/masterdbrmnode.cpp b/versioning/BRM/masterdbrmnode.cpp index 4f2a32bbd..a48b6277a 100644 --- a/versioning/BRM/masterdbrmnode.cpp +++ b/versioning/BRM/masterdbrmnode.cpp @@ -107,6 +107,16 @@ MasterDBRMNode::MasterDBRMNode() MSG_TIMEOUT.tv_sec = secondsToWait; else MSG_TIMEOUT.tv_sec = 20; + + // Configurable unresponsive timeout (default 300 seconds) + haltTimeout = {300, 0}; + std::string unrespStr = config->getConfig("SystemConfig", "DBRMUnresponsiveTimeout"); + int unrespSecs = config->fromText(unrespStr); + if (unrespSecs > 0) + { + haltTimeout.tv_sec = unrespSecs; + haltTimeout.tv_nsec = 0; + } } MasterDBRMNode::~MasterDBRMNode() @@ -534,16 +544,16 @@ void MasterDBRMNode::msgProcessor() retrycmd: uint32_t haltloops = 0; - while (halting && ++haltloops < static_cast(FIVE_MIN_TIMEOUT.tv_sec)) + while (halting && ++haltloops < static_cast(haltTimeout.tv_sec)) sleep(1); slaveLock.lock(); - if (haltloops == FIVE_MIN_TIMEOUT.tv_sec) + if (haltloops == static_cast(haltTimeout.tv_sec)) { ostringstream os; os << "A node is unresponsive for cmd = " << (uint32_t)cmd << ", no reconfigure in at least " - << FIVE_MIN_TIMEOUT.tv_sec << " seconds. Setting read-only mode."; + << haltTimeout.tv_sec << " seconds. Setting read-only mode."; log(os.str()); readOnly = true; halting = false; @@ -832,7 +842,9 @@ int MasterDBRMNode::gatherResponses(uint8_t cmd, uint32_t cmdMsgLength, vector 0) ? (haltTimeout.tv_sec / newtimeout.tv_sec) : 0; + if (ntRetries == 0) + ntRetries = 1; uint32_t retries = 0; while (++retries < ntRetries && tmp->length() == 0 && !halting) diff --git a/versioning/BRM/masterdbrmnode.h b/versioning/BRM/masterdbrmnode.h index a29e60c56..407f0b3dc 100644 --- a/versioning/BRM/masterdbrmnode.h +++ b/versioning/BRM/masterdbrmnode.h @@ -258,6 +258,9 @@ class MasterDBRMNode volatile bool die, halting; bool reloadCmd; mutable bool readOnly; + // Maximum time to wait for worker responses/reconfigure before forcing read-only + // Loaded from Columnstore.xml: SystemConfig/DBRMUnresponsiveTimeout (default: 300 seconds) + struct timespec haltTimeout; mutable bool waitToFinishJobs{false}; struct timespec MSG_TIMEOUT; };