1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

feat(cmapi,cli): MCOL-5618: Add backup, restore, backup-dbrm, restore-dbrm commands for mcs cli tool. (#3130)

* MCOL-5618: Add backup, restore, backup-dbrm, restore-dbrm commands for mcs cli tool.

[add] mcs_cluster_tool/helpers.py file with cook_sh_arg function inside
[add] MCS_BACKUP_MANAGER_SH to mcs_cluster_tool/constants.py
[add] backup and restore commands to "mcs" Typer APP

* MCOL-5618: Move mcs_backup_manager.sh to cmapi/scripts.

* MCOL-5618: Install mcs_backup_manager.sh with CMAPI package.
This commit is contained in:
Alan Mologorsky
2024-04-01 16:51:38 +03:00
committed by GitHub
parent 77cd733a6d
commit 6844923b9a
7 changed files with 667 additions and 4 deletions

View File

@ -84,6 +84,9 @@ INSTALL(FILES mcs_aws
INSTALL(FILES mcs_gsutil INSTALL(FILES mcs_gsutil
PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
DESTINATION ${BIN_DIR}) DESTINATION ${BIN_DIR})
INSTALL(FILES scripts/mcs_backup_manager.sh
PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
DESTINATION ${BIN_DIR})
OPTION(RPM "Build an RPM" OFF) OPTION(RPM "Build an RPM" OFF)
IF(RPM) IF(RPM)

View File

@ -4,7 +4,9 @@ import sys
import typer import typer
from cmapi_server.logging_management import dict_config, add_logging_level from cmapi_server.logging_management import dict_config, add_logging_level
from mcs_cluster_tool import cluster_app, cmapi_app from mcs_cluster_tool import (
cluster_app, cmapi_app, backup_commands, restore_commands
)
from mcs_cluster_tool.constants import MCS_CLI_LOG_CONF_PATH from mcs_cluster_tool.constants import MCS_CLI_LOG_CONF_PATH
@ -16,11 +18,15 @@ app = typer.Typer(
'MCS services' 'MCS services'
), ),
) )
app.add_typer(cluster_app.app, name="cluster") app.add_typer(cluster_app.app, name='cluster')
app.add_typer(cmapi_app.app, name="cmapi") app.add_typer(cmapi_app.app, name='cmapi')
app.command()(backup_commands.backup)
app.command('backup-dbrm')(backup_commands.backup_dbrm)
app.command()(restore_commands.restore)
app.command('restore-dbrm')(restore_commands.restore_dbrm)
if __name__ == "__main__": if __name__ == '__main__':
add_logging_level('TRACE', 5) #TODO: remove when stadalone mode added. add_logging_level('TRACE', 5) #TODO: remove when stadalone mode added.
dict_config(MCS_CLI_LOG_CONF_PATH) dict_config(MCS_CLI_LOG_CONF_PATH)
logger = logging.getLogger('mcs_cli') logger = logging.getLogger('mcs_cli')

View File

@ -0,0 +1,325 @@
"""Typer application for backup Columnstore data."""
import logging
import sys
from datetime import datetime
from typing_extensions import Annotated
import typer
from cmapi_server.process_dispatchers.base import BaseDispatcher
from mcs_cluster_tool.constants import MCS_BACKUP_MANAGER_SH
from mcs_cluster_tool.decorators import handle_output
from mcs_cluster_tool.helpers import cook_sh_arg
logger = logging.getLogger('mcs_cli')
@handle_output
def backup(
bl: Annotated[
str,
typer.Option(
'-bl', '--backup-location',
help=(
'What directory to store the backups on this machine or the target machine.\n'
'Consider write permissions of the scp user and the user running this script.\n'
'Mariadb-backup will use this location as a tmp dir for S3 and remote backups temporarily.\n'
'Example: /mnt/backups/'
)
)
] = '/tmp/backups/',
bd: Annotated[
str,
typer.Option(
'-bd', '--backup-destination',
help=(
'Are the backups going to be stored on the same machine this '
'script is running on or another server - if Remote you need '
'to setup scp='
'Options: "Local" or "Remote"'
)
)
] = 'Local',
scp: Annotated[
str,
typer.Option(
'-scp',
help=(
'Used only if --backup-destination="Remote".\n'
'The user/credentials that will be used to scp the backup '
'files\n'
'Example: "centos@10.14.51.62"'
)
)
] = '',
bb: Annotated[
str,
typer.Option(
'-bb', '--backup-bucket',
help=(
'Only used if --storage=S3\n'
'Name of the bucket to store the columnstore backups.\n'
'Example: "s3://my-cs-backups"'
)
)
] = '',
url: Annotated[
str,
typer.Option(
'-url', '--endpoint-url',
help=(
'Used by on premise S3 vendors.\n'
'Example: "http://127.0.0.1:8000"'
)
)
] = '',
nv_ssl: Annotated[
bool,
typer.Option(
'-nv-ssl/-v-ssl','--no-verify-ssl/--verify-ssl',
help='Skips verifying ssl certs, useful for onpremise s3 storage.'
)
] = False,
s: Annotated[
str,
typer.Option(
'-s', '--storage',
help=(
'What storage topogoly is being used by Columnstore - found '
'in /etc/columnstore/storagemanager.cnf.\n'
'Options: "LocalStorage" or "S3"'
)
)
] = 'LocalStorage',
i: Annotated[
bool,
typer.Option(
'-i/-no-i', '--incremental/--no--incremental',
help='Adds columnstore deltas to an existing full backup.'
)
] = False,
P: Annotated[
int,
typer.Option(
'-P', '--parallel',
help=(
'Determines if columnstore data directories will have '
'multiple rsync running at the same time for different '
'subfolders to parallelize writes.'
)
)
] = 4,
ha: Annotated[
bool,
typer.Option(
'-ha/-no-ha', '--highavilability/--no-highavilability',
help=(
'Hint wether shared storage is attached @ below on all nodes '
'to see all data\n'
' HA LocalStorage ( /var/lib/columnstore/dataX/ )\n'
' HA S3 ( /var/lib/columnstore/storagemanager/ )'
)
)
] = False,
f: Annotated[
str,
typer.Option(
'-f', '--config-file',
help='Path to backup configuration file to load variables from.'
)
] = '.cs-backup-config',
sbrm: Annotated[
bool,
typer.Option(
'-sbrm/-no-sbrm', '--skip-save-brm/--no-skip-save-brm',
help=(
'Skip saving brm prior to running a backup - '
'ideal for dirty backups.'
)
)
] = False,
spoll: Annotated[
bool,
typer.Option(
'-spoll/-no-spoll', '--skip-polls/--no-skip-polls',
help='Skip sql checks confirming no write/cpimports running.'
)
] = False,
slock: Annotated[
bool,
typer.Option(
'-slock/-no-slock', '--skip-locks/--no-skip-locks',
help='Skip issuing write locks - ideal for dirty backups.'
)
] = False,
smdb: Annotated[
bool,
typer.Option(
'-smdb/-no-smdb', '--skip-mariadb-backup/--no-skip-mariadb-backup',
help=(
'Skip running a mariadb-backup for innodb data - ideal for '
'incremental dirty backups.'
)
)
] = False,
sb: Annotated[
bool,
typer.Option(
'-sb/-no-sb', '--skip-bucket-data/--no-skip-bucket-data',
help='Skip taking a copy of the columnstore data in the bucket.'
)
] = False,
pi: Annotated[
int,
typer.Option(
'-pi', '--poll-interval',
help=(
'Number of seconds between poll checks for active writes & '
'cpimports.'
)
)
] = 5,
pmw: Annotated[
int,
typer.Option(
'-pmw', '--poll-max-wait',
help=(
'Max number of minutes for polling checks for writes to wait '
'before exiting as a failed backup attempt.'
)
)
] = 60,
q: Annotated[
bool,
typer.Option(
'-q/-no-q', '--quiet/--no-quiet',
help='Silence verbose copy command outputs.'
)
] = False,
c: Annotated[
str,
typer.Option(
'-c', '--compress',
help='Compress backup in X format - Options: [ pigz ].'
)
] = '',
nb: Annotated[
str,
typer.Option(
'-nb', '--name-backup',
help='Define the name of the backup - default: $(date +%m-%d-%Y)'
)
] = datetime.now().strftime('%m-%d-%Y'),
m: Annotated[
str,
typer.Option(
'-m', '--mode',
help=(
'Modes ["direct","indirect"] - direct backups run on the '
'columnstore nodes themselves. indirect run on another '
'machine that has read-only mounts associated with '
'columnstore/mariadb\n'
),
hidden=True
)
] = 'direct',
):
"""Backup Columnstore and/or MariDB data."""
# Local Storage Examples:
# ./$0 backup -bl /tmp/backups/ -bd Local -s LocalStorage
# ./$0 backup -bl /tmp/backups/ -bd Local -s LocalStorage -P 8
# ./$0 backup -bl /tmp/backups/ -bd Local -s LocalStorage --incremental 02-18-2022
# ./$0 backup -bl /tmp/backups/ -bd Remote -scp root@172.31.6.163 -s LocalStorage
# S3 Examples:
# ./$0 backup -bb s3://my-cs-backups -s S3
# ./$0 backup -bb s3://my-cs-backups -c pigz --quiet -sb
# ./$0 backup -bb gs://my-cs-backups -s S3 --incremental 02-18-2022
# ./$0 backup -bb s3://my-onpremise-bucket -s S3 -url http://127.0.0.1:8000
# Cron Example:
# */60 */24 * * * root bash /root/$0 -bb s3://my-cs-backups -s S3 >> /root/csBackup.log 2>&1
arguments = []
for arg_name, value in locals().items():
sh_arg = cook_sh_arg(arg_name, value)
if sh_arg is None:
continue
arguments.append(sh_arg)
cmd = f'{MCS_BACKUP_MANAGER_SH} {" ".join(arguments)}'
success, _ = BaseDispatcher.exec_command(cmd, stdout=sys.stdout)
return {'success': success}
@handle_output
def backup_dbrm(
m: Annotated[
str,
typer.Option(
'-m', '--mode',
help=(
'"loop" or "once" ; Determines if this script runs in a '
'forever loop sleeping -i minutes or just once.'
),
)
] = 'once',
i: Annotated[
int,
typer.Option(
'-i', '--interval',
help='Number of minutes to sleep when --mode=loop.'
)
] = 90,
r: Annotated[
int,
typer.Option(
'-r', '--retention-days',
help=(
'Number of days of dbrm backups to retain - script will '
'delete based on last update file time.'
)
)
] = 7,
p: Annotated[
str,
typer.Option(
'-p', '--path',
help='Path of where to save the dbrm backups on disk.'
)
] = '/tmp/dbrm_backups',
nb: Annotated[
str,
typer.Option(
'-nb', '--name-backup',
help='Custom name to prefex dbrm backups with.'
)
] = 'dbrm_backup',
q: Annotated[
bool,
typer.Option(
'-q/-no-q', '--quiet/--no-quiet',
help='Silence verbose copy command outputs.'
)
] = False
):
"""Columnstore DBRM Backup."""
# Default: ./$0 dbrm_backup -m once --retention-days 7 --path /tmp/dbrm_backups
# Examples:
# ./$0 dbrm_backup --mode loop --interval 90 --retention-days 7 --path /mnt/dbrm_backups
# ./$0 dbrm_backup --mode once --retention-days 7 --path /mnt/dbrm_backups -nb my-one-off-backup
# Cron Example:
# */60 */3 * * * root bash /root/$0 dbrm_backup -m once --retention-days 7 --path /tmp/dbrm_backups >> /tmp/dbrm_backups/cs_backup.log 2>&1
arguments = []
for arg_name, value in locals().items():
sh_arg = cook_sh_arg(arg_name, value)
if sh_arg is None:
continue
arguments.append(sh_arg)
cmd = f'{MCS_BACKUP_MANAGER_SH} {" ".join(arguments)}'
success, _ = BaseDispatcher.exec_command(cmd, stdout=sys.stdout)
return {'success': success}

View File

@ -1,4 +1,9 @@
import os import os
from cmapi_server.constants import MCS_INSTALL_BIN
MCS_CLI_ROOT_PATH = os.path.dirname(__file__) MCS_CLI_ROOT_PATH = os.path.dirname(__file__)
MCS_CLI_LOG_CONF_PATH = os.path.join(MCS_CLI_ROOT_PATH, 'mcs_cli_log.conf') MCS_CLI_LOG_CONF_PATH = os.path.join(MCS_CLI_ROOT_PATH, 'mcs_cli_log.conf')
MCS_BACKUP_MANAGER_SH = os.path.join(MCS_INSTALL_BIN, 'mcs_backup_manager.sh')

View File

@ -0,0 +1,29 @@
"""Module with helper functions for mcs cli tool."""
from typing import Union
def cook_sh_arg(arg_name: str, value:Union[str, int, bool]) -> str:
"""Convert argument and and value from function locals to bash argument.
:param arg_name: function argument name
:type arg_name: str
:param value: function argument value
:type value: Union[str, int, bool]
:return: bash argument string
:rtype: str
"""
# skip "arguments" list and Typer ctx variables from local scope
if arg_name in ('arguments', 'ctx'):
return None
# skip args that have empty string as value
if value == '':
return None
if '_' in arg_name:
arg_name = arg_name.replace('_', '-')
# skip boolean args that have False value
if isinstance(value, bool):
if not value:
return None
# if True value presented just pass only arg name without value
value = ''
return f'-{arg_name} {value}' if value else f'-{arg_name}'

View File

@ -0,0 +1,295 @@
"""Typer application for restore Columnstore data."""
import logging
import sys
from typing_extensions import Annotated
import typer
from cmapi_server.process_dispatchers.base import BaseDispatcher
from mcs_cluster_tool.constants import MCS_BACKUP_MANAGER_SH
from mcs_cluster_tool.decorators import handle_output
from mcs_cluster_tool.helpers import cook_sh_arg
logger = logging.getLogger('mcs_cli')
@handle_output
def restore(
l: Annotated[
str,
typer.Option(
'-l', '--load',
help='What date folder to load from the backup_location.'
)
] = '',
bl: Annotated[
str,
typer.Option(
'-bl', '--backup-location',
help=(
'Where the backup to load is found.\n'
'Example: /mnt/backups/'
)
)
] = '/tmp/backups/',
bd: Annotated[
str,
typer.Option(
'-bd', '--backup_destination',
help=(
'Is this backup on the same or remote server compared to '
'where this script is running.\n'
'Options: "Local" or "Remote"'
)
)
] = 'Local',
scp: Annotated[
str,
typer.Option(
'-scp', '--secure-copy-protocol',
help=(
'Used only if --backup-destination=Remote'
'The user/credentials that will be used to scp the backup files.'
'Example: "centos@10.14.51.62"'
)
)
] = '',
bb: Annotated[
str,
typer.Option(
'-bb', '--backup-bucket',
help=(
'Only used if --storage=S3\n'
'Name of the bucket to store the columnstore backups.\n'
'Example: "s3://my-cs-backups"'
)
)
] = '',
url: Annotated[
str,
typer.Option(
'-url', '--endpoint-url',
help=(
'Used by on premise S3 vendors.\n'
'Example: "http://127.0.0.1:8000"'
)
)
] = '',
s: Annotated[
str,
typer.Option(
'-s', '--storage',
help=(
'What storage topogoly is being used by Columnstore - found '
'in /etc/columnstore/storagemanager.cnf.\n'
'Options: "LocalStorage" or "S3"'
)
)
] = 'LocalStorage',
dbs: Annotated[
int,
typer.Option(
'-dbs', '--dbroots',
help='Number of database roots in the backup.'
)
] = 1,
pm: Annotated[
str,
typer.Option(
'-pm', '--nodeid',
help=(
'Forces the handling of the restore as this node as opposed '
'to whats detected on disk.'
)
)
] = '',
nb: Annotated[
str,
typer.Option(
'-nb', '--new-bucket',
help=(
'Defines the new bucket to copy the s3 data to from the '
'backup bucket. Use -nb if the new restored cluster should '
'use a different bucket than the backup bucket itself.'
)
)
] = '',
nr: Annotated[
str,
typer.Option(
'-nr', '--new-region',
help=(
'Defines the region of the new bucket to copy the s3 data to '
'from the backup bucket.'
)
)
] = '',
nk: Annotated[
str,
typer.Option(
'-nk', '--new-key',
help='Defines the aws key to connect to the new_bucket.'
)
] = '',
ns: Annotated[
str,
typer.Option(
'-ns', '--new-secret',
help=(
'Defines the aws secret of the aws key to connect to the '
'new_bucket.'
)
)
] = '',
P: Annotated[
int,
typer.Option(
'-P', '--parallel',
help=(
'Determines if columnstore data directories will have '
'multiple rsync running at the same time for different '
'subfolders to parallelize writes.'
)
)
] = 4,
ha: Annotated[
bool,
typer.Option(
'-ha/-no-ha', '--highavilability/--no-highavilability',
help=(
'Flag for high available systems (meaning shared storage '
'exists supporting the topology so that each node sees '
'all data)'
)
)
] = False,
cont: Annotated[
bool,
typer.Option(
'-cont/-no-cont', '--continue/--no-continue',
help=(
'This acknowledges data in your --new_bucket is ok to delete '
'when restoring S3. When set to true skips the enforcement '
'that new_bucket should be empty prior to starting a restore.'
)
)
] = False,
f: Annotated[
str,
typer.Option(
'-f', '--config-file',
help='Path to backup configuration file to load variables from.'
)
] = '.cs-backup-config',
smdb: Annotated[
bool,
typer.Option(
'-smdb/-no-smdb', '--skip-mariadb-backup/--no-skip-mariadb-backup',
help=(
'Skip restoring mariadb server via mariadb-backup - ideal for '
'only restoring columnstore.'
)
)
] = False,
sb: Annotated[
bool,
typer.Option(
'-sb/-no-sb', '--skip-bucket-data/--no-skip-bucket-data',
help=(
'Skip restoring columnstore data in the bucket - ideal if '
'looking to only restore mariadb server.'
)
)
] = False,
m: Annotated[
str,
typer.Option(
'-m', '--mode',
help=(
'Modes ["direct","indirect"] - direct backups run on the '
'columnstore nodes themselves. indirect run on another '
'machine that has read-only mounts associated with '
'columnstore/mariadb\n'
),
hidden=True
)
] = 'direct',
c: Annotated[
str,
typer.Option(
'-c', '--compress',
help=(
'Hint that the backup is compressed in X format. '
'Options: [ pigz ].'
)
)
] = '',
q: Annotated[
bool,
typer.Option(
'-q/-no-q', '--quiet/--no-quiet',
help='Silence verbose copy command outputs.'
)
] = False,
nv_ssl: Annotated[
bool,
typer.Option(
'-nv-ssl/-v-ssl','--no-verify-ssl/--verify-ssl',
help='Skips verifying ssl certs, useful for onpremise s3 storage.'
)
] = False,
):
"""Restore Columnstore (and/or MariaDB) data."""
# Local Storage Examples:
# ./$0 restore -s LocalStorage -bl /tmp/backups/ -bd Local -l 12-29-2021
# ./$0 restore -s LocalStorage -bl /tmp/backups/ -bd Remote -scp root@172.31.6.163 -l 12-29-2021
# S3 Storage Examples:
# ./$0 restore -s S3 -bb s3://my-cs-backups -l 12-29-2021
# ./$0 restore -s S3 -bb gs://on-premise-bucket -l 12-29-2021 -url http://127.0.0.1:8000
# ./$0 restore -s S3 -bb s3://my-cs-backups -l 08-16-2022 -nb s3://new-data-bucket -nr us-east-1 -nk AKIAxxxxxxx3FHCADF -ns GGGuxxxxxxxxxxnqa72csk5 -ha
arguments = []
for arg_name, value in locals().items():
sh_arg = cook_sh_arg(arg_name, value)
if sh_arg is None:
continue
arguments.append(sh_arg)
cmd = f'{MCS_BACKUP_MANAGER_SH} {" ".join(arguments)}'
success, _ = BaseDispatcher.exec_command(cmd, stdout=sys.stdout)
return {'success': success}
@handle_output
def restore_dbrm(
p: Annotated[
str,
typer.Option(
'-p', '--path',
help='Path of where dbrm backups stored on disk.'
)
] = '/tmp/dbrm_backups',
d: Annotated[
str,
typer.Option(
'-d', '--directory',
help='Date or directory chose to restore from.'
)
] = '',
):
"""Restore Columnstore DBRM data."""
# Default: ./$0 dbrm_restore --path /tmp/dbrm_backups
# Examples:
# ./$0 dbrm_restore --path /tmp/dbrm_backups --directory dbrm_backup12252023
arguments = []
for arg_name, value in locals().items():
sh_arg = cook_sh_arg(arg_name, value)
if sh_arg is None:
continue
arguments.append(sh_arg)
cmd = f'{MCS_BACKUP_MANAGER_SH} {" ".join(arguments)}'
success, _ = BaseDispatcher.exec_command(cmd, stdout=sys.stdout)
return {'success': success}