1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

feat(mcs): MCOL-5300 review/finetune log collection tools.

* chore(mcs, scripts): extra/columnstore_review.sh with scripts/columnstore_review.sh with 1.4.13 version
* feat(mcs): add review command to the Tools section. It's the wrapper for columnstore_review.sh
* feat(mcs): add review command implementation to tools.py file + constansts.py
* chore(mcs): add separator argument to cook_sh_arg function
* docs(mcs): updated README.md and mcs.1 man file
This commit is contained in:
mariadb-AlanMologorsky
2025-04-22 20:36:07 +03:00
committed by Alan Mologorsky
parent d245ef33b1
commit 2c0367ea2b
8 changed files with 851 additions and 105 deletions

View File

@ -86,7 +86,7 @@ INSTALL(FILES mcs_aws
INSTALL(FILES mcs_gsutil
PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
DESTINATION ${BIN_DIR})
INSTALL(FILES scripts/mcs_backup_manager.sh scripts/cs_package_manager.sh
INSTALL(FILES scripts/mcs_backup_manager.sh scripts/cs_package_manager.sh scripts/columnstore_review.sh
PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ
DESTINATION ${BIN_DIR})
INSTALL(FILES mcs_cluster_tool/mcs.1 DESTINATION ${MAN_DIR})

View File

@ -10,6 +10,7 @@ $ mcs [OPTIONS] COMMAND [ARGS]...
**Options**:
* `-v, --verbose`: Enable verbose logging to console
* `--help`: Show this message and exit.
**Commands**:
@ -19,7 +20,9 @@ $ mcs [OPTIONS] COMMAND [ARGS]...
* `restore`: Restore Columnstore (and/or MariaDB) data.
* `dbrm_restore`: Restore Columnstore DBRM data.
* `cskeys`: Generates a random AES encryption key and init vector and writes them to disk.
* `cspasswd`: Encrypt a Columnstore plaintext password using the encryption key in the key file.
* `cspasswd`: Encrypt a Columnstore plaintext password.
* `bootstrap-single-node`: Bootstrap a single node (localhost)...
* `review`: Provides useful functions to review and troubleshoot the MCS cluster.
* `help-all`: Show help for all commands in man page style.
* `status`: Get status information.
* `stop`: Stop the Columnstore cluster.
@ -45,8 +48,8 @@ $ mcs backup [OPTIONS]
* `-bl, --backup-location TEXT`: What directory to store the backups on this machine or the target machine.
Consider write permissions of the scp user and the user running this script.
Mariadb-backup will use this location as a tmp dir for S3 and remote backups temporarily.
Example: /mnt/backups/ [default: /tmp/backups/]
* `-bd, --backup-destination TEXT`: Are the backups going to be stored on the same machine this script is running on or another server - if Remote you need to setup scp=Options: "Local" or "Remote" [default: Local]
Example: /mnt/backups/
* `-bd, --backup-destination TEXT`: Are the backups going to be stored on the same machine this script is running on or another server - if Remote you need to setup scp=Options: "Local" or "Remote"
* `-scp TEXT`: Used only if --backup-destination="Remote".
The user/credentials that will be used to scp the backup files
Example: "centos@10.14.51.62"
@ -56,25 +59,25 @@ Example: "s3://my-cs-backups"
* `-url, --endpoint-url TEXT`: Used by on premise S3 vendors.
Example: "http://127.0.0.1:8000"
* `-s, --storage TEXT`: What storage topogoly is being used by Columnstore - found in /etc/columnstore/storagemanager.cnf.
Options: "LocalStorage" or "S3" [default: LocalStorage]
Options: "LocalStorage" or "S3"
* `-i, --incremental TEXT`: Adds columnstore deltas to an existing full backup. Backup folder to apply increment could be a value or "auto_most_recent" - the incremental backup applies to last full backup.
* `-P, --parallel INTEGER`: Determines if columnstore data directories will have multiple rsync running at the same time for different subfolders to parallelize writes. Ignored if "-c/--compress" argument not set. [default: 4]
* `-ha, --highavilability / -no-ha, --no-highavilability`: Hint wether shared storage is attached @ below on all nodes to see all data
* `-P, --parallel INTEGER`: Enables parallel rsync for faster backups, setting the number of simultaneous rsync processes. With -c/--compress, sets the number of compression threads.
* `-ha, --highavilability`: Hint wether shared storage is attached @ below on all nodes to see all data
HA LocalStorage ( /var/lib/columnstore/dataX/ )
HA S3 ( /var/lib/columnstore/storagemanager/ ) [default: no-ha]
HA S3 ( /var/lib/columnstore/storagemanager/ )
* `-f, --config-file TEXT`: Path to backup configuration file to load variables from - relative or full path accepted.
* `-sbrm, --skip-save-brm / -no-sbrm, --no-skip-save-brm`: Skip saving brm prior to running a backup - ideal for dirty backups. [default: no-sbrm]
* `-spoll, --skip-polls / -no-spoll, --no-skip-polls`: Skip sql checks confirming no write/cpimports running. [default: no-spoll]
* `-slock, --skip-locks / -no-slock, --no-skip-locks`: Skip issuing write locks - ideal for dirty backups. [default: no-slock]
* `-smdb, --skip-mariadb-backup / -no-smdb, --no-skip-mariadb-backup`: Skip running a mariadb-backup for innodb data - ideal for incremental dirty backups. [default: no-smdb]
* `-sb, --skip-bucket-data / -no-sb, --no-skip-bucket-data`: Skip taking a copy of the columnstore data in the bucket. [default: no-sb]
* `-nb, --name-backup TEXT`: Define the name of the backup - default: $(date +%m-%d-%Y) [default: 03-20-2025]
* `-sbrm, --skip-save-brm`: Skip saving brm prior to running a backup - ideal for dirty backups.
* `-spoll, --skip-polls`: Skip sql checks confirming no write/cpimports running.
* `-slock, --skip-locks`: Skip issuing write locks - ideal for dirty backups.
* `-smdb, --skip-mariadb-backup`: Skip running a mariadb-backup for innodb data - ideal for incremental dirty backups.
* `-sb, --skip-bucket-data`: Skip taking a copy of the columnstore data in the bucket.
* `-nb, --name-backup TEXT`: Define the name of the backup - default: $(date +%m-%d-%Y)
* `-c, --compress TEXT`: Compress backup in X format - Options: [ pigz ].
* `-q, --quiet / -no-q, --no-quiet`: Silence verbose copy command outputs. [default: no-q]
* `-nv-ssl, --no-verify-ssl / -v-ssl, --verify-ssl`: Skips verifying ssl certs, useful for onpremise s3 storage. [default: v-ssl]
* `-pi, --poll-interval INTEGER`: Number of seconds between poll checks for active writes & cpimports. [default: 5]
* `-pmw, --poll-max-wait INTEGER`: Max number of minutes for polling checks for writes to wait before exiting as a failed backup attempt. [default: 60]
* `-r, --retention-days INTEGER`: Retain backups created within the last X days, default 0 == keep all backups. [default: 0]
* `-q, --quiet`: Silence verbose copy command outputs.
* `-nv-ssl, --no-verify-ssl`: Skips verifying ssl certs, useful for onpremise s3 storage.
* `-pi, --poll-interval INTEGER`: Number of seconds between poll checks for active writes & cpimports.
* `-pmw, --poll-max-wait INTEGER`: Max number of minutes for polling checks for writes to wait before exiting as a failed backup attempt.
* `-r, --retention-days INTEGER`: Retain backups created within the last X days, default 0 == keep all backups.
* `-aro, --apply-retention-only`: Only apply retention policy to existing backups, does not run a backup.
* `-li, --list`: List backups.
* `--help`: Show this message and exit.
@ -96,8 +99,8 @@ $ mcs dbrm_backup [OPTIONS]
* `-bl, --backup-location TEXT`: Path of where to save the dbrm backups on disk. [default: /tmp/dbrm_backups]
* `-m, --mode TEXT`: "loop" or "once" ; Determines if this script runs in a forever loop sleeping -i minutes or just once. [default: once]
* `-nb, --name-backup TEXT`: Define the prefix of the backup - default: dbrm_backup+date +%Y%m%d_%H%M%S [default: dbrm_backup]
* `-ssm, --skip-storage-manager / -no-ssm, --no-skip-storage-manager`: Skip backing up storagemanager directory. [default: no-ssm]
* `-q, --quiet / -no-q, --no-quiet`: Silence verbose copy command outputs. [default: no-q]
* `-ssm, --skip-storage-manager`: Skip backing up storagemanager directory.
* `-q, --quiet`: Silence verbose copy command outputs.
* `-li, --list`: List backups.
* `--help`: Show this message and exit.
@ -133,14 +136,14 @@ Options: "LocalStorage" or "S3" [default: LocalStorage]
* `-nk, --new-key TEXT`: Defines the aws key to connect to the new_bucket.
* `-ns, --new-secret TEXT`: Defines the aws secret of the aws key to connect to the new_bucket.
* `-P, --parallel INTEGER`: Determines number of decompression and mdbstream threads. Ignored if "-c/--compress" argument not set. [default: 4]
* `-ha, --highavilability / -no-ha, --no-highavilability`: Flag for high available systems (meaning shared storage exists supporting the topology so that each node sees all data) [default: no-ha]
* `-cont, --continue / -no-cont, --no-continue`: This acknowledges data in your --new_bucket is ok to delete when restoring S3. When set to true skips the enforcement that new_bucket should be empty prior to starting a restore. [default: no-cont]
* `-ha, --highavilability`: Flag for high available systems (meaning shared storage exists supporting the topology so that each node sees all data)
* `-cont, --continue`: This acknowledges data in your --new_bucket is ok to delete when restoring S3. When set to true skips the enforcement that new_bucket should be empty prior to starting a restore.
* `-f, --config-file TEXT`: Path to backup configuration file to load variables from - relative or full path accepted.
* `-smdb, --skip-mariadb-backup / -no-smdb, --no-skip-mariadb-backup`: Skip restoring mariadb server via mariadb-backup - ideal for only restoring columnstore. [default: no-smdb]
* `-sb, --skip-bucket-data / -no-sb, --no-skip-bucket-data`: Skip restoring columnstore data in the bucket - ideal if looking to only restore mariadb server. [default: no-sb]
* `-smdb, --skip-mariadb-backup`: Skip restoring mariadb server via mariadb-backup - ideal for only restoring columnstore.
* `-sb, --skip-bucket-data`: Skip restoring columnstore data in the bucket - ideal if looking to only restore mariadb server.
* `-c, --compress TEXT`: Hint that the backup is compressed in X format. Options: [ pigz ].
* `-q, --quiet / -no-q, --no-quiet`: Silence verbose copy command outputs. [default: no-q]
* `-nv-ssl, --no-verify-ssl / -v-ssl, --verify-ssl`: Skips verifying ssl certs, useful for onpremise s3 storage. [default: v-ssl]
* `-q, --quiet`: Silence verbose copy command outputs.
* `-nv-ssl, --no-verify-ssl`: Skips verifying ssl certs, useful for onpremise s3 storage.
* `-li, --list`: List backups.
* `--help`: Show this message and exit.
@ -159,8 +162,8 @@ $ mcs dbrm_restore [OPTIONS]
* `-bl, --backup-location TEXT`: Path of where dbrm backups exist on disk. [default: /tmp/dbrm_backups]
* `-l, --load TEXT`: Name of the directory to restore from -bl
* `-ns, --no-start`: Do not attempt columnstore startup post dbrm_restore.
* `-sdbk, --skip-dbrm-backup / -no-sdbk, --no-skip-dbrm-backup`: Skip backing up dbrms before restoring. [default: sdbk]
* `-ssm, --skip-storage-manager / -no-ssm, --no-skip-storage-manager`: Skip backing up storagemanager directory. [default: ssm]
* `-sdbk, --skip-dbrm-backup`: Skip backing up dbrms before restoring.
* `-ssm, --skip-storage-manager`: Skip backing up storagemanager directory.
* `-li, --list`: List backups.
* `--help`: Show this message and exit.
@ -208,6 +211,62 @@ $ mcs cspasswd [OPTIONS]
* `--decrypt`: Decrypt an encrypted password instead.
* `--help`: Show this message and exit.
## `mcs bootstrap-single-node`
Bootstrap a single node (localhost) Columnstore instance.
**Usage**:
```console
$ mcs bootstrap-single-node [OPTIONS]
```
**Options**:
* `--api-key TEXT`: API key to set.
* `--help`: Show this message and exit.
## `mcs review`
This script performs various maintenance and diagnostic tasks for
MariaDB ColumnStore, including log archiving, extent map backups,
schema and table testing, directory and ownership checks, extent map
validation, S3 storage comparison, process management, table
synchronization, port availability checks, stack dumps, cleanup of
rollback fragments, and graceful process termination.
If database is up, this script will connect as root@localhost via socket.
**Usage**:
```console
$ mcs review [OPTIONS]
```
**Options**:
* `--version`: Only show the header with version information.
* `--logs`: Create a compressed archive of logs for MariaDB Support Ticket
* `--path`: Define the path for where to save files/tarballs and outputs of this script.
* `--backupdbrm`: Takes a compressed backup of extent map files in dbrm directory.
* `--testschema`: Creates a test schema, tables, imports, queries, drops schema.
* `--testschemakeep`: creates a test schema, tables, imports, queries, does not drop.
* `--ldlischema`: Using ldli, creates test schema, tables, imports, queries, drops schema.
* `--ldlischemakeep`: Using ldli, creates test schema, tables, imports, queries, does not drop.
* `--emptydirs`: Searches /var/lib/columnstore for empty directories.
* `--notmysqldirs`: Searches /var/lib/columnstore for directories not owned by mysql.
* `--emcheck`: Checks the extent map for orphaned and missing files.
* `--s3check`: Checks the extent map against S3 storage.
* `--pscs`: Adds the pscs command. pscs lists running columnstore processes.
* `--schemasync`: Fix out-of-sync columnstore tables (CAL0009).
* `--tmpdir`: Ensure owner of temporary dir after reboot (MCOL-4866 & MCOL-5242).
* `--checkports`: Checks if ports needed by Columnstore are opened.
* `--eustack`: Dumps the stack of Columnstore processes.
* `--clearrollback`: Clear any rollback fragments from dbrm files.
* `--killcolumnstore`: Stop columnstore processes gracefully, then kill remaining processes.
* `--color TEXT`: print headers in color. Options: prefix color with l for light.
* `--help`: Show this message and exit.
## `mcs help-all`
Show help for all commands in man page style.

View File

@ -50,6 +50,12 @@ app.command(
app.command(
'bootstrap-single-node', rich_help_panel='Tools commands',
)(tools_commands.bootstrap_single_node)
app.command(
'review', rich_help_panel='Tools commands',
short_help=(
'Provides useful functions to review and troubleshoot the MCS cluster.'
)
)(tools_commands.review)
@app.command(

View File

@ -7,3 +7,6 @@ MCS_CLI_ROOT_PATH = os.path.dirname(__file__)
MCS_CLI_LOG_CONF_PATH = os.path.join(MCS_CLI_ROOT_PATH, 'mcs_cli_log.conf')
MCS_BACKUP_MANAGER_SH = os.path.join(MCS_INSTALL_BIN, 'mcs_backup_manager.sh')
MCS_COLUMNSTORE_REVIEW_SH = os.path.join(
MCS_INSTALL_BIN, 'columnstore_review.sh'
)

View File

@ -2,13 +2,17 @@
from typing import Optional, Union
def cook_sh_arg(arg_name: str, value: Union[str, int, bool]) -> Optional[str]:
def cook_sh_arg(
arg_name: str, value: Union[str, int, bool], separator: str = ' '
) -> Optional[str]:
"""Convert argument and and value from function locals to bash argument.
:param arg_name: function argument name
:type arg_name: str
:param value: function argument value
:type value: Union[str, int, bool]
:param separator: separator between argument and value
:type separator: str
:return: bash argument string or None
:rtype: Optional[str]
"""
@ -31,4 +35,4 @@ def cook_sh_arg(arg_name: str, value: Union[str, int, bool]) -> Optional[str]:
return None
# if True value presented just pass only arg name without value
value = ''
return f'-{arg_name} {value}' if value else f'-{arg_name}'
return f'-{arg_name}{separator}{value}' if value else f'-{arg_name}'

View File

@ -13,6 +13,8 @@ $ mcs [OPTIONS] COMMAND [ARGS]...
\fBOptions\fP:
.RS
.IP \(bu 2
\fB\fC\-v, \-\-verbose\fR: Enable verbose logging to console
.IP \(bu 2
\fB\fC\-\-help\fR: Show this message and exit.
.RE
.PP
@ -29,7 +31,11 @@ $ mcs [OPTIONS] COMMAND [ARGS]...
.IP \(bu 2
\fB\fCcskeys\fR: Generates a random AES encryption key and init vector and writes them to disk.
.IP \(bu 2
\fB\fCcspasswd\fR: Encrypt a Columnstore plaintext password using the encryption key in the key file.
\fB\fCcspasswd\fR: Encrypt a Columnstore plaintext password.
.IP \(bu 2
\fB\fCbootstrap\-single\-node\fR: Bootstrap a single node (localhost)...
.IP \(bu 2
\fB\fCreview\fR: Provides useful functions to review and troubleshoot the MCS cluster.
.IP \(bu 2
\fB\fChelp\-all\fR: Show help for all commands in man page style.
.IP \(bu 2
@ -67,9 +73,9 @@ $ mcs backup [OPTIONS]
\fB\fC\-bl, \-\-backup\-location TEXT\fR: What directory to store the backups on this machine or the target machine.
Consider write permissions of the scp user and the user running this script.
Mariadb\-backup will use this location as a tmp dir for S3 and remote backups temporarily.
Example: /mnt/backups/ [default: /tmp/backups/]
Example: /mnt/backups/
.IP \(bu 2
\fB\fC\-bd, \-\-backup\-destination TEXT\fR: Are the backups going to be stored on the same machine this script is running on or another server \- if Remote you need to setup scp=Options: \[dq]Local\[dq] or \[dq]Remote\[dq] [default: Local]
\fB\fC\-bd, \-\-backup\-destination TEXT\fR: Are the backups going to be stored on the same machine this script is running on or another server \- if Remote you need to setup scp=Options: \[dq]Local\[dq] or \[dq]Remote\[dq]
.IP \(bu 2
\fB\fC\-scp TEXT\fR: Used only if \-\-backup\-destination=\[dq]Remote\[dq]\&.
The user/credentials that will be used to scp the backup files
@ -83,41 +89,41 @@ Example: \[dq]s3://my\-cs\-backups\[dq]
Example: \[dq]\[la]http://127.0.0.1:8000\[ra]\[dq]
.IP \(bu 2
\fB\fC\-s, \-\-storage TEXT\fR: What storage topogoly is being used by Columnstore \- found in /etc/columnstore/storagemanager.cnf.
Options: \[dq]LocalStorage\[dq] or \[dq]S3\[dq] [default: LocalStorage]
Options: \[dq]LocalStorage\[dq] or \[dq]S3\[dq]
.IP \(bu 2
\fB\fC\-i, \-\-incremental TEXT\fR: Adds columnstore deltas to an existing full backup. Backup folder to apply increment could be a value or \[dq]auto\fImost\fPrecent\[dq] \- the incremental backup applies to last full backup.
.IP \(bu 2
\fB\fC\-P, \-\-parallel INTEGER\fR: Determines if columnstore data directories will have multiple rsync running at the same time for different subfolders to parallelize writes. Ignored if \[dq]\-c/\-\-compress\[dq] argument not set. [default: 4]
\fB\fC\-P, \-\-parallel INTEGER\fR: Enables parallel rsync for faster backups, setting the number of simultaneous rsync processes. With \-c/\-\-compress, sets the number of compression threads.
.IP \(bu 2
\fB\fC\-ha, \-\-highavilability / \-no\-ha, \-\-no\-highavilability\fR: Hint wether shared storage is attached @ below on all nodes to see all data
\fB\fC\-ha, \-\-highavilability\fR: Hint wether shared storage is attached @ below on all nodes to see all data
HA LocalStorage ( /var/lib/columnstore/dataX/ )
HA S3 ( /var/lib/columnstore/storagemanager/ ) [default: no\-ha]
HA S3 ( /var/lib/columnstore/storagemanager/ )
.IP \(bu 2
\fB\fC\-f, \-\-config\-file TEXT\fR: Path to backup configuration file to load variables from \- relative or full path accepted.
.IP \(bu 2
\fB\fC\-sbrm, \-\-skip\-save\-brm / \-no\-sbrm, \-\-no\-skip\-save\-brm\fR: Skip saving brm prior to running a backup \- ideal for dirty backups. [default: no\-sbrm]
\fB\fC\-sbrm, \-\-skip\-save\-brm\fR: Skip saving brm prior to running a backup \- ideal for dirty backups.
.IP \(bu 2
\fB\fC\-spoll, \-\-skip\-polls / \-no\-spoll, \-\-no\-skip\-polls\fR: Skip sql checks confirming no write/cpimports running. [default: no\-spoll]
\fB\fC\-spoll, \-\-skip\-polls\fR: Skip sql checks confirming no write/cpimports running.
.IP \(bu 2
\fB\fC\-slock, \-\-skip\-locks / \-no\-slock, \-\-no\-skip\-locks\fR: Skip issuing write locks \- ideal for dirty backups. [default: no\-slock]
\fB\fC\-slock, \-\-skip\-locks\fR: Skip issuing write locks \- ideal for dirty backups.
.IP \(bu 2
\fB\fC\-smdb, \-\-skip\-mariadb\-backup / \-no\-smdb, \-\-no\-skip\-mariadb\-backup\fR: Skip running a mariadb\-backup for innodb data \- ideal for incremental dirty backups. [default: no\-smdb]
\fB\fC\-smdb, \-\-skip\-mariadb\-backup\fR: Skip running a mariadb\-backup for innodb data \- ideal for incremental dirty backups.
.IP \(bu 2
\fB\fC\-sb, \-\-skip\-bucket\-data / \-no\-sb, \-\-no\-skip\-bucket\-data\fR: Skip taking a copy of the columnstore data in the bucket. [default: no\-sb]
\fB\fC\-sb, \-\-skip\-bucket\-data\fR: Skip taking a copy of the columnstore data in the bucket.
.IP \(bu 2
\fB\fC\-nb, \-\-name\-backup TEXT\fR: Define the name of the backup \- default: $(date +%m\-%d\-%Y) [default: 03\-20\-2025]
\fB\fC\-nb, \-\-name\-backup TEXT\fR: Define the name of the backup \- default: $(date +%m\-%d\-%Y)
.IP \(bu 2
\fB\fC\-c, \-\-compress TEXT\fR: Compress backup in X format \- Options: [ pigz ].
.IP \(bu 2
\fB\fC\-q, \-\-quiet / \-no\-q, \-\-no\-quiet\fR: Silence verbose copy command outputs. [default: no\-q]
\fB\fC\-q, \-\-quiet\fR: Silence verbose copy command outputs.
.IP \(bu 2
\fB\fC\-nv\-ssl, \-\-no\-verify\-ssl / \-v\-ssl, \-\-verify\-ssl\fR: Skips verifying ssl certs, useful for onpremise s3 storage. [default: v\-ssl]
\fB\fC\-nv\-ssl, \-\-no\-verify\-ssl\fR: Skips verifying ssl certs, useful for onpremise s3 storage.
.IP \(bu 2
\fB\fC\-pi, \-\-poll\-interval INTEGER\fR: Number of seconds between poll checks for active writes & cpimports. [default: 5]
\fB\fC\-pi, \-\-poll\-interval INTEGER\fR: Number of seconds between poll checks for active writes & cpimports.
.IP \(bu 2
\fB\fC\-pmw, \-\-poll\-max\-wait INTEGER\fR: Max number of minutes for polling checks for writes to wait before exiting as a failed backup attempt. [default: 60]
\fB\fC\-pmw, \-\-poll\-max\-wait INTEGER\fR: Max number of minutes for polling checks for writes to wait before exiting as a failed backup attempt.
.IP \(bu 2
\fB\fC\-r, \-\-retention\-days INTEGER\fR: Retain backups created within the last X days, default 0 == keep all backups. [default: 0]
\fB\fC\-r, \-\-retention\-days INTEGER\fR: Retain backups created within the last X days, default 0 == keep all backups.
.IP \(bu 2
\fB\fC\-aro, \-\-apply\-retention\-only\fR: Only apply retention policy to existing backups, does not run a backup.
.IP \(bu 2
@ -150,9 +156,9 @@ $ mcs dbrm_backup [OPTIONS]
.IP \(bu 2
\fB\fC\-nb, \-\-name\-backup TEXT\fR: Define the prefix of the backup \- default: dbrm\fIbackup+date +%Y%m%d\fP%H%M%S [default: dbrm_backup]
.IP \(bu 2
\fB\fC\-ssm, \-\-skip\-storage\-manager / \-no\-ssm, \-\-no\-skip\-storage\-manager\fR: Skip backing up storagemanager directory. [default: no\-ssm]
\fB\fC\-ssm, \-\-skip\-storage\-manager\fR: Skip backing up storagemanager directory.
.IP \(bu 2
\fB\fC\-q, \-\-quiet / \-no\-q, \-\-no\-quiet\fR: Silence verbose copy command outputs. [default: no\-q]
\fB\fC\-q, \-\-quiet\fR: Silence verbose copy command outputs.
.IP \(bu 2
\fB\fC\-li, \-\-list\fR: List backups.
.IP \(bu 2
@ -207,21 +213,21 @@ Options: \[dq]LocalStorage\[dq] or \[dq]S3\[dq] [default: LocalStorage]
.IP \(bu 2
\fB\fC\-P, \-\-parallel INTEGER\fR: Determines number of decompression and mdbstream threads. Ignored if \[dq]\-c/\-\-compress\[dq] argument not set. [default: 4]
.IP \(bu 2
\fB\fC\-ha, \-\-highavilability / \-no\-ha, \-\-no\-highavilability\fR: Flag for high available systems (meaning shared storage exists supporting the topology so that each node sees all data) [default: no\-ha]
\fB\fC\-ha, \-\-highavilability\fR: Flag for high available systems (meaning shared storage exists supporting the topology so that each node sees all data)
.IP \(bu 2
\fB\fC\-cont, \-\-continue / \-no\-cont, \-\-no\-continue\fR: This acknowledges data in your \-\-new\fIbucket is ok to delete when restoring S3. When set to true skips the enforcement that new\fPbucket should be empty prior to starting a restore. [default: no\-cont]
\fB\fC\-cont, \-\-continue\fR: This acknowledges data in your \-\-new\fIbucket is ok to delete when restoring S3. When set to true skips the enforcement that new\fPbucket should be empty prior to starting a restore.
.IP \(bu 2
\fB\fC\-f, \-\-config\-file TEXT\fR: Path to backup configuration file to load variables from \- relative or full path accepted.
.IP \(bu 2
\fB\fC\-smdb, \-\-skip\-mariadb\-backup / \-no\-smdb, \-\-no\-skip\-mariadb\-backup\fR: Skip restoring mariadb server via mariadb\-backup \- ideal for only restoring columnstore. [default: no\-smdb]
\fB\fC\-smdb, \-\-skip\-mariadb\-backup\fR: Skip restoring mariadb server via mariadb\-backup \- ideal for only restoring columnstore.
.IP \(bu 2
\fB\fC\-sb, \-\-skip\-bucket\-data / \-no\-sb, \-\-no\-skip\-bucket\-data\fR: Skip restoring columnstore data in the bucket \- ideal if looking to only restore mariadb server. [default: no\-sb]
\fB\fC\-sb, \-\-skip\-bucket\-data\fR: Skip restoring columnstore data in the bucket \- ideal if looking to only restore mariadb server.
.IP \(bu 2
\fB\fC\-c, \-\-compress TEXT\fR: Hint that the backup is compressed in X format. Options: [ pigz ].
.IP \(bu 2
\fB\fC\-q, \-\-quiet / \-no\-q, \-\-no\-quiet\fR: Silence verbose copy command outputs. [default: no\-q]
\fB\fC\-q, \-\-quiet\fR: Silence verbose copy command outputs.
.IP \(bu 2
\fB\fC\-nv\-ssl, \-\-no\-verify\-ssl / \-v\-ssl, \-\-verify\-ssl\fR: Skips verifying ssl certs, useful for onpremise s3 storage. [default: v\-ssl]
\fB\fC\-nv\-ssl, \-\-no\-verify\-ssl\fR: Skips verifying ssl certs, useful for onpremise s3 storage.
.IP \(bu 2
\fB\fC\-li, \-\-list\fR: List backups.
.IP \(bu 2
@ -248,9 +254,9 @@ $ mcs dbrm_restore [OPTIONS]
.IP \(bu 2
\fB\fC\-ns, \-\-no\-start\fR: Do not attempt columnstore startup post dbrm_restore.
.IP \(bu 2
\fB\fC\-sdbk, \-\-skip\-dbrm\-backup / \-no\-sdbk, \-\-no\-skip\-dbrm\-backup\fR: Skip backing up dbrms before restoring. [default: sdbk]
\fB\fC\-sdbk, \-\-skip\-dbrm\-backup\fR: Skip backing up dbrms before restoring.
.IP \(bu 2
\fB\fC\-ssm, \-\-skip\-storage\-manager / \-no\-ssm, \-\-no\-skip\-storage\-manager\fR: Skip backing up storagemanager directory. [default: ssm]
\fB\fC\-ssm, \-\-skip\-storage\-manager\fR: Skip backing up storagemanager directory.
.IP \(bu 2
\fB\fC\-li, \-\-list\fR: List backups.
.IP \(bu 2
@ -311,6 +317,89 @@ $ mcs cspasswd [OPTIONS]
.IP \(bu 2
\fB\fC\-\-help\fR: Show this message and exit.
.RE
.SH \fB\fCmcs bootstrap\-single\-node\fR
.PP
Bootstrap a single node (localhost) Columnstore instance.
.PP
\fBUsage\fP:
.PP
.RS
.nf
$ mcs bootstrap\-single\-node [OPTIONS]
.fi
.RE
.PP
\fBOptions\fP:
.RS
.IP \(bu 2
\fB\fC\-\-api\-key TEXT\fR: API key to set.
.IP \(bu 2
\fB\fC\-\-help\fR: Show this message and exit.
.RE
.SH \fB\fCmcs review\fR
.PP
This script performs various maintenance and diagnostic tasks for
MariaDB ColumnStore, including log archiving, extent map backups,
schema and table testing, directory and ownership checks, extent map
validation, S3 storage comparison, process management, table
synchronization, port availability checks, stack dumps, cleanup of
rollback fragments, and graceful process termination.
.PP
If database is up, this script will connect as root@localhost via socket.
.PP
\fBUsage\fP:
.PP
.RS
.nf
$ mcs review [OPTIONS]
.fi
.RE
.PP
\fBOptions\fP:
.RS
.IP \(bu 2
\fB\fC\-\-version\fR: Only show the header with version information.
.IP \(bu 2
\fB\fC\-\-logs\fR: Create a compressed archive of logs for MariaDB Support Ticket
.IP \(bu 2
\fB\fC\-\-path\fR: Define the path for where to save files/tarballs and outputs of this script.
.IP \(bu 2
\fB\fC\-\-backupdbrm\fR: Takes a compressed backup of extent map files in dbrm directory.
.IP \(bu 2
\fB\fC\-\-testschema\fR: Creates a test schema, tables, imports, queries, drops schema.
.IP \(bu 2
\fB\fC\-\-testschemakeep\fR: creates a test schema, tables, imports, queries, does not drop.
.IP \(bu 2
\fB\fC\-\-ldlischema\fR: Using ldli, creates test schema, tables, imports, queries, drops schema.
.IP \(bu 2
\fB\fC\-\-ldlischemakeep\fR: Using ldli, creates test schema, tables, imports, queries, does not drop.
.IP \(bu 2
\fB\fC\-\-emptydirs\fR: Searches /var/lib/columnstore for empty directories.
.IP \(bu 2
\fB\fC\-\-notmysqldirs\fR: Searches /var/lib/columnstore for directories not owned by mysql.
.IP \(bu 2
\fB\fC\-\-emcheck\fR: Checks the extent map for orphaned and missing files.
.IP \(bu 2
\fB\fC\-\-s3check\fR: Checks the extent map against S3 storage.
.IP \(bu 2
\fB\fC\-\-pscs\fR: Adds the pscs command. pscs lists running columnstore processes.
.IP \(bu 2
\fB\fC\-\-schemasync\fR: Fix out\-of\-sync columnstore tables (CAL0009).
.IP \(bu 2
\fB\fC\-\-tmpdir\fR: Ensure owner of temporary dir after reboot (MCOL\-4866 & MCOL\-5242).
.IP \(bu 2
\fB\fC\-\-checkports\fR: Checks if ports needed by Columnstore are opened.
.IP \(bu 2
\fB\fC\-\-eustack\fR: Dumps the stack of Columnstore processes.
.IP \(bu 2
\fB\fC\-\-clearrollback\fR: Clear any rollback fragments from dbrm files.
.IP \(bu 2
\fB\fC\-\-killcolumnstore\fR: Stop columnstore processes gracefully, then kill remaining processes.
.IP \(bu 2
\fB\fC\-\-color TEXT\fR: print headers in color. Options: prefix color with l for light.
.IP \(bu 2
\fB\fC\-\-help\fR: Show this message and exit.
.RE
.SH \fB\fCmcs help\-all\fR
.PP
Show help for all commands in man page style.

View File

@ -1,7 +1,9 @@
import logging
import os
import secrets
from datetime import datetime, timedelta
import sys
from datetime import datetime
from typing import Optional
import typer
from typing_extensions import Annotated
@ -14,7 +16,11 @@ from cmapi_server.controllers.api_clients import ClusterControllerClient
from cmapi_server.exceptions import CEJError
from cmapi_server.handlers.cej import CEJPasswordHandler
from cmapi_server.managers.transaction import TransactionManager
from cmapi_server.process_dispatchers.base import BaseDispatcher
from mcs_cluster_tool.constants import MCS_COLUMNSTORE_REVIEW_SH
from mcs_cluster_tool.decorators import handle_output
from mcs_cluster_tool.helpers import cook_sh_arg
logger = logging.getLogger('mcs_cli')
@ -150,3 +156,227 @@ def bootstrap_single_node(
'add_node_resp': add_node_resp,
}
return result
@handle_output
def review(
_version: Annotated[
Optional[bool],
typer.Option(
'--version',
help='Only show the header with version information.',
show_default=False
)
] = None,
_logs: Annotated[
Optional[bool],
typer.Option(
'--logs',
help=(
'Create a compressed archive of logs for MariaDB Support '
'Ticket'
),
show_default=False
)
] = None,
_path: Annotated[
Optional[str],
typer.Option(
'--path',
help=(
'Define the path for where to save files/tarballs and outputs '
'of this script.'
),
show_default=False
)
] = None,
_backupdbrm: Annotated[
Optional[bool],
typer.Option(
'--backupdbrm',
help=(
'Takes a compressed backup of extent map files in dbrm '
'directory.'
),
show_default=False
)
] = None,
_testschema: Annotated[
Optional[bool],
typer.Option(
'--testschema',
help=(
'Creates a test schema, tables, imports, queries, drops '
'schema.'
),
show_default=False
)
] = None,
_testschemakeep: Annotated[
Optional[bool],
typer.Option(
'--testschemakeep',
help=(
'Creates a test schema, tables, imports, queries, does not '
'drop.'
),
show_default=False
)
] = None,
_ldlischema: Annotated[
Optional[bool],
typer.Option(
'--ldlischema',
help=(
'Using ldli, creates test schema, tables, imports, queries, '
'drops schema.'
),
show_default=False
)
] = None,
_ldlischemakeep: Annotated[
Optional[bool],
typer.Option(
'--ldlischemakeep',
help=(
'Using ldli, creates test schema, tables, imports, queries, '
'does not drop.'
),
show_default=False
)
] = None,
_emptydirs: Annotated[
Optional[bool],
typer.Option(
'--emptydirs',
help='Searches /var/lib/columnstore for empty directories.',
show_default=False
)
] = None,
_notmysqldirs: Annotated[
Optional[bool],
typer.Option(
'--notmysqldirs',
help=(
'Searches /var/lib/columnstore for directories not owned by '
'mysql.'
),
show_default=False
)
] = None,
_emcheck: Annotated[
Optional[bool],
typer.Option(
'--emcheck',
help='Checks the extent map for orphaned and missing files.',
show_default=False
)
] = None,
_s3check: Annotated[
Optional[bool],
typer.Option(
'--s3check',
help='Checks the extent map against S3 storage.',
show_default=False
)
] = None,
_pscs: Annotated[
Optional[bool],
typer.Option(
'--pscs',
help=(
'Adds the pscs command. pscs lists running columnstore '
'processes.'
),
show_default=False
)
] = None,
_schemasync: Annotated[
Optional[bool],
typer.Option(
'--schemasync',
help='Fix out-of-sync columnstore tables (CAL0009).',
show_default=False
)
] = None,
_tmpdir: Annotated[
Optional[bool],
typer.Option(
'--tmpdir',
help=(
'Ensure owner of temporary dir after reboot (MCOL-4866 & '
'MCOL-5242).'
),
show_default=False
)
] = None,
_checkports: Annotated[
Optional[bool],
typer.Option(
'--checkports',
help='Checks if ports needed by Columnstore are opened.',
show_default=False
)
] = None,
_eustack: Annotated[
Optional[bool],
typer.Option(
'--eustack',
help='Dumps the stack of Columnstore processes.',
show_default=False
)
] = None,
_clearrollback: Annotated[
Optional[bool],
typer.Option(
'--clearrollback',
help='Clear any rollback fragments from dbrm files.',
show_default=False
)
] = None,
_killcolumnstore: Annotated[
Optional[bool],
typer.Option(
'--killcolumnstore',
help=(
'Stop columnstore processes gracefully, then kill remaining '
'processes.'
),
show_default=False
)
] = None,
_color: Annotated[
Optional[str],
typer.Option(
'--color',
help=(
'print headers in color. Options: [none,red,blue,green,yellow,'
'magenta,cyan, none] prefix color with l for light.'
),
show_default=False
)
] = None,
):
"""
This script performs various maintenance and diagnostic tasks for
MariaDB ColumnStore, including log archiving, extent map backups,
schema and table testing, directory and ownership checks, extent map
validation, S3 storage comparison, process management, table
synchronization, port availability checks, stack dumps, cleanup of
rollback fragments, and graceful process termination.
If database is up, this script will connect as root@localhost via socket.
"""
arguments = []
for arg_name, value in locals().items():
sh_arg = cook_sh_arg(arg_name, value, separator='=')
if sh_arg is None:
continue
# columnstore_review.sh accepts only --arg=value format
arguments.append(sh_arg)
cmd = f'{MCS_COLUMNSTORE_REVIEW_SH} {" ".join(arguments)}'
success, _ = BaseDispatcher.exec_command(cmd, stdout=sys.stdout)
if not success:
raise typer.Exit(code=1)
raise typer.Exit(code=0)

View File

@ -1,15 +1,28 @@
#!/bin/bash
# columnstore_review.sh
# script by Edward Stoever for MariaDB support
VERSION=1.4.3
# Contributors: Allen Herrera
# Patrizio Tamorri
VERSION=1.4.13
function prepare_for_run() {
unset ERR
if [ -n "$USER_PROVIDED_OUTPUT_PATH" ] && [ ! -d "$USER_PROVIDED_OUTPUT_PATH" ]; then
printf "The directory $USER_PROVIDED_OUTPUT_PATH does not exist.\n\n"
exit 1
fi
if [ -n "$USER_PROVIDED_OUTPUT_PATH" ]; then
OUTDIR=$USER_PROVIDED_OUTPUT_PATH/columnstore_review
TARDIR=$USER_PROVIDED_OUTPUT_PATH
else
OUTDIR=/tmp/columnstore_review
TARDIR=/tmp
fi
mkdir -p $OUTDIR
WARNFILE=$OUTDIR/cs_warnings.out
if [ $EM_CHECK ]; then
EMOUTDIR=/tmp/columnstore_review/em; mkdir -p $EMOUTDIR
EMOUTDIR=$OUTDIR/em; mkdir -p $EMOUTDIR
OUTPUTFILE=$EMOUTDIR/$(hostname)_cs_em_check.txt
else
OUTPUTFILE=$OUTDIR/$(hostname)_cs_review.txt
@ -44,6 +57,7 @@ function exists_mariadbd_running() {
}
function exists_columnstore_running() {
if [[ "$(ps -ef | grep -E "(PrimProc|ExeMgr|DMLProc|DDLProc|WriteEngineServer|StorageManager|controllernode|workernode)" | grep -v "grep"|wc -l)" == "0" ]]; then
echo 'There are no Mariadb-Columnstore processes running.' >> $WARNFILE;
else
@ -942,7 +956,15 @@ function dump_log () {
}
function collect_logs() {
if [ -n "$USER_PROVIDED_OUTPUT_PATH" ]; then
TARPATH="$USER_PROVIDED_OUTPUT_PATH"
LOGSOUTDIR="$USER_PROVIDED_OUTPUT_PATH/columnstore_review/logs_$(date +"%m-%d-%H-%M-%S")/$(hostname)"
else
TARPATH=/tmp
LOGSOUTDIR=/tmp/columnstore_review/logs_$(date +"%m-%d-%H-%M-%S")/$(hostname)
fi
mkdir -p $LOGSOUTDIR || ech0 'Cannot create temporary directory for logs.';
mkdir -p $LOGSOUTDIR/system
mkdir -p $LOGSOUTDIR/mariadb
@ -968,25 +990,46 @@ function collect_logs() {
dump_log "mcs-loadbrm" $LOGSOUTDIR/columnstore/
dump_log "mcs-primproc" $LOGSOUTDIR/columnstore/
dump_log "mcs-workernode@1" $LOGSOUTDIR/columnstore/
dump_log "mcs-workernode@2" $LOGSOUTDIR/columnstore/
dump_log "mcs-writeengineserver" $LOGSOUTDIR/columnstore/
dump_log "mcs-controllernode" $LOGSOUTDIR/columnstore/
set_data1dir
ls -lrt $DATA1DIR/systemFiles/dbrm > $LOGSOUTDIR/columnstore/ls_lrt_dbrm.txt
if [ ! -z "$STORAGE_TYPE" ] && [ "$STORAGE_TYPE" == "S3" ]; then
dump_log "mcs-storagemanager" $LOGSOUTDIR/columnstore/
smls /data1/systemFiles/dbrm/ > $LOGSOUTDIR/columnstore/s3_dbrms.txt ;
smcat /data1/systemFiles/dbrm/BRM_saves_current 2>/dev/null > $LOGSOUTDIR/columnstore/s3_BRM_saves_current ;
fi
# System Logs
if [ -f "/proc/sys/kernel/threads-max" ]; then cp /proc/sys/kernel/threads-max $LOGSOUTDIR/system/kernal-threads-max; fi;
if [ -f "/proc/sys/kernel/pid_max" ]; then cp /proc/sys/kernel/pid_max $LOGSOUTDIR/system/kernal-pid_max; fi;
if [ -f "/proc/sys/vm/max_map_count" ]; then cp /proc/sys/vm/max_map_count $LOGSOUTDIR/system/kernal-max_map_count; fi;
# if [ -f "/var/log/messages" ]; then cp /var/log/messages* $LOGSOUTDIR/system; fi; # TOO MUCH COLLECTED...
find /var/log -name "messages*" -mtime -5 -type f -exec cp {} $LOGSOUTDIR/system \; 2>/dev/null
if [ -f "/var/log/syslog" ]; then find /var/log/syslog -name syslog -type f -exec tail -10000 {} > $LOGSOUTDIR/system/syslog \;; fi;
if [ -f "/var/log/daemon.log" ]; then find /var/log/daemon.log -name daemon.log -type f -exec tail -10000 {} > $LOGSOUTDIR/system/daemon.log \;; fi;
if command -v ulimit >/dev/null 2>&1; then
ulimit -a > $LOGSOUTDIR/system/kernal-ulimits.txt
fi
# find /var/log \( -name "messages" -o -name "messages.1" \) -type f -exec cp {} $LOGSOUTDIR/system \;
cp /var/log/messages* $LOGSOUTDIR/system
find /var/log/syslog -name syslog -type f -exec tail -10000 {} > $LOGSOUTDIR/system/syslog \;
find /var/log/daemon.log -name daemon.log -type f -exec tail -10000 {} > $LOGSOUTDIR/system/daemon.log \;
cd /var/log/mariadb
find /usr/lib -name "mcs*service" -exec cp {} $LOGSOUTDIR/systemd \;
find /usr/lib -name "mariadb*service" -exec cp {} $LOGSOUTDIR/systemd \;
ls -1 columnstore/*.log 2>/dev/null | cpio -pd $LOGSOUTDIR/ 2>/dev/null
ls -1 columnstore/*z 2>/dev/null | cpio -pd $LOGSOUTDIR/ 2>/dev/null
find columnstore/archive columnstore/install columnstore/trace -mtime -30 | cpio -pd $LOGSOUTDIR/ 2>/dev/null
find columnstore/cpimport -mtime -1 | cpio -pd $LOGSOUTDIR/ 2>/dev/null
# find columnstore/cpimport -mtime -1 | cpio -pd $LOGSOUTDIR/ 2>/dev/null # COLLECTS TOO MUCH
find columnstore/cpimport -name "*.err" -size +0 -mtime -2 | cpio -pd $LOGSOUTDIR/ 2>/dev/null
#collect ports Status
unset SUPPRESS_CLOSED_PORTS
check_ports > $LOGSOUTDIR/columnstore/$(hostname)_ports_check.txt 2>/dev/null
if [ $CAN_CONNECT ]; then
mariadb -ABNe "show global variables" > $LOGSOUTDIR/mariadb/$(hostname)_global_variables.txt 2>/dev/null
@ -995,18 +1038,18 @@ function collect_logs() {
my_print_defaults --mysqld > $LOGSOUTDIR/mariadb/$(hostname)_my_print_defaults.txt 2>/dev/null
if [ -f $OUTPUTFILE ]; then cp $OUTPUTFILE $LOGSOUTDIR/; fi
cd $LOGSOUTDIR/..
tar -czf /tmp/$COMPRESSFILE ./*
tar -czf $TARPATH/$COMPRESSFILE ./*
cd - 1>/dev/null
print_color "### COLLECTED LOGS FOR SUPPORT TICKET ###\n"
ech0 "Attach the following tar file to your support ticket."
if [ $THISISCLUSTER ]; then
ech0 "Please collect logs with this script from each node in your cluster."
fi
FILE_SIZE=$(stat -c %s /tmp/$COMPRESSFILE)
FILE_SIZE=$(stat -c %s $TARPATH/$COMPRESSFILE)
if (( $FILE_SIZE > 52428800 )); then
print0 "The file /tmp/$COMPRESSFILE is larger than 50MB.\nPlease use MariaDB Large file upload at https://mariadb.com/upload/\nInform us about the upload in the support ticket."
print0 "The file $TARPATH/$COMPRESSFILE is larger than 50MB.\nPlease use MariaDB Large file upload at https://mariadb.com/upload/\nInform us about the upload in the support ticket.\n"
fi
print0 "\nCreated: /tmp/$COMPRESSFILE\n"
print0 "\nCreated: $TARPATH/$COMPRESSFILE\n"
ech0
}
@ -1192,6 +1235,12 @@ fi
}
function backup_dbrm() {
if [ -n "$USER_PROVIDED_OUTPUT_PATH" ]; then
TARPATH="$USER_PROVIDED_OUTPUT_PATH"
else
TARPATH=/tmp
fi
STORAGE_TYPE=$(grep service /etc/columnstore/storagemanager.cnf | grep -v "^\#" | grep "\=" | awk -F= '{print $2}' | xargs)
if [ "$(echo $STORAGE_TYPE | awk '{print tolower($0)}')" == "s3" ]; then print0 "This is node uses S3 storage for Columnstore. Exiting.\n\n"; return; fi
@ -1217,10 +1266,10 @@ fi
fi
set_data1dir
cd $DATA1DIR/systemFiles
tar -czf /tmp/$COMPRESSFILE ./dbrm
tar -czf $TARPATH/$COMPRESSFILE ./dbrm
cd - 1>/dev/null
print_color "### DBRM EXTENT MAP BACKUP ###\n"
ech0 "Files in dbrm directory backed up to compressed archive /tmp/$COMPRESSFILE."
ech0 "Files in dbrm directory backed up to compressed archive $TARPATH/$COMPRESSFILE"
ech0 "Files in /tmp can be deleted on reboot. It is recommended to move the archive to a safe location."
ech0
}
@ -2047,6 +2096,250 @@ function ensure_owner_privs_of_tmp_dir() {
}
# CHECK PORTS FUNCTIONS BY Patrizio Tamorri
function check_ports(){
# Check if nmap is installed
if ! command -v nmap &> /dev/null; then
printf "nmap is not installed.\n\n"
return
fi
# Define the ports to check
ports="8600,8601,8602,8603,8604,8605,8606,8607,8608,8609,8610,8611,8612,8613,8614,8615,8616,8617,8618,8619,8620,8630,8700,8800,3306,8999"
# Get the local node from the file
my_node=$(cat /var/lib/columnstore/local/module)
# Get the hostname and local IP address of the machine
hostname=$(hostname)
local_ip=$(hostname -I | awk '{print $1}')
# Extract IPs from Columnstore.xml, handling special characters like \r, \n, and \t
ips=$(grep -A 1 "_WriteEngineServer" /etc/columnstore/Columnstore.xml \
| sed "/${my_node}_WriteEngineServer/,+0d" \
| grep "<IPAddr>" \
| tr -d '\r\n\t' \
| sed -e 's/<IPAddr>//g' -e 's/<\/IPAddr>//g' -e 's/^[ \t]*//' -e 's/[ \t]*$//' \
| sort -u)
# Extract IPAddr:Port pairs from the XML file, removing \r, \n, and \t
local_ports=$(grep -E "<IPAddr>|<Port>" /etc/columnstore/Columnstore.xml \
| tr -d '\r\n\t' \
| sed -e 's/<\/\?IPAddr>//g' -e 's/<\/\?Port>//g' \
| awk 'NR%2{printf "%s:", $0; next;} 1')
pass=true
# Function to check if a port is available to use
check_port_nmap_available_to_use() {
ip=$1
port=$2
# Use nmap to check the port status
result=$(nmap -T4 -p $port $ip | grep "$port" | awk '{print $2}')
if [ "$result" = "open" ]; then
echo "$ip:$port - Port is open: SUCCESS"
elif [ "$result" = "closed" ]; then
if [ ! ${SUPPRESS_CLOSED_PORTS} ]; then echo "$ip:$port - Port is closed and not firewalled"; fi
elif [ "$result" = "filtered" ]; then
echo "$ip:$port - Port is filtered (firewalled or blocked): ERROR"
pass=false
else
echo "$ip:$port - Unknown port status: ERROR"
pass=false
fi
}
# Function to check if a port must be open
check_port_nmap_must_be_opened() {
ip=$1
port=$2
# Use nmap to check the port status
result=$(nmap -T4 -p $port $ip | grep "$port" | awk '{print $2}')
if [ "$result" = "open" ]; then
echo "$ip:$port - Port is open: SUCCESS"
elif [ "$result" = "closed" ]; then
echo "$ip:$port - Port is closed and not firewalled: ERROR"
pass=false
elif [ "$result" = "filtered" ]; then
echo "$ip:$port - Port is filtered (firewalled or blocked): ERROR"
pass=false
else
echo "$ip:$port - Unknown port status: ERROR"
pass=false
fi
}
# Loop through each IP and check the ports
for ipadd in $ips; do
echo "Checking ports on $ipadd..."
# Replace ipadd with 127.0.0.1 if it matches the local IP or hostname
if [[ "$ipadd" == "$local_ip" || "$ipadd" == "$hostname" ]]; then
ipadd="127.0.0.1"
fi
for port in ${ports//,/ }; do
ip_port="$ipadd:$port"
if [[ " ${local_ports[@]} " =~ " $ip_port " ]]; then
check_port_nmap_must_be_opened $ipadd $port
else
check_port_nmap_available_to_use $ipadd $port
fi
done
done
# Final status report
if [ "$pass" = true ]; then
printf "All nodes passed the port test.\n\n"
else
printf "One or more nodes failed the port test. Please investigate.\n\n"
fi
}
function clear_rollback() {
unset ERR
CLEAR_ROLLBACK_MESSAGE="It is recommended that you clear rollback files only when instructed to do so by Mariadb Support.\nType c to clear rollback files.\nType any other key to exit.\n"
STORAGE_TYPE=$(grep service /etc/columnstore/storagemanager.cnf | grep -v "^\#" | grep "\=" | awk -F= '{print $2}' | awk '{print tolower($0)}' | xargs)
DATA1DIR=$(mcsGetConfig SystemConfig DBRoot1 2>/dev/null) || DATA1DIR=/var/lib/columnstore/data1
BRMSAV=$(cat $DATA1DIR/systemFiles/dbrm/BRM_saves_current | xargs)
if [[ ! "$(ps -ef | grep -E "(PrimProc|ExeMgr|DMLProc|DDLProc|WriteEngineServer|StorageManager|controllernode|workernode)" | grep -v "grep"|wc -l)" == "0" ]]; then
TEMP_COLOR=lred; print_color "Columnstore processes are running.\nYou may clear rollback fragments only when Columnstore processes are stopped."; unset TEMP_COLOR
print0 "\nExiting.\n\n"; exit 0
fi
if [ "$STORAGE_TYPE" == "localstorage" ]; then
COUNTFILES=$(find $DATA1DIR/systemFiles \( -name "${BRMSAV}_vss" -o -name "${BRMSAV}_vbbm" \) -size +0 | wc -l)
if [ "$COUNTFILES" == "0" ]; then
TEMP_COLOR=lred; print_color "Rollback files are empty."; unset TEMP_COLOR
print0 "\nExiting.\n\n"; exit 0
fi
print0 "$CLEAR_ROLLBACK_MESSAGE"
read -s -n 1 RESPONSE
if [ "$RESPONSE" == "c" ]; then
ech0; ech0
BRM_SAVES_BACKUP_FILE=$(hostname)_$(date +"%Y-%m-%d-%H-%M-%S")_BRM_saves.tar
cd $DATA1DIR/systemFiles/dbrm/
print0 "BRM_saves_current: ${BRMSAV}\n\nBacking up these files:\n"
find . \( -name "${BRMSAV}_vss" -o -name "${BRMSAV}_vbbm" \) -exec tar -rvf /tmp/$BRM_SAVES_BACKUP_FILE {} \;
ech0
find . \( -name "${BRMSAV}_vss" -o -name "${BRMSAV}_vbbm" \) -size +0 -exec truncate -s0 {} \; || ERR=true
COUNTFILES=$(find $DATA1DIR/systemFiles \( -name "${BRMSAV}_vss" -o -name "${BRMSAV}_vbbm" \) -size +0 | wc -l)
if [ $ERR ] || [ "$COUNTFILES" != "0" ]; then
ech0 "Something went wrong. Check the size of files ${BRMSAV}_vss and ${BRMSAV}_vbbm. Each file should be zero bytes in size."
ls -lrt $DATA1DIR/systemFiles/dbrm
else
TEMP_COLOR=lcyan; print_color "BRM_saves files backed up to /tmp/$BRM_SAVES_BACKUP_FILE.\nFiles cleared successfully.\n\n"; unset TEMP_COLOR
fi
else
print0 "\nNothing done.\n\n"
fi
fi
if [ "$STORAGE_TYPE" == "s3" ]; then
DBRM_TMP_DIR=/tmp/dbrm-before-clearing-$(date +"%Y-%m-%d-%H-%M-%S") || ERR=true
print0 "$CLEAR_ROLLBACK_MESSAGE"
read -s -n 1 RESPONSE
if [ "$RESPONSE" == "c" ]; then
## REF: https://mariadbcorp.atlassian.net/wiki/spaces/Support/pages/1600094249/Stuck+load_brm+failed+rollback+of+a+transaction
cd /var/lib/columnstore/storagemanager/metadata/data1/systemFiles/dbrm/ || ERR=true
mkdir -p $DBRM_TMP_DIR || ERR=true
find . | cpio -pd $DBRM_TMP_DIR || ERR=true
# Clear vss and vbbm files
rm -f BRM_saves_vss.meta BRM_saves_vbbm.meta || ERR=true
touch BRM_saves_vss.meta || ERR=true; chown mysql:mysql BRM_saves_vss.meta || ERR=true
touch BRM_saves_vbbm.meta || ERR=true; chown mysql:mysql BRM_saves_vbbm.meta || ERR=true
rm -rf /var/lib/columnstore/storagemanager/cache/data1/* || ERR=true
mkdir /var/lib/columnstore/storagemanager/cache/data1/downloading || ERR=true
chown mysql:mysql -R /var/lib/columnstore/storagemanager/cache || ERR=true
if [ $ERR ]; then
ech0 "Something went wrong."
else
TEMP_COLOR=lcyan; print_color "BRM_saves files backed up to $DBRM_TMP_DIR.\nFiles cleared successfully.\n\n"; unset TEMP_COLOR
fi
else
print0 "\nNothing done.\n\n"
fi
fi
}
function kill_columnstore(){
COUNT_ANY_STRAGGLERS=$(ps -ef | grep -E '(PrimProc|ExeMgr|DMLProc|DDLProc|WriteEngineServer|StorageManager|controllernode|workernode|load_brm)' | grep -v "grep" | wc -l)
PM1=$(mcsGetConfig pm1_WriteEngineServer IPAddr)
PM2=$(mcsGetConfig pm2_WriteEngineServer IPAddr)
if [ ! "$PM1" == "127.0.0.1" ] && [ ! -z $PM2 ]; then
THISISCLUSTER=true
fi
if [ "$COUNT_ANY_STRAGGLERS" == "0" ]; then
TEMP_COLOR=lred; print_color "Columnstore processes are not running.\n"; unset TEMP_COLOR
clearShm
TEMP_COLOR=lcyan; print_color "Columnstore shared memory cleared.\n"; unset TEMP_COLOR
print0 "\nExiting.\n\n"; exit 0
fi
if [ $THISISCLUSTER ] && [ "$COUNT_ANY_STRAGGLERS" != "0" ]; then
TEMP_COLOR=lred; print_color "WARNING: This is a columnstore cluster and it is best to use cmapi commands to stop columnstore processes.\n"; unset TEMP_COLOR
fi
TEMP_COLOR=lcyan; print_color "Press c to stop all columnstore processes on this node.\n"; unset TEMP_COLOR
read -s -n 1 RESPONSE
if [ "$RESPONSE" == "c" ]; then
if [ "$COUNT_ANY_STRAGGLERS" != "0" ]; then
ech0 "Attempting to gracefully stop mcs-ddlproc."
systemctl stop mcs-ddlproc;
ech0 "Attempting to gracefully stop mcs-dmlproc."
systemctl stop mcs-dmlproc;
systemctl stop mcs-exemgr 2>/dev/null; # if cs 6.4 and prior
ech0 "Attempting to gracefully stop mcs-controllernode."
systemctl stop mcs-controllernode;
ech0 "Attempting to gracefully stop mcs-storagemanager."
systemctl stop mcs-storagemanager;
ech0 "Attempting to gracefully stop mcs-primproc."
systemctl stop mcs-primproc;
ech0 "Attempting to gracefully stop mcs-writeengineserver."
systemctl stop mcs-writeengineserver;
ech0 "Attempting to gracefully stop mcs-workernode@1."
systemctl stop mcs-workernode@1;
ech0 "Attempting to gracefully stop mcs-workernode@2."
systemctl stop mcs-workernode@2;
fi
COUNT_ANY_STRAGGLERS=$(ps -ef | grep -E '(PrimProc|ExeMgr|DMLProc|DDLProc|WriteEngineServer|StorageManager|controllernode|workernode|load_brm)' | grep -v "grep" | wc -l)
if [ "$COUNT_ANY_STRAGGLERS" != "0" ]; then
ech0 "Remaining processes:"
ps -ef | grep -E '(PrimProc|ExeMgr|DMLProc|DDLProc|WriteEngineServer|StorageManager|controllernode|workernode|load_brm)' | grep -v "grep"
ech0 "Killing them..."
ps -ef | grep -E '(PrimProc|ExeMgr|DMLProc|DDLProc|WriteEngineServer|StorageManager|controllernode|workernode|load_brm)' | grep -v "grep" | awk '{print $2}' | xargs kill -9
fi
COUNT_ANY_STRAGGLERS=$(ps -ef | grep -E '(PrimProc|ExeMgr|DMLProc|DDLProc|WriteEngineServer|StorageManager|controllernode|workernode|load_brm)' | grep -v "grep" | wc -l)
if [ "$COUNT_ANY_STRAGGLERS" == "0" ]; then
ech0 "No columnstore processes running."
clearShm
TEMP_COLOR=lcyan; print_color "Columnstore shared memory cleared.\n"; unset TEMP_COLOR
else
ech0 "After two attempts to kill all Columnstore processes, this is still running:"
ps -ef | grep -E '(PrimProc|ExeMgr|DMLProc|DDLProc|WriteEngineServer|StorageManager|controllernode|workernode|load_brm)' | grep -v "grep"
fi
else
print0 "\nNothing done.\n\n"
fi
}
function display_outputfile_message() {
echo "The output of this script is saved in the file $OUTPUTFILE"; echo;
}
@ -2062,6 +2355,7 @@ Switches:
--help # display this message
--version # only show the header with version information
--logs # create a compressed archive of logs for MariaDB Support Ticket
--path # define the path for where to save files/tarballs and outputs of this script
--backupdbrm # takes a compressed backup of extent map files in dbrm directory
--testschema # creates a test schema, tables, imports, queries, drops schema
--testschemakeep # creates a test schema, tables, imports, queries, does not drop
@ -2074,21 +2368,15 @@ Switches:
--pscs # Adds the pscs command. pscs lists running columnstore processes
--schemasync # Fix out-of-sync columnstore tables (CAL0009)
--tmpdir # Ensure owner of temporary dir after reboot (MCOL-4866 & MCOL-5242)
--checkports # Checks if ports needed by Columnstore are opened
--eustack # Dumps the stack of Columnstore processes
--clearrollback # Clear any rollback fragments from dbrm files
--killcolumnstore # Stop columnstore processes gracefully, then kill remaining processes
Color output switches:
--color=none # print headers without color
--color=red # print headers in red
--color=blue # print headers in blue
--color=green # print headers in green
--color=yellow # print headers in yellow
--color=magenta # print headers in magenta
--color=cyan # print headers in cyan (default color)
--color=lred # print headers in light red
--color=lblue # print headers in light blue
--color=lgreen # print headers in light green
--color=lyellow # print headers in light yellow
--color=lmagenta # print headers in light magenta
--color=lcyan # print headers in light cyan\n"
--color=red # print headers in color
# Options: [none,red,blue,green,yellow,magenta,cyan] prefix color with "l" for light\n"
ech0
}
@ -2143,6 +2431,53 @@ fi
printf "$1" >> $OUTPUTFILE
}
function get_eu_stack() {
if ! command -v eu-stack &> /dev/null; then
printf "\n[!] eu-stack not found. Please install eu-stack\n\n"
ech0 "example: "
ech0 " yum install elfutils -y"
ech0 " apt-get install elfutils"
ech0
exit 1;
fi
# Confirm CS online
if [[ "$(ps -ef | grep -E "(PrimProc|ExeMgr|DMLProc|DDLProc|WriteEngineServer|StorageManager|controllernode|workernode)" | grep -v "grep"|wc -l)" == "0" ]]; then
printf "Columnstore processes are not running. EU Stack will not be collected.\n\n"
exit 1;
fi
eu=$(which eu-stack)
EU_FOLDER="$(hostname)_$(date +"%Y-%m-%d-%H-%M-%S")_eu_stack"
if [ ! -d "$OUTDIR/$EU_FOLDER" ]; then mkdir -p "$OUTDIR/$EU_FOLDER"; fi
$eu -p $(pidof PrimProc) > "$OUTDIR/$EU_FOLDER/eu-PrimProc.txt" ;
$eu -p $(pidof DMLProc) > "$OUTDIR/$EU_FOLDER/eu-DMLProc.txt" ;
$eu -p $(pidof DDLProc) > "$OUTDIR/$EU_FOLDER/eu-DDLProc.txt" ;
$eu -p $(pidof mariadbd) > "$OUTDIR/$EU_FOLDER/eu-mariadbd.txt" ;
$eu -p $(pidof WriteEngineServer) > "$OUTDIR/$EU_FOLDER/eu-WriteEngineServer.txt" ;
$eu -p $(pidof controllernode) > "$OUTDIR/$EU_FOLDER/eu-controllernode.txt" ;
$eu -p $(pidof workernode) > "$OUTDIR/$EU_FOLDER/eu-workernode.txt" ;
cd $OUTDIR
tar -czf "$OUTDIR/$EU_FOLDER.tar.gz" $EU_FOLDER/*
if [ -f "$OUTDIR/$EU_FOLDER.tar.gz" ]; then
print_color "### EU STACK COMPLETE ###\n"
else
print0 "EU Stack files not found.\n"
exit 1;
fi
# cleanup
mv "$OUTDIR/$EU_FOLDER.tar.gz" $TARDIR
if [ -f "$TARDIR/$EU_FOLDER.tar.gz" ]; then
print0 "Created: $TARDIR/$EU_FOLDER.tar.gz \n\n"
else
print0 "EU Stack files not found.\n"
exit 1;
fi
}
COLOR=default
for params in "$@"; do
unset VALID;
@ -2163,6 +2498,7 @@ for params in "$@"; do
if [ "$params" == '--help' ]; then HELP=true; VALID=true; fi
if [ "$params" == '--version' ]; then if [ ! $SKIP_REPORT ]; then DISPLAY_VERSION=true; fi; VALID=true; fi
if [ "$params" == '--logs' ]; then if [ ! $SKIP_REPORT ]; then COLLECT_LOGS=true; fi; VALID=true; fi
if [[ "$params" == "--path"* ]]; then USER_PROVIDED_OUTPUT_PATH=$(echo "$params" | awk -F= '{print $2}'); VALID=true; fi
if [ "$params" == '--backupdbrm' ]; then BACKUP_DBRM=true; SKIP_REPORT=true; unset COLLECT_LOGS; VALID=true; fi
if [ "$params" == '--testschema' ]; then TEST_SCHEMA=true; SKIP_REPORT=true; unset COLLECT_LOGS; VALID=true; fi
if [ "$params" == '--testschemakeep' ]; then TEST_SCHEMA_KEEP=true; SKIP_REPORT=true; unset COLLECT_LOGS; VALID=true; fi
@ -2175,13 +2511,17 @@ for params in "$@"; do
if [ "$params" == '--pscs' ]; then PSCS_ALIAS=true; SKIP_REPORT=true; unset COLLECT_LOGS; VALID=true; fi
if [ "$params" == '--schemasync' ]; then SCHEMA_SYNC=true; SKIP_REPORT=true; unset COLLECT_LOGS; VALID=true; fi
if [ "$params" == '--tmpdir' ]; then FIX_TMP_DIR=true; SKIP_REPORT=true; unset COLLECT_LOGS; VALID=true; fi
if [ "$params" == '--clearrollback' ]; then CLEARROLLBACK=true; SKIP_REPORT=true; unset COLLECT_LOGS; VALID=true; fi
if [ "$params" == '--checkports' ]; then SKIP_REPORT=true; CHECKPORTS=true;VALID=true; fi
if [ "$params" == '--killcolumnstore' ]; then KILLCS=true; SKIP_REPORT=true; unset COLLECT_LOGS; VALID=true; fi
if [ "$params" == '--eustack' ]; then SKIP_REPORT=true; COLLECT_EU_STACK=true;VALID=true; fi
if [ ! $VALID ]; then INVALID_INPUT=$params; fi
done
prepare_for_run
exists_client_able_to_connect_with_socket
if [ $DISPLAY_VERSION ]; then exit 0; fi
if [ $INVALID_INPUT ]; then TEMP_COLOR=lred; print_color "Invalid parameter: ";ech0 $INVALID_INPUT; ech0; unset TEMP_COLOR; fi
if [ $INVALID_INPUT ]; then TEMP_COLOR=lred; print_color "Invalid parameter: ";ech0 $INVALID_INPUT; ech0; unset TEMP_COLOR; exit 1; fi
if [ $HELP ]||[ $INVALID_INPUT ]; then
display_help_message
exit 0
@ -2265,6 +2605,7 @@ report_cs_table_locks
report_columnstore_query_count
report_calpontsys_exists
report_columnstore_tables
SUPPRESS_CLOSED_PORTS=true; check_ports
TEMP_COLOR=lblue; print_color "===================== LOGS =====================\n"; unset TEMP_COLOR
report_host_datetime
report_last_10_error_log_error
@ -2327,4 +2668,18 @@ if [ $FIX_TMP_DIR ]; then
ensure_owner_privs_of_tmp_dir
fi
if [ $CLEARROLLBACK ]; then
clear_rollback
fi
if [ $CHECKPORTS ]; then
check_ports
fi
if [ $KILLCS ]; then
kill_columnstore
fi
if [ $COLLECT_EU_STACK ]; then
get_eu_stack
fi