1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-04 04:42:30 +03:00

feat(backup): added timestamps to localstorage backups major sections, new flag backup --apply-retention-only , more verbose logging in poll_check_no_active_sql_writes(), fixes to multinode race conditions to apply apply_backup_retention_policy() (#3422)

This commit is contained in:
Allen Herrera
2025-03-14 07:36:01 -04:00
committed by GitHub
parent d132c1174a
commit 001367e68a

View File

@ -13,7 +13,7 @@
# #
######################################################################## ########################################################################
# Documentation: bash mcs_backup_manager.sh help # Documentation: bash mcs_backup_manager.sh help
# Version: 3.11 # Version: 3.12
# #
# Backup Example # Backup Example
# LocalStorage: sudo ./mcs_backup_manager.sh backup # LocalStorage: sudo ./mcs_backup_manager.sh backup
@ -26,7 +26,7 @@
# S3: sudo ./mcs_backup_manager.sh restore -bb s3://my-cs-backups -l <date> # S3: sudo ./mcs_backup_manager.sh restore -bb s3://my-cs-backups -l <date>
# #
######################################################################## ########################################################################
mcs_bk_manager_version="3.11" mcs_bk_manager_version="3.12"
start=$(date +%s) start=$(date +%s)
action=$1 action=$1
@ -171,6 +171,8 @@ load_default_backup_variables() {
poll_interval=5 poll_interval=5
poll_max_wait=60; poll_max_wait=60;
list_backups=false list_backups=false
retention_file_lock_name=".delete-lock"
apply_retention_only=false
# Compression Variables # Compression Variables
compress_format="" compress_format=""
@ -322,6 +324,10 @@ parse_backup_variables() {
shift # past argument shift # past argument
shift # past value shift # past value
;; ;;
-aro| --apply-retention-only)
apply_retention_only=true
shift # past argument
;;
"list") "list")
list_backups=true list_backups=true
shift # past argument shift # past argument
@ -371,14 +377,15 @@ print_backup_help_text() {
-c | --compress Compress backup in X format - Options: [ pigz ] -c | --compress Compress backup in X format - Options: [ pigz ]
-nb | --name-backup Define the name of the backup - default: date +%m-%d-%Y -nb | --name-backup Define the name of the backup - default: date +%m-%d-%Y
-r | --retention-days Retain backups created within the last X days, the rest are deleted, default 0 = keep all backups -r | --retention-days Retain backups created within the last X days, the rest are deleted, default 0 = keep all backups
-aro | --apply-retention-only Only apply retention policy to existing backups, does not run a backup
-ha | --highavilability Hint wether shared storage is attached @ below on all nodes to see all data -ha | --highavilability Hint wether shared storage is attached @ below on all nodes to see all data
HA LocalStorage ( /var/lib/columnstore/dataX/ ) HA LocalStorage ( /var/lib/columnstore/dataX/ )
HA S3 ( /var/lib/columnstore/storagemanager/ ) HA S3 ( /var/lib/columnstore/storagemanager/ )
Local Storage Examples: Local Storage Examples:
./$0 backup -bl /tmp/backups/ -bd Local -s LocalStorage ./$0 backup -bl /tmp/backups/
./$0 backup -bl /tmp/backups/ -bd Local -s LocalStorage -P 8 ./$0 backup -bl /tmp/backups/ -P 8
./$0 backup -bl /tmp/backups/ -bd Local -s LocalStorage --incremental auto_most_recent ./$0 backup -bl /tmp/backups/ --incremental auto_most_recent
./$0 backup -bl /tmp/backups/ -bd Remote -scp root@172.31.6.163 -s LocalStorage ./$0 backup -bl /tmp/backups/ -bd Remote -scp root@172.31.6.163 -s LocalStorage
S3 Examples: S3 Examples:
@ -666,7 +673,7 @@ validation_prechecks_for_backup() {
printf " - Columnstore is OFFLINE \n"; printf " - Columnstore is OFFLINE \n";
export columnstore_online=false; export columnstore_online=false;
else else
printf " - Columnstore is ONLINE - safer if offline \n"; printf " - Columnstore is ONLINE - safer if offline (but not required to be offline) \n";
export columnstore_online=true; export columnstore_online=true;
fi fi
fi; fi;
@ -796,7 +803,26 @@ auto_select_most_recent_backup_for_incremental() {
printf " - Searching for most recent backup ...." printf " - Searching for most recent backup ...."
if [ $storage == "LocalStorage" ]; then if [ $storage == "LocalStorage" ]; then
most_recent_backup=$(ls -td "${backup_location}"* 2>/dev/null | head -n 1)
# Example: find /mnt/backups/ -mindepth 1 -maxdepth 1 -type d ! -exec test -f "{}/.auto-delete-via-retention" \; -printf "%T@ %p\n" | sort -nr | awk '{print $2}' | head -n 1
if [ -f "${backup_location}${retention_file_lock_name}-$pm" ] ; then
handle_early_exit_on_backup "\n[!!!] ${backup_location}${retention_file_lock_name}-$pm exists. are multiple backups running or was there an error? Manually resolve before retrying\n" true
fi
# Primary node is in the middle of applying retention policy, distorting the most recent backup
if [ -f "${backup_location}${retention_file_lock_name}-pm1" ] ; then
most_recent_backup="$(head -n1 "${backup_location}${retention_file_lock_name}-pm1" | awk '{print $2}')"
else
most_recent_backup=$(find "$backup_location" -mindepth 1 -maxdepth 1 -type d -printf "%T@ %p\n" | sort -nr | awk '{print $2}' | head -n 1)
fi
# Debug messaging to understand the most recent backup modified times
if ! $quiet; then
echo ""
find "$backup_location" -mindepth 1 -maxdepth 1 -type d -printf "%T@ %p\n" | sort -nr
fi
# If no backup found, exit
if [[ -z "$most_recent_backup" ]]; then if [[ -z "$most_recent_backup" ]]; then
handle_early_exit_on_backup "\n[!!!] No backup found to increment in '$backup_location', please run a full backup or define a folder that exists --incremental <folder>\n" true handle_early_exit_on_backup "\n[!!!] No backup found to increment in '$backup_location', please run a full backup or define a folder that exists --incremental <folder>\n" true
else else
@ -849,18 +875,41 @@ auto_select_most_recent_backup_for_incremental() {
apply_backup_retention_policy() { apply_backup_retention_policy() {
if [ $retention_days -eq 0 ]; then if [ "$retention_days" -eq 0 ]; then
printf " - Skipping Backup Rentention Policy\n" printf " - Skipping Backup Rentention Policy\n"
return 0; return 0;
fi fi
printf " - Applying Backup Rentention Policy...." # PM1 is in charge of deleting the backup
# Rest of the nodes should wait for the delete to be over if it finds the lock file
applying_retention_start=$(date +%s)
printf "\nApplying Backup Rentention Policy\n"
if [ "$pm" == "pm1" ] || [ -n "${PM_FORCE_DELETE-}" ] ; then
retention_file_lock_name="${retention_file_lock_name}-$pm"
if [ $storage == "LocalStorage" ]; then if [ $storage == "LocalStorage" ]; then
# example: find /tmp/backups/ -mindepth 1 -maxdepth 1 -type d -name "*" -amin +0
find "$backup_location" -mindepth 1 -maxdepth 1 -type d -name "*" -mtime +$retention_days -exec rm -r {} \; # Create a lock file to prevent other PMs from deleting backups
touch "${backup_location}${retention_file_lock_name}"
echo "most_recent_backup: $most_recent_backup" >> "${backup_location}${retention_file_lock_name}"
printf " - Created: ${backup_location}${retention_file_lock_name}\n"
# example1: find /mnt/backups/ -mindepth 1 -maxdepth 1 -type d -name "*" -mmin +10 -exec echo "{}" \;
# example2: find /mnt/backups/ -mindepth 1 -maxdepth 1 -type d -name "*" -mmin +10 -exec sh -c 'printf " - Deleting: {} \n" && echo "{}" >> "/mnt/backups/.delete-lock-pm1" && rm -rf "{}"' \;
# find "$backup_location" -mindepth 1 -maxdepth 1 -type d -name "*" -mmin +$retention_days -exec sh -c 'printf " - Deleting: {} \n" && echo "{}" >> "'"${backup_location}${retention_file_lock_name}"'" && rm -rf "{}"' \;
find "$backup_location" -mindepth 1 -maxdepth 1 -type d -name "*" -mtime +$retention_days -exec sh -c 'printf " - Deleting: {} \n" && echo "{}" >> "'"${backup_location}${retention_file_lock_name}"'" && rm -rf "{}"' \;
# Cleanup the lock file
rm "${backup_location}${retention_file_lock_name}"
printf " - Deleted: ${backup_location}${retention_file_lock_name}\n"
elif [ $storage == "S3" ]; then elif [ $storage == "S3" ]; then
# Create a lock file to prevent other PMs from deleting backups
echo "most_recent_backup: $most_recent_backup" >> "${retention_file_lock_name}"
s3cp "${retention_file_lock_name}" "${backup_bucket}/${retention_file_lock_name}"
current_date=$(date +%s) current_date=$(date +%s)
backups=$(s3ls $backup_bucket) backups=$(s3ls $backup_bucket)
@ -890,14 +939,60 @@ apply_backup_retention_policy() {
fi fi
if $delete_backup; then if $delete_backup; then
echo "${backup_bucket}/${folder}" >> "${retention_file_lock_name}"
s3cp "${retention_file_lock_name}" "${backup_bucket}/${retention_file_lock_name}"
s3rm "${backup_bucket}/${folder}" s3rm "${backup_bucket}/${folder}"
#echo "Deleting ${backup_bucket}/${folder}" #printf "\n - Deleting: ${backup_bucket}/${folder}"
fi fi
printf "." printf "."
done <<< "$backups" done <<< "$backups"
# Cleanup the lock file
s3rm "${backup_bucket}/${retention_file_lock_name}"
fi
applying_retention_end=$(date +%s)
printf "Done $(human_readable_time $((applying_retention_end-applying_retention_start))) \n"
else
local retry_limit=2160
local retry_counter=0
local retry_sleep_interval=5
if [ $storage == "LocalStorage" ]; then
if [ -f "${backup_location}${retention_file_lock_name}-pm1" ]; then
printf " - Waiting for PM1 to finish deleting backups (max: $retry_limit intervals of $retry_sleep_interval sec ) ... \n"
fi;
while [ -f "${backup_location}${retention_file_lock_name}-pm1" ]; do
if [ -f "${backup_location}${retention_file_lock_name}-pm1" ]; then
printf " - Waiting on PM1 to finish deleting %s \n" "$(tail -n 1 "${backup_location}${retention_file_lock_name}-pm1" 2>/dev/null )"
fi;
if [ $retry_counter -ge $retry_limit ]; then
handle_early_exit_on_backup "\n [!] PM1 lock file still exists after $retry_limit sec, deleting backups via retention policy might have failed or taking too long, please investigate, exiting\n" true
fi
sleep $retry_sleep_interval
((retry_counter++))
done
elif [ $storage == "S3" ]; then
if [[ $(s3ls "${backup_bucket}/${retention_file_lock_name}") ]]; then
printf " - Waiting for PM1 to finish deleting backups (max: $retry_limit intervals of $retry_sleep_interval sec ) ... \n"
fi;
while [[ $(s3ls "${backup_bucket}/${retention_file_lock_name}") ]]; do
if [[ $(s3ls "${backup_bucket}/${retention_file_lock_name}") ]]; then
printf " - Waiting on PM1 to finish deleting %s \n" "$( s3cp "${backup_bucket}/${retention_file_lock_name}" "-" | tail -n 1 2>/dev/null )"
fi;
if [ $retry_counter -ge $retry_limit ]; then
handle_early_exit_on_backup "\n [!] PM1 lock file still exists after $retry_limit sec, deleting backups via retention policy might have failed or taking too long, please investigate, exiting\n" true
fi
sleep $retry_sleep_interval
((retry_counter++))
done
fi
applying_retention_end=$(date +%s)
printf "Done $(human_readable_time $((applying_retention_end-applying_retention_start))) \n"
fi fi
printf " Done\n"
} }
@ -994,7 +1089,8 @@ issue_write_locks()
} }
poll_check_no_active_sql_writes() { poll_check_no_active_sql_writes() {
printf " - Polling For Active Writes ... " poll_active_sql_writes_start=$(date +%s)
printf " - Polling for active Writes ... "
query="SELECT COUNT(*) FROM information_schema.processlist where user != 'root' AND command = 'Query' AND ( info LIKE '%INSERT%' OR info LIKE '%UPDATE%' OR info LIKE '%DELETE%' OR info LIKE '%LOAD DATA%');" query="SELECT COUNT(*) FROM information_schema.processlist where user != 'root' AND command = 'Query' AND ( info LIKE '%INSERT%' OR info LIKE '%UPDATE%' OR info LIKE '%DELETE%' OR info LIKE '%LOAD DATA%');"
attempts=0 attempts=0
no_writes=false no_writes=false
@ -1005,34 +1101,41 @@ poll_check_no_active_sql_writes() {
fi fi
if [ "$active_writes" -le 0 ]; then if [ "$active_writes" -le 0 ]; then
printf "Done\n" poll_active_sql_writes_end=$(date +%s)
printf "Done $(human_readable_time $((poll_active_sql_writes_start-poll_active_sql_writes_end))) \n"
no_writes=true no_writes=true
break break
else else
# extra_info=$( mariadb -e "SELECT * FROM information_schema.processlist where user != 'root' AND command = 'Query' AND info LIKE '%INSERT%' OR info LIKE '%UPDATE%' OR info LIKE '%DELETE%' " )
# echo "Active write operations detected: $active_writes" if ! $quiet; then
# echo "$extra_info" printf "\nActive write operations detected: $active_writes\n";
mariadb -e "select ID,USER,TIME,LEFT(INFO, 50) from information_schema.processlist where user != 'root' AND command = 'Query' AND ( info LIKE '%INSERT%' OR info LIKE '%UPDATE%' OR info LIKE '%DELETE%' OR info LIKE '%LOAD DATA%');"
else
printf "." printf "."
if ! $quiet; then printf "\nActive write operations detected: $active_writes"; fi; fi;
sleep "$poll_interval" sleep "$poll_interval"
((attempts++)) ((attempts++))
fi fi
done done
if ! $no_writes; then if ! $no_writes; then
mariadb -e "select ID,USER,TIME,INFO from information_schema.processlist where user != 'root' AND command = 'Query' AND ( info LIKE '%INSERT%' OR info LIKE '%UPDATE%' OR info LIKE '%DELETE%' OR info LIKE '%LOAD DATA%');"
handle_early_exit_on_backup "\n[X] Exceeded poll_max_wait: $poll_max_wait minutes\nActive write operations detected: $active_writes \n" true handle_early_exit_on_backup "\n[X] Exceeded poll_max_wait: $poll_max_wait minutes\nActive write operations detected: $active_writes \n" true
fi; fi;
} }
poll_check_no_active_cpimports() { poll_check_no_active_cpimports() {
printf " - Polling For Active cpimports ... " poll_active_cpimports_start=$(date +%s)
printf " - Polling for active cpimports ... "
attempts=0 attempts=0
no_cpimports=false no_cpimports=false
if ! $columnstore_online ; then printf "Skip since offline\n"; return ; fi; if ! $columnstore_online ; then printf "Skip since offline\n"; return ; fi;
while [ "$attempts" -lt "$max_poll_attempts" ]; do while [ "$attempts" -lt "$max_poll_attempts" ]; do
active_cpimports=$(viewtablelock 2>/dev/null ) active_cpimports=$(viewtablelock 2>/dev/null )
if [[ "$active_cpimports" == *"No tables are locked"* ]]; then if [[ "$active_cpimports" == *"No tables are locked"* ]]; then
printf "Done\n" poll_active_cpimports_end=$(date +%s)
printf "Done $(human_readable_time $((poll_active_cpimports_end-poll_active_cpimports_start))) \n"
no_cpimports=true no_cpimports=true
break break
else else
@ -1358,7 +1461,39 @@ deepParallelRsync() {
wait wait
} }
human_readable_time() {
local total_seconds=$1
local days=$((total_seconds / 86400))
local hours=$(( (total_seconds % 86400) / 3600 ))
local minutes=$(( (total_seconds % 3600) / 60 ))
local seconds=$(( total_seconds % 60 ))
local output=""
# Special case for 0 seconds
if (( total_seconds == 0 )); then
printf "<1 sec"
return
fi
if (( days > 0 )); then
output+=" ${days} days"
fi
if (( hours > 0 )); then
output+=" ${hours} hours"
fi
if (( minutes > 0 )); then
output+=" ${minutes} min"
fi
if (( seconds > 0 || total_seconds == 0 )); then
output+=" ${seconds} sec"
fi
printf "${output}" | xargs
}
run_backup() { run_backup() {
backup_start=$(date +%s)
if [ $storage == "LocalStorage" ]; then if [ $storage == "LocalStorage" ]; then
if [ $backup_destination == "Local" ]; then if [ $backup_destination == "Local" ]; then
@ -1383,6 +1518,7 @@ run_backup() {
printf " Done\n" printf " Done\n"
# Backup Columnstore data # Backup Columnstore data
columnstore_backup_start=$(date +%s)
i=1 i=1
while [ $i -le $DBROOT_COUNT ]; do while [ $i -le $DBROOT_COUNT ]; do
if [[ $ASSIGNED_DBROOT == "$i" || $HA == true ]]; then if [[ $ASSIGNED_DBROOT == "$i" || $HA == true ]]; then
@ -1393,16 +1529,20 @@ run_backup() {
elif $parrallel_rsync ; then elif $parrallel_rsync ; then
printf " - Parallel Rsync CS Data$i... \n" printf " - Parallel Rsync CS Data$i... \n"
initiate_rsyncs $i initiate_rsyncs $i
printf " Done\n" columnstore_backup_end=$(date +%s)
printf " Done $(human_readable_time $((columnstore_backup_end-columnstore_backup_start))) \n"
else else
printf " - Syncing Columnstore Data$i... " printf " - Syncing Columnstore Data$i... "
eval "rsync $additional_rysnc_flags /var/lib/columnstore/data$i/* $backup_location$today/data$i/ $xtra_cmd_args"; eval "rsync $additional_rysnc_flags /var/lib/columnstore/data$i/* $backup_location$today/data$i/ $xtra_cmd_args";
printf " Done\n" columnstore_backup_end=$(date +%s)
printf " Done $(human_readable_time $((columnstore_backup_end-columnstore_backup_start))) \n"
fi; fi;
fi fi
((i++)) ((i++))
done done
# Backup MariaDB data # Backup MariaDB data
if [ $ASSIGNED_DBROOT == "1" ]; then if [ $ASSIGNED_DBROOT == "1" ]; then
@ -1419,6 +1559,7 @@ run_backup() {
fi fi
# Run mariadb-backup # Run mariadb-backup
mariadb_backup_start=$(date +%s)
if ! $skip_mdb; then if ! $skip_mdb; then
if [[ -n "$compress_format" ]]; then if [[ -n "$compress_format" ]]; then
printf " - Compressing MariaDB Data... " printf " - Compressing MariaDB Data... "
@ -1426,9 +1567,11 @@ run_backup() {
case $compress_format in case $compress_format in
pigz) pigz)
# Handle Cloud # Handle Cloud
if ! mariabackup --user=root --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS -c > $mbd_prefix 2>> $logfile; then mariadb_backup_start=$(date +%s)
if ! mariabackup --user=root --backup --slave-info --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS -c > $mbd_prefix 2>> $logfile; then
handle_early_exit_on_backup "\nFailed mariabackup --user=root --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS -c > $mbd_prefix 2>> $logfile \n" handle_early_exit_on_backup "\nFailed mariabackup --user=root --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS -c > $mbd_prefix 2>> $logfile \n"
fi fi
mariadb_backup_end=$(date +%s)
printf " Done @ $mbd_prefix\n" printf " Done @ $mbd_prefix\n"
;; ;;
*) # unknown option *) # unknown option
@ -1436,7 +1579,12 @@ run_backup() {
esac esac
else else
printf " - Copying MariaDB Data... " printf " - Copying MariaDB Data... "
if eval "mariadb-backup --backup --target-dir=$backup_location$today/mysql --user=root $xtra_cmd_args" ; then printf " Done \n"; else printf "\n Failed: mariadb-backup --backup --target-dir=$backup_location$today/mysql --user=root\n"; handle_early_exit_on_backup; fi if eval "mariadb-backup --backup --slave-info --target-dir=$backup_location$today/mysql --user=root $xtra_cmd_args" ; then
mariadb_backup_end=$(date +%s)
printf " Done $(human_readable_time $((mariadb_backup_end-mariadb_backup_start))) \n"
else
handle_early_exit_on_backup "\n Failed: mariadb-backup --backup --target-dir=$backup_location$today/mysql --user=root\n";
fi
fi fi
else else
echo "[!] Skipping mariadb-backup" echo "[!] Skipping mariadb-backup"
@ -1469,6 +1617,21 @@ run_backup() {
else else
compress_paths+=" $MARIADB_SERVER_CONFIGS_PATH/*" compress_paths+=" $MARIADB_SERVER_CONFIGS_PATH/*"
fi fi
# Backup mariadb server replication details
slave_status=$(mariadb -e "show slave status\G" 2>/dev/null)
if [[ $? -eq 0 ]]; then
if [[ -n "$slave_status" ]]; then
if [[ -z "$compress_format" ]]; then
mariadb -e "show slave status\G" | grep -E "Master_Host|Master_User|Master_Port|Gtid_IO_Pos" > $backup_location$today/configs/mysql/$pm/replication_details.txt
else
mariadb -e "show slave status\G" | grep -E "Master_Host|Master_User|Master_Port|Gtid_IO_Pos" > replication_details.txt
compress_paths+=" replication_details.txt"
fi
fi
else
handle_early_exit_on_backup "[!] - Failed to execute the MariaDB command: mariadb -e 'show slave status\G' \n"
fi
fi fi
# Handle compression for Columnstore Data & Configs # Handle compression for Columnstore Data & Configs
@ -1478,9 +1641,31 @@ run_backup() {
case $compress_format in case $compress_format in
pigz) pigz)
printf " - Compressing CS Data & Configs... " printf " - Compressing CS Data & Configs... "
columnstore_backup_start=$(date +%s)
if ! eval "tar cf - $compress_paths 2>>$logfile | pigz -p $PARALLEL_THREADS -c > $cs_prefix 2>> $logfile"; then if ! eval "tar cf - $compress_paths 2>>$logfile | pigz -p $PARALLEL_THREADS -c > $cs_prefix 2>> $logfile"; then
handle_early_exit_on_backup "[!] - Compression Failed \ntar cf - $compress_paths 2>>$logfile | pigz -p $PARALLEL_THREADS -c > $cs_prefix 2>> $logfile \n" handle_early_exit_on_backup "[!] - Compression Failed \ntar cf - $compress_paths 2>>$logfile | pigz -p $PARALLEL_THREADS -c > $cs_prefix 2>> $logfile \n"
fi fi
columnstore_backup_end=$(date +%s)
# Create summary info for list backup
get_latest_em_from_dbrm_directory "$DBRM_PATH"
em_file_name=$(basename "$em_file_full_path")
version_prefix=${em_file_name::-3}
journal_file=$(ls -la "${subdir_dbrms}/BRM_saves_journal" 2>/dev/null | awk 'NR==1 {print $5}' )
vbbm_file=$(ls -la "${subdir_dbrms}/${version_prefix}_vbbm" 2>/dev/null | awk 'NR==1 {print $5}' )
vss_file=$(ls -la "${subdir_dbrms}/${version_prefix}_vss" 2>/dev/null | awk 'NR==1 {print $5}' )
echo "em_file_created: $em_file_created" >> $backup_location$today/backup_summary_info.txt
echo "em_file_name: $em_file_name" >> $backup_location$today/backup_summary_info.txt
echo "em_file_size: $em_file_size" >> $backup_location$today/backup_summary_info.txt
echo "journal_file: $journal_file" >> $backup_location$today/backup_summary_info.txt
echo "vbbm_file: $vbbm_file" >> $backup_location$today/backup_summary_info.txt
echo "vss_file: $vss_file" >> $backup_location$today/backup_summary_info.txt
echo "compression: $compress_format" >> $backup_location$today/backup_summary_info.txt
if [ -f replication_details.txt ]; then
rm -rf replication_details.txt;
fi
printf " Done @ $cs_prefix\n" printf " Done @ $cs_prefix\n"
;; ;;
*) # unknown option *) # unknown option
@ -1488,18 +1673,6 @@ run_backup() {
esac esac
fi fi
# Save replication details
slave_status=$(mariadb -e "show slave status\G" 2>/dev/null)
if [[ $? -eq 0 ]]; then
if [[ -n "$slave_status" ]]; then
mariadb -e "show slave status\G" | grep -E "Master_Host|Master_User|Master_Port" > $backup_location$today/replication_details.txt
else
echo "No replication details to save"
fi
else
handle_early_exit_on_backup "[!] - Failed to execute the MariaDB command: mariadb -e 'show slave status\G' \n"
fi
if $incremental ; then if $incremental ; then
# Log each incremental run # Log each incremental run
now=$(date "+%m-%d-%Y %H:%M:%S"); echo "$pm updated on $now" >> $backup_location$today/incrementallyUpdated.txt now=$(date "+%m-%d-%Y %H:%M:%S"); echo "$pm updated on $now" >> $backup_location$today/incrementallyUpdated.txt
@ -1662,12 +1835,12 @@ run_backup() {
pigz) pigz)
# Handle Cloud # Handle Cloud
if [ $cloud == "gcp" ]; then if [ $cloud == "gcp" ]; then
if ! mariabackup --user=root --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS 2>> $logfile | split -d -a 5 -b 250M --filter="gsutil cp - ${mbd_prefix}_\$FILE 2>$logfile" - chunk 2>$logfile; then if ! mariabackup --user=root --backup --slave-info --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS 2>> $logfile | split -d -a 5 -b 250M --filter="gsutil cp - ${mbd_prefix}_\$FILE 2>$logfile" - chunk 2>$logfile; then
handle_early_exit_on_backup "\nFailed mariadb-backup --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS 2>> $logfile | split -d -a 5 -b 250M --filter=\"gsutil cp - ${mbd_prefix}_\$FILE 2>$logfile\" - chunk 2>$logfile \n" handle_early_exit_on_backup "\nFailed mariadb-backup --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS 2>> $logfile | split -d -a 5 -b 250M --filter=\"gsutil cp - ${mbd_prefix}_\$FILE 2>$logfile\" - chunk 2>$logfile \n"
fi fi
elif [ $cloud == "aws" ]; then elif [ $cloud == "aws" ]; then
if ! mariabackup --user=root --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS 2>> $logfile | split -d -a 5 -b 250M --filter="aws s3 cp - ${mbd_prefix}_\$FILE 2>$logfile 1>&2" - chunk 2>$logfile; then if ! mariabackup --user=root --backup --slave-info --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS 2>> $logfile | split -d -a 5 -b 250M --filter="aws s3 cp - ${mbd_prefix}_\$FILE 2>$logfile 1>&2" - chunk 2>$logfile; then
handle_early_exit_on_backup "\nFailed mariadb-backup --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS 2>> $logfile | split -d -a 5 -b 250M --filter=\"aws s3 cp - ${mbd_prefix}_\$FILE 2>$logfile 1>&2\" - chunk 2>$logfile\n" handle_early_exit_on_backup "\nFailed mariadb-backup --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS 2>> $logfile | split -d -a 5 -b 250M --filter=\"aws s3 cp - ${mbd_prefix}_\$FILE 2>$logfile 1>&2\" - chunk 2>$logfile\n"
fi fi
fi fi
@ -1682,7 +1855,7 @@ run_backup() {
printf " - Syncing MariaDB data ... \n" printf " - Syncing MariaDB data ... \n"
rm -rf $backup_location$today/mysql rm -rf $backup_location$today/mysql
if mkdir -p $backup_location$today/mysql ; then if mkdir -p $backup_location$today/mysql ; then
if eval "mariabackup --user=root --backup --target-dir=$backup_location$today/mysql/ $xtra_cmd_args"; then if eval "mariabackup --user=root --backup --slave-info --target-dir=$backup_location$today/mysql/ $xtra_cmd_args"; then
printf " + mariadb-backup @ $backup_location$today/mysql/ \n" printf " + mariadb-backup @ $backup_location$today/mysql/ \n"
else else
handle_early_exit_on_backup "\nFailed mariadb-backup --backup --target-dir=$backup_location$today/mysql --user=root\n" true handle_early_exit_on_backup "\nFailed mariadb-backup --backup --target-dir=$backup_location$today/mysql --user=root\n" true
@ -1708,6 +1881,22 @@ run_backup() {
else else
compress_paths+="$MARIADB_SERVER_CONFIGS_PATH" compress_paths+="$MARIADB_SERVER_CONFIGS_PATH"
fi fi
# Backup mariadb server replication details
slave_status=$(mariadb -e "show slave status\G" 2>/dev/null)
if [[ $? -eq 0 ]]; then
if [[ -n "$slave_status" ]]; then
mariadb -e "show slave status\G" | grep -E "Master_Host|Master_User|Master_Port|Gtid_IO_Pos" > replication_details.txt
if [[ -z "$compress_format" ]]; then
s3cp replication_details.txt $backup_bucket/$today/configs/mysql/$pm/replication_details.txt
else
compress_paths+=" replication_details.txt"
fi
fi
else
handle_early_exit_on_backup "[!] - Failed to execute the MariaDB command: mariadb -e 'show slave status\G' \n"
fi
fi fi
# Handles streaming the compressed columnstore local files # Handles streaming the compressed columnstore local files
@ -1727,6 +1916,12 @@ run_backup() {
handle_early_exit_on_backup "[!] - Compression/Split/Upload Failed \n" handle_early_exit_on_backup "[!] - Compression/Split/Upload Failed \n"
fi fi
fi fi
# Cleanup temp files
if [ -f replication_details.txt ]; then
rm -rf replication_details.txt
fi
printf " Done @ $cs_prefix\n" printf " Done @ $cs_prefix\n"
;; ;;
*) # unknown option *) # unknown option
@ -1765,8 +1960,52 @@ run_backup() {
clear_read_lock clear_read_lock
end=$(date +%s) end=$(date +%s)
runtime=$((end-start)) total_runtime=$((end - start))
printf "\nRuntime: $runtime\n" other=$total_runtime
printf "\nRuntime Summary\n"
echo "--------------------"
if [[ -n "$applying_retention_end" && $applying_retention_end -ne 0 ]]; then
applying_retention_time=$((applying_retention_end - applying_retention_start))
printf "%-20s %-15s\n" "Applying Retention:" "$(human_readable_time $applying_retention_time)"
other=$((other - applying_retention_time))
fi
if [[ -n "$poll_active_sql_writes_end" && $poll_active_sql_writes_end -ne 0 ]]; then
poll_sql_writes_time=$((poll_active_sql_writes_end - poll_active_sql_writes_start))
printf "%-20s %-15s\n" "Polling SQL Writes:" "$(human_readable_time $poll_sql_writes_time)"
other=$((other - poll_sql_writes_time))
fi
if [[ -n "$poll_active_cpimports_end" && $poll_active_cpimports_end -ne 0 ]]; then
poll_cpimports_time=$((poll_active_cpimports_end - poll_active_cpimports_start))
printf "%-20s %-15s\n" "Polling cpimports:" "$(human_readable_time $poll_cpimports_time)"
other=$((other - poll_cpimports_time))
fi
if [[ -n "$columnstore_backup_end" && $columnstore_backup_end -ne 0 ]]; then
columnstore_backup_time=$((columnstore_backup_end - columnstore_backup_start))
printf "%-20s %-15s\n" "Columnstore Backup:" "$(human_readable_time $columnstore_backup_time)"
other=$((other - columnstore_backup_time))
fi
if [[ -n "$mariadb_backup_end" && $mariadb_backup_end -ne 0 ]]; then
mariadb_backup_time=$((mariadb_backup_end - mariadb_backup_start))
printf "%-20s %-15s\n" "MariaDB Backup:" "$(human_readable_time $mariadb_backup_time)"
other=$((other - mariadb_backup_time))
fi
if (( other < 0 )); then
printf "%-20s %-15s\n" "Other:" "N/A (invalid data)"
else
if [ $other -gt 0 ]; then
printf "%-20s %-15s\n" "Other:" "<$(human_readable_time $other)"
else
printf "%-20s %-15s\n" "Other:" "<1 sec"
fi
fi
printf "\nTotal Runtime: $(human_readable_time $((end-start))) \n"
printf "$final_message\n\n" printf "$final_message\n\n"
} }
@ -2734,10 +2973,12 @@ run_restore()
if [ $DBROOT_COUNT -gt 1 ]; then echo -e "[!!] Check Replicas to manually configure mariadb replication"; fi if [ $DBROOT_COUNT -gt 1 ]; then echo -e "[!!] Check Replicas to manually configure mariadb replication"; fi
echo -e "systemctl start mariadb " echo -e "systemctl start mariadb "
echo -e "systemctl start mariadb-columnstore-cmapi " echo -e "systemctl start mariadb-columnstore-cmapi "
echo -e "mcs cluster start \n\n" echo -e "mcs cluster start"
echo -e "mariadb -e \"show variables like '%gtid_current_pos%';\"\n\n"
else else
echo -e "[!!] Check this replica to manually configure mariadb replication after primary node is ready" echo -e "[!!] Check this replica to manually configure mariadb replication after primary node is ready"
echo -e "Example:" echo -e "Example:"
echo -e "mariadb -e \"SET GLOBAL gtid_slave_pos = '0-1-14'\""
echo -e "mariadb -e \"stop slave;CHANGE MASTER TO MASTER_HOST='\$pm1' , MASTER_USER='repuser' , MASTER_PASSWORD='aBcd123%123%aBc' , MASTER_USE_GTID = slave_pos;start slave;show slave status\G\"" echo -e "mariadb -e \"stop slave;CHANGE MASTER TO MASTER_HOST='\$pm1' , MASTER_USER='repuser' , MASTER_PASSWORD='aBcd123%123%aBc' , MASTER_USE_GTID = slave_pos;start slave;show slave status\G\""
echo -e "mariadb -e \"show slave status\G\" | grep \"Slave_\" \n" echo -e "mariadb -e \"show slave status\G\" | grep \"Slave_\" \n"
fi fi
@ -3321,7 +3562,7 @@ get_latest_em_from_dbrm_directory() {
fi fi
else else
subdir_dbrms="$1" subdir_dbrms="$1"
latest_em_file=$(find "${subdir_dbrms}" -maxdepth 1 -type f -name "BRM_saves*_em" -exec ls -lat {} + | awk 'NR==1 {printf "%-12s %-4s %-2s %-5s %s\n", $5, $6, $7, $8, $9}'| head -n 1) latest_em_file=$(find "${subdir_dbrms}" -maxdepth 1 -type f -name "BRM_saves*_em" -exec ls -lat {} + 2>/dev/null | awk 'NR==1 {printf "%-12s %-4s %-2s %-5s %s\n", $5, $6, $7, $8, $9}'| head -n 1)
em_file_size=$(echo "$latest_em_file" | awk '{print $1}' ) em_file_size=$(echo "$latest_em_file" | awk '{print $1}' )
em_file_created=$(echo "$latest_em_file" | awk '{print $2,$3,$4}' ) em_file_created=$(echo "$latest_em_file" | awk '{print $2,$3,$4}' )
em_file_full_path=$(echo $latest_em_file | awk '{print $NF}' ) em_file_full_path=$(echo $latest_em_file | awk '{print $NF}' )
@ -3339,18 +3580,32 @@ list_restore_options_from_backups() {
echo "Retention Policy: $retention_days days" echo "Retention Policy: $retention_days days"
fi; fi;
echo "--------------------------------------------------------------------------" echo "--------------------------------------------------------------------------"
printf "%-45s %-13s %-15s %-12s %-12s %-10s %-10s %-10s\n" "Options" "Last-Updated" "Extent Map" "EM-Size" "Journal-Size" "VBBM-Size" "VSS-Size" "Days Old" printf "%-45s %-13s %-15s %-12s %-12s %-10s %-10s %-10s\n" "Options" "EM-Updated" "Extent Map" "EM-Size" "Journal-Size" "VBBM-Size" "VSS-Size" "Backup Days Old"
# Iterate over subdirectories # Iterate over subdirectories
for subdir in "${backup_location}"/*; do for subdir in "${backup_location}"/*; do
if [ -f "${subdir}/cs-localfiles.tar.pigz" ]; then if [ -f "${subdir}/cs-localfiles.tar.pigz" ]; then
em_file_created=$( date -r "$subdir" +"%b %d %H:%M" ) em_file_created=$( date -r "$subdir" +"%b %d %H:%M" )
# parse from ${subdir}/backup_summary_info.txt
if [ -f "${subdir}/backup_summary_info.txt" ]; then
em_file_created="$(grep -m 1 "em_file_created" "${subdir}/backup_summary_info.txt" | awk -F': ' '{print $2}')"
em_file_name="$(grep -m 1 "em_file_name" "${subdir}/backup_summary_info.txt" | awk '{print $2}')"
em_file_size="$(grep -m 1 "em_file_size" "${subdir}/backup_summary_info.txt" | awk '{print $2}')"
journal_file="$(grep -m 1 "journal_file" "${subdir}/backup_summary_info.txt" | awk '{print $2}')"
vbbm_file="$(grep -m 1 "vbbm_file" "${subdir}/backup_summary_info.txt" | awk '{print $2}')"
vss_file="$(grep -m 1 "vss_file" "${subdir}/backup_summary_info.txt" | awk '{print $2}')"
compression="$(grep -m 1 "compression" "${subdir}/backup_summary_info.txt" | awk '{print $2}')"
else
em_file_created="N/A"
em_file_size="N/A" em_file_size="N/A"
em_file_name="N/A" em_file_name="N/A"
journal_file="N/A" journal_file="N/A"
vbbm_file="N/A" vbbm_file="N/A"
vss_file="N/A" vss_file="N/A"
compression="N/A"
fi
file_age=$(($(date +%s) - $(date -r "$subdir" +%s))) file_age=$(($(date +%s) - $(date -r "$subdir" +%s)))
file_age_days=$((file_age / 86400)) file_age_days=$((file_age / 86400))
@ -3359,13 +3614,12 @@ list_restore_options_from_backups() {
file_age_days="$file_age_days (Will Delete)" file_age_days="$file_age_days (Will Delete)"
fi fi
printf "%-45s %-13s %-15s %-12s %-12s %-10s %-10s %-10s\n" "$(basename "$subdir")" "$em_file_created" "$em_file_name" "$em_file_size" "$journal_file" "$vbbm_file" "$vss_file" "$file_age_days" printf "%-35s %-9s %-13s %-15s %-12s %-12s %-10s %-10s %-10s\n" "$(basename "$subdir")" "$compression" "$em_file_created" "$em_file_name" "$em_file_size" "$journal_file" "$vbbm_file" "$vss_file" "$file_age_days"
continue continue
fi fi
get_latest_em_from_dbrm_directory "$subdir/data1/systemFiles/dbrm/" get_latest_em_from_dbrm_directory "$subdir/data1/systemFiles/dbrm/"
if [ -f "${subdir_dbrms}/BRM_saves_journal" ]; then if [ -f "${subdir_dbrms}/BRM_saves_journal" ]; then
em_file_name=$(basename "$em_file_full_path") em_file_name=$(basename "$em_file_full_path")
version_prefix=${em_file_name::-3} version_prefix=${em_file_name::-3}
@ -3373,12 +3627,13 @@ list_restore_options_from_backups() {
vbbm_file=$(ls -la "${subdir_dbrms}/${version_prefix}_vbbm" 2>/dev/null | awk 'NR==1 {print $5}' ) vbbm_file=$(ls -la "${subdir_dbrms}/${version_prefix}_vbbm" 2>/dev/null | awk 'NR==1 {print $5}' )
vss_file=$(ls -la "${subdir_dbrms}/${version_prefix}_vss" 2>/dev/null | awk 'NR==1 {print $5}' ) vss_file=$(ls -la "${subdir_dbrms}/${version_prefix}_vss" 2>/dev/null | awk 'NR==1 {print $5}' )
file_age=$(($(date +%s) - $(date -r "$subdir/data1/systemFiles" +%s))) file_age=$(($(date +%s) - $(date -r "$subdir" +%s)))
file_age_days=$((file_age / 86400)) file_age_days=$((file_age / 86400))
# Check if the backup will be deleted given the retention policy defined # Check if the backup will be deleted given the retention policy defined
if [ -n "$retention_days" ] && [ $retention_days -ne 0 ]; then if [ -n "$retention_days" ] && [ $retention_days -ne 0 ]; then
will_delete="$(find "$subdir/data1/systemFiles" -mindepth 1 -maxdepth 1 -type d -name "*" -mtime +$retention_days -exec echo {} \;)" # file_age_days=$((file_age / 60)); will_delete="$(find "$subdir" -mindepth 1 -maxdepth 1 -type d -name "*" -mmin +$retention_days -exec echo {} \;)"
will_delete="$(find "$subdir" -mindepth 1 -maxdepth 1 -type d -name "*" -mtime +$retention_days -exec echo {} \;)"
if [ -n "$will_delete" ]; then if [ -n "$will_delete" ]; then
file_age_days="$file_age_days (Will Delete)" file_age_days="$file_age_days (Will Delete)"
@ -3400,7 +3655,7 @@ list_dbrm_restore_options_from_backups() {
echo "Retention Policy: $retention_days days" echo "Retention Policy: $retention_days days"
fi; fi;
echo "--------------------------------------------------------------------------" echo "--------------------------------------------------------------------------"
printf "%-45s %-13s %-15s %-12s %-12s %-10s %-10s\n" "Options" "Last-Updated" "Extent Map" "EM-Size" "Journal-Size" "VBBM-Size" "VSS-Size" printf "%-45s %-13s %-15s %-12s %-12s %-10s %-10s\n" "Options" "EM-Updated" "Extent Map" "EM-Size" "Journal-Size" "VBBM-Size" "VSS-Size"
# Iterate over subdirectories # Iterate over subdirectories
for subdir in "${backup_location}"/*; do for subdir in "${backup_location}"/*; do
@ -3783,6 +4038,7 @@ dynamic_list_backups() {
} }
handle_list_backups () { handle_list_backups () {
if $list_backups ; then if $list_backups ; then
@ -3799,16 +4055,42 @@ handle_empty_restore_folder() {
fi; fi;
} }
handle_apply_retention_only() {
if $apply_retention_only; then
s1=20
s2=20
printf "\nApplying Retention Only Variables\n"
echo "--------------------------------------------------------------------------"
if [ -n "$config_file" ] && [ -f "$config_file" ]; then
printf "%-${s1}s %-${s2}s\n" "Configuration File:" "$config_file";
fi
echo "--------------------------------------------------------------------------"
printf "%-${s1}s %-${s2}s\n" "Backup Location:" "$backup_location";
printf "%-${s1}s %-${s2}s\n" "Timestamp:" "$(date +%m-%d-%Y-%H:%M:%S)";
printf "%-${s1}s %-${s2}s\n" "Retention:" "$retention_days";
echo "--------------------------------------------------------------------------"
if [ $retention_days -eq 0 ]; then
printf "\n[!] Are you sure retention should be set to 0?\n"
fi;
apply_backup_retention_policy
exit 0
fi;
}
process_backup() process_backup()
{ {
load_default_backup_variables; load_default_backup_variables;
parse_backup_variables "$@"; parse_backup_variables "$@";
handle_list_backups "$@" handle_list_backups "$@"
handle_apply_retention_only "$@"
print_backup_variables; print_backup_variables;
check_for_dependancies "backup"; check_for_dependancies "backup";
validation_prechecks_for_backup; validation_prechecks_for_backup;
apply_backup_retention_policy apply_backup_retention_policy;
issue_write_locks; issue_write_locks;
run_save_brm; run_save_brm;
run_backup; run_backup;
@ -3826,11 +4108,20 @@ process_restore()
run_restore; run_restore;
} }
global_dependencies() {
if ! command -v curl &> /dev/null; then
printf "\n[!] curl not found. Please install curl\n\n"
exit 1;
fi
}
print_mcs_bk_mgr_version_info() { print_mcs_bk_mgr_version_info() {
echo "MariaDB Columnstore Backup Manager" echo "MariaDB Columnstore Backup Manager"
echo "Version: $mcs_bk_manager_version" echo "Version: $mcs_bk_manager_version"
} }
global_dependencies
case "$action" in case "$action" in
'help' | '--help' | '-help' | '-h') 'help' | '--help' | '-help' | '-h')
print_action_help_text print_action_help_text