diff --git a/cmapi/scripts/mcs_backup_manager.sh b/cmapi/scripts/mcs_backup_manager.sh index 59355aff5..af1a1f2c0 100644 --- a/cmapi/scripts/mcs_backup_manager.sh +++ b/cmapi/scripts/mcs_backup_manager.sh @@ -13,7 +13,7 @@ # ######################################################################## # Documentation: bash mcs_backup_manager.sh help -# Version: 3.11 +# Version: 3.12 # # Backup Example # LocalStorage: sudo ./mcs_backup_manager.sh backup @@ -26,7 +26,7 @@ # S3: sudo ./mcs_backup_manager.sh restore -bb s3://my-cs-backups -l # ######################################################################## -mcs_bk_manager_version="3.11" +mcs_bk_manager_version="3.12" start=$(date +%s) action=$1 @@ -171,6 +171,8 @@ load_default_backup_variables() { poll_interval=5 poll_max_wait=60; list_backups=false + retention_file_lock_name=".delete-lock" + apply_retention_only=false # Compression Variables compress_format="" @@ -322,6 +324,10 @@ parse_backup_variables() { shift # past argument shift # past value ;; + -aro| --apply-retention-only) + apply_retention_only=true + shift # past argument + ;; "list") list_backups=true shift # past argument @@ -371,14 +377,15 @@ print_backup_help_text() { -c | --compress Compress backup in X format - Options: [ pigz ] -nb | --name-backup Define the name of the backup - default: date +%m-%d-%Y -r | --retention-days Retain backups created within the last X days, the rest are deleted, default 0 = keep all backups + -aro | --apply-retention-only Only apply retention policy to existing backups, does not run a backup -ha | --highavilability Hint wether shared storage is attached @ below on all nodes to see all data HA LocalStorage ( /var/lib/columnstore/dataX/ ) HA S3 ( /var/lib/columnstore/storagemanager/ ) Local Storage Examples: - ./$0 backup -bl /tmp/backups/ -bd Local -s LocalStorage - ./$0 backup -bl /tmp/backups/ -bd Local -s LocalStorage -P 8 - ./$0 backup -bl /tmp/backups/ -bd Local -s LocalStorage --incremental auto_most_recent + ./$0 backup -bl /tmp/backups/ + ./$0 backup -bl /tmp/backups/ -P 8 + ./$0 backup -bl /tmp/backups/ --incremental auto_most_recent ./$0 backup -bl /tmp/backups/ -bd Remote -scp root@172.31.6.163 -s LocalStorage S3 Examples: @@ -666,7 +673,7 @@ validation_prechecks_for_backup() { printf " - Columnstore is OFFLINE \n"; export columnstore_online=false; else - printf " - Columnstore is ONLINE - safer if offline \n"; + printf " - Columnstore is ONLINE - safer if offline (but not required to be offline) \n"; export columnstore_online=true; fi fi; @@ -796,7 +803,26 @@ auto_select_most_recent_backup_for_incremental() { printf " - Searching for most recent backup ...." if [ $storage == "LocalStorage" ]; then - most_recent_backup=$(ls -td "${backup_location}"* 2>/dev/null | head -n 1) + + # Example: find /mnt/backups/ -mindepth 1 -maxdepth 1 -type d ! -exec test -f "{}/.auto-delete-via-retention" \; -printf "%T@ %p\n" | sort -nr | awk '{print $2}' | head -n 1 + if [ -f "${backup_location}${retention_file_lock_name}-$pm" ] ; then + handle_early_exit_on_backup "\n[!!!] ${backup_location}${retention_file_lock_name}-$pm exists. are multiple backups running or was there an error? Manually resolve before retrying\n" true + fi + + # Primary node is in the middle of applying retention policy, distorting the most recent backup + if [ -f "${backup_location}${retention_file_lock_name}-pm1" ] ; then + most_recent_backup="$(head -n1 "${backup_location}${retention_file_lock_name}-pm1" | awk '{print $2}')" + else + most_recent_backup=$(find "$backup_location" -mindepth 1 -maxdepth 1 -type d -printf "%T@ %p\n" | sort -nr | awk '{print $2}' | head -n 1) + fi + + # Debug messaging to understand the most recent backup modified times + if ! $quiet; then + echo "" + find "$backup_location" -mindepth 1 -maxdepth 1 -type d -printf "%T@ %p\n" | sort -nr + fi + + # If no backup found, exit if [[ -z "$most_recent_backup" ]]; then handle_early_exit_on_backup "\n[!!!] No backup found to increment in '$backup_location', please run a full backup or define a folder that exists --incremental \n" true else @@ -849,55 +875,124 @@ auto_select_most_recent_backup_for_incremental() { apply_backup_retention_policy() { - if [ $retention_days -eq 0 ]; then + if [ "$retention_days" -eq 0 ]; then printf " - Skipping Backup Rentention Policy\n" return 0; fi - printf " - Applying Backup Rentention Policy...." - if [ $storage == "LocalStorage" ]; then - # example: find /tmp/backups/ -mindepth 1 -maxdepth 1 -type d -name "*" -amin +0 - find "$backup_location" -mindepth 1 -maxdepth 1 -type d -name "*" -mtime +$retention_days -exec rm -r {} \; + # PM1 is in charge of deleting the backup + # Rest of the nodes should wait for the delete to be over if it finds the lock file + applying_retention_start=$(date +%s) + printf "\nApplying Backup Rentention Policy\n" + if [ "$pm" == "pm1" ] || [ -n "${PM_FORCE_DELETE-}" ] ; then + + retention_file_lock_name="${retention_file_lock_name}-$pm" - elif [ $storage == "S3" ]; then + if [ $storage == "LocalStorage" ]; then + + # Create a lock file to prevent other PMs from deleting backups + touch "${backup_location}${retention_file_lock_name}" + echo "most_recent_backup: $most_recent_backup" >> "${backup_location}${retention_file_lock_name}" + printf " - Created: ${backup_location}${retention_file_lock_name}\n" - current_date=$(date +%s) - backups=$(s3ls $backup_bucket) - - while IFS= read -r line; do + # example1: find /mnt/backups/ -mindepth 1 -maxdepth 1 -type d -name "*" -mmin +10 -exec echo "{}" \; + # example2: find /mnt/backups/ -mindepth 1 -maxdepth 1 -type d -name "*" -mmin +10 -exec sh -c 'printf " - Deleting: {} \n" && echo "{}" >> "/mnt/backups/.delete-lock-pm1" && rm -rf "{}"' \; + # find "$backup_location" -mindepth 1 -maxdepth 1 -type d -name "*" -mmin +$retention_days -exec sh -c 'printf " - Deleting: {} \n" && echo "{}" >> "'"${backup_location}${retention_file_lock_name}"'" && rm -rf "{}"' \; + find "$backup_location" -mindepth 1 -maxdepth 1 -type d -name "*" -mtime +$retention_days -exec sh -c 'printf " - Deleting: {} \n" && echo "{}" >> "'"${backup_location}${retention_file_lock_name}"'" && rm -rf "{}"' \; + + # Cleanup the lock file + rm "${backup_location}${retention_file_lock_name}" + printf " - Deleted: ${backup_location}${retention_file_lock_name}\n" - delete_backup=false - folder=$(echo "$line" | awk '{print substr($2, 1, length($2)-1)}') - date_time=$(s3ls "${backup_bucket}/${folder}/restore --recursive" | awk '{print $1,$2}') - - if [[ -n "$date_time" ]]; then + elif [ $storage == "S3" ]; then - # Parse the date - backup_date=$(date -d "$date_time" +%s) - - # Calculate the difference in days - days_diff=$(( (current_date - backup_date) / (60*60*24) )) - # echo "line: $line" - # echo "date_time: $date_time" - # echo "backup_date: $backup_date" - # echo "days_diff: $days_diff" + # Create a lock file to prevent other PMs from deleting backups + echo "most_recent_backup: $most_recent_backup" >> "${retention_file_lock_name}" + s3cp "${retention_file_lock_name}" "${backup_bucket}/${retention_file_lock_name}" - if [ $days_diff -gt "$retention_days" ]; then + current_date=$(date +%s) + backups=$(s3ls $backup_bucket) + + while IFS= read -r line; do + + delete_backup=false + folder=$(echo "$line" | awk '{print substr($2, 1, length($2)-1)}') + date_time=$(s3ls "${backup_bucket}/${folder}/restore --recursive" | awk '{print $1,$2}') + + if [[ -n "$date_time" ]]; then + + # Parse the date + backup_date=$(date -d "$date_time" +%s) + + # Calculate the difference in days + days_diff=$(( (current_date - backup_date) / (60*60*24) )) + # echo "line: $line" + # echo "date_time: $date_time" + # echo "backup_date: $backup_date" + # echo "days_diff: $days_diff" + + if [ $days_diff -gt "$retention_days" ]; then + delete_backup=true + fi + else delete_backup=true fi - else - delete_backup=true - fi + + if $delete_backup; then + echo "${backup_bucket}/${folder}" >> "${retention_file_lock_name}" + s3cp "${retention_file_lock_name}" "${backup_bucket}/${retention_file_lock_name}" + s3rm "${backup_bucket}/${folder}" + #printf "\n - Deleting: ${backup_bucket}/${folder}" + fi + printf "." + done <<< "$backups" - if $delete_backup; then - s3rm "${backup_bucket}/${folder}" - #echo "Deleting ${backup_bucket}/${folder}" - fi - printf "." + # Cleanup the lock file + s3rm "${backup_bucket}/${retention_file_lock_name}" + fi + + applying_retention_end=$(date +%s) + printf "Done $(human_readable_time $((applying_retention_end-applying_retention_start))) \n" + else - done <<< "$backups" + local retry_limit=2160 + local retry_counter=0 + local retry_sleep_interval=5 + if [ $storage == "LocalStorage" ]; then + + if [ -f "${backup_location}${retention_file_lock_name}-pm1" ]; then + printf " - Waiting for PM1 to finish deleting backups (max: $retry_limit intervals of $retry_sleep_interval sec ) ... \n" + fi; + while [ -f "${backup_location}${retention_file_lock_name}-pm1" ]; do + + if [ -f "${backup_location}${retention_file_lock_name}-pm1" ]; then + printf " - Waiting on PM1 to finish deleting %s \n" "$(tail -n 1 "${backup_location}${retention_file_lock_name}-pm1" 2>/dev/null )" + fi; + + if [ $retry_counter -ge $retry_limit ]; then + handle_early_exit_on_backup "\n [!] PM1 lock file still exists after $retry_limit sec, deleting backups via retention policy might have failed or taking too long, please investigate, exiting\n" true + fi + sleep $retry_sleep_interval + ((retry_counter++)) + done + elif [ $storage == "S3" ]; then + if [[ $(s3ls "${backup_bucket}/${retention_file_lock_name}") ]]; then + printf " - Waiting for PM1 to finish deleting backups (max: $retry_limit intervals of $retry_sleep_interval sec ) ... \n" + fi; + while [[ $(s3ls "${backup_bucket}/${retention_file_lock_name}") ]]; do + if [[ $(s3ls "${backup_bucket}/${retention_file_lock_name}") ]]; then + printf " - Waiting on PM1 to finish deleting %s \n" "$( s3cp "${backup_bucket}/${retention_file_lock_name}" "-" | tail -n 1 2>/dev/null )" + fi; + if [ $retry_counter -ge $retry_limit ]; then + handle_early_exit_on_backup "\n [!] PM1 lock file still exists after $retry_limit sec, deleting backups via retention policy might have failed or taking too long, please investigate, exiting\n" true + fi + sleep $retry_sleep_interval + ((retry_counter++)) + done + fi + applying_retention_end=$(date +%s) + printf "Done $(human_readable_time $((applying_retention_end-applying_retention_start))) \n" fi - printf " Done\n" } @@ -994,7 +1089,8 @@ issue_write_locks() } poll_check_no_active_sql_writes() { - printf " - Polling For Active Writes ... " + poll_active_sql_writes_start=$(date +%s) + printf " - Polling for active Writes ... " query="SELECT COUNT(*) FROM information_schema.processlist where user != 'root' AND command = 'Query' AND ( info LIKE '%INSERT%' OR info LIKE '%UPDATE%' OR info LIKE '%DELETE%' OR info LIKE '%LOAD DATA%');" attempts=0 no_writes=false @@ -1005,34 +1101,41 @@ poll_check_no_active_sql_writes() { fi if [ "$active_writes" -le 0 ]; then - printf "Done\n" + poll_active_sql_writes_end=$(date +%s) + printf "Done $(human_readable_time $((poll_active_sql_writes_start-poll_active_sql_writes_end))) \n" no_writes=true break else - # extra_info=$( mariadb -e "SELECT * FROM information_schema.processlist where user != 'root' AND command = 'Query' AND info LIKE '%INSERT%' OR info LIKE '%UPDATE%' OR info LIKE '%DELETE%' " ) - # echo "Active write operations detected: $active_writes" - # echo "$extra_info" - printf "." - if ! $quiet; then printf "\nActive write operations detected: $active_writes"; fi; + + if ! $quiet; then + printf "\nActive write operations detected: $active_writes\n"; + mariadb -e "select ID,USER,TIME,LEFT(INFO, 50) from information_schema.processlist where user != 'root' AND command = 'Query' AND ( info LIKE '%INSERT%' OR info LIKE '%UPDATE%' OR info LIKE '%DELETE%' OR info LIKE '%LOAD DATA%');" + else + printf "." + fi; sleep "$poll_interval" ((attempts++)) fi done if ! $no_writes; then + mariadb -e "select ID,USER,TIME,INFO from information_schema.processlist where user != 'root' AND command = 'Query' AND ( info LIKE '%INSERT%' OR info LIKE '%UPDATE%' OR info LIKE '%DELETE%' OR info LIKE '%LOAD DATA%');" handle_early_exit_on_backup "\n[X] Exceeded poll_max_wait: $poll_max_wait minutes\nActive write operations detected: $active_writes \n" true fi; + } poll_check_no_active_cpimports() { - printf " - Polling For Active cpimports ... " + poll_active_cpimports_start=$(date +%s) + printf " - Polling for active cpimports ... " attempts=0 no_cpimports=false if ! $columnstore_online ; then printf "Skip since offline\n"; return ; fi; while [ "$attempts" -lt "$max_poll_attempts" ]; do active_cpimports=$(viewtablelock 2>/dev/null ) if [[ "$active_cpimports" == *"No tables are locked"* ]]; then - printf "Done\n" + poll_active_cpimports_end=$(date +%s) + printf "Done $(human_readable_time $((poll_active_cpimports_end-poll_active_cpimports_start))) \n" no_cpimports=true break else @@ -1358,7 +1461,39 @@ deepParallelRsync() { wait } -run_backup() { +human_readable_time() { + local total_seconds=$1 + local days=$((total_seconds / 86400)) + local hours=$(( (total_seconds % 86400) / 3600 )) + local minutes=$(( (total_seconds % 3600) / 60 )) + local seconds=$(( total_seconds % 60 )) + + local output="" + + # Special case for 0 seconds + if (( total_seconds == 0 )); then + printf "<1 sec" + return + fi + + if (( days > 0 )); then + output+=" ${days} days" + fi + if (( hours > 0 )); then + output+=" ${hours} hours" + fi + if (( minutes > 0 )); then + output+=" ${minutes} min" + fi + if (( seconds > 0 || total_seconds == 0 )); then + output+=" ${seconds} sec" + fi + + printf "${output}" | xargs +} + +run_backup() { + backup_start=$(date +%s) if [ $storage == "LocalStorage" ]; then if [ $backup_destination == "Local" ]; then @@ -1383,6 +1518,7 @@ run_backup() { printf " Done\n" # Backup Columnstore data + columnstore_backup_start=$(date +%s) i=1 while [ $i -le $DBROOT_COUNT ]; do if [[ $ASSIGNED_DBROOT == "$i" || $HA == true ]]; then @@ -1393,15 +1529,19 @@ run_backup() { elif $parrallel_rsync ; then printf " - Parallel Rsync CS Data$i... \n" initiate_rsyncs $i - printf " Done\n" + columnstore_backup_end=$(date +%s) + printf " Done $(human_readable_time $((columnstore_backup_end-columnstore_backup_start))) \n" else printf " - Syncing Columnstore Data$i... " eval "rsync $additional_rysnc_flags /var/lib/columnstore/data$i/* $backup_location$today/data$i/ $xtra_cmd_args"; - printf " Done\n" + columnstore_backup_end=$(date +%s) + printf " Done $(human_readable_time $((columnstore_backup_end-columnstore_backup_start))) \n" + fi; fi ((i++)) done + # Backup MariaDB data if [ $ASSIGNED_DBROOT == "1" ]; then @@ -1419,6 +1559,7 @@ run_backup() { fi # Run mariadb-backup + mariadb_backup_start=$(date +%s) if ! $skip_mdb; then if [[ -n "$compress_format" ]]; then printf " - Compressing MariaDB Data... " @@ -1426,9 +1567,11 @@ run_backup() { case $compress_format in pigz) # Handle Cloud - if ! mariabackup --user=root --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS -c > $mbd_prefix 2>> $logfile; then + mariadb_backup_start=$(date +%s) + if ! mariabackup --user=root --backup --slave-info --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS -c > $mbd_prefix 2>> $logfile; then handle_early_exit_on_backup "\nFailed mariabackup --user=root --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS -c > $mbd_prefix 2>> $logfile \n" fi + mariadb_backup_end=$(date +%s) printf " Done @ $mbd_prefix\n" ;; *) # unknown option @@ -1436,7 +1579,12 @@ run_backup() { esac else printf " - Copying MariaDB Data... " - if eval "mariadb-backup --backup --target-dir=$backup_location$today/mysql --user=root $xtra_cmd_args" ; then printf " Done \n"; else printf "\n Failed: mariadb-backup --backup --target-dir=$backup_location$today/mysql --user=root\n"; handle_early_exit_on_backup; fi + if eval "mariadb-backup --backup --slave-info --target-dir=$backup_location$today/mysql --user=root $xtra_cmd_args" ; then + mariadb_backup_end=$(date +%s) + printf " Done $(human_readable_time $((mariadb_backup_end-mariadb_backup_start))) \n" + else + handle_early_exit_on_backup "\n Failed: mariadb-backup --backup --target-dir=$backup_location$today/mysql --user=root\n"; + fi fi else echo "[!] Skipping mariadb-backup" @@ -1469,8 +1617,23 @@ run_backup() { else compress_paths+=" $MARIADB_SERVER_CONFIGS_PATH/*" fi + + # Backup mariadb server replication details + slave_status=$(mariadb -e "show slave status\G" 2>/dev/null) + if [[ $? -eq 0 ]]; then + if [[ -n "$slave_status" ]]; then + if [[ -z "$compress_format" ]]; then + mariadb -e "show slave status\G" | grep -E "Master_Host|Master_User|Master_Port|Gtid_IO_Pos" > $backup_location$today/configs/mysql/$pm/replication_details.txt + else + mariadb -e "show slave status\G" | grep -E "Master_Host|Master_User|Master_Port|Gtid_IO_Pos" > replication_details.txt + compress_paths+=" replication_details.txt" + fi + fi + else + handle_early_exit_on_backup "[!] - Failed to execute the MariaDB command: mariadb -e 'show slave status\G' \n" + fi fi - + # Handle compression for Columnstore Data & Configs if [[ -n "$compress_format" ]]; then cs_prefix="$backup_location$today/$split_file_cs_prefix.$compress_format" @@ -1478,9 +1641,31 @@ run_backup() { case $compress_format in pigz) printf " - Compressing CS Data & Configs... " + columnstore_backup_start=$(date +%s) if ! eval "tar cf - $compress_paths 2>>$logfile | pigz -p $PARALLEL_THREADS -c > $cs_prefix 2>> $logfile"; then handle_early_exit_on_backup "[!] - Compression Failed \ntar cf - $compress_paths 2>>$logfile | pigz -p $PARALLEL_THREADS -c > $cs_prefix 2>> $logfile \n" fi + columnstore_backup_end=$(date +%s) + + # Create summary info for list backup + get_latest_em_from_dbrm_directory "$DBRM_PATH" + em_file_name=$(basename "$em_file_full_path") + version_prefix=${em_file_name::-3} + journal_file=$(ls -la "${subdir_dbrms}/BRM_saves_journal" 2>/dev/null | awk 'NR==1 {print $5}' ) + vbbm_file=$(ls -la "${subdir_dbrms}/${version_prefix}_vbbm" 2>/dev/null | awk 'NR==1 {print $5}' ) + vss_file=$(ls -la "${subdir_dbrms}/${version_prefix}_vss" 2>/dev/null | awk 'NR==1 {print $5}' ) + echo "em_file_created: $em_file_created" >> $backup_location$today/backup_summary_info.txt + echo "em_file_name: $em_file_name" >> $backup_location$today/backup_summary_info.txt + echo "em_file_size: $em_file_size" >> $backup_location$today/backup_summary_info.txt + echo "journal_file: $journal_file" >> $backup_location$today/backup_summary_info.txt + echo "vbbm_file: $vbbm_file" >> $backup_location$today/backup_summary_info.txt + echo "vss_file: $vss_file" >> $backup_location$today/backup_summary_info.txt + echo "compression: $compress_format" >> $backup_location$today/backup_summary_info.txt + + if [ -f replication_details.txt ]; then + rm -rf replication_details.txt; + fi + printf " Done @ $cs_prefix\n" ;; *) # unknown option @@ -1488,18 +1673,6 @@ run_backup() { esac fi - # Save replication details - slave_status=$(mariadb -e "show slave status\G" 2>/dev/null) - if [[ $? -eq 0 ]]; then - if [[ -n "$slave_status" ]]; then - mariadb -e "show slave status\G" | grep -E "Master_Host|Master_User|Master_Port" > $backup_location$today/replication_details.txt - else - echo "No replication details to save" - fi - else - handle_early_exit_on_backup "[!] - Failed to execute the MariaDB command: mariadb -e 'show slave status\G' \n" - fi - if $incremental ; then # Log each incremental run now=$(date "+%m-%d-%Y %H:%M:%S"); echo "$pm updated on $now" >> $backup_location$today/incrementallyUpdated.txt @@ -1662,12 +1835,12 @@ run_backup() { pigz) # Handle Cloud if [ $cloud == "gcp" ]; then - if ! mariabackup --user=root --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS 2>> $logfile | split -d -a 5 -b 250M --filter="gsutil cp - ${mbd_prefix}_\$FILE 2>$logfile" - chunk 2>$logfile; then + if ! mariabackup --user=root --backup --slave-info --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS 2>> $logfile | split -d -a 5 -b 250M --filter="gsutil cp - ${mbd_prefix}_\$FILE 2>$logfile" - chunk 2>$logfile; then handle_early_exit_on_backup "\nFailed mariadb-backup --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS 2>> $logfile | split -d -a 5 -b 250M --filter=\"gsutil cp - ${mbd_prefix}_\$FILE 2>$logfile\" - chunk 2>$logfile \n" fi elif [ $cloud == "aws" ]; then - if ! mariabackup --user=root --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS 2>> $logfile | split -d -a 5 -b 250M --filter="aws s3 cp - ${mbd_prefix}_\$FILE 2>$logfile 1>&2" - chunk 2>$logfile; then + if ! mariabackup --user=root --backup --slave-info --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS 2>> $logfile | split -d -a 5 -b 250M --filter="aws s3 cp - ${mbd_prefix}_\$FILE 2>$logfile 1>&2" - chunk 2>$logfile; then handle_early_exit_on_backup "\nFailed mariadb-backup --backup --stream xbstream --parallel $PARALLEL_THREADS --ftwrl-wait-timeout=$timeout --ftwrl-wait-threshold=999999 --extra-lsndir=/tmp/checkpoint_out 2>>$logfile | pigz -p $PARALLEL_THREADS 2>> $logfile | split -d -a 5 -b 250M --filter=\"aws s3 cp - ${mbd_prefix}_\$FILE 2>$logfile 1>&2\" - chunk 2>$logfile\n" fi fi @@ -1682,7 +1855,7 @@ run_backup() { printf " - Syncing MariaDB data ... \n" rm -rf $backup_location$today/mysql if mkdir -p $backup_location$today/mysql ; then - if eval "mariabackup --user=root --backup --target-dir=$backup_location$today/mysql/ $xtra_cmd_args"; then + if eval "mariabackup --user=root --backup --slave-info --target-dir=$backup_location$today/mysql/ $xtra_cmd_args"; then printf " + mariadb-backup @ $backup_location$today/mysql/ \n" else handle_early_exit_on_backup "\nFailed mariadb-backup --backup --target-dir=$backup_location$today/mysql --user=root\n" true @@ -1708,6 +1881,22 @@ run_backup() { else compress_paths+="$MARIADB_SERVER_CONFIGS_PATH" fi + + # Backup mariadb server replication details + slave_status=$(mariadb -e "show slave status\G" 2>/dev/null) + if [[ $? -eq 0 ]]; then + if [[ -n "$slave_status" ]]; then + mariadb -e "show slave status\G" | grep -E "Master_Host|Master_User|Master_Port|Gtid_IO_Pos" > replication_details.txt + if [[ -z "$compress_format" ]]; then + s3cp replication_details.txt $backup_bucket/$today/configs/mysql/$pm/replication_details.txt + else + compress_paths+=" replication_details.txt" + fi + fi + else + handle_early_exit_on_backup "[!] - Failed to execute the MariaDB command: mariadb -e 'show slave status\G' \n" + fi + fi # Handles streaming the compressed columnstore local files @@ -1727,6 +1916,12 @@ run_backup() { handle_early_exit_on_backup "[!] - Compression/Split/Upload Failed \n" fi fi + + # Cleanup temp files + if [ -f replication_details.txt ]; then + rm -rf replication_details.txt + fi + printf " Done @ $cs_prefix\n" ;; *) # unknown option @@ -1765,8 +1960,52 @@ run_backup() { clear_read_lock end=$(date +%s) - runtime=$((end-start)) - printf "\nRuntime: $runtime\n" + total_runtime=$((end - start)) + other=$total_runtime + + printf "\nRuntime Summary\n" + echo "--------------------" + if [[ -n "$applying_retention_end" && $applying_retention_end -ne 0 ]]; then + applying_retention_time=$((applying_retention_end - applying_retention_start)) + printf "%-20s %-15s\n" "Applying Retention:" "$(human_readable_time $applying_retention_time)" + other=$((other - applying_retention_time)) + fi + + if [[ -n "$poll_active_sql_writes_end" && $poll_active_sql_writes_end -ne 0 ]]; then + poll_sql_writes_time=$((poll_active_sql_writes_end - poll_active_sql_writes_start)) + printf "%-20s %-15s\n" "Polling SQL Writes:" "$(human_readable_time $poll_sql_writes_time)" + other=$((other - poll_sql_writes_time)) + fi + + if [[ -n "$poll_active_cpimports_end" && $poll_active_cpimports_end -ne 0 ]]; then + poll_cpimports_time=$((poll_active_cpimports_end - poll_active_cpimports_start)) + printf "%-20s %-15s\n" "Polling cpimports:" "$(human_readable_time $poll_cpimports_time)" + other=$((other - poll_cpimports_time)) + fi + + if [[ -n "$columnstore_backup_end" && $columnstore_backup_end -ne 0 ]]; then + columnstore_backup_time=$((columnstore_backup_end - columnstore_backup_start)) + printf "%-20s %-15s\n" "Columnstore Backup:" "$(human_readable_time $columnstore_backup_time)" + other=$((other - columnstore_backup_time)) + fi + + if [[ -n "$mariadb_backup_end" && $mariadb_backup_end -ne 0 ]]; then + mariadb_backup_time=$((mariadb_backup_end - mariadb_backup_start)) + printf "%-20s %-15s\n" "MariaDB Backup:" "$(human_readable_time $mariadb_backup_time)" + other=$((other - mariadb_backup_time)) + fi + + if (( other < 0 )); then + printf "%-20s %-15s\n" "Other:" "N/A (invalid data)" + else + if [ $other -gt 0 ]; then + printf "%-20s %-15s\n" "Other:" "<$(human_readable_time $other)" + else + printf "%-20s %-15s\n" "Other:" "<1 sec" + fi + fi + + printf "\nTotal Runtime: $(human_readable_time $((end-start))) \n" printf "$final_message\n\n" } @@ -2353,7 +2592,7 @@ validation_prechecks_for_restore() { fi fi; else - handle_early_exit_on_restore " Failed to list backup contents...\n$check5\n" + handle_early_exit_on_restore " Failed to list backup contents...\n$check5\n" fi; fi; @@ -2734,10 +2973,12 @@ run_restore() if [ $DBROOT_COUNT -gt 1 ]; then echo -e "[!!] Check Replicas to manually configure mariadb replication"; fi echo -e "systemctl start mariadb " echo -e "systemctl start mariadb-columnstore-cmapi " - echo -e "mcs cluster start \n\n" + echo -e "mcs cluster start" + echo -e "mariadb -e \"show variables like '%gtid_current_pos%';\"\n\n" else echo -e "[!!] Check this replica to manually configure mariadb replication after primary node is ready" echo -e "Example:" + echo -e "mariadb -e \"SET GLOBAL gtid_slave_pos = '0-1-14'\"" echo -e "mariadb -e \"stop slave;CHANGE MASTER TO MASTER_HOST='\$pm1' , MASTER_USER='repuser' , MASTER_PASSWORD='aBcd123%123%aBc' , MASTER_USE_GTID = slave_pos;start slave;show slave status\G\"" echo -e "mariadb -e \"show slave status\G\" | grep \"Slave_\" \n" fi @@ -3321,7 +3562,7 @@ get_latest_em_from_dbrm_directory() { fi else subdir_dbrms="$1" - latest_em_file=$(find "${subdir_dbrms}" -maxdepth 1 -type f -name "BRM_saves*_em" -exec ls -lat {} + | awk 'NR==1 {printf "%-12s %-4s %-2s %-5s %s\n", $5, $6, $7, $8, $9}'| head -n 1) + latest_em_file=$(find "${subdir_dbrms}" -maxdepth 1 -type f -name "BRM_saves*_em" -exec ls -lat {} + 2>/dev/null | awk 'NR==1 {printf "%-12s %-4s %-2s %-5s %s\n", $5, $6, $7, $8, $9}'| head -n 1) em_file_size=$(echo "$latest_em_file" | awk '{print $1}' ) em_file_created=$(echo "$latest_em_file" | awk '{print $2,$3,$4}' ) em_file_full_path=$(echo $latest_em_file | awk '{print $NF}' ) @@ -3339,18 +3580,32 @@ list_restore_options_from_backups() { echo "Retention Policy: $retention_days days" fi; echo "--------------------------------------------------------------------------" - printf "%-45s %-13s %-15s %-12s %-12s %-10s %-10s %-10s\n" "Options" "Last-Updated" "Extent Map" "EM-Size" "Journal-Size" "VBBM-Size" "VSS-Size" "Days Old" + printf "%-45s %-13s %-15s %-12s %-12s %-10s %-10s %-10s\n" "Options" "EM-Updated" "Extent Map" "EM-Size" "Journal-Size" "VBBM-Size" "VSS-Size" "Backup Days Old" # Iterate over subdirectories for subdir in "${backup_location}"/*; do if [ -f "${subdir}/cs-localfiles.tar.pigz" ]; then em_file_created=$( date -r "$subdir" +"%b %d %H:%M" ) - em_file_size="N/A" - em_file_name="N/A" - journal_file="N/A" - vbbm_file="N/A" - vss_file="N/A" + + # parse from ${subdir}/backup_summary_info.txt + if [ -f "${subdir}/backup_summary_info.txt" ]; then + em_file_created="$(grep -m 1 "em_file_created" "${subdir}/backup_summary_info.txt" | awk -F': ' '{print $2}')" + em_file_name="$(grep -m 1 "em_file_name" "${subdir}/backup_summary_info.txt" | awk '{print $2}')" + em_file_size="$(grep -m 1 "em_file_size" "${subdir}/backup_summary_info.txt" | awk '{print $2}')" + journal_file="$(grep -m 1 "journal_file" "${subdir}/backup_summary_info.txt" | awk '{print $2}')" + vbbm_file="$(grep -m 1 "vbbm_file" "${subdir}/backup_summary_info.txt" | awk '{print $2}')" + vss_file="$(grep -m 1 "vss_file" "${subdir}/backup_summary_info.txt" | awk '{print $2}')" + compression="$(grep -m 1 "compression" "${subdir}/backup_summary_info.txt" | awk '{print $2}')" + else + em_file_created="N/A" + em_file_size="N/A" + em_file_name="N/A" + journal_file="N/A" + vbbm_file="N/A" + vss_file="N/A" + compression="N/A" + fi file_age=$(($(date +%s) - $(date -r "$subdir" +%s))) file_age_days=$((file_age / 86400)) @@ -3359,13 +3614,12 @@ list_restore_options_from_backups() { file_age_days="$file_age_days (Will Delete)" fi - printf "%-45s %-13s %-15s %-12s %-12s %-10s %-10s %-10s\n" "$(basename "$subdir")" "$em_file_created" "$em_file_name" "$em_file_size" "$journal_file" "$vbbm_file" "$vss_file" "$file_age_days" + printf "%-35s %-9s %-13s %-15s %-12s %-12s %-10s %-10s %-10s\n" "$(basename "$subdir")" "$compression" "$em_file_created" "$em_file_name" "$em_file_size" "$journal_file" "$vbbm_file" "$vss_file" "$file_age_days" continue fi get_latest_em_from_dbrm_directory "$subdir/data1/systemFiles/dbrm/" - if [ -f "${subdir_dbrms}/BRM_saves_journal" ]; then em_file_name=$(basename "$em_file_full_path") version_prefix=${em_file_name::-3} @@ -3373,12 +3627,13 @@ list_restore_options_from_backups() { vbbm_file=$(ls -la "${subdir_dbrms}/${version_prefix}_vbbm" 2>/dev/null | awk 'NR==1 {print $5}' ) vss_file=$(ls -la "${subdir_dbrms}/${version_prefix}_vss" 2>/dev/null | awk 'NR==1 {print $5}' ) - file_age=$(($(date +%s) - $(date -r "$subdir/data1/systemFiles" +%s))) + file_age=$(($(date +%s) - $(date -r "$subdir" +%s))) file_age_days=$((file_age / 86400)) # Check if the backup will be deleted given the retention policy defined if [ -n "$retention_days" ] && [ $retention_days -ne 0 ]; then - will_delete="$(find "$subdir/data1/systemFiles" -mindepth 1 -maxdepth 1 -type d -name "*" -mtime +$retention_days -exec echo {} \;)" + # file_age_days=$((file_age / 60)); will_delete="$(find "$subdir" -mindepth 1 -maxdepth 1 -type d -name "*" -mmin +$retention_days -exec echo {} \;)" + will_delete="$(find "$subdir" -mindepth 1 -maxdepth 1 -type d -name "*" -mtime +$retention_days -exec echo {} \;)" if [ -n "$will_delete" ]; then file_age_days="$file_age_days (Will Delete)" @@ -3400,7 +3655,7 @@ list_dbrm_restore_options_from_backups() { echo "Retention Policy: $retention_days days" fi; echo "--------------------------------------------------------------------------" - printf "%-45s %-13s %-15s %-12s %-12s %-10s %-10s\n" "Options" "Last-Updated" "Extent Map" "EM-Size" "Journal-Size" "VBBM-Size" "VSS-Size" + printf "%-45s %-13s %-15s %-12s %-12s %-10s %-10s\n" "Options" "EM-Updated" "Extent Map" "EM-Size" "Journal-Size" "VBBM-Size" "VSS-Size" # Iterate over subdirectories for subdir in "${backup_location}"/*; do @@ -3783,6 +4038,7 @@ dynamic_list_backups() { } + handle_list_backups () { if $list_backups ; then @@ -3799,16 +4055,42 @@ handle_empty_restore_folder() { fi; } +handle_apply_retention_only() { + + if $apply_retention_only; then + s1=20 + s2=20 + printf "\nApplying Retention Only Variables\n" + echo "--------------------------------------------------------------------------" + if [ -n "$config_file" ] && [ -f "$config_file" ]; then + printf "%-${s1}s %-${s2}s\n" "Configuration File:" "$config_file"; + fi + echo "--------------------------------------------------------------------------" + printf "%-${s1}s %-${s2}s\n" "Backup Location:" "$backup_location"; + printf "%-${s1}s %-${s2}s\n" "Timestamp:" "$(date +%m-%d-%Y-%H:%M:%S)"; + printf "%-${s1}s %-${s2}s\n" "Retention:" "$retention_days"; + echo "--------------------------------------------------------------------------" + + if [ $retention_days -eq 0 ]; then + printf "\n[!] Are you sure retention should be set to 0?\n" + fi; + + apply_backup_retention_policy + exit 0 + fi; +} + process_backup() { load_default_backup_variables; parse_backup_variables "$@"; handle_list_backups "$@" + handle_apply_retention_only "$@" print_backup_variables; check_for_dependancies "backup"; validation_prechecks_for_backup; - apply_backup_retention_policy + apply_backup_retention_policy; issue_write_locks; run_save_brm; run_backup; @@ -3826,11 +4108,20 @@ process_restore() run_restore; } +global_dependencies() { + if ! command -v curl &> /dev/null; then + printf "\n[!] curl not found. Please install curl\n\n" + exit 1; + fi +} + print_mcs_bk_mgr_version_info() { echo "MariaDB Columnstore Backup Manager" echo "Version: $mcs_bk_manager_version" } +global_dependencies + case "$action" in 'help' | '--help' | '-help' | '-h') print_action_help_text @@ -3859,4 +4150,4 @@ case "$action" in ;; esac -exit 0; +exit 0; \ No newline at end of file