From 4fff78f00910af0137f9de7532f8eb21d08ab1c3 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 8 Jun 2022 10:53:01 +0900 Subject: [PATCH] Restructure pg_upgrade output directories for better idempotence 38bfae3 has moved the contents written to files by pg_upgrade under a new directory called pg_upgrade_output.d/ located in the new cluster's data folder, and it used a simple structure made of two subdirectories leading to a fixed structure: log/ and dump/. This design has made weaker pg_upgrade on repeated calls, as we could get failures when creating one or more of those directories, while potentially losing the logs of a previous run (logs are retained automatically on failure, and cleaned up on success unless --retain is specified). So a user would need to clean up pg_upgrade_output.d/ as an extra step for any repeated calls of pg_upgrade. The most common scenario here is --check followed by the actual upgrade, but one could see a failure when specifying an incorrect input argument value. Removing entirely the logs would have the disadvantage of removing all the past information, even if --retain was specified at some past step. This result is annoying for a lot of users and automated upgrade flows. So, rather than requiring a manual removal of pg_upgrade_output.d/, this redesigns the set of output directories in a more dynamic way, based on a suggestion from Tom Lane and Daniel Gustafsson. pg_upgrade_output.d/ is still the base path, but a second directory level is added, mostly named after an ISO-8601-formatted timestamp (in short human-readable, with milliseconds appended to the name to avoid any conflicts). The logs and dumps are saved within the same subdirectories as previously, as of log/ and dump/, but these are located inside the subdirectory named after the timestamp. The logs of a given run are removed only after a successful run if --retain is not used, and pg_upgrade_output.d/ is kept if there are any logs from a previous run. Note that previously, pg_upgrade would have kept the logs even after a successful --check but that was inconsistent compared to the case without --check when using --retain. The code in charge of the removal of the output directories is now refactored into a single routine. Two TAP tests are added with some --check commands (one failure case and one success case), to look after the issue fixed here. Note that the tests had to be tweaked a bit to fit with the new directory structure so as it can find any logs generated on failure. This is still going to require a change in the buildfarm client for the case where pg_upgrade is tested without the TAP test, though, but I'll tackle that with a separate patch where needed. Reported-by: Tushar Ahuja Author: Michael Paquier Reviewed-by: Daniel Gustafsson, Justin Pryzby Discussion: https://postgr.es/m/77e6ecaa-2785-97aa-f229-4b6e047cbd2b@enterprisedb.com --- doc/src/sgml/ref/pgupgrade.sgml | 5 +- src/bin/pg_upgrade/check.c | 2 + src/bin/pg_upgrade/pg_upgrade.c | 67 +++++++++++++++++--------- src/bin/pg_upgrade/pg_upgrade.h | 14 ++++-- src/bin/pg_upgrade/t/002_pg_upgrade.pl | 49 ++++++++++++++++++- src/bin/pg_upgrade/util.c | 42 ++++++++++++++++ 6 files changed, 149 insertions(+), 30 deletions(-) diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml index 8cda8d16d17..f3eb7fbd338 100644 --- a/doc/src/sgml/ref/pgupgrade.sgml +++ b/doc/src/sgml/ref/pgupgrade.sgml @@ -768,7 +768,10 @@ psql --username=postgres --file=script.sql postgres pg_upgrade creates various working files, such as schema dumps, stored within pg_upgrade_output.d in - the directory of the new cluster. + the directory of the new cluster. Each run creates a new subdirectory named + with a timestamp formatted as per ISO 8601 + (%Y%m%dT%H%M%S), where all the generated files are + stored. diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index 6114303b527..ace7387edaf 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -210,6 +210,8 @@ report_clusters_compatible(void) pg_log(PG_REPORT, "\n*Clusters are compatible*\n"); /* stops new cluster */ stop_postmaster(false); + + cleanup_output_dirs(); exit(0); } diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index ecb3e1f6474..ccb048ab2e5 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -58,7 +58,6 @@ static void copy_xact_xlog_xid(void); static void set_frozenxids(bool minmxid_only); static void make_outputdirs(char *pgdata); static void setup(char *argv0, bool *live_check); -static void cleanup(void); ClusterInfo old_cluster, new_cluster; @@ -204,7 +203,7 @@ main(int argc, char **argv) pg_free(deletion_script_file_name); - cleanup(); + cleanup_output_dirs(); return 0; } @@ -221,19 +220,54 @@ make_outputdirs(char *pgdata) char **filename; time_t run_time = time(NULL); char filename_path[MAXPGPATH]; + char timebuf[128]; + struct timeval time; + time_t tt; + int len; - log_opts.basedir = (char *) pg_malloc(MAXPGPATH); - snprintf(log_opts.basedir, MAXPGPATH, "%s/%s", pgdata, BASE_OUTPUTDIR); - log_opts.dumpdir = (char *) pg_malloc(MAXPGPATH); - snprintf(log_opts.dumpdir, MAXPGPATH, "%s/%s", pgdata, DUMP_OUTPUTDIR); - log_opts.logdir = (char *) pg_malloc(MAXPGPATH); - snprintf(log_opts.logdir, MAXPGPATH, "%s/%s", pgdata, LOG_OUTPUTDIR); + log_opts.rootdir = (char *) pg_malloc0(MAXPGPATH); + len = snprintf(log_opts.rootdir, MAXPGPATH, "%s/%s", pgdata, BASE_OUTPUTDIR); + if (len >= MAXPGPATH) + pg_fatal("buffer for root directory too small"); - if (mkdir(log_opts.basedir, pg_dir_create_mode)) + /* BASE_OUTPUTDIR/$timestamp/ */ + gettimeofday(&time, NULL); + tt = (time_t) time.tv_sec; + strftime(timebuf, sizeof(timebuf), "%Y%m%dT%H%M%S", localtime(&tt)); + /* append milliseconds */ + snprintf(timebuf + strlen(timebuf), sizeof(timebuf) - strlen(timebuf), + ".%03d", (int) (time.tv_usec / 1000)); + log_opts.basedir = (char *) pg_malloc0(MAXPGPATH); + len = snprintf(log_opts.basedir, MAXPGPATH, "%s/%s", log_opts.rootdir, + timebuf); + if (len >= MAXPGPATH) + pg_fatal("buffer for base directory too small"); + + /* BASE_OUTPUTDIR/$timestamp/dump/ */ + log_opts.dumpdir = (char *) pg_malloc0(MAXPGPATH); + len = snprintf(log_opts.dumpdir, MAXPGPATH, "%s/%s/%s", log_opts.rootdir, + timebuf, DUMP_OUTPUTDIR); + if (len >= MAXPGPATH) + pg_fatal("buffer for dump directory too small"); + + /* BASE_OUTPUTDIR/$timestamp/log/ */ + log_opts.logdir = (char *) pg_malloc0(MAXPGPATH); + len = snprintf(log_opts.logdir, MAXPGPATH, "%s/%s/%s", log_opts.rootdir, + timebuf, LOG_OUTPUTDIR); + if (len >= MAXPGPATH) + pg_fatal("buffer for log directory too small"); + + /* + * Ignore the error case where the root path exists, as it is kept the + * same across runs. + */ + if (mkdir(log_opts.rootdir, pg_dir_create_mode) < 0 && errno != EEXIST) + pg_fatal("could not create directory \"%s\": %m\n", log_opts.rootdir); + if (mkdir(log_opts.basedir, pg_dir_create_mode) < 0) pg_fatal("could not create directory \"%s\": %m\n", log_opts.basedir); - if (mkdir(log_opts.dumpdir, pg_dir_create_mode)) + if (mkdir(log_opts.dumpdir, pg_dir_create_mode) < 0) pg_fatal("could not create directory \"%s\": %m\n", log_opts.dumpdir); - if (mkdir(log_opts.logdir, pg_dir_create_mode)) + if (mkdir(log_opts.logdir, pg_dir_create_mode) < 0) pg_fatal("could not create directory \"%s\": %m\n", log_opts.logdir); snprintf(filename_path, sizeof(filename_path), "%s/%s", log_opts.logdir, @@ -745,14 +779,3 @@ set_frozenxids(bool minmxid_only) check_ok(); } - - -static void -cleanup(void) -{ - fclose(log_opts.internal); - - /* Remove dump and log files? */ - if (!log_opts.retain) - (void) rmtree(log_opts.basedir, true); -} diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 86d3dc46fa5..55de244ac01 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -30,12 +30,14 @@ #define DB_DUMP_FILE_MASK "pg_upgrade_dump_%u.custom" /* - * Base directories that include all the files generated internally, - * from the root path of the new cluster. + * Base directories that include all the files generated internally, from the + * root path of the new cluster. The paths are dynamically built as of + * BASE_OUTPUTDIR/$timestamp/{LOG_OUTPUTDIR,DUMP_OUTPUTDIR} to ensure their + * uniqueness in each run. */ #define BASE_OUTPUTDIR "pg_upgrade_output.d" -#define LOG_OUTPUTDIR BASE_OUTPUTDIR "/log" -#define DUMP_OUTPUTDIR BASE_OUTPUTDIR "/dump" +#define LOG_OUTPUTDIR "log" +#define DUMP_OUTPUTDIR "dump" #define DB_DUMP_LOG_FILE_MASK "pg_upgrade_dump_%u.log" #define SERVER_LOG_FILE "pg_upgrade_server.log" @@ -276,7 +278,8 @@ typedef struct bool verbose; /* true -> be verbose in messages */ bool retain; /* retain log files on success */ /* Set of internal directories for output files */ - char *basedir; /* Base output directory */ + char *rootdir; /* Root directory, aka pg_upgrade_output.d */ + char *basedir; /* Base output directory, with timestamp */ char *dumpdir; /* Dumps */ char *logdir; /* Log files */ bool isatty; /* is stdout a tty */ @@ -432,6 +435,7 @@ void report_status(eLogType type, const char *fmt,...) pg_attribute_printf(2, 3 void pg_log(eLogType type, const char *fmt,...) pg_attribute_printf(2, 3); void pg_fatal(const char *fmt,...) pg_attribute_printf(1, 2) pg_attribute_noreturn(); void end_progress_output(void); +void cleanup_output_dirs(void); void prep_status(const char *fmt,...) pg_attribute_printf(1, 2); void prep_status_progress(const char *fmt,...) pg_attribute_printf(1, 2); unsigned int str2uint(const char *str); diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl index 55c7354ba2a..3f11540e189 100644 --- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl +++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl @@ -5,6 +5,8 @@ use warnings; use Cwd qw(abs_path); use File::Basename qw(dirname); use File::Compare; +use File::Find qw(find); +use File::Path qw(rmtree); use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; @@ -213,6 +215,39 @@ chdir ${PostgreSQL::Test::Utils::tmp_check}; # Upgrade the instance. $oldnode->stop; + +# Cause a failure at the start of pg_upgrade, this should create the logging +# directory pg_upgrade_output.d but leave it around. Keep --check for an +# early exit. +command_fails( + [ + 'pg_upgrade', '--no-sync', + '-d', $oldnode->data_dir, + '-D', $newnode->data_dir, + '-b', $oldbindir . '/does/not/exist/', + '-B', $newbindir, + '-p', $oldnode->port, + '-P', $newnode->port, + '--check' + ], + 'run of pg_upgrade --check for new instance with incorrect binary path'); +ok(-d $newnode->data_dir . "/pg_upgrade_output.d", + "pg_upgrade_output.d/ not removed after pg_upgrade failure"); +rmtree($newnode->data_dir . "/pg_upgrade_output.d"); + +# --check command works here, cleans up pg_upgrade_output.d. +command_ok( + [ + 'pg_upgrade', '--no-sync', '-d', $oldnode->data_dir, + '-D', $newnode->data_dir, '-b', $oldbindir, + '-B', $newbindir, '-p', $oldnode->port, + '-P', $newnode->port, '--check' + ], + 'run of pg_upgrade --check for new instance'); +ok( !-d $newnode->data_dir . "/pg_upgrade_output.d", + "pg_upgrade_output.d/ not removed after pg_upgrade --check success"); + +# Actual run, pg_upgrade_output.d is removed at the end. command_ok( [ 'pg_upgrade', '--no-sync', '-d', $oldnode->data_dir, @@ -221,14 +256,24 @@ command_ok( '-P', $newnode->port ], 'run of pg_upgrade for new instance'); +ok( !-d $newnode->data_dir . "/pg_upgrade_output.d", + "pg_upgrade_output.d/ removed after pg_upgrade success"); + $newnode->start; # Check if there are any logs coming from pg_upgrade, that would only be # retained on failure. -my $log_path = $newnode->data_dir . "/pg_upgrade_output.d/log"; +my $log_path = $newnode->data_dir . "/pg_upgrade_output.d"; if (-d $log_path) { - foreach my $log (glob("$log_path/*")) + my @log_files; + find( + sub { + push @log_files, $File::Find::name + if $File::Find::name =~ m/.*\.log/; + }, + $newnode->data_dir . "/pg_upgrade_output.d"); + foreach my $log (@log_files) { note "=== contents of $log ===\n"; print slurp_file($log); diff --git a/src/bin/pg_upgrade/util.c b/src/bin/pg_upgrade/util.c index 1a328b42700..f25e14c3214 100644 --- a/src/bin/pg_upgrade/util.c +++ b/src/bin/pg_upgrade/util.c @@ -55,6 +55,48 @@ end_progress_output(void) pg_log(PG_REPORT, "%-*s", MESSAGE_WIDTH, ""); } +/* + * Remove any logs generated internally. To be used once when exiting. + */ +void +cleanup_output_dirs(void) +{ + fclose(log_opts.internal); + + /* Remove dump and log files? */ + if (log_opts.retain) + return; + + (void) rmtree(log_opts.basedir, true); + + /* Remove pg_upgrade_output.d only if empty */ + switch (pg_check_dir(log_opts.rootdir)) + { + case 0: /* non-existent */ + case 3: /* exists and contains a mount point */ + Assert(false); + break; + + case 1: /* exists and empty */ + case 2: /* exists and contains only dot files */ + (void) rmtree(log_opts.rootdir, true); + break; + + case 4: /* exists */ + + /* + * Keep the root directory as this includes some past log + * activity. + */ + break; + + default: + /* different failure, just report it */ + pg_log(PG_WARNING, "could not access directory \"%s\": %m", + log_opts.rootdir); + break; + } +} /* * prep_status