diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 5f8e5e699ff..b9c130bcccb 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -8037,6 +8037,59 @@ StartupXLOG(void) RequestCheckpoint(CHECKPOINT_FORCE); } +/* + * Verify that, in non-test mode, ./pg_tblspc doesn't contain any real + * directories. + * + * Replay of database creation XLOG records for databases that were later + * dropped can create fake directories in pg_tblspc. By the time consistency + * is reached these directories should have been removed; here we verify + * that this did indeed happen. This is to be called at the point where + * consistent state is reached. + * + * allow_in_place_tablespaces turns the PANIC into a WARNING, which is + * useful for testing purposes, and also allows for an escape hatch in case + * things go south. + */ +static void +CheckTablespaceDirectory(void) +{ + DIR *dir; + struct dirent *de; + + dir = AllocateDir("pg_tblspc"); + while ((de = ReadDir(dir, "pg_tblspc")) != NULL) + { + char path[MAXPGPATH + 10]; +#ifndef WIN32 + struct stat st; +#endif + + /* Skip entries of non-oid names */ + if (strspn(de->d_name, "0123456789") != strlen(de->d_name)) + continue; + + snprintf(path, sizeof(path), "pg_tblspc/%s", de->d_name); + +#ifndef WIN32 + if (lstat(path, &st) < 0) + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", + path))); + else if (!S_ISLNK(st.st_mode)) +#else /* WIN32 */ + if (!pgwin32_is_junction(path)) +#endif + ereport(allow_in_place_tablespaces ? WARNING : PANIC, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("unexpected directory entry \"%s\" found in %s", + de->d_name, "pg_tblspc/"), + errdetail("All directory entries in pg_tblspc/ should be symbolic links."), + errhint("Remove those directories, or set allow_in_place_tablespaces to ON transiently to let recovery complete."))); + } +} + /* * Checks if recovery has reached a consistent state. When consistency is * reached and we have a valid starting standby snapshot, tell postmaster @@ -8107,6 +8160,14 @@ CheckRecoveryConsistency(void) */ XLogCheckInvalidPages(); + /* + * Check that pg_tblspc doesn't contain any real directories. Replay + * of Database/CREATE_* records may have created ficticious tablespace + * directories that should have been removed by the time consistency + * was reached. + */ + CheckTablespaceDirectory(); + reachedConsistency = true; ereport(LOG, (errmsg("consistent recovery state reached at %X/%X", diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index f27c3fe8c1c..abe524f1121 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -46,6 +46,7 @@ #include "commands/defrem.h" #include "commands/seclabel.h" #include "commands/tablespace.h" +#include "common/file_perm.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "pgstat.h" @@ -2169,6 +2170,45 @@ get_database_name(Oid dbid) return result; } +/* + * recovery_create_dbdir() + * + * During recovery, there's a case where we validly need to recover a missing + * tablespace directory so that recovery can continue. This happens when + * recovery wants to create a database but the holding tablespace has been + * removed before the server stopped. Since we expect that the directory will + * be gone before reaching recovery consistency, and we have no knowledge about + * the tablespace other than its OID here, we create a real directory under + * pg_tblspc here instead of restoring the symlink. + * + * If only_tblspc is true, then the requested directory must be in pg_tblspc/ + */ +static void +recovery_create_dbdir(char *path, bool only_tblspc) +{ + struct stat st; + + Assert(RecoveryInProgress()); + + if (stat(path, &st) == 0) + return; + + if (only_tblspc && strstr(path, "pg_tblspc/") == NULL) + elog(PANIC, "requested to created invalid directory: %s", path); + + if (reachedConsistency && !allow_in_place_tablespaces) + ereport(PANIC, + errmsg("missing directory \"%s\"", path)); + + elog(reachedConsistency ? WARNING : DEBUG1, + "creating missing directory: %s", path); + + if (pg_mkdir_p(path, pg_dir_create_mode) != 0) + ereport(PANIC, + errmsg("could not create missing directory \"%s\": %m", path)); +} + + /* * DATABASE resource manager's routines */ @@ -2185,6 +2225,7 @@ dbase_redo(XLogReaderState *record) xl_dbase_create_rec *xlrec = (xl_dbase_create_rec *) XLogRecGetData(record); char *src_path; char *dst_path; + char *parent_path; struct stat st; src_path = GetDatabasePath(xlrec->src_db_id, xlrec->src_tablespace_id); @@ -2204,6 +2245,34 @@ dbase_redo(XLogReaderState *record) dst_path))); } + /* + * If the parent of the target path doesn't exist, create it now. This + * enables us to create the target underneath later. Note that if + * the database dir is not in a tablespace, the parent will always + * exist, so this never runs in that case. + */ + parent_path = pstrdup(dst_path); + get_parent_directory(parent_path); + if (stat(parent_path, &st) < 0) + { + if (errno != ENOENT) + ereport(FATAL, + errmsg("could not stat directory \"%s\": %m", + dst_path)); + + recovery_create_dbdir(parent_path, true); + } + pfree(parent_path); + + /* + * There's a case where the copy source directory is missing for the + * same reason above. Create the emtpy source directory so that + * copydir below doesn't fail. The directory will be dropped soon by + * recovery. + */ + if (stat(src_path, &st) < 0 && errno == ENOENT) + recovery_create_dbdir(src_path, false); + /* * Force dirty buffers out to disk, to ensure source database is * up-to-date for the copy. diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index 52f23e7467a..3c830d15f50 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -154,8 +154,6 @@ TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo) /* Directory creation failed? */ if (MakePGDirectory(dir) < 0) { - char *parentdir; - /* Failure other than not exists or not in WAL replay? */ if (errno != ENOENT || !isRedo) ereport(ERROR, @@ -164,36 +162,16 @@ TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo) dir))); /* - * Parent directories are missing during WAL replay, so - * continue by creating simple parent directories rather - * than a symlink. + * During WAL replay, it's conceivable that several levels + * of directories are missing if tablespaces are dropped + * further ahead of the WAL stream than we're currently + * replaying. An easy way forward is to create them as + * plain directories and hope they are removed by further + * WAL replay if necessary. If this also fails, there is + * trouble we cannot get out of, so just report that and + * bail out. */ - - /* create two parents up if not exist */ - parentdir = pstrdup(dir); - get_parent_directory(parentdir); - get_parent_directory(parentdir); - /* Can't create parent and it doesn't already exist? */ - if (MakePGDirectory(parentdir) < 0 && errno != EEXIST) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not create directory \"%s\": %m", - parentdir))); - pfree(parentdir); - - /* create one parent up if not exist */ - parentdir = pstrdup(dir); - get_parent_directory(parentdir); - /* Can't create parent and it doesn't already exist? */ - if (MakePGDirectory(parentdir) < 0 && errno != EEXIST) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not create directory \"%s\": %m", - parentdir))); - pfree(parentdir); - - /* Create database directory */ - if (MakePGDirectory(dir) < 0) + if (pg_mkdir_p(dir, pg_dir_create_mode) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create directory \"%s\": %m", diff --git a/src/test/recovery/t/033_replay_tsp_drops.pl b/src/test/recovery/t/033_replay_tsp_drops.pl new file mode 100644 index 00000000000..ee6deebf9c7 --- /dev/null +++ b/src/test/recovery/t/033_replay_tsp_drops.pl @@ -0,0 +1,163 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Test replay of tablespace/database creation/drop + +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +sub test_tablespace +{ + my $node_primary = PostgreSQL::Test::Cluster->new("primary1"); + $node_primary->init(allows_streaming => 1); + $node_primary->start; + $node_primary->psql( + 'postgres', + qq[ + SET allow_in_place_tablespaces=on; + CREATE TABLESPACE dropme_ts1 LOCATION ''; + CREATE TABLESPACE dropme_ts2 LOCATION ''; + CREATE TABLESPACE source_ts LOCATION ''; + CREATE TABLESPACE target_ts LOCATION ''; + CREATE DATABASE template_db IS_TEMPLATE = true; + ]); + my $backup_name = 'my_backup'; + $node_primary->backup($backup_name); + + my $node_standby = PostgreSQL::Test::Cluster->new("standby2"); + $node_standby->init_from_backup($node_primary, $backup_name, + has_streaming => 1); + $node_standby->append_conf('postgresql.conf', + "allow_in_place_tablespaces = on"); + $node_standby->start; + + # Make sure connection is made + $node_primary->poll_query_until('postgres', + 'SELECT count(*) = 1 FROM pg_stat_replication'); + + $node_standby->safe_psql('postgres', 'CHECKPOINT'); + + # Do immediate shutdown just after a sequence of CREAT DATABASE / DROP + # DATABASE / DROP TABLESPACE. This causes CREATE DATABASE WAL records + # to be applied to already-removed directories. + my $query = q[ + CREATE DATABASE dropme_db1 WITH TABLESPACE dropme_ts1; + CREATE TABLE t (a int) TABLESPACE dropme_ts2; + CREATE DATABASE dropme_db2 WITH TABLESPACE dropme_ts2; + CREATE DATABASE moveme_db TABLESPACE source_ts; + ALTER DATABASE moveme_db SET TABLESPACE target_ts; + CREATE DATABASE newdb TEMPLATE template_db; + ALTER DATABASE template_db IS_TEMPLATE = false; + DROP DATABASE dropme_db1; + DROP TABLE t; + DROP DATABASE dropme_db2; DROP TABLESPACE dropme_ts2; + DROP TABLESPACE source_ts; + DROP DATABASE template_db; + ]; + + $node_primary->safe_psql('postgres', $query); + $node_primary->wait_for_catchup($node_standby, 'replay', + $node_primary->lsn('write')); + + # show "create missing directory" log message + $node_standby->safe_psql('postgres', + "ALTER SYSTEM SET log_min_messages TO debug1;"); + $node_standby->stop('immediate'); + # Should restart ignoring directory creation error. + is($node_standby->start(fail_ok => 1), 1, "standby node started"); + $node_standby->stop('immediate'); +} + +test_tablespace(); + +# Ensure that a missing tablespace directory during create database +# replay immediately causes panic if the standby has already reached +# consistent state (archive recovery is in progress). + +my $node_primary = PostgreSQL::Test::Cluster->new('primary2'); +$node_primary->init(allows_streaming => 1); +$node_primary->start; + +# Create tablespace +$node_primary->safe_psql( + 'postgres', q[ + SET allow_in_place_tablespaces=on; + CREATE TABLESPACE ts1 LOCATION '' + ]); +$node_primary->safe_psql('postgres', + "CREATE DATABASE db1 WITH TABLESPACE ts1"); + +# Take backup +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); +my $node_standby = PostgreSQL::Test::Cluster->new('standby3'); +$node_standby->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby->append_conf('postgresql.conf', + "allow_in_place_tablespaces = on"); +$node_standby->start; + +# Make sure standby reached consistency and starts accepting connections +$node_standby->poll_query_until('postgres', 'SELECT 1', '1'); + +# Remove standby tablespace directory so it will be missing when +# replay resumes. +my $tspoid = $node_standby->safe_psql('postgres', + "SELECT oid FROM pg_tablespace WHERE spcname = 'ts1';"); +my $tspdir = $node_standby->data_dir . "/pg_tblspc/$tspoid"; +File::Path::rmtree($tspdir); + +my $logstart = get_log_size($node_standby); + +# Create a database in the tablespace and a table in default tablespace +$node_primary->safe_psql( + 'postgres', + q[ + CREATE TABLE should_not_replay_insertion(a int); + CREATE DATABASE db2 WITH TABLESPACE ts1; + INSERT INTO should_not_replay_insertion VALUES (1); + ]); + +# Standby should fail and should not silently skip replaying the wal +# In this test, PANIC turns into WARNING by allow_in_place_tablespaces. +# Check the log messages instead of confirming standby failure. +my $max_attempts = $PostgreSQL::Test::Utils::timeout_default; +while ($max_attempts-- >= 0) +{ + last + if ( + find_in_log( + $node_standby, "WARNING: creating missing directory: pg_tblspc/", + $logstart)); + sleep 1; +} +ok($max_attempts > 0, "invalid directory creation is detected"); + +done_testing(); + + +# return the size of logfile of $node in bytes +sub get_log_size +{ + my ($node) = @_; + + return (stat $node->logfile)[7]; +} + +# find $pat in logfile of $node after $off-th byte +sub find_in_log +{ + my ($node, $pat, $off) = @_; + + $off = 0 unless defined $off; + my $log = PostgreSQL::Test::Utils::slurp_file($node->logfile); + return 0 if (length($log) <= $off); + + $log = substr($log, $off); + + return $log =~ m/$pat/; +}