diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index d889d69387e..83f605bc974 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -8255,6 +8255,12 @@ CheckRecoveryConsistency(void) */ XLogCheckInvalidPages(); + /* + * Check if the XLOG sequence contained any unresolved references to + * missing directories. + */ + XLogCheckMissingDirs(); + reachedConsistency = true; ereport(LOG, (errmsg("consistent recovery state reached at %X/%X", diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index d17d660f460..8694a0ba302 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -34,6 +34,164 @@ /* GUC variable */ bool ignore_invalid_pages = false; + +/* + * If a create database WAL record is being replayed more than once during + * crash recovery on a standby, it is possible that either the tablespace + * directory or the template database directory is missing. This happens when + * the directories are removed by replay of subsequent drop records. Note + * that this problem happens only on standby and not on master. On master, a + * checkpoint is created at the end of create database operation. On standby, + * however, such a strategy (creating restart points during replay) is not + * viable because it will slow down WAL replay. + * + * The alternative is to track references to each missing directory + * encountered when performing crash recovery in the following hash table. + * Similar to invalid page table above, the expectation is that each missing + * directory entry should be matched with a drop database or drop tablespace + * WAL record by the end of crash recovery. + */ +typedef struct xl_missing_dir_key +{ + Oid spcNode; + Oid dbNode; +} xl_missing_dir_key; + +typedef struct xl_missing_dir +{ + xl_missing_dir_key key; + char path[MAXPGPATH]; +} xl_missing_dir; + +static HTAB *missing_dir_tab = NULL; + + +/* + * Keep track of a directory that wasn't found while replaying database + * creation records. These should match up with tablespace removal records + * later in the WAL stream; we verify that before reaching consistency. + */ +void +XLogRememberMissingDir(Oid spcNode, Oid dbNode, char *path) +{ + xl_missing_dir_key key; + bool found; + xl_missing_dir *entry; + + /* + * Database OID may be invalid but tablespace OID must be valid. If + * dbNode is InvalidOid, we are logging a missing tablespace directory, + * otherwise we are logging a missing database directory. + */ + Assert(OidIsValid(spcNode)); + + if (missing_dir_tab == NULL) + { + /* create hash table when first needed */ + HASHCTL ctl; + + memset(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(xl_missing_dir_key); + ctl.entrysize = sizeof(xl_missing_dir); + + missing_dir_tab = hash_create("XLOG missing directory table", + 100, + &ctl, + HASH_ELEM | HASH_BLOBS); + } + + key.spcNode = spcNode; + key.dbNode = dbNode; + + entry = hash_search(missing_dir_tab, &key, HASH_ENTER, &found); + + if (found) + { + if (dbNode == InvalidOid) + elog(DEBUG1, "missing directory %s (tablespace %u) already exists: %s", + path, spcNode, entry->path); + else + elog(DEBUG1, "missing directory %s (tablespace %u database %u) already exists: %s", + path, spcNode, dbNode, entry->path); + } + else + { + strlcpy(entry->path, path, sizeof(entry->path)); + if (dbNode == InvalidOid) + elog(DEBUG1, "logged missing dir %s (tablespace %u)", + path, spcNode); + else + elog(DEBUG1, "logged missing dir %s (tablespace %u database %u)", + path, spcNode, dbNode); + } +} + +/* + * Remove an entry from the list of directories not found. This is to be done + * when the matching tablespace removal WAL record is found. + */ +void +XLogForgetMissingDir(Oid spcNode, Oid dbNode) +{ + xl_missing_dir_key key; + + key.spcNode = spcNode; + key.dbNode = dbNode; + + /* Database OID may be invalid but tablespace OID must be valid. */ + Assert(OidIsValid(spcNode)); + + if (missing_dir_tab == NULL) + return; + + if (hash_search(missing_dir_tab, &key, HASH_REMOVE, NULL) != NULL) + { + if (dbNode == InvalidOid) + { + elog(DEBUG2, "forgot missing dir (tablespace %u)", spcNode); + } + else + { + char *path = GetDatabasePath(dbNode, spcNode); + + elog(DEBUG2, "forgot missing dir %s (tablespace %u database %u)", + path, spcNode, dbNode); + pfree(path); + } + } +} + +/* + * This is called at the end of crash recovery, before entering archive + * recovery on a standby. PANIC if the hash table is not empty. + */ +void +XLogCheckMissingDirs(void) +{ + HASH_SEQ_STATUS status; + xl_missing_dir *hentry; + bool foundone = false; + + if (missing_dir_tab == NULL) + return; /* nothing to do */ + + hash_seq_init(&status, missing_dir_tab); + + while ((hentry = (xl_missing_dir *) hash_seq_search(&status)) != NULL) + { + elog(WARNING, "missing directory \"%s\" tablespace %u database %u", + hentry->path, hentry->key.spcNode, hentry->key.dbNode); + foundone = true; + } + + if (foundone) + elog(PANIC, "WAL contains references to missing directories"); + + hash_destroy(missing_dir_tab); + missing_dir_tab = NULL; +} + + /* * During XLOG replay, we may see XLOG records for incremental updates of * pages that no longer exist, because their relation was later dropped or @@ -59,7 +217,6 @@ typedef struct xl_invalid_page static HTAB *invalid_page_tab = NULL; - /* Report a reference to an invalid page */ static void report_invalid_page(int elevel, RelFileNode node, ForkNumber forkno, diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 2b159b60ebb..c37065c320c 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -2185,7 +2185,9 @@ dbase_redo(XLogReaderState *record) xl_dbase_create_rec *xlrec = (xl_dbase_create_rec *) XLogRecGetData(record); char *src_path; char *dst_path; + char *parent_path; struct stat st; + bool skip = false; src_path = GetDatabasePath(xlrec->src_db_id, xlrec->src_tablespace_id); dst_path = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id); @@ -2203,6 +2205,56 @@ dbase_redo(XLogReaderState *record) (errmsg("some useless files may be left behind in old database directory \"%s\"", dst_path))); } + else if (!reachedConsistency) + { + /* + * It is possible that a drop tablespace record appearing later in + * WAL has already been replayed -- in other words, that we are + * replaying the database creation record a second time with no + * intervening checkpoint. In that case, the tablespace directory + * has already been removed and the create database operation + * cannot be replayed. Skip the replay itself, but remember the + * fact that the tablespace directory is missing, to be matched + * with the expected tablespace drop record later. + */ + parent_path = pstrdup(dst_path); + get_parent_directory(parent_path); + if (!(stat(parent_path, &st) == 0 && S_ISDIR(st.st_mode))) + { + XLogRememberMissingDir(xlrec->tablespace_id, InvalidOid, parent_path); + skip = true; + ereport(WARNING, + (errmsg("skipping replay of database creation WAL record"), + errdetail("The target tablespace \"%s\" directory was not found.", + parent_path), + errhint("A future WAL record that removes the directory before reaching consistent mode is expected."))); + } + pfree(parent_path); + } + + /* + * If the source directory is missing, skip the copy and make a note of + * it for later. + * + * One possible reason for this is that the template database used for + * creating this database may have been dropped, as noted above. + * Moving a database from one tablespace may also be a partner in the + * crime. + */ + if (!(stat(src_path, &st) == 0 && S_ISDIR(st.st_mode)) && + !reachedConsistency) + { + XLogRememberMissingDir(xlrec->src_tablespace_id, xlrec->src_db_id, src_path); + skip = true; + ereport(WARNING, + (errmsg("skipping replay of database creation WAL record"), + errdetail("The source database directory \"%s\" was not found.", + src_path), + errhint("A future WAL record that removes the directory before reaching consistent mode is expected."))); + } + + if (skip) + return; /* * Force dirty buffers out to disk, to ensure source database is @@ -2260,6 +2312,10 @@ dbase_redo(XLogReaderState *record) ereport(WARNING, (errmsg("some useless files may be left behind in old database directory \"%s\"", dst_path))); + + if (!reachedConsistency) + XLogForgetMissingDir(xlrec->tablespace_ids[i], xlrec->db_id); + pfree(dst_path); } diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index 5411638696b..eedc6162128 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -58,6 +58,7 @@ #include "access/xact.h" #include "access/xlog.h" #include "access/xloginsert.h" +#include "access/xlogutils.h" #include "catalog/catalog.h" #include "catalog/dependency.h" #include "catalog/indexing.h" @@ -1529,6 +1530,22 @@ tblspc_redo(XLogReaderState *record) { xl_tblspc_drop_rec *xlrec = (xl_tblspc_drop_rec *) XLogRecGetData(record); + if (!reachedConsistency) + XLogForgetMissingDir(xlrec->ts_id, InvalidOid); + + /* + * Before we remove the tablespace directory, update minimum recovery + * point to cover this WAL record. Once the tablespace is removed, + * there's no going back. This manually enforces the WAL-first rule. + * Doing this before the removal means that if the removal fails for + * some reason, the directory is left alone and needs to be manually + * removed. Alternatively we could update the minimum recovery point + * after removal, but that would leave a small window where the + * WAL-first rule could be violated. + */ + if (!reachedConsistency) + XLogFlush(record->EndRecPtr); + /* * If we issued a WAL record for a drop tablespace it implies that * there were no files in it at all when the DROP was done. That means diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h index 9ac602b674d..2c2a22e34c3 100644 --- a/src/include/access/xlogutils.h +++ b/src/include/access/xlogutils.h @@ -23,6 +23,10 @@ extern void XLogDropDatabase(Oid dbid); extern void XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum, BlockNumber nblocks); +extern void XLogRememberMissingDir(Oid spcNode, Oid dbNode, char *path); +extern void XLogForgetMissingDir(Oid spcNode, Oid dbNode); +extern void XLogCheckMissingDirs(void); + /* Result codes for XLogReadBufferForRedo[Extended] */ typedef enum { diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index d71d1adbecd..ccbc69f15b8 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3700,6 +3700,8 @@ xl_invalid_page xl_invalid_page_key xl_invalidations xl_logical_message +xl_missing_dir_key +xl_missing_dir xl_multi_insert_tuple xl_multixact_create xl_multixact_truncate