mirror of
https://github.com/postgres/postgres.git
synced 2025-05-05 09:19:17 +03:00
Handle interleavings between CREATE DATABASE steps and base backup.
Restoring a base backup taken in the middle of CreateDirAndVersionFile() or write_relmap_file() would lose the function's effects. The symptom was absence of the database directory, PG_VERSION file, or pg_filenode.map. If missing the directory, recovery would fail. Either missing file would not fail recovery but would render the new database unusable. Fix CreateDirAndVersionFile() with the transam/README "action first and then write a WAL entry" strategy. That has a side benefit of moving filesystem mutations out of a critical section, reducing the ways to PANIC. Fix the write_relmap_file() call with a lock acquisition, so it interacts with checkpoints like non-CREATE DATABASE calls do. Back-patch to v15, where commit 9c08aea6a3090a396be334cc58c511edab05776a introduced STRATEGY=WAL_LOG and made it the default. Discussion: https://postgr.es/m/20240130195003.0a.nmisch@google.com
This commit is contained in:
parent
970b1aeeba
commit
d493bed28f
@ -461,35 +461,12 @@ CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo)
|
|||||||
char buf[16];
|
char buf[16];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Prepare version data before starting a critical section.
|
* Note that we don't have to copy version data from the source database;
|
||||||
*
|
* there's only one legal value.
|
||||||
* Note that we don't have to copy this from the source database; there's
|
|
||||||
* only one legal value.
|
|
||||||
*/
|
*/
|
||||||
sprintf(buf, "%s\n", PG_MAJORVERSION);
|
sprintf(buf, "%s\n", PG_MAJORVERSION);
|
||||||
nbytes = strlen(PG_MAJORVERSION) + 1;
|
nbytes = strlen(PG_MAJORVERSION) + 1;
|
||||||
|
|
||||||
/* If we are not in WAL replay then write the WAL. */
|
|
||||||
if (!isRedo)
|
|
||||||
{
|
|
||||||
xl_dbase_create_wal_log_rec xlrec;
|
|
||||||
XLogRecPtr lsn;
|
|
||||||
|
|
||||||
START_CRIT_SECTION();
|
|
||||||
|
|
||||||
xlrec.db_id = dbid;
|
|
||||||
xlrec.tablespace_id = tsid;
|
|
||||||
|
|
||||||
XLogBeginInsert();
|
|
||||||
XLogRegisterData((char *) (&xlrec),
|
|
||||||
sizeof(xl_dbase_create_wal_log_rec));
|
|
||||||
|
|
||||||
lsn = XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE_WAL_LOG);
|
|
||||||
|
|
||||||
/* As always, WAL must hit the disk before the data update does. */
|
|
||||||
XLogFlush(lsn);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Create database directory. */
|
/* Create database directory. */
|
||||||
if (MakePGDirectory(dbpath) < 0)
|
if (MakePGDirectory(dbpath) < 0)
|
||||||
{
|
{
|
||||||
@ -533,9 +510,24 @@ CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo)
|
|||||||
/* Close the version file. */
|
/* Close the version file. */
|
||||||
CloseTransientFile(fd);
|
CloseTransientFile(fd);
|
||||||
|
|
||||||
/* Critical section done. */
|
/* If we are not in WAL replay then write the WAL. */
|
||||||
if (!isRedo)
|
if (!isRedo)
|
||||||
|
{
|
||||||
|
xl_dbase_create_wal_log_rec xlrec;
|
||||||
|
|
||||||
|
START_CRIT_SECTION();
|
||||||
|
|
||||||
|
xlrec.db_id = dbid;
|
||||||
|
xlrec.tablespace_id = tsid;
|
||||||
|
|
||||||
|
XLogBeginInsert();
|
||||||
|
XLogRegisterData((char *) (&xlrec),
|
||||||
|
sizeof(xl_dbase_create_wal_log_rec));
|
||||||
|
|
||||||
|
(void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE_WAL_LOG);
|
||||||
|
|
||||||
END_CRIT_SECTION();
|
END_CRIT_SECTION();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
16
src/backend/utils/cache/relmapper.c
vendored
16
src/backend/utils/cache/relmapper.c
vendored
@ -298,14 +298,15 @@ RelationMapCopy(Oid dbid, Oid tsid, char *srcdbpath, char *dstdbpath)
|
|||||||
* Write the same data into the destination database's relmap file.
|
* Write the same data into the destination database's relmap file.
|
||||||
*
|
*
|
||||||
* No sinval is needed because no one can be connected to the destination
|
* No sinval is needed because no one can be connected to the destination
|
||||||
* database yet. For the same reason, there is no need to acquire
|
* database yet.
|
||||||
* RelationMappingLock.
|
|
||||||
*
|
*
|
||||||
* There's no point in trying to preserve files here. The new database
|
* There's no point in trying to preserve files here. The new database
|
||||||
* isn't usable yet anyway, and won't ever be if we can't install a relmap
|
* isn't usable yet anyway, and won't ever be if we can't install a relmap
|
||||||
* file.
|
* file.
|
||||||
*/
|
*/
|
||||||
|
LWLockAcquire(RelationMappingLock, LW_EXCLUSIVE);
|
||||||
write_relmap_file(&map, true, false, false, dbid, tsid, dstdbpath);
|
write_relmap_file(&map, true, false, false, dbid, tsid, dstdbpath);
|
||||||
|
LWLockRelease(RelationMappingLock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -627,10 +628,12 @@ RelationMapFinishBootstrap(void)
|
|||||||
Assert(pending_local_updates.num_mappings == 0);
|
Assert(pending_local_updates.num_mappings == 0);
|
||||||
|
|
||||||
/* Write the files; no WAL or sinval needed */
|
/* Write the files; no WAL or sinval needed */
|
||||||
|
LWLockAcquire(RelationMappingLock, LW_EXCLUSIVE);
|
||||||
write_relmap_file(&shared_map, false, false, false,
|
write_relmap_file(&shared_map, false, false, false,
|
||||||
InvalidOid, GLOBALTABLESPACE_OID, "global");
|
InvalidOid, GLOBALTABLESPACE_OID, "global");
|
||||||
write_relmap_file(&local_map, false, false, false,
|
write_relmap_file(&local_map, false, false, false,
|
||||||
MyDatabaseId, MyDatabaseTableSpace, DatabasePath);
|
MyDatabaseId, MyDatabaseTableSpace, DatabasePath);
|
||||||
|
LWLockRelease(RelationMappingLock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -876,6 +879,15 @@ write_relmap_file(RelMapFile *newmap, bool write_wal, bool send_sinval,
|
|||||||
int fd;
|
int fd;
|
||||||
char mapfilename[MAXPGPATH];
|
char mapfilename[MAXPGPATH];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Even without concurrent use of this map, CheckPointRelationMap() relies
|
||||||
|
* on this locking. Without it, a restore of a base backup taken after
|
||||||
|
* this function's XLogInsert() and before its durable_rename() would not
|
||||||
|
* have the changes. wal_level=minimal doesn't need the lock, but this
|
||||||
|
* isn't performance-critical enough for such a micro-optimization.
|
||||||
|
*/
|
||||||
|
Assert(LWLockHeldByMeInMode(RelationMappingLock, LW_EXCLUSIVE));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fill in the overhead fields and update CRC.
|
* Fill in the overhead fields and update CRC.
|
||||||
*/
|
*/
|
||||||
|
Loading…
x
Reference in New Issue
Block a user