mirror of
https://github.com/postgres/postgres.git
synced 2025-05-02 11:44:50 +03:00
Protect against SnapshotNow race conditions in pg_tablespace scans.
Use of SnapshotNow is known to expose us to race conditions if the tuple(s) being sought could be updated by concurrently-committing transactions. CREATE DATABASE and DROP DATABASE are particularly exposed because they do heavyweight filesystem operations during their scans of pg_tablespace, so that the scans run for a very long time compared to most. Furthermore, the potential consequences of a missed or twice-visited row are nastier than average: * createdb() could fail with a bogus "file already exists" error, or silently fail to copy one or more tablespace's worth of files into the new database. * remove_dbtablespaces() could miss one or more tablespaces, thus failing to free filesystem space for the dropped database. * check_db_file_conflict() could likewise miss a tablespace, leading to an OID conflict that could result in data loss either immediately or in future operations. (This seems of very low probability, though, since a duplicate database OID would be unlikely to start with.) Hence, it seems worth fixing these three places to use MVCC snapshots, even though this will someday be superseded by a generic solution to SnapshotNow race conditions. Back-patch to all active branches. Stephen Frost and Tom Lane
This commit is contained in:
parent
8d1fbf947d
commit
d98547eb43
@ -129,6 +129,7 @@ createdb(const CreatedbStmt *stmt)
|
|||||||
int notherbackends;
|
int notherbackends;
|
||||||
int npreparedxacts;
|
int npreparedxacts;
|
||||||
createdb_failure_params fparms;
|
createdb_failure_params fparms;
|
||||||
|
Snapshot snapshot;
|
||||||
|
|
||||||
/* Extract options from the statement node tree */
|
/* Extract options from the statement node tree */
|
||||||
foreach(option, stmt->options)
|
foreach(option, stmt->options)
|
||||||
@ -532,6 +533,29 @@ createdb(const CreatedbStmt *stmt)
|
|||||||
*/
|
*/
|
||||||
RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
|
RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Take an MVCC snapshot to use while scanning through pg_tablespace. For
|
||||||
|
* safety, register the snapshot (this prevents it from changing if
|
||||||
|
* something else were to request a snapshot during the loop).
|
||||||
|
*
|
||||||
|
* Traversing pg_tablespace with an MVCC snapshot is necessary to provide
|
||||||
|
* us with a consistent view of the tablespaces that exist. Using
|
||||||
|
* SnapshotNow here would risk seeing the same tablespace multiple times,
|
||||||
|
* or worse not seeing a tablespace at all, if its tuple is moved around
|
||||||
|
* by a concurrent update (eg an ACL change).
|
||||||
|
*
|
||||||
|
* Inconsistency of this sort is inherent to all SnapshotNow scans, unless
|
||||||
|
* some lock is held to prevent concurrent updates of the rows being
|
||||||
|
* sought. There should be a generic fix for that, but in the meantime
|
||||||
|
* it's worth fixing this case in particular because we are doing very
|
||||||
|
* heavyweight operations within the scan, so that the elapsed time for
|
||||||
|
* the scan is vastly longer than for most other catalog scans. That
|
||||||
|
* means there's a much wider window for concurrent updates to cause
|
||||||
|
* trouble here than anywhere else. XXX this code should be changed
|
||||||
|
* whenever a generic fix is implemented.
|
||||||
|
*/
|
||||||
|
snapshot = RegisterSnapshot(GetLatestSnapshot());
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Once we start copying subdirectories, we need to be able to clean 'em
|
* Once we start copying subdirectories, we need to be able to clean 'em
|
||||||
* up if we fail. Use an ENSURE block to make sure this happens. (This
|
* up if we fail. Use an ENSURE block to make sure this happens. (This
|
||||||
@ -549,7 +573,7 @@ createdb(const CreatedbStmt *stmt)
|
|||||||
* each one to the new database.
|
* each one to the new database.
|
||||||
*/
|
*/
|
||||||
rel = heap_open(TableSpaceRelationId, AccessShareLock);
|
rel = heap_open(TableSpaceRelationId, AccessShareLock);
|
||||||
scan = heap_beginscan(rel, SnapshotNow, 0, NULL);
|
scan = heap_beginscan(rel, snapshot, 0, NULL);
|
||||||
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
|
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
|
||||||
{
|
{
|
||||||
Oid srctablespace = HeapTupleGetOid(tuple);
|
Oid srctablespace = HeapTupleGetOid(tuple);
|
||||||
@ -653,6 +677,9 @@ createdb(const CreatedbStmt *stmt)
|
|||||||
}
|
}
|
||||||
PG_END_ENSURE_ERROR_CLEANUP(createdb_failure_callback,
|
PG_END_ENSURE_ERROR_CLEANUP(createdb_failure_callback,
|
||||||
PointerGetDatum(&fparms));
|
PointerGetDatum(&fparms));
|
||||||
|
|
||||||
|
/* Free our snapshot */
|
||||||
|
UnregisterSnapshot(snapshot);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1698,9 +1725,20 @@ remove_dbtablespaces(Oid db_id)
|
|||||||
Relation rel;
|
Relation rel;
|
||||||
HeapScanDesc scan;
|
HeapScanDesc scan;
|
||||||
HeapTuple tuple;
|
HeapTuple tuple;
|
||||||
|
Snapshot snapshot;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* As in createdb(), we'd better use an MVCC snapshot here, since this
|
||||||
|
* scan can run for a long time. Duplicate visits to tablespaces would be
|
||||||
|
* harmless, but missing a tablespace could result in permanently leaked
|
||||||
|
* files.
|
||||||
|
*
|
||||||
|
* XXX change this when a generic fix for SnapshotNow races is implemented
|
||||||
|
*/
|
||||||
|
snapshot = RegisterSnapshot(GetLatestSnapshot());
|
||||||
|
|
||||||
rel = heap_open(TableSpaceRelationId, AccessShareLock);
|
rel = heap_open(TableSpaceRelationId, AccessShareLock);
|
||||||
scan = heap_beginscan(rel, SnapshotNow, 0, NULL);
|
scan = heap_beginscan(rel, snapshot, 0, NULL);
|
||||||
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
|
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
|
||||||
{
|
{
|
||||||
Oid dsttablespace = HeapTupleGetOid(tuple);
|
Oid dsttablespace = HeapTupleGetOid(tuple);
|
||||||
@ -1746,6 +1784,7 @@ remove_dbtablespaces(Oid db_id)
|
|||||||
|
|
||||||
heap_endscan(scan);
|
heap_endscan(scan);
|
||||||
heap_close(rel, AccessShareLock);
|
heap_close(rel, AccessShareLock);
|
||||||
|
UnregisterSnapshot(snapshot);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1767,9 +1806,19 @@ check_db_file_conflict(Oid db_id)
|
|||||||
Relation rel;
|
Relation rel;
|
||||||
HeapScanDesc scan;
|
HeapScanDesc scan;
|
||||||
HeapTuple tuple;
|
HeapTuple tuple;
|
||||||
|
Snapshot snapshot;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* As in createdb(), we'd better use an MVCC snapshot here; missing a
|
||||||
|
* tablespace could result in falsely reporting the OID is unique, with
|
||||||
|
* disastrous future consequences per the comment above.
|
||||||
|
*
|
||||||
|
* XXX change this when a generic fix for SnapshotNow races is implemented
|
||||||
|
*/
|
||||||
|
snapshot = RegisterSnapshot(GetLatestSnapshot());
|
||||||
|
|
||||||
rel = heap_open(TableSpaceRelationId, AccessShareLock);
|
rel = heap_open(TableSpaceRelationId, AccessShareLock);
|
||||||
scan = heap_beginscan(rel, SnapshotNow, 0, NULL);
|
scan = heap_beginscan(rel, snapshot, 0, NULL);
|
||||||
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
|
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
|
||||||
{
|
{
|
||||||
Oid dsttablespace = HeapTupleGetOid(tuple);
|
Oid dsttablespace = HeapTupleGetOid(tuple);
|
||||||
@ -1795,6 +1844,8 @@ check_db_file_conflict(Oid db_id)
|
|||||||
|
|
||||||
heap_endscan(scan);
|
heap_endscan(scan);
|
||||||
heap_close(rel, AccessShareLock);
|
heap_close(rel, AccessShareLock);
|
||||||
|
UnregisterSnapshot(snapshot);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user