1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-03 20:02:46 +03:00

Create a "relation mapping" infrastructure to support changing the relfilenodes

of shared or nailed system catalogs.  This has two key benefits:

* The new CLUSTER-based VACUUM FULL can be applied safely to all catalogs.

* We no longer have to use an unsafe reindex-in-place approach for reindexing
  shared catalogs.

CLUSTER on nailed catalogs now works too, although I left it disabled on
shared catalogs because the resulting pg_index.indisclustered update would
only be visible in one database.

Since reindexing shared system catalogs is now fully transactional and
crash-safe, the former special cases in REINDEX behavior have been removed;
shared catalogs are treated the same as non-shared.

This commit does not do anything about the recently-discussed problem of
deadlocks between VACUUM FULL/CLUSTER on a system catalog and other
concurrent queries; will address that in a separate patch.  As a stopgap,
parallel_schedule has been tweaked to run vacuum.sql by itself, to avoid
such failures during the regression tests.
This commit is contained in:
Tom Lane
2010-02-07 20:48:13 +00:00
parent 7fc30c488f
commit b9b8831ad6
54 changed files with 2316 additions and 585 deletions

View File

@ -1,7 +1,7 @@
/*-------------------------------------------------------------------------
*
* cluster.c
* CLUSTER a table on an index.
* CLUSTER a table on an index. This is now also used for VACUUM FULL.
*
* There is hardly anything left of Paul Brown's original implementation...
*
@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.197 2010/02/04 00:09:14 tgl Exp $
* $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.198 2010/02/07 20:48:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -44,6 +44,7 @@
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/relcache.h"
#include "utils/relmapper.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
#include "utils/tqual.h"
@ -223,7 +224,8 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
StartTransactionCommand();
/* functions in indexes may want a snapshot set */
PushActiveSnapshot(GetTransactionSnapshot());
cluster_rel(rvtc->tableOid, rvtc->indexOid, true, stmt->verbose, -1, -1);
cluster_rel(rvtc->tableOid, rvtc->indexOid, true, stmt->verbose,
-1, -1);
PopActiveSnapshot();
CommitTransactionCommand();
}
@ -245,13 +247,13 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
* GRANT, inheritance nor references to this table (this was a bug
* in releases thru 7.3).
*
* Also create new indexes and swap the filenodes with the old indexes the
* same way we do for the relation. Since we are effectively bulk-loading
* Indexes are rebuilt too, via REINDEX. Since we are effectively bulk-loading
* the new table, it's better to create the indexes afterwards than to fill
* them incrementally while we load the table.
*
* If indexOid is InvalidOid, the table will be rewritten in physical order
* instead of index order.
* instead of index order. This is the new implementation of VACUUM FULL,
* and error messages should refer to the operation as VACUUM not CLUSTER.
*/
void
cluster_rel(Oid tableOid, Oid indexOid, bool recheck, bool verbose,
@ -300,8 +302,7 @@ cluster_rel(Oid tableOid, Oid indexOid, bool recheck, bool verbose,
* somebody is executing a database-wide CLUSTER), because there is
* another check in cluster() which will stop any attempt to cluster
* remote temp tables by name. There is another check in
* check_index_is_clusterable which is redundant, but we leave it for
* extra safety.
* cluster_rel which is redundant, but we leave it for extra safety.
*/
if (RELATION_IS_OTHER_TEMP(OldHeap))
{
@ -344,10 +345,44 @@ cluster_rel(Oid tableOid, Oid indexOid, bool recheck, bool verbose,
}
}
/* Check heap and index are valid to cluster on */
check_index_is_clusterable(OldHeap, indexOid, recheck);
/*
* We allow VACUUM FULL, but not CLUSTER, on shared catalogs. CLUSTER
* would work in most respects, but the index would only get marked as
* indisclustered in the current database, leading to unexpected behavior
* if CLUSTER were later invoked in another database.
*/
if (OidIsValid(indexOid) && OldHeap->rd_rel->relisshared)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot cluster a shared catalog")));
/* rebuild_relation does all the dirty work */
/*
* Don't process temp tables of other backends ... their local
* buffer manager is not going to cope.
*/
if (RELATION_IS_OTHER_TEMP(OldHeap))
{
if (OidIsValid(indexOid))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot cluster temporary tables of other sessions")));
else
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot vacuum temporary tables of other sessions")));
}
/*
* Also check for active uses of the relation in the current transaction,
* including open scans and pending AFTER trigger events.
*/
CheckTableNotInUse(OldHeap, OidIsValid(indexOid) ? "CLUSTER" : "VACUUM");
/* Check heap and index are valid to cluster on */
if (OidIsValid(indexOid))
check_index_is_clusterable(OldHeap, indexOid, recheck);
/* Log what we're doing (this could use more effort) */
if (OidIsValid(indexOid))
ereport(verbose ? INFO : DEBUG2,
(errmsg("clustering \"%s.%s\"",
@ -358,6 +393,8 @@ cluster_rel(Oid tableOid, Oid indexOid, bool recheck, bool verbose,
(errmsg("vacuuming \"%s.%s\"",
get_namespace_name(RelationGetNamespace(OldHeap)),
RelationGetRelationName(OldHeap))));
/* rebuild_relation does all the dirty work */
rebuild_relation(OldHeap, indexOid, freeze_min_age, freeze_table_age);
/* NB: rebuild_relation does heap_close() on OldHeap */
@ -376,38 +413,6 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck)
{
Relation OldIndex;
/*
* Disallow clustering system relations. This will definitely NOT work
* for shared relations (we have no way to update pg_class rows in other
* databases), nor for nailed-in-cache relations (the relfilenode values
* for those are hardwired, see relcache.c). It might work for other
* system relations, but I ain't gonna risk it.
*/
if (IsSystemRelation(OldHeap))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("\"%s\" is a system catalog",
RelationGetRelationName(OldHeap))));
/*
* Don't allow cluster on temp tables of other backends ... their local
* buffer manager is not going to cope.
*/
if (RELATION_IS_OTHER_TEMP(OldHeap))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot cluster temporary tables of other sessions")));
/*
* Also check for active uses of the relation in the current transaction,
* including open scans and pending AFTER trigger events.
*/
CheckTableNotInUse(OldHeap, "CLUSTER");
/* Skip checks for index if not specified. */
if (!OidIsValid(indexOid))
return;
OldIndex = index_open(indexOid, AccessExclusiveLock);
/*
@ -421,6 +426,13 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck)
RelationGetRelationName(OldIndex),
RelationGetRelationName(OldHeap))));
/* Index AM must allow clustering */
if (!OldIndex->rd_am->amclusterable)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
RelationGetRelationName(OldIndex))));
/*
* Disallow clustering on incomplete indexes (those that might not index
* every row of the relation). We could relax this by making a separate
@ -433,12 +445,6 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck)
errmsg("cannot cluster on partial index \"%s\"",
RelationGetRelationName(OldIndex))));
if (!OldIndex->rd_am->amclusterable)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot cluster on index \"%s\" because access method does not support clustering",
RelationGetRelationName(OldIndex))));
if (!OldIndex->rd_am->amindexnulls)
{
AttrNumber colno;
@ -585,6 +591,7 @@ rebuild_relation(Relation OldHeap, Oid indexOid,
Oid tableOid = RelationGetRelid(OldHeap);
Oid tableSpace = OldHeap->rd_rel->reltablespace;
Oid OIDNewHeap;
bool is_system_catalog;
bool swap_toast_by_content;
TransactionId frozenXid;
@ -592,6 +599,9 @@ rebuild_relation(Relation OldHeap, Oid indexOid,
if (OidIsValid(indexOid))
mark_index_clustered(OldHeap, indexOid);
/* Remember if it's a system catalog */
is_system_catalog = IsSystemRelation(OldHeap);
/* Close relcache entry, but keep lock until transaction commit */
heap_close(OldHeap, NoLock);
@ -603,12 +613,12 @@ rebuild_relation(Relation OldHeap, Oid indexOid,
freeze_min_age, freeze_table_age,
&swap_toast_by_content, &frozenXid);
/* Swap the physical files of the old and new heaps */
swap_relation_files(tableOid, OIDNewHeap,
swap_toast_by_content, frozenXid);
/* Destroy the new heap, removing the old data along with it */
cleanup_heap_swap(tableOid, OIDNewHeap, swap_toast_by_content);
/*
* Swap the physical files of the target and transient tables, then
* rebuild the target's indexes and throw away the transient table.
*/
finish_heap_swap(tableOid, OIDNewHeap, is_system_catalog,
swap_toast_by_content, frozenXid);
}
@ -619,8 +629,7 @@ rebuild_relation(Relation OldHeap, Oid indexOid,
* NewTableSpace which might be different from OldHeap's.
*
* After this, the caller should load the new heap with transferred/modified
* data, then call swap_relation_files, and finally call cleanup_heap_swap to
* remove the debris.
* data, then call finish_heap_swap to complete the operation.
*/
Oid
make_new_heap(Oid OIDOldHeap, Oid NewTableSpace)
@ -666,6 +675,11 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace)
* relnames. Working around this seems more trouble than it's worth; in
* particular, we can't create the new heap in a different namespace from
* the old, or we will have problems with the TEMP status of temp tables.
*
* Note: the new heap is not a shared relation, even if we are rebuilding
* a shared rel. However, we do make the new heap mapped if the source
* is mapped. This simplifies swap_relation_files, and is absolutely
* necessary for rebuilding pg_class, for reasons explained there.
*/
snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
@ -679,13 +693,14 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace)
tupdesc,
NIL,
OldHeap->rd_rel->relkind,
OldHeap->rd_rel->relisshared,
false,
RelationIsMapped(OldHeap),
true,
0,
ONCOMMIT_NOOP,
reloptions,
false,
allowSystemTableMods);
true);
ReleaseSysCache(tuple);
@ -696,14 +711,20 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace)
CommandCounterIncrement();
/*
* If necessary, create a TOAST table for the new relation. Note that
* AlterTableCreateToastTable ends with CommandCounterIncrement(), so that
* the TOAST table will be visible for insertion.
* If necessary, create a TOAST table for the new relation.
*
* If the relation doesn't have a TOAST table already, we can't need one
* for the new relation. The other way around is possible though: if
* some wide columns have been dropped, AlterTableCreateToastTable
* can decide that no TOAST table is needed for the new table.
*
* Note that AlterTableCreateToastTable ends with CommandCounterIncrement,
* so that the TOAST table will be visible for insertion.
*/
toastid = OldHeap->rd_rel->reltoastrelid;
reloptions = (Datum) 0;
if (OidIsValid(toastid))
{
/* keep the existing toast table's reloptions, if any */
tuple = SearchSysCache(RELOID,
ObjectIdGetDatum(toastid),
0, 0, 0);
@ -713,11 +734,11 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace)
&isNull);
if (isNull)
reloptions = (Datum) 0;
}
AlterTableCreateToastTable(OIDNewHeap, reloptions);
if (OidIsValid(toastid))
AlterTableCreateToastTable(OIDNewHeap, reloptions);
ReleaseSysCache(tuple);
}
heap_close(OldHeap, NoLock);
@ -747,6 +768,7 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
IndexScanDesc indexScan;
HeapScanDesc heapScan;
bool use_wal;
bool is_system_catalog;
TransactionId OldestXmin;
TransactionId FreezeXid;
RewriteState rwstate;
@ -786,9 +808,14 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
*/
if (!use_wal && !NewHeap->rd_istemp)
{
char reason[NAMEDATALEN + 20];
snprintf(reason, sizeof(reason), "CLUSTER on \"%s\"",
RelationGetRelationName(NewHeap));
char reason[NAMEDATALEN + 32];
if (OldIndex != NULL)
snprintf(reason, sizeof(reason), "CLUSTER on \"%s\"",
RelationGetRelationName(NewHeap));
else
snprintf(reason, sizeof(reason), "VACUUM FULL on \"%s\"",
RelationGetRelationName(NewHeap));
XLogReportUnloggedStatement(reason);
}
@ -841,6 +868,9 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
/* return selected value to caller */
*pFreezeXid = FreezeXid;
/* Remember if it's a system catalog */
is_system_catalog = IsSystemRelation(OldHeap);
/* Initialize the rewrite operation */
rwstate = begin_heap_rewrite(NewHeap, OldestXmin, FreezeXid, use_wal);
@ -909,25 +939,31 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
case HEAPTUPLE_INSERT_IN_PROGRESS:
/*
* We should not see this unless it's been inserted earlier in
* our own transaction.
* Since we hold exclusive lock on the relation, normally
* the only way to see this is if it was inserted earlier
* in our own transaction. However, it can happen in system
* catalogs, since we tend to release write lock before commit
* there. Give a warning if neither case applies; but in
* any case we had better copy it.
*/
if (!TransactionIdIsCurrentTransactionId(
HeapTupleHeaderGetXmin(tuple->t_data)))
elog(ERROR, "concurrent insert in progress");
if (!is_system_catalog &&
!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
elog(WARNING, "concurrent insert in progress within table \"%s\"",
RelationGetRelationName(OldHeap));
/* treat as live */
isdead = false;
break;
case HEAPTUPLE_DELETE_IN_PROGRESS:
/*
* We should not see this unless it's been deleted earlier in
* our own transaction.
* Similar situation to INSERT_IN_PROGRESS case.
*/
Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
if (!TransactionIdIsCurrentTransactionId(
HeapTupleHeaderGetXmax(tuple->t_data)))
elog(ERROR, "concurrent delete in progress");
if (!is_system_catalog &&
!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple->t_data)))
elog(WARNING, "concurrent delete in progress within table \"%s\"",
RelationGetRelationName(OldHeap));
/* treat as recently dead */
isdead = false;
break;
@ -1016,21 +1052,29 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
* table is added or removed altogether.
*
* Additionally, the first relation is marked with relfrozenxid set to
* frozenXid. It seems a bit ugly to have this here, but all callers would
* frozenXid. It seems a bit ugly to have this here, but the caller would
* have to do it anyway, so having it here saves a heap_update. Note: in
* the swap-toast-links case, we assume we don't need to change the toast
* table's relfrozenxid: the new version of the toast table should already
* have relfrozenxid set to RecentXmin, which is good enough.
*
* Lastly, if r2 and its toast table and toast index (if any) are mapped,
* their OIDs are emitted into mapped_tables[]. This is hacky but beats
* having to look the information up again later in finish_heap_swap.
*/
void
swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content,
TransactionId frozenXid)
static void
swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
bool swap_toast_by_content,
TransactionId frozenXid,
Oid *mapped_tables)
{
Relation relRelation;
HeapTuple reltup1,
reltup2;
Form_pg_class relform1,
relform2;
Oid relfilenode1,
relfilenode2;
Oid swaptemp;
CatalogIndexState indstate;
@ -1051,29 +1095,86 @@ swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content,
elog(ERROR, "cache lookup failed for relation %u", r2);
relform2 = (Form_pg_class) GETSTRUCT(reltup2);
/*
* Actually swap the fields in the two tuples
*/
swaptemp = relform1->relfilenode;
relform1->relfilenode = relform2->relfilenode;
relform2->relfilenode = swaptemp;
relfilenode1 = relform1->relfilenode;
relfilenode2 = relform2->relfilenode;
swaptemp = relform1->reltablespace;
relform1->reltablespace = relform2->reltablespace;
relform2->reltablespace = swaptemp;
if (!swap_toast_by_content)
if (OidIsValid(relfilenode1) && OidIsValid(relfilenode2))
{
swaptemp = relform1->reltoastrelid;
relform1->reltoastrelid = relform2->reltoastrelid;
relform2->reltoastrelid = swaptemp;
/* Normal non-mapped relations: swap relfilenodes and reltablespaces */
Assert(!target_is_pg_class);
/* we should not swap reltoastidxid */
swaptemp = relform1->relfilenode;
relform1->relfilenode = relform2->relfilenode;
relform2->relfilenode = swaptemp;
swaptemp = relform1->reltablespace;
relform1->reltablespace = relform2->reltablespace;
relform2->reltablespace = swaptemp;
/* Also swap toast links, if we're swapping by links */
if (!swap_toast_by_content)
{
swaptemp = relform1->reltoastrelid;
relform1->reltoastrelid = relform2->reltoastrelid;
relform2->reltoastrelid = swaptemp;
/* we should NOT swap reltoastidxid */
}
}
else
{
/*
* Mapped-relation case. Here we have to swap the relation mappings
* instead of modifying the pg_class columns. Both must be mapped.
*/
if (OidIsValid(relfilenode1) || OidIsValid(relfilenode2))
elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation",
NameStr(relform1->relname));
/*
* We can't change the tablespace of a mapped rel, and we can't handle
* toast link swapping for one either, because we must not apply any
* critical changes to its pg_class row. These cases should be
* prevented by upstream permissions tests, so this check is a
* non-user-facing emergency backstop.
*/
if (relform1->reltablespace != relform2->reltablespace)
elog(ERROR, "cannot change tablespace of mapped relation \"%s\"",
NameStr(relform1->relname));
if (!swap_toast_by_content &&
(relform1->reltoastrelid || relform2->reltoastrelid))
elog(ERROR, "cannot swap toast by links for mapped relation \"%s\"",
NameStr(relform1->relname));
/*
* Fetch the mappings --- shouldn't fail, but be paranoid
*/
relfilenode1 = RelationMapOidToFilenode(r1, relform1->relisshared);
if (!OidIsValid(relfilenode1))
elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
NameStr(relform1->relname), r1);
relfilenode2 = RelationMapOidToFilenode(r2, relform2->relisshared);
if (!OidIsValid(relfilenode2))
elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u",
NameStr(relform2->relname), r2);
/*
* Send replacement mappings to relmapper. Note these won't actually
* take effect until CommandCounterIncrement.
*/
RelationMapUpdateMap(r1, relfilenode2, relform1->relisshared, false);
RelationMapUpdateMap(r2, relfilenode1, relform2->relisshared, false);
/* Pass OIDs of mapped r2 tables back to caller */
*mapped_tables++ = r2;
}
/*
* In the case of a shared catalog, these next few steps only affect our
* own database's pg_class row; but that's okay.
* In the case of a shared catalog, these next few steps will only affect
* our own database's pg_class row; but that's okay, because they are
* all noncritical updates. That's also an important fact for the case
* of a mapped catalog, because it's possible that we'll commit the map
* change and then fail to commit the pg_class update.
*/
/* set rel1's frozen Xid */
@ -1097,15 +1198,31 @@ swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content,
relform2->reltuples = swap_tuples;
}
/* Update the tuples in pg_class */
simple_heap_update(relRelation, &reltup1->t_self, reltup1);
simple_heap_update(relRelation, &reltup2->t_self, reltup2);
/*
* Update the tuples in pg_class --- unless the target relation of the
* swap is pg_class itself. In that case, there is zero point in making
* changes because we'd be updating the old data that we're about to
* throw away. Because the real work being done here for a mapped relation
* is just to change the relation map settings, it's all right to not
* update the pg_class rows in this case.
*/
if (!target_is_pg_class)
{
simple_heap_update(relRelation, &reltup1->t_self, reltup1);
simple_heap_update(relRelation, &reltup2->t_self, reltup2);
/* Keep system catalogs current */
indstate = CatalogOpenIndexes(relRelation);
CatalogIndexInsert(indstate, reltup1);
CatalogIndexInsert(indstate, reltup2);
CatalogCloseIndexes(indstate);
/* Keep system catalogs current */
indstate = CatalogOpenIndexes(relRelation);
CatalogIndexInsert(indstate, reltup1);
CatalogIndexInsert(indstate, reltup2);
CatalogCloseIndexes(indstate);
}
else
{
/* no update ... but we do still need relcache inval */
CacheInvalidateRelcacheByTuple(reltup1);
CacheInvalidateRelcacheByTuple(reltup2);
}
/*
* If we have toast tables associated with the relations being swapped,
@ -1120,8 +1237,10 @@ swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content,
/* Recursively swap the contents of the toast tables */
swap_relation_files(relform1->reltoastrelid,
relform2->reltoastrelid,
true,
frozenXid);
target_is_pg_class,
swap_toast_by_content,
frozenXid,
mapped_tables);
}
else
{
@ -1146,6 +1265,15 @@ swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content,
toastobject;
long count;
/*
* We disallow this case for system catalogs, to avoid the
* possibility that the catalog we're rebuilding is one of the
* ones the dependency changes would change. It's too late
* to be making any data changes to the target catalog.
*/
if (IsSystemClass(relform1))
elog(ERROR, "cannot swap toast files by links for system catalogs");
/* Delete old dependencies */
if (relform1->reltoastrelid)
{
@ -1196,30 +1324,35 @@ swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content,
relform1->reltoastidxid && relform2->reltoastidxid)
swap_relation_files(relform1->reltoastidxid,
relform2->reltoastidxid,
true,
InvalidTransactionId);
/*
* Blow away the old relcache entries now. We need this kluge because
* relcache.c keeps a link to the smgr relation for the physical file, and
* that will be out of date as soon as we do CommandCounterIncrement.
* Whichever of the rels is the second to be cleared during cache
* invalidation will have a dangling reference to an already-deleted smgr
* relation. Rather than trying to avoid this by ordering operations just
* so, it's easiest to not have the relcache entries there at all.
* (Fortunately, since one of the entries is local in our transaction,
* it's sufficient to clear out our own relcache this way; the problem
* cannot arise for other backends when they see our update on the
* non-local relation.)
*/
RelationForgetRelation(r1);
RelationForgetRelation(r2);
target_is_pg_class,
swap_toast_by_content,
InvalidTransactionId,
mapped_tables);
/* Clean up. */
heap_freetuple(reltup1);
heap_freetuple(reltup2);
heap_close(relRelation, RowExclusiveLock);
/*
* Close both relcache entries' smgr links. We need this kluge because
* both links will be invalidated during upcoming CommandCounterIncrement.
* Whichever of the rels is the second to be cleared will have a dangling
* reference to the other's smgr entry. Rather than trying to avoid this
* by ordering operations just so, it's easiest to close the links first.
* (Fortunately, since one of the entries is local in our transaction,
* it's sufficient to clear out our own relcache this way; the problem
* cannot arise for other backends when they see our update on the
* non-transient relation.)
*
* Caution: the placement of this step interacts with the decision to
* handle toast rels by recursion. When we are trying to rebuild pg_class
* itself, the smgr close on pg_class must happen after all accesses in
* this function.
*/
RelationCloseSmgrByOid(r1);
RelationCloseSmgrByOid(r2);
}
/*
@ -1227,12 +1360,43 @@ swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content,
* cleaning up (including rebuilding all indexes on the old heap).
*/
void
cleanup_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, bool swap_toast_by_content)
finish_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
bool is_system_catalog,
bool swap_toast_by_content,
TransactionId frozenXid)
{
ObjectAddress object;
Oid mapped_tables[4];
int i;
/* Make swap_relation_files' changes visible in the catalogs. */
CommandCounterIncrement();
/* Zero out possible results from swapped_relation_files */
memset(mapped_tables, 0, sizeof(mapped_tables));
/*
* Swap the contents of the heap relations (including any toast tables).
* Also set old heap's relfrozenxid to frozenXid.
*/
swap_relation_files(OIDOldHeap, OIDNewHeap,
(OIDOldHeap == RelationRelationId),
swap_toast_by_content, frozenXid, mapped_tables);
/*
* If it's a system catalog, queue an sinval message to flush all
* catcaches on the catalog when we reach CommandCounterIncrement.
*/
if (is_system_catalog)
CacheInvalidateCatalog(OIDOldHeap);
/*
* Rebuild each index on the relation (but not the toast table, which is
* all-new at this point). It is important to do this before the DROP
* step because if we are processing a system catalog that will be used
* during DROP, we want to have its indexes available. There is no
* advantage to the other order anyway because this is all transactional,
* so no chance to reclaim disk space before commit. We do not need
* a final CommandCounterIncrement() because reindex_relation does it.
*/
reindex_relation(OIDOldHeap, false, true);
/* Destroy new heap with old filenode */
object.classId = RelationRelationId;
@ -1248,11 +1412,13 @@ cleanup_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, bool swap_toast_by_content)
/* performDeletion does CommandCounterIncrement at end */
/*
* Rebuild each index on the relation (but not the toast table, which is
* all-new at this point). We do not need CommandCounterIncrement()
* because reindex_relation does it.
* Now we must remove any relation mapping entries that we set up for the
* transient table, as well as its toast table and toast index if any.
* If we fail to do this before commit, the relmapper will complain about
* new permanent map entries being added post-bootstrap.
*/
reindex_relation(OIDOldHeap, false);
for (i = 0; OidIsValid(mapped_tables[i]); i++)
RelationMapRemoveMapping(mapped_tables[i]);
/*
* At this point, everything is kosher except that, if we did toast swap