mirror of
https://github.com/postgres/postgres.git
synced 2025-06-14 18:42:34 +03:00
Code review of CLUSTER patch. Clean up problems with relcache getting
confused, toasted data getting lost, etc.
This commit is contained in:
@ -1,29 +1,25 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* cluster.c
|
||||
* Paul Brown's implementation of cluster index.
|
||||
* CLUSTER a table on an index.
|
||||
*
|
||||
* There is hardly anything left of Paul Brown's original implementation...
|
||||
*
|
||||
* I am going to use the rename function as a model for this in the
|
||||
* parser and executor, and the vacuum code as an example in this
|
||||
* file. As I go - in contrast to the rest of postgres - there will
|
||||
* be BUCKETS of comments. This is to allow reviewers to understand
|
||||
* my (probably bogus) assumptions about the way this works.
|
||||
* [pbrown '94]
|
||||
*
|
||||
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994-5, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/commands/cluster.c,v 1.85 2002/08/10 21:00:34 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/commands/cluster.c,v 1.86 2002/08/11 21:17:34 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/genam.h"
|
||||
#include "access/heapam.h"
|
||||
#include "catalog/catalog.h"
|
||||
#include "catalog/dependency.h"
|
||||
#include "catalog/heap.h"
|
||||
#include "catalog/index.h"
|
||||
@ -40,25 +36,23 @@
|
||||
/*
|
||||
* We need one of these structs for each index in the relation to be
|
||||
* clustered. It's basically the data needed by index_create() so
|
||||
* we can recreate the indexes after destroying the old heap.
|
||||
* we can rebuild the indexes on the new heap.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
Oid indexOID;
|
||||
char *indexName;
|
||||
IndexInfo *indexInfo;
|
||||
Oid accessMethodOID;
|
||||
Oid *classOID;
|
||||
Oid indexOID;
|
||||
bool isPrimary;
|
||||
} IndexAttrs;
|
||||
|
||||
static Oid copy_heap(Oid OIDOldHeap, const char *NewName);
|
||||
static void rebuildheap(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex);
|
||||
static List *get_indexattr_list (Oid OIDOldHeap);
|
||||
static Oid make_new_heap(Oid OIDOldHeap, const char *NewName);
|
||||
static void copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex);
|
||||
static List *get_indexattr_list(Relation OldHeap);
|
||||
static void recreate_indexattr(Oid OIDOldHeap, List *indexes);
|
||||
static void swap_relfilenodes(Oid r1, Oid r2);
|
||||
|
||||
Relation RelationIdGetRelation(Oid relationId);
|
||||
|
||||
/*
|
||||
* cluster
|
||||
@ -67,17 +61,14 @@ Relation RelationIdGetRelation(Oid relationId);
|
||||
* swapping the relfilenodes of the new table and the old table, so
|
||||
* the OID of the original table is preserved. Thus we do not lose
|
||||
* GRANT, inheritance nor references to this table (this was a bug
|
||||
* in releases thru 7.3)
|
||||
* in releases thru 7.3).
|
||||
*
|
||||
* Also create new indexes and swap the filenodes with the old indexes
|
||||
* the same way we do for the relation.
|
||||
* Also create new indexes and swap the filenodes with the old indexes the
|
||||
* same way we do for the relation. Since we are effectively bulk-loading
|
||||
* the new table, it's better to create the indexes afterwards than to fill
|
||||
* them incrementally while we load the table.
|
||||
*
|
||||
* TODO:
|
||||
* maybe we can get away with AccessShareLock for the table.
|
||||
* Concurrency would be much improved. Only acquire
|
||||
* AccessExclusiveLock right before swapping the filenodes.
|
||||
* This would allow users to CLUSTER on a regular basis,
|
||||
* practically eliminating the need for auto-clustered indexes.
|
||||
* Permissions checks were done already.
|
||||
*/
|
||||
void
|
||||
cluster(RangeVar *oldrelation, char *oldindexname)
|
||||
@ -105,43 +96,60 @@ cluster(RangeVar *oldrelation, char *oldindexname)
|
||||
RelationGetNamespace(OldHeap));
|
||||
if (!OidIsValid(OIDOldIndex))
|
||||
elog(ERROR, "CLUSTER: cannot find index \"%s\" for table \"%s\"",
|
||||
oldindexname, oldrelation->relname);
|
||||
oldindexname, RelationGetRelationName(OldHeap));
|
||||
OldIndex = index_open(OIDOldIndex);
|
||||
LockRelation(OldIndex, AccessExclusiveLock);
|
||||
|
||||
/*
|
||||
* Check that index is in fact an index on the given relation
|
||||
*/
|
||||
if (OldIndex->rd_index->indrelid != OIDOldHeap)
|
||||
if (OldIndex->rd_index == NULL ||
|
||||
OldIndex->rd_index->indrelid != OIDOldHeap)
|
||||
elog(ERROR, "CLUSTER: \"%s\" is not an index for table \"%s\"",
|
||||
oldindexname, oldrelation->relname);
|
||||
|
||||
/* Drop relcache refcnts, but do NOT give up the locks */
|
||||
heap_close(OldHeap, NoLock);
|
||||
index_close(OldIndex);
|
||||
|
||||
/* Save the information of all indexes on the relation. */
|
||||
indexes = get_indexattr_list(OIDOldHeap);
|
||||
RelationGetRelationName(OldIndex),
|
||||
RelationGetRelationName(OldHeap));
|
||||
|
||||
/*
|
||||
* Create the new heap with a temporary name.
|
||||
* Disallow clustering system relations. This will definitely NOT work
|
||||
* for shared relations (we have no way to update pg_class rows in other
|
||||
* databases), nor for nailed-in-cache relations (the relfilenode values
|
||||
* for those are hardwired, see relcache.c). It might work for other
|
||||
* system relations, but I ain't gonna risk it.
|
||||
*/
|
||||
snprintf(NewHeapName, NAMEDATALEN, "temp_%u", OIDOldHeap);
|
||||
if (IsSystemRelation(OldHeap))
|
||||
elog(ERROR, "CLUSTER: cannot cluster system relation \"%s\"",
|
||||
RelationGetRelationName(OldHeap));
|
||||
|
||||
OIDNewHeap = copy_heap(OIDOldHeap, NewHeapName);
|
||||
/* Save the information of all indexes on the relation. */
|
||||
indexes = get_indexattr_list(OldHeap);
|
||||
|
||||
/* We do not need CommandCounterIncrement() because copy_heap did it. */
|
||||
/* Drop relcache refcnts, but do NOT give up the locks */
|
||||
index_close(OldIndex);
|
||||
heap_close(OldHeap, NoLock);
|
||||
|
||||
/*
|
||||
* Create the new heap, using a temporary name in the same namespace
|
||||
* as the existing table. NOTE: there is some risk of collision with user
|
||||
* relnames. Working around this seems more trouble than it's worth; in
|
||||
* particular, we can't create the new heap in a different namespace from
|
||||
* the old, or we will have problems with the TEMP status of temp tables.
|
||||
*/
|
||||
snprintf(NewHeapName, NAMEDATALEN, "pg_temp_%u", OIDOldHeap);
|
||||
|
||||
OIDNewHeap = make_new_heap(OIDOldHeap, NewHeapName);
|
||||
|
||||
/* We don't need CommandCounterIncrement() because make_new_heap did it. */
|
||||
|
||||
/*
|
||||
* Copy the heap data into the new table in the desired order.
|
||||
*/
|
||||
rebuildheap(OIDNewHeap, OIDOldHeap, OIDOldIndex);
|
||||
copy_heap_data(OIDNewHeap, OIDOldHeap, OIDOldIndex);
|
||||
|
||||
/* To make the new heap's data visible. */
|
||||
/* To make the new heap's data visible (probably not needed?). */
|
||||
CommandCounterIncrement();
|
||||
|
||||
/* Swap the relfilenodes of the old and new heaps. */
|
||||
swap_relfilenodes(OIDNewHeap, OIDOldHeap);
|
||||
swap_relfilenodes(OIDOldHeap, OIDNewHeap);
|
||||
|
||||
CommandCounterIncrement();
|
||||
|
||||
@ -150,21 +158,26 @@ cluster(RangeVar *oldrelation, char *oldindexname)
|
||||
object.objectId = OIDNewHeap;
|
||||
object.objectSubId = 0;
|
||||
|
||||
/* The relation is local to our transaction and we know nothin
|
||||
/*
|
||||
* The new relation is local to our transaction and we know nothing
|
||||
* depends on it, so DROP_RESTRICT should be OK.
|
||||
*/
|
||||
performDeletion(&object, DROP_RESTRICT);
|
||||
|
||||
/* performDeletion does CommandCounterIncrement at end */
|
||||
|
||||
/* Recreate the indexes on the relation. We do not need
|
||||
* CommandCounterIncrement() because recreate_indexattr does it.
|
||||
*/
|
||||
recreate_indexattr(OIDOldHeap, indexes);
|
||||
/*
|
||||
* Recreate each index on the relation. We do not need
|
||||
* CommandCounterIncrement() because recreate_indexattr does it.
|
||||
*/
|
||||
recreate_indexattr(OIDOldHeap, indexes);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create the new table that we will fill with correctly-ordered data.
|
||||
*/
|
||||
static Oid
|
||||
copy_heap(Oid OIDOldHeap, const char *NewName)
|
||||
make_new_heap(Oid OIDOldHeap, const char *NewName)
|
||||
{
|
||||
TupleDesc OldHeapDesc,
|
||||
tupdesc;
|
||||
@ -206,27 +219,29 @@ copy_heap(Oid OIDOldHeap, const char *NewName)
|
||||
return OIDNewHeap;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do the physical copying of heap data.
|
||||
*/
|
||||
static void
|
||||
rebuildheap(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
|
||||
copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
|
||||
{
|
||||
Relation LocalNewHeap,
|
||||
LocalOldHeap,
|
||||
LocalOldIndex;
|
||||
IndexScanDesc ScanDesc;
|
||||
HeapTuple LocalHeapTuple;
|
||||
Relation NewHeap,
|
||||
OldHeap,
|
||||
OldIndex;
|
||||
IndexScanDesc scan;
|
||||
HeapTuple tuple;
|
||||
|
||||
/*
|
||||
* Open the relations I need. Scan through the OldHeap on the OldIndex
|
||||
* and insert each tuple into the NewHeap.
|
||||
*/
|
||||
LocalNewHeap = heap_open(OIDNewHeap, AccessExclusiveLock);
|
||||
LocalOldHeap = heap_open(OIDOldHeap, AccessExclusiveLock);
|
||||
LocalOldIndex = index_open(OIDOldIndex);
|
||||
NewHeap = heap_open(OIDNewHeap, AccessExclusiveLock);
|
||||
OldHeap = heap_open(OIDOldHeap, AccessExclusiveLock);
|
||||
OldIndex = index_open(OIDOldIndex);
|
||||
|
||||
ScanDesc = index_beginscan(LocalOldHeap, LocalOldIndex,
|
||||
SnapshotNow, 0, (ScanKey) NULL);
|
||||
scan = index_beginscan(OldHeap, OldIndex, SnapshotNow, 0, (ScanKey) NULL);
|
||||
|
||||
while ((LocalHeapTuple = index_getnext(ScanDesc, ForwardScanDirection)) != NULL)
|
||||
while ((tuple = index_getnext(scan, ForwardScanDirection)) != NULL)
|
||||
{
|
||||
/*
|
||||
* We must copy the tuple because heap_insert() will overwrite
|
||||
@ -234,199 +249,280 @@ rebuildheap(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
|
||||
* retrieved tuple will actually be in a disk buffer! Thus,
|
||||
* the source relation would get trashed, which is bad news if
|
||||
* we abort later on. (This was a bug in releases thru 7.0)
|
||||
*
|
||||
* Note that the copied tuple will have the original OID, if any,
|
||||
* so this does preserve OIDs.
|
||||
*/
|
||||
HeapTuple copiedTuple = heap_copytuple(LocalHeapTuple);
|
||||
HeapTuple copiedTuple = heap_copytuple(tuple);
|
||||
|
||||
simple_heap_insert(NewHeap, copiedTuple);
|
||||
|
||||
simple_heap_insert(LocalNewHeap, copiedTuple);
|
||||
heap_freetuple(copiedTuple);
|
||||
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
}
|
||||
|
||||
index_endscan(ScanDesc);
|
||||
index_endscan(scan);
|
||||
|
||||
index_close(LocalOldIndex);
|
||||
heap_close(LocalOldHeap, NoLock);
|
||||
heap_close(LocalNewHeap, NoLock);
|
||||
index_close(OldIndex);
|
||||
heap_close(OldHeap, NoLock);
|
||||
heap_close(NewHeap, NoLock);
|
||||
}
|
||||
|
||||
/* Get the necessary info about the indexes in the relation and
|
||||
* return a List of IndexAttrs.
|
||||
/*
|
||||
* Get the necessary info about the indexes of the relation and
|
||||
* return a list of IndexAttrs structures.
|
||||
*/
|
||||
List *
|
||||
get_indexattr_list (Oid OIDOldHeap)
|
||||
static List *
|
||||
get_indexattr_list(Relation OldHeap)
|
||||
{
|
||||
ScanKeyData entry;
|
||||
HeapScanDesc scan;
|
||||
Relation indexRelation;
|
||||
HeapTuple indexTuple;
|
||||
List *indexes = NIL;
|
||||
IndexAttrs *attrs;
|
||||
HeapTuple tuple;
|
||||
Form_pg_index index;
|
||||
|
||||
/* Grab the index tuples by looking into RelationRelationName
|
||||
* by the OID of the old heap.
|
||||
*/
|
||||
indexRelation = heap_openr(IndexRelationName, AccessShareLock);
|
||||
ScanKeyEntryInitialize(&entry, 0, Anum_pg_index_indrelid,
|
||||
F_OIDEQ, ObjectIdGetDatum(OIDOldHeap));
|
||||
scan = heap_beginscan(indexRelation, SnapshotNow, 1, &entry);
|
||||
while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
|
||||
List *indlist;
|
||||
|
||||
/* Ask the relcache to produce a list of the indexes of the old rel */
|
||||
foreach(indlist, RelationGetIndexList(OldHeap))
|
||||
{
|
||||
index = (Form_pg_index) GETSTRUCT(indexTuple);
|
||||
Oid indexOID = (Oid) lfirsti(indlist);
|
||||
HeapTuple indexTuple;
|
||||
HeapTuple classTuple;
|
||||
Form_pg_index indexForm;
|
||||
Form_pg_class classForm;
|
||||
IndexAttrs *attrs;
|
||||
|
||||
indexTuple = SearchSysCache(INDEXRELID,
|
||||
ObjectIdGetDatum(indexOID),
|
||||
0, 0, 0);
|
||||
if (!HeapTupleIsValid(indexTuple))
|
||||
elog(ERROR, "Cache lookup failed for index %u", indexOID);
|
||||
indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
|
||||
Assert(indexForm->indexrelid == indexOID);
|
||||
|
||||
attrs = (IndexAttrs *) palloc(sizeof(IndexAttrs));
|
||||
attrs->indexInfo = BuildIndexInfo(index);
|
||||
attrs->isPrimary = index->indisprimary;
|
||||
attrs->indexOID = index->indexrelid;
|
||||
attrs->indexOID = indexOID;
|
||||
attrs->indexInfo = BuildIndexInfo(indexForm);
|
||||
attrs->classOID = (Oid *)
|
||||
palloc(sizeof(Oid) * attrs->indexInfo->ii_NumIndexAttrs);
|
||||
memcpy(attrs->classOID, indexForm->indclass,
|
||||
sizeof(Oid) * attrs->indexInfo->ii_NumIndexAttrs);
|
||||
|
||||
/* The opclasses are copied verbatim from the original indexes.
|
||||
*/
|
||||
attrs->classOID = (Oid *)palloc(sizeof(Oid) *
|
||||
attrs->indexInfo->ii_NumIndexAttrs);
|
||||
memcpy(attrs->classOID, index->indclass,
|
||||
sizeof(Oid) * attrs->indexInfo->ii_NumIndexAttrs);
|
||||
/* Name and access method of each index come from pg_class */
|
||||
classTuple = SearchSysCache(RELOID,
|
||||
ObjectIdGetDatum(indexOID),
|
||||
0, 0, 0);
|
||||
if (!HeapTupleIsValid(classTuple))
|
||||
elog(ERROR, "Cache lookup failed for index %u", indexOID);
|
||||
classForm = (Form_pg_class) GETSTRUCT(classTuple);
|
||||
|
||||
/* Name and access method of each index come from
|
||||
* RelationRelationName.
|
||||
*/
|
||||
tuple = SearchSysCache(RELOID,
|
||||
ObjectIdGetDatum(attrs->indexOID),
|
||||
0, 0, 0);
|
||||
if (!HeapTupleIsValid(tuple))
|
||||
elog(ERROR, "CLUSTER: cannot find index %u", attrs->indexOID);
|
||||
attrs->indexName = pstrdup(NameStr(((Form_pg_class) GETSTRUCT(tuple))->relname));
|
||||
attrs->accessMethodOID = ((Form_pg_class) GETSTRUCT(tuple))->relam;
|
||||
ReleaseSysCache(tuple);
|
||||
attrs->indexName = pstrdup(NameStr(classForm->relname));
|
||||
attrs->accessMethodOID = classForm->relam;
|
||||
|
||||
ReleaseSysCache(classTuple);
|
||||
ReleaseSysCache(indexTuple);
|
||||
|
||||
/* Cons the gathered data into the list. We do not care about
|
||||
* ordering, and this is more efficient than append.
|
||||
*/
|
||||
indexes=lcons((void *)attrs, indexes);
|
||||
indexes = lcons(attrs, indexes);
|
||||
}
|
||||
heap_endscan(scan);
|
||||
heap_close(indexRelation, AccessShareLock);
|
||||
|
||||
return indexes;
|
||||
}
|
||||
|
||||
/* Create new indexes and swap the filenodes with old indexes. Then drop
|
||||
* the new index (carrying the old heap along).
|
||||
/*
|
||||
* Create new indexes and swap the filenodes with old indexes. Then drop
|
||||
* the new index (carrying the old index filenode along).
|
||||
*/
|
||||
void
|
||||
static void
|
||||
recreate_indexattr(Oid OIDOldHeap, List *indexes)
|
||||
{
|
||||
IndexAttrs *attrs;
|
||||
List *elem;
|
||||
Oid newIndexOID;
|
||||
char newIndexName[NAMEDATALEN];
|
||||
ObjectAddress object;
|
||||
|
||||
foreach (elem, indexes)
|
||||
foreach(elem, indexes)
|
||||
{
|
||||
attrs=(IndexAttrs *) lfirst(elem);
|
||||
IndexAttrs *attrs = (IndexAttrs *) lfirst(elem);
|
||||
Oid newIndexOID;
|
||||
char newIndexName[NAMEDATALEN];
|
||||
ObjectAddress object;
|
||||
|
||||
/* Create the new index under a temporary name */
|
||||
snprintf(newIndexName, NAMEDATALEN, "temp_%u", attrs->indexOID);
|
||||
snprintf(newIndexName, NAMEDATALEN, "pg_temp_%u", attrs->indexOID);
|
||||
|
||||
/* The new index will have constraint status set to false,
|
||||
/*
|
||||
* The new index will have primary and constraint status set to false,
|
||||
* but since we will only use its filenode it doesn't matter:
|
||||
* after the filenode swap the index will keep the constraint
|
||||
* status of the old index.
|
||||
*/
|
||||
newIndexOID = index_create(OIDOldHeap, newIndexName,
|
||||
attrs->indexInfo, attrs->accessMethodOID,
|
||||
attrs->classOID, attrs->isPrimary,
|
||||
attrs->classOID, false,
|
||||
false, allowSystemTableMods);
|
||||
CommandCounterIncrement();
|
||||
|
||||
/* Swap the filenodes. */
|
||||
swap_relfilenodes(newIndexOID, attrs->indexOID);
|
||||
setRelhasindex(OIDOldHeap, true, attrs->isPrimary, InvalidOid);
|
||||
swap_relfilenodes(attrs->indexOID, newIndexOID);
|
||||
|
||||
CommandCounterIncrement();
|
||||
|
||||
/* Destroy new index with old filenode */
|
||||
object.classId = RelOid_pg_class;
|
||||
object.objectId = newIndexOID;
|
||||
object.objectSubId = 0;
|
||||
|
||||
/* The relation is local to our transaction and we know
|
||||
/*
|
||||
* The relation is local to our transaction and we know
|
||||
* nothing depends on it, so DROP_RESTRICT should be OK.
|
||||
*/
|
||||
performDeletion(&object, DROP_RESTRICT);
|
||||
|
||||
/* performDeletion does CommandCounterIncrement() at its end */
|
||||
|
||||
pfree(attrs->classOID);
|
||||
pfree(attrs);
|
||||
}
|
||||
freeList(indexes);
|
||||
}
|
||||
|
||||
/* Swap the relfilenodes for two given relations.
|
||||
/*
|
||||
* Swap the relfilenodes for two given relations.
|
||||
*
|
||||
* Also swap any TOAST links, so that the toast data moves along with
|
||||
* the main-table data.
|
||||
*/
|
||||
void
|
||||
static void
|
||||
swap_relfilenodes(Oid r1, Oid r2)
|
||||
{
|
||||
/* I can probably keep RelationRelationName open in the main
|
||||
* function and pass the Relation around so I don't have to open
|
||||
* it every time.
|
||||
*/
|
||||
Relation relRelation,
|
||||
rel;
|
||||
HeapTuple reltup[2];
|
||||
Oid tempRFNode;
|
||||
HeapTuple reltup1,
|
||||
reltup2;
|
||||
Form_pg_class relform1,
|
||||
relform2;
|
||||
Oid swaptemp;
|
||||
int i;
|
||||
CatalogIndexState indstate;
|
||||
|
||||
/* We need both RelationRelationName tuples. */
|
||||
/* We need writable copies of both pg_class tuples. */
|
||||
relRelation = heap_openr(RelationRelationName, RowExclusiveLock);
|
||||
|
||||
reltup[0] = SearchSysCacheCopy(RELOID,
|
||||
ObjectIdGetDatum(r1),
|
||||
0, 0, 0);
|
||||
if (!HeapTupleIsValid(reltup[0]))
|
||||
reltup1 = SearchSysCacheCopy(RELOID,
|
||||
ObjectIdGetDatum(r1),
|
||||
0, 0, 0);
|
||||
if (!HeapTupleIsValid(reltup1))
|
||||
elog(ERROR, "CLUSTER: Cannot find tuple for relation %u", r1);
|
||||
reltup[1] = SearchSysCacheCopy(RELOID,
|
||||
ObjectIdGetDatum(r2),
|
||||
0, 0, 0);
|
||||
if (!HeapTupleIsValid(reltup[1]))
|
||||
elog(ERROR, "CLUSTER: Cannot find tuple for relation %u", r2);
|
||||
relform1 = (Form_pg_class) GETSTRUCT(reltup1);
|
||||
|
||||
/* The buffer manager gets confused if we swap relfilenodes for
|
||||
reltup2 = SearchSysCacheCopy(RELOID,
|
||||
ObjectIdGetDatum(r2),
|
||||
0, 0, 0);
|
||||
if (!HeapTupleIsValid(reltup2))
|
||||
elog(ERROR, "CLUSTER: Cannot find tuple for relation %u", r2);
|
||||
relform2 = (Form_pg_class) GETSTRUCT(reltup2);
|
||||
|
||||
/*
|
||||
* The buffer manager gets confused if we swap relfilenodes for
|
||||
* relations that are not both local or non-local to this transaction.
|
||||
* Flush the buffers on both relations so the buffer manager can
|
||||
* forget about'em.
|
||||
* forget about'em. (XXX this might not be necessary anymore?)
|
||||
*/
|
||||
|
||||
rel = RelationIdGetRelation(r1);
|
||||
rel = relation_open(r1, NoLock);
|
||||
i = FlushRelationBuffers(rel, 0);
|
||||
if (i < 0)
|
||||
elog(ERROR, "CLUSTER: FlushRelationBuffers returned %d", i);
|
||||
RelationClose(rel);
|
||||
rel = RelationIdGetRelation(r1);
|
||||
relation_close(rel, NoLock);
|
||||
|
||||
rel = relation_open(r2, NoLock);
|
||||
i = FlushRelationBuffers(rel, 0);
|
||||
if (i < 0)
|
||||
elog(ERROR, "CLUSTER: FlushRelationBuffers returned %d", i);
|
||||
RelationClose(rel);
|
||||
relation_close(rel, NoLock);
|
||||
|
||||
/* Actually swap the filenodes */
|
||||
/*
|
||||
* Actually swap the filenode and TOAST fields in the two tuples
|
||||
*/
|
||||
swaptemp = relform1->relfilenode;
|
||||
relform1->relfilenode = relform2->relfilenode;
|
||||
relform2->relfilenode = swaptemp;
|
||||
|
||||
tempRFNode = ((Form_pg_class) GETSTRUCT(reltup[0]))->relfilenode;
|
||||
((Form_pg_class) GETSTRUCT(reltup[0]))->relfilenode =
|
||||
((Form_pg_class) GETSTRUCT(reltup[1]))->relfilenode;
|
||||
((Form_pg_class) GETSTRUCT(reltup[1]))->relfilenode = tempRFNode;
|
||||
swaptemp = relform1->reltoastrelid;
|
||||
relform1->reltoastrelid = relform2->reltoastrelid;
|
||||
relform2->reltoastrelid = swaptemp;
|
||||
|
||||
/* Update the RelationRelationName tuples */
|
||||
simple_heap_update(relRelation, &reltup[1]->t_self, reltup[1]);
|
||||
simple_heap_update(relRelation, &reltup[0]->t_self, reltup[0]);
|
||||
/* we should not swap reltoastidxid */
|
||||
|
||||
/* Update the tuples in pg_class */
|
||||
simple_heap_update(relRelation, &reltup1->t_self, reltup1);
|
||||
simple_heap_update(relRelation, &reltup2->t_self, reltup2);
|
||||
|
||||
/* To keep system catalogs current. */
|
||||
/* Keep system catalogs current */
|
||||
indstate = CatalogOpenIndexes(relRelation);
|
||||
CatalogIndexInsert(indstate, reltup[1]);
|
||||
CatalogIndexInsert(indstate, reltup[0]);
|
||||
CatalogIndexInsert(indstate, reltup1);
|
||||
CatalogIndexInsert(indstate, reltup2);
|
||||
CatalogCloseIndexes(indstate);
|
||||
|
||||
heap_close(relRelation, NoLock);
|
||||
heap_freetuple(reltup[0]);
|
||||
heap_freetuple(reltup[1]);
|
||||
/*
|
||||
* If we have toast tables associated with the relations being swapped,
|
||||
* change their dependency links to re-associate them with their new
|
||||
* owning relations. Otherwise the wrong one will get dropped ...
|
||||
*
|
||||
* NOTE: for now, we can assume the new table will have a TOAST table
|
||||
* if and only if the old one does. This logic might need work if we
|
||||
* get smarter about dropped columns.
|
||||
*
|
||||
* NOTE: at present, a TOAST table's only dependency is the one on
|
||||
* its owning table. If more are ever created, we'd need to use something
|
||||
* more selective than deleteDependencyRecordsFor() to get rid of only
|
||||
* the link we want.
|
||||
*/
|
||||
if (relform1->reltoastrelid || relform2->reltoastrelid)
|
||||
{
|
||||
ObjectAddress baseobject,
|
||||
toastobject;
|
||||
long count;
|
||||
|
||||
if (!(relform1->reltoastrelid && relform2->reltoastrelid))
|
||||
elog(ERROR, "CLUSTER: expected both swapped tables to have TOAST tables");
|
||||
|
||||
/* Delete old dependencies */
|
||||
count = deleteDependencyRecordsFor(RelOid_pg_class,
|
||||
relform1->reltoastrelid);
|
||||
if (count != 1)
|
||||
elog(ERROR, "CLUSTER: expected one dependency record for TOAST table, found %ld",
|
||||
count);
|
||||
count = deleteDependencyRecordsFor(RelOid_pg_class,
|
||||
relform2->reltoastrelid);
|
||||
if (count != 1)
|
||||
elog(ERROR, "CLUSTER: expected one dependency record for TOAST table, found %ld",
|
||||
count);
|
||||
|
||||
/* Register new dependencies */
|
||||
baseobject.classId = RelOid_pg_class;
|
||||
baseobject.objectId = r1;
|
||||
baseobject.objectSubId = 0;
|
||||
toastobject.classId = RelOid_pg_class;
|
||||
toastobject.objectId = relform1->reltoastrelid;
|
||||
toastobject.objectSubId = 0;
|
||||
|
||||
recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
|
||||
|
||||
baseobject.objectId = r2;
|
||||
toastobject.objectId = relform2->reltoastrelid;
|
||||
|
||||
recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Blow away the old relcache entries now. We need this kluge because
|
||||
* relcache.c indexes relcache entries by rd_node as well as OID.
|
||||
* It will get confused if it is asked to (re)build an entry with a new
|
||||
* rd_node value when there is still another entry laying about with
|
||||
* that same rd_node value. (Fortunately, since one of the entries
|
||||
* is local in our transaction, it's sufficient to clear out our own
|
||||
* relcache this way; the problem cannot arise for other backends when
|
||||
* they see our update on the non-local relation.)
|
||||
*/
|
||||
RelationForgetRelation(r1);
|
||||
RelationForgetRelation(r2);
|
||||
|
||||
/* Clean up. */
|
||||
heap_freetuple(reltup1);
|
||||
heap_freetuple(reltup2);
|
||||
|
||||
heap_close(relRelation, RowExclusiveLock);
|
||||
}
|
||||
|
Reference in New Issue
Block a user