Code review of CLUSTER patch. Clean up problems with relcache getting

confused, toasted data getting lost, etc.
2025-11-29 23:43:17 +03:00 · 2002-08-11 21:17:35 +00:00
parent 9bccdf17f7
commit e44beef712
12 changed files with 560 additions and 431 deletions
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -1,29 +1,25 @@
 /*-------------------------------------------------------------------------
 *
 * cluster.c
- *	  Paul Brown's implementation of cluster index.
+ *	  CLUSTER a table on an index.
+ *
+ * There is hardly anything left of Paul Brown's original implementation...
 *
- *	  I am going to use the rename function as a model for this in the
- *	  parser and executor, and the vacuum code as an example in this
- *	  file. As I go - in contrast to the rest of postgres - there will
- *	  be BUCKETS of comments. This is to allow reviewers to understand
- *	  my (probably bogus) assumptions about the way this works.
- *														[pbrown '94]
 *
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994-5, Regents of the University of California
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/commands/cluster.c,v 1.85 2002/08/10 21:00:34 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/commands/cluster.c,v 1.86 2002/08/11 21:17:34 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
-
 #include "postgres.h"

 #include "access/genam.h"
 #include "access/heapam.h"
+#include "catalog/catalog.h"
 #include "catalog/dependency.h"
 #include "catalog/heap.h"
 #include "catalog/index.h"
@@ -40,25 +36,23 @@
 /*
 * We need one of these structs for each index in the relation to be
 * clustered.  It's basically the data needed by index_create() so
- * we can recreate the indexes after destroying the old heap.
+ * we can rebuild the indexes on the new heap.
 */
 typedef struct
 {
+	Oid			indexOID;
 	char	   *indexName;
 	IndexInfo  *indexInfo;
 	Oid			accessMethodOID;
 	Oid		   *classOID;
-	Oid			indexOID;
-	bool		isPrimary;
 } IndexAttrs;

-static Oid	copy_heap(Oid OIDOldHeap, const char *NewName);
-static void rebuildheap(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex);
-static List *get_indexattr_list (Oid OIDOldHeap);
+static Oid	make_new_heap(Oid OIDOldHeap, const char *NewName);
+static void copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex);
+static List *get_indexattr_list(Relation OldHeap);
 static void recreate_indexattr(Oid OIDOldHeap, List *indexes);
 static void swap_relfilenodes(Oid r1, Oid r2);

-Relation RelationIdGetRelation(Oid relationId);

 /*
 * cluster
@@ -67,17 +61,14 @@ Relation RelationIdGetRelation(Oid relationId);
 * swapping the relfilenodes of the new table and the old table, so
 * the OID of the original table is preserved.  Thus we do not lose
 * GRANT, inheritance nor references to this table (this was a bug
- * in releases thru 7.3)
+ * in releases thru 7.3).
 *
- * Also create new indexes and swap the filenodes with the old indexes
- * the same way we do for the relation.
+ * Also create new indexes and swap the filenodes with the old indexes the
+ * same way we do for the relation.  Since we are effectively bulk-loading
+ * the new table, it's better to create the indexes afterwards than to fill
+ * them incrementally while we load the table.
 *
- * TODO:
- * 		maybe we can get away with AccessShareLock for the table.
- * 		Concurrency would be much improved.  Only acquire
- * 		AccessExclusiveLock right before swapping the filenodes.
- * 		This would allow users to CLUSTER on a regular basis,
- * 		practically eliminating the need for auto-clustered indexes.
+ * Permissions checks were done already.
 */
 void
 cluster(RangeVar *oldrelation, char *oldindexname)
@@ -105,43 +96,60 @@ cluster(RangeVar *oldrelation, char *oldindexname)
 									RelationGetNamespace(OldHeap));
 	if (!OidIsValid(OIDOldIndex))
 		elog(ERROR, "CLUSTER: cannot find index \"%s\" for table \"%s\"",
-			 oldindexname, oldrelation->relname);
+			 oldindexname, RelationGetRelationName(OldHeap));
 	OldIndex = index_open(OIDOldIndex);
 	LockRelation(OldIndex, AccessExclusiveLock);

 	/*
 	 * Check that index is in fact an index on the given relation
 	 */
-	if (OldIndex->rd_index->indrelid != OIDOldHeap)
+	if (OldIndex->rd_index == NULL ||
+		OldIndex->rd_index->indrelid != OIDOldHeap)
 		elog(ERROR, "CLUSTER: \"%s\" is not an index for table \"%s\"",
-			 oldindexname, oldrelation->relname);
-
-	/* Drop relcache refcnts, but do NOT give up the locks */
-	heap_close(OldHeap, NoLock);
-	index_close(OldIndex);
-
-	/* Save the information of all indexes on the relation. */
-	indexes = get_indexattr_list(OIDOldHeap);
+			 RelationGetRelationName(OldIndex),
+			 RelationGetRelationName(OldHeap));

 	/*
-	 * Create the new heap with a temporary name.
+	 * Disallow clustering system relations.  This will definitely NOT work
+	 * for shared relations (we have no way to update pg_class rows in other
+	 * databases), nor for nailed-in-cache relations (the relfilenode values
+	 * for those are hardwired, see relcache.c).  It might work for other
+	 * system relations, but I ain't gonna risk it.
 	 */
-	snprintf(NewHeapName, NAMEDATALEN, "temp_%u", OIDOldHeap);
+	if (IsSystemRelation(OldHeap))
+		elog(ERROR, "CLUSTER: cannot cluster system relation \"%s\"",
+			 RelationGetRelationName(OldHeap));

-	OIDNewHeap = copy_heap(OIDOldHeap, NewHeapName);
+	/* Save the information of all indexes on the relation. */
+	indexes = get_indexattr_list(OldHeap);

-	/* We do not need CommandCounterIncrement() because copy_heap did it. */
+	/* Drop relcache refcnts, but do NOT give up the locks */
+	index_close(OldIndex);
+	heap_close(OldHeap, NoLock);
+
+	/*
+	 * Create the new heap, using a temporary name in the same namespace
+	 * as the existing table.  NOTE: there is some risk of collision with user
+	 * relnames.  Working around this seems more trouble than it's worth; in
+	 * particular, we can't create the new heap in a different namespace from
+	 * the old, or we will have problems with the TEMP status of temp tables.
+	 */
+	snprintf(NewHeapName, NAMEDATALEN, "pg_temp_%u", OIDOldHeap);
+
+	OIDNewHeap = make_new_heap(OIDOldHeap, NewHeapName);
+
+	/* We don't need CommandCounterIncrement() because make_new_heap did it. */

 	/*
 	 * Copy the heap data into the new table in the desired order.
 	 */
-	rebuildheap(OIDNewHeap, OIDOldHeap, OIDOldIndex);
+	copy_heap_data(OIDNewHeap, OIDOldHeap, OIDOldIndex);

-	/* To make the new heap's data visible. */
+	/* To make the new heap's data visible (probably not needed?). */
 	CommandCounterIncrement();

 	/* Swap the relfilenodes of the old and new heaps. */
-	swap_relfilenodes(OIDNewHeap, OIDOldHeap);
+	swap_relfilenodes(OIDOldHeap, OIDNewHeap);

 	CommandCounterIncrement();

@@ -150,21 +158,26 @@ cluster(RangeVar *oldrelation, char *oldindexname)
 	object.objectId = OIDNewHeap;
 	object.objectSubId = 0;

-	/* The relation is local to our transaction and we know nothin
+	/*
+	 * The new relation is local to our transaction and we know nothing
 	 * depends on it, so DROP_RESTRICT should be OK.
 	 */
 	performDeletion(&object, DROP_RESTRICT);

 	/* performDeletion does CommandCounterIncrement at end */

- 	/* Recreate the indexes on the relation.  We do not need
-  	 * CommandCounterIncrement() because recreate_indexattr does it.
-   	 */
-  	recreate_indexattr(OIDOldHeap, indexes);
+	/*
+	 * Recreate each index on the relation.  We do not need
+	 * CommandCounterIncrement() because recreate_indexattr does it.
+	 */
+	recreate_indexattr(OIDOldHeap, indexes);
 }

+/*
+ * Create the new table that we will fill with correctly-ordered data.
+ */
 static Oid
-copy_heap(Oid OIDOldHeap, const char *NewName)
+make_new_heap(Oid OIDOldHeap, const char *NewName)
 {
 	TupleDesc	OldHeapDesc,
 				tupdesc;
@@ -206,27 +219,29 @@ copy_heap(Oid OIDOldHeap, const char *NewName)
 	return OIDNewHeap;
 }

+/*
+ * Do the physical copying of heap data.
+ */
 static void
-rebuildheap(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
+copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
 {
-	Relation	LocalNewHeap,
-				LocalOldHeap,
-				LocalOldIndex;
-	IndexScanDesc ScanDesc;
-	HeapTuple	LocalHeapTuple;
+	Relation	NewHeap,
+				OldHeap,
+				OldIndex;
+	IndexScanDesc scan;
+	HeapTuple	tuple;

 	/*
 	 * Open the relations I need. Scan through the OldHeap on the OldIndex
 	 * and insert each tuple into the NewHeap.
 	 */
-	LocalNewHeap = heap_open(OIDNewHeap, AccessExclusiveLock);
-	LocalOldHeap = heap_open(OIDOldHeap, AccessExclusiveLock);
-	LocalOldIndex = index_open(OIDOldIndex);
+	NewHeap = heap_open(OIDNewHeap, AccessExclusiveLock);
+	OldHeap = heap_open(OIDOldHeap, AccessExclusiveLock);
+	OldIndex = index_open(OIDOldIndex);

-	ScanDesc = index_beginscan(LocalOldHeap, LocalOldIndex,
-							   SnapshotNow, 0, (ScanKey) NULL);
+	scan = index_beginscan(OldHeap, OldIndex, SnapshotNow, 0, (ScanKey) NULL);

-	while ((LocalHeapTuple = index_getnext(ScanDesc, ForwardScanDirection)) != NULL)
+	while ((tuple = index_getnext(scan, ForwardScanDirection)) != NULL)
 	{
 		/*
 		 * We must copy the tuple because heap_insert() will overwrite
@@ -234,199 +249,280 @@ rebuildheap(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
 		 * retrieved tuple will actually be in a disk buffer!  Thus,
 		 * the source relation would get trashed, which is bad news if
 		 * we abort later on.  (This was a bug in releases thru 7.0)
+		 *
+		 * Note that the copied tuple will have the original OID, if any,
+		 * so this does preserve OIDs.
 		 */
-		HeapTuple	copiedTuple = heap_copytuple(LocalHeapTuple);
+		HeapTuple	copiedTuple = heap_copytuple(tuple);
+
+		simple_heap_insert(NewHeap, copiedTuple);

-		simple_heap_insert(LocalNewHeap, copiedTuple);
 		heap_freetuple(copiedTuple);

 		CHECK_FOR_INTERRUPTS();
 	}

-	index_endscan(ScanDesc);
+	index_endscan(scan);

-	index_close(LocalOldIndex);
-	heap_close(LocalOldHeap, NoLock);
-	heap_close(LocalNewHeap, NoLock);
+	index_close(OldIndex);
+	heap_close(OldHeap, NoLock);
+	heap_close(NewHeap, NoLock);
 }

-/* Get the necessary info about the indexes in the relation and
- * return a List of IndexAttrs.
+/*
+ * Get the necessary info about the indexes of the relation and
+ * return a list of IndexAttrs structures.
 */
-List *
-get_indexattr_list (Oid OIDOldHeap)
+static List *
+get_indexattr_list(Relation OldHeap)
 {
-	ScanKeyData	entry;
-	HeapScanDesc scan;
-	Relation indexRelation;
-	HeapTuple indexTuple;
 	List *indexes = NIL;
-	IndexAttrs *attrs;
-	HeapTuple tuple;
-	Form_pg_index index;
-	
-	/* Grab the index tuples by looking into RelationRelationName
-	 * by the OID of the old heap.
-	 */
-	indexRelation = heap_openr(IndexRelationName, AccessShareLock);
-	ScanKeyEntryInitialize(&entry, 0, Anum_pg_index_indrelid,
-			F_OIDEQ, ObjectIdGetDatum(OIDOldHeap));
-	scan = heap_beginscan(indexRelation, SnapshotNow, 1, &entry);
-	while ((indexTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+	List	   *indlist;
+
+	/* Ask the relcache to produce a list of the indexes of the old rel */
+	foreach(indlist, RelationGetIndexList(OldHeap))
 	{
-		index = (Form_pg_index) GETSTRUCT(indexTuple);
+		Oid		indexOID = (Oid) lfirsti(indlist);
+		HeapTuple indexTuple;
+		HeapTuple classTuple;
+		Form_pg_index indexForm;
+		Form_pg_class classForm;
+		IndexAttrs *attrs;
+
+		indexTuple = SearchSysCache(INDEXRELID,
+									ObjectIdGetDatum(indexOID),
+									0, 0, 0);
+		if (!HeapTupleIsValid(indexTuple))
+			elog(ERROR, "Cache lookup failed for index %u", indexOID);
+		indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
+		Assert(indexForm->indexrelid == indexOID);

 		attrs = (IndexAttrs *) palloc(sizeof(IndexAttrs));
-		attrs->indexInfo = BuildIndexInfo(index);
-		attrs->isPrimary = index->indisprimary;
-		attrs->indexOID = index->indexrelid;
+		attrs->indexOID = indexOID;
+		attrs->indexInfo = BuildIndexInfo(indexForm);
+		attrs->classOID = (Oid *)
+			palloc(sizeof(Oid) * attrs->indexInfo->ii_NumIndexAttrs);
+		memcpy(attrs->classOID, indexForm->indclass,
+			   sizeof(Oid) * attrs->indexInfo->ii_NumIndexAttrs);

-		/* The opclasses are copied verbatim from the original indexes.
-		*/
-		attrs->classOID = (Oid *)palloc(sizeof(Oid) *
-				attrs->indexInfo->ii_NumIndexAttrs);
-		memcpy(attrs->classOID, index->indclass,
-				sizeof(Oid) * attrs->indexInfo->ii_NumIndexAttrs);
+		/* Name and access method of each index come from pg_class */
+		classTuple = SearchSysCache(RELOID,
+									ObjectIdGetDatum(indexOID),
+									0, 0, 0);
+		if (!HeapTupleIsValid(classTuple))
+			elog(ERROR, "Cache lookup failed for index %u", indexOID);
+		classForm = (Form_pg_class) GETSTRUCT(classTuple);

-		/* Name and access method of each index come from
-		 * RelationRelationName.
-		 */
-		tuple = SearchSysCache(RELOID,
-				ObjectIdGetDatum(attrs->indexOID),
-				0, 0, 0);
-		if (!HeapTupleIsValid(tuple))
-			elog(ERROR, "CLUSTER: cannot find index %u", attrs->indexOID);
-		attrs->indexName = pstrdup(NameStr(((Form_pg_class) GETSTRUCT(tuple))->relname));
-		attrs->accessMethodOID = ((Form_pg_class) GETSTRUCT(tuple))->relam;
-		ReleaseSysCache(tuple);
+		attrs->indexName = pstrdup(NameStr(classForm->relname));
+		attrs->accessMethodOID = classForm->relam;
+
+		ReleaseSysCache(classTuple);
+		ReleaseSysCache(indexTuple);

 		/* Cons the gathered data into the list.  We do not care about
 		 * ordering, and this is more efficient than append.
 		 */
-		indexes=lcons((void *)attrs, indexes);
+		indexes = lcons(attrs, indexes);
 	}
-	heap_endscan(scan);
-	heap_close(indexRelation, AccessShareLock);
+
 	return indexes;
 }

-/* Create new indexes and swap the filenodes with old indexes.  Then drop
- * the new index (carrying the old heap along).
+/*
+ * Create new indexes and swap the filenodes with old indexes.  Then drop
+ * the new index (carrying the old index filenode along).
 */
-void
+static void
 recreate_indexattr(Oid OIDOldHeap, List *indexes)
 {
-	IndexAttrs *attrs;
 	List 	   *elem;
-	Oid			newIndexOID;
-	char		newIndexName[NAMEDATALEN];
-	ObjectAddress object;

-	foreach (elem, indexes)
+	foreach(elem, indexes)
 	{
-		attrs=(IndexAttrs *) lfirst(elem);
+		IndexAttrs *attrs = (IndexAttrs *) lfirst(elem);
+		Oid			newIndexOID;
+		char		newIndexName[NAMEDATALEN];
+		ObjectAddress object;

 		/* Create the new index under a temporary name */
-		snprintf(newIndexName, NAMEDATALEN, "temp_%u", attrs->indexOID);
+		snprintf(newIndexName, NAMEDATALEN, "pg_temp_%u", attrs->indexOID);

-		/* The new index will have constraint status set to false,
+		/*
+		 * The new index will have primary and constraint status set to false,
 		 * but since we will only use its filenode it doesn't matter:
 		 * after the filenode swap the index will keep the constraint
 		 * status of the old index.
 		 */
 		newIndexOID = index_create(OIDOldHeap, newIndexName,
 								   attrs->indexInfo, attrs->accessMethodOID,
-								   attrs->classOID, attrs->isPrimary,
+								   attrs->classOID, false,
 								   false, allowSystemTableMods);
 		CommandCounterIncrement();

 		/* Swap the filenodes. */
-		swap_relfilenodes(newIndexOID, attrs->indexOID);
-		setRelhasindex(OIDOldHeap, true, attrs->isPrimary, InvalidOid);
+		swap_relfilenodes(attrs->indexOID, newIndexOID);
+
+		CommandCounterIncrement();

 		/* Destroy new index with old filenode */
 		object.classId = RelOid_pg_class;
 		object.objectId = newIndexOID;
 		object.objectSubId = 0;
 		
-		/* The relation is local to our transaction and we know
+		/*
+		 * The relation is local to our transaction and we know
 		 * nothing depends on it, so DROP_RESTRICT should be OK.
 		 */
 		performDeletion(&object, DROP_RESTRICT);
 		
 		/* performDeletion does CommandCounterIncrement() at its end */
-		
-		pfree(attrs->classOID);
-		pfree(attrs);
 	}
-	freeList(indexes);
 }

-/* Swap the relfilenodes for two given relations.
+/*
+ * Swap the relfilenodes for two given relations.
+ *
+ * Also swap any TOAST links, so that the toast data moves along with
+ * the main-table data.
 */
-void
+static void
 swap_relfilenodes(Oid r1, Oid r2)
 {
-	/* I can probably keep RelationRelationName open in the main
-	 * function and pass the Relation around so I don't have to open
-	 * it every time.
-	 */
 	Relation	relRelation,
 				rel;
-	HeapTuple	reltup[2];
-	Oid			tempRFNode;
+	HeapTuple	reltup1,
+				reltup2;
+	Form_pg_class relform1,
+				relform2;
+	Oid			swaptemp;
 	int			i;
 	CatalogIndexState indstate;

-	/* We need both RelationRelationName tuples.  */
+	/* We need writable copies of both pg_class tuples. */
 	relRelation = heap_openr(RelationRelationName, RowExclusiveLock);

-	reltup[0] = SearchSysCacheCopy(RELOID,
-								   ObjectIdGetDatum(r1),
-								   0, 0, 0);
-	if (!HeapTupleIsValid(reltup[0]))
+	reltup1 = SearchSysCacheCopy(RELOID,
+								 ObjectIdGetDatum(r1),
+								 0, 0, 0);
+	if (!HeapTupleIsValid(reltup1))
 		elog(ERROR, "CLUSTER: Cannot find tuple for relation %u", r1);
-	reltup[1] = SearchSysCacheCopy(RELOID,
-								   ObjectIdGetDatum(r2),
-								   0, 0, 0);
-	if (!HeapTupleIsValid(reltup[1]))
-		elog(ERROR, "CLUSTER: Cannot find tuple for relation %u", r2);
+	relform1 = (Form_pg_class) GETSTRUCT(reltup1);

-	/* The buffer manager gets confused if we swap relfilenodes for
+	reltup2 = SearchSysCacheCopy(RELOID,
+								 ObjectIdGetDatum(r2),
+								 0, 0, 0);
+	if (!HeapTupleIsValid(reltup2))
+		elog(ERROR, "CLUSTER: Cannot find tuple for relation %u", r2);
+	relform2 = (Form_pg_class) GETSTRUCT(reltup2);
+
+	/*
+	 * The buffer manager gets confused if we swap relfilenodes for
 	 * relations that are not both local or non-local to this transaction.
 	 * Flush the buffers on both relations so the buffer manager can
-	 * forget about'em.
+	 * forget about'em.  (XXX this might not be necessary anymore?)
 	 */
-
-	rel = RelationIdGetRelation(r1);
+	rel = relation_open(r1, NoLock);
 	i = FlushRelationBuffers(rel, 0);
 	if (i < 0)
 		elog(ERROR, "CLUSTER: FlushRelationBuffers returned %d", i);
-	RelationClose(rel);
-	rel = RelationIdGetRelation(r1);
+	relation_close(rel, NoLock);
+
+	rel = relation_open(r2, NoLock);
 	i = FlushRelationBuffers(rel, 0);
 	if (i < 0)
 		elog(ERROR, "CLUSTER: FlushRelationBuffers returned %d", i);
-	RelationClose(rel);
+	relation_close(rel, NoLock);

-	/* Actually swap the filenodes */
+	/*
+	 * Actually swap the filenode and TOAST fields in the two tuples
+	 */
+	swaptemp = relform1->relfilenode;
+	relform1->relfilenode = relform2->relfilenode;
+	relform2->relfilenode = swaptemp;

-	tempRFNode = ((Form_pg_class) GETSTRUCT(reltup[0]))->relfilenode;
-	((Form_pg_class) GETSTRUCT(reltup[0]))->relfilenode =
-		((Form_pg_class) GETSTRUCT(reltup[1]))->relfilenode;
-	((Form_pg_class) GETSTRUCT(reltup[1]))->relfilenode = tempRFNode;
+	swaptemp = relform1->reltoastrelid;
+	relform1->reltoastrelid = relform2->reltoastrelid;
+	relform2->reltoastrelid = swaptemp;

-	/* Update the RelationRelationName tuples */
-	simple_heap_update(relRelation, &reltup[1]->t_self, reltup[1]);
-	simple_heap_update(relRelation, &reltup[0]->t_self, reltup[0]);
+	/* we should not swap reltoastidxid */
+
+	/* Update the tuples in pg_class */
+	simple_heap_update(relRelation, &reltup1->t_self, reltup1);
+	simple_heap_update(relRelation, &reltup2->t_self, reltup2);
 	
-	/* To keep system catalogs current. */
+	/* Keep system catalogs current */
 	indstate = CatalogOpenIndexes(relRelation);
-	CatalogIndexInsert(indstate, reltup[1]);
-	CatalogIndexInsert(indstate, reltup[0]);
+	CatalogIndexInsert(indstate, reltup1);
+	CatalogIndexInsert(indstate, reltup2);
 	CatalogCloseIndexes(indstate);

-	heap_close(relRelation, NoLock);
-	heap_freetuple(reltup[0]);
-	heap_freetuple(reltup[1]);
+	/*
+	 * If we have toast tables associated with the relations being swapped,
+	 * change their dependency links to re-associate them with their new
+	 * owning relations.  Otherwise the wrong one will get dropped ...
+	 *
+	 * NOTE: for now, we can assume the new table will have a TOAST table
+	 * if and only if the old one does.  This logic might need work if we
+	 * get smarter about dropped columns.
+	 *
+	 * NOTE: at present, a TOAST table's only dependency is the one on
+	 * its owning table.  If more are ever created, we'd need to use something
+	 * more selective than deleteDependencyRecordsFor() to get rid of only
+	 * the link we want.
+	 */
+	if (relform1->reltoastrelid || relform2->reltoastrelid)
+	{
+		ObjectAddress baseobject,
+					toastobject;
+		long		count;
+
+		if (!(relform1->reltoastrelid && relform2->reltoastrelid))
+			elog(ERROR, "CLUSTER: expected both swapped tables to have TOAST tables");
+
+		/* Delete old dependencies */
+		count = deleteDependencyRecordsFor(RelOid_pg_class,
+										   relform1->reltoastrelid);
+		if (count != 1)
+			elog(ERROR, "CLUSTER: expected one dependency record for TOAST table, found %ld",
+				 count);
+		count = deleteDependencyRecordsFor(RelOid_pg_class,
+										   relform2->reltoastrelid);
+		if (count != 1)
+			elog(ERROR, "CLUSTER: expected one dependency record for TOAST table, found %ld",
+				 count);
+
+		/* Register new dependencies */
+		baseobject.classId = RelOid_pg_class;
+		baseobject.objectId = r1;
+		baseobject.objectSubId = 0;
+		toastobject.classId = RelOid_pg_class;
+		toastobject.objectId = relform1->reltoastrelid;
+		toastobject.objectSubId = 0;
+
+		recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
+
+		baseobject.objectId = r2;
+		toastobject.objectId = relform2->reltoastrelid;
+
+		recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
+	}
+
+	/*
+	 * Blow away the old relcache entries now.  We need this kluge because
+	 * relcache.c indexes relcache entries by rd_node as well as OID.
+	 * It will get confused if it is asked to (re)build an entry with a new
+	 * rd_node value when there is still another entry laying about with
+	 * that same rd_node value.  (Fortunately, since one of the entries
+	 * is local in our transaction, it's sufficient to clear out our own
+	 * relcache this way; the problem cannot arise for other backends when
+	 * they see our update on the non-local relation.)
+	 */
+	RelationForgetRelation(r1);
+	RelationForgetRelation(r2);
+
+	/* Clean up. */
+	heap_freetuple(reltup1);
+	heap_freetuple(reltup2);
+
+	heap_close(relRelation, RowExclusiveLock);
 }