1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-31 17:02:12 +03:00

tableam: relation creation, VACUUM FULL/CLUSTER, SET TABLESPACE.

This moves the responsibility for:
- creating the storage necessary for a relation, including creating a
  new relfilenode for a relation with existing storage
- non-transactional truncation of a relation
- VACUUM FULL / CLUSTER's rewrite of a table
below tableam.

This is fairly straight forward, with a bit of complexity smattered in
to move the computation of xid / multixid horizons below the AM, as
they don't make sense for every table AM.

Author: Andres Freund
Discussion: https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de
This commit is contained in:
Andres Freund
2019-03-28 20:01:14 -07:00
parent 7e69323bf7
commit d25f519107
13 changed files with 856 additions and 579 deletions

View File

@@ -21,7 +21,6 @@
#include "access/heapam.h"
#include "access/multixact.h"
#include "access/relscan.h"
#include "access/rewriteheap.h"
#include "access/tableam.h"
#include "access/transam.h"
#include "access/tuptoaster.h"
@@ -45,7 +44,6 @@
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
#include "storage/predicate.h"
#include "storage/smgr.h"
#include "utils/acl.h"
#include "utils/fmgroids.h"
#include "utils/inval.h"
@@ -71,14 +69,10 @@ typedef struct
static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
static void copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
bool verbose, bool *pSwapToastByContent,
TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
static List *get_tables_to_cluster(MemoryContext cluster_context);
static void reform_and_rewrite_tuple(HeapTuple tuple,
TupleDesc oldTupDesc, TupleDesc newTupDesc,
Datum *values, bool *isnull,
RewriteState rwstate);
/*---------------------------------------------------------------------------
@@ -619,7 +613,7 @@ rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
AccessExclusiveLock);
/* Copy the heap data into the new table in the desired order */
copy_heap_data(OIDNewHeap, tableOid, indexOid, verbose,
copy_table_data(OIDNewHeap, tableOid, indexOid, verbose,
&swap_toast_by_content, &frozenXid, &cutoffMulti);
/*
@@ -762,7 +756,7 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence,
}
/*
* Do the physical copying of heap data.
* Do the physical copying of table data.
*
* There are three output parameters:
* *pSwapToastByContent is set true if toast tables must be swapped by content.
@@ -770,9 +764,9 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence,
* *pCutoffMulti receives the MultiXactId used as a cutoff point.
*/
static void
copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
bool *pSwapToastByContent, TransactionId *pFreezeXid,
MultiXactId *pCutoffMulti)
copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
bool *pSwapToastByContent, TransactionId *pFreezeXid,
MultiXactId *pCutoffMulti)
{
Relation NewHeap,
OldHeap,
@@ -780,30 +774,18 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
Relation relRelation;
HeapTuple reltup;
Form_pg_class relform;
TupleDesc oldTupDesc;
TupleDesc newTupDesc;
int natts;
Datum *values;
bool *isnull;
IndexScanDesc indexScan;
TableScanDesc tableScan;
HeapScanDesc heapScan;
bool use_wal;
bool is_system_catalog;
TupleDesc oldTupDesc PG_USED_FOR_ASSERTS_ONLY;
TupleDesc newTupDesc PG_USED_FOR_ASSERTS_ONLY;
TransactionId OldestXmin;
TransactionId FreezeXid;
MultiXactId MultiXactCutoff;
RewriteState rwstate;
bool use_sort;
Tuplesortstate *tuplesort;
double num_tuples = 0,
tups_vacuumed = 0,
tups_recently_dead = 0;
BlockNumber num_pages;
int elevel = verbose ? INFO : DEBUG2;
PGRUsage ru0;
TupleTableSlot *slot;
BufferHeapTupleTableSlot *hslot;
pg_rusage_init(&ru0);
@@ -825,11 +807,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
newTupDesc = RelationGetDescr(NewHeap);
Assert(newTupDesc->natts == oldTupDesc->natts);
/* Preallocate values/isnull arrays */
natts = newTupDesc->natts;
values = (Datum *) palloc(natts * sizeof(Datum));
isnull = (bool *) palloc(natts * sizeof(bool));
/*
* If the OldHeap has a toast table, get lock on the toast table to keep
* it from being vacuumed. This is needed because autovacuum processes
@@ -846,15 +823,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
if (OldHeap->rd_rel->reltoastrelid)
LockRelationOid(OldHeap->rd_rel->reltoastrelid, AccessExclusiveLock);
/*
* We need to log the copied data in WAL iff WAL archiving/streaming is
* enabled AND it's a WAL-logged rel.
*/
use_wal = XLogIsNeeded() && RelationNeedsWAL(NewHeap);
/* use_wal off requires smgr_targblock be initially invalid */
Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
/*
* If both tables have TOAST tables, perform toast swap by content. It is
* possible that the old table has a toast table but the new one doesn't,
@@ -915,13 +883,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
*pFreezeXid = FreezeXid;
*pCutoffMulti = MultiXactCutoff;
/* Remember if it's a system catalog */
is_system_catalog = IsSystemRelation(OldHeap);
/* Initialize the rewrite operation */
rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, FreezeXid,
MultiXactCutoff, use_wal);
/*
* Decide whether to use an indexscan or seqscan-and-optional-sort to scan
* the OldHeap. We know how to use a sort to duplicate the ordering of a
@@ -934,63 +895,14 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
else
use_sort = false;
/* Set up sorting if wanted */
if (use_sort)
tuplesort = tuplesort_begin_cluster(oldTupDesc, OldIndex,
maintenance_work_mem,
NULL, false);
else
tuplesort = NULL;
/*
* Prepare to scan the OldHeap. To ensure we see recently-dead tuples
* that still need to be copied, we scan with SnapshotAny and use
* HeapTupleSatisfiesVacuum for the visibility test.
*/
if (OldIndex != NULL && !use_sort)
{
const int ci_index[] = {
PROGRESS_CLUSTER_PHASE,
PROGRESS_CLUSTER_INDEX_RELID
};
int64 ci_val[2];
/* Set phase and OIDOldIndex to columns */
ci_val[0] = PROGRESS_CLUSTER_PHASE_INDEX_SCAN_HEAP;
ci_val[1] = OIDOldIndex;
pgstat_progress_update_multi_param(2, ci_index, ci_val);
tableScan = NULL;
heapScan = NULL;
indexScan = index_beginscan(OldHeap, OldIndex, SnapshotAny, 0, 0);
index_rescan(indexScan, NULL, 0, NULL, 0);
}
else
{
/* In scan-and-sort mode and also VACUUM FULL, set phase */
pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
PROGRESS_CLUSTER_PHASE_SEQ_SCAN_HEAP);
tableScan = table_beginscan(OldHeap, SnapshotAny, 0, (ScanKey) NULL);
heapScan = (HeapScanDesc) tableScan;
indexScan = NULL;
/* Set total heap blocks */
pgstat_progress_update_param(PROGRESS_CLUSTER_TOTAL_HEAP_BLKS,
heapScan->rs_nblocks);
}
slot = table_slot_create(OldHeap, NULL);
hslot = (BufferHeapTupleTableSlot *) slot;
/* Log what we're doing */
if (indexScan != NULL)
if (OldIndex != NULL && !use_sort)
ereport(elevel,
(errmsg("clustering \"%s.%s\" using index scan on \"%s\"",
get_namespace_name(RelationGetNamespace(OldHeap)),
RelationGetRelationName(OldHeap),
RelationGetRelationName(OldIndex))));
else if (tuplesort != NULL)
else if (use_sort)
ereport(elevel,
(errmsg("clustering \"%s.%s\" using sequential scan and sort",
get_namespace_name(RelationGetNamespace(OldHeap)),
@@ -1002,188 +914,13 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
RelationGetRelationName(OldHeap))));
/*
* Scan through the OldHeap, either in OldIndex order or sequentially;
* copy each tuple into the NewHeap, or transiently to the tuplesort
* module. Note that we don't bother sorting dead tuples (they won't get
* to the new table anyway).
* Hand of the actual copying to AM specific function, the generic code
* cannot know how to deal with visibility across AMs.
*/
for (;;)
{
HeapTuple tuple;
Buffer buf;
bool isdead;
CHECK_FOR_INTERRUPTS();
if (indexScan != NULL)
{
if (!index_getnext_slot(indexScan, ForwardScanDirection, slot))
break;
/* Since we used no scan keys, should never need to recheck */
if (indexScan->xs_recheck)
elog(ERROR, "CLUSTER does not support lossy index conditions");
tuple = hslot->base.tuple;
buf = hslot->buffer;
}
else
{
tuple = heap_getnext(tableScan, ForwardScanDirection);
if (tuple == NULL)
break;
buf = heapScan->rs_cbuf;
/* In scan-and-sort mode and also VACUUM FULL, set heap blocks scanned */
pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_BLKS_SCANNED,
heapScan->rs_cblock + 1);
}
LockBuffer(buf, BUFFER_LOCK_SHARE);
switch (HeapTupleSatisfiesVacuum(tuple, OldestXmin, buf))
{
case HEAPTUPLE_DEAD:
/* Definitely dead */
isdead = true;
break;
case HEAPTUPLE_RECENTLY_DEAD:
tups_recently_dead += 1;
/* fall through */
case HEAPTUPLE_LIVE:
/* Live or recently dead, must copy it */
isdead = false;
break;
case HEAPTUPLE_INSERT_IN_PROGRESS:
/*
* Since we hold exclusive lock on the relation, normally the
* only way to see this is if it was inserted earlier in our
* own transaction. However, it can happen in system
* catalogs, since we tend to release write lock before commit
* there. Give a warning if neither case applies; but in any
* case we had better copy it.
*/
if (!is_system_catalog &&
!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
elog(WARNING, "concurrent insert in progress within table \"%s\"",
RelationGetRelationName(OldHeap));
/* treat as live */
isdead = false;
break;
case HEAPTUPLE_DELETE_IN_PROGRESS:
/*
* Similar situation to INSERT_IN_PROGRESS case.
*/
if (!is_system_catalog &&
!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data)))
elog(WARNING, "concurrent delete in progress within table \"%s\"",
RelationGetRelationName(OldHeap));
/* treat as recently dead */
tups_recently_dead += 1;
isdead = false;
break;
default:
elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
isdead = false; /* keep compiler quiet */
break;
}
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
if (isdead)
{
tups_vacuumed += 1;
/* heap rewrite module still needs to see it... */
if (rewrite_heap_dead_tuple(rwstate, tuple))
{
/* A previous recently-dead tuple is now known dead */
tups_vacuumed += 1;
tups_recently_dead -= 1;
}
continue;
}
num_tuples += 1;
if (tuplesort != NULL)
{
tuplesort_putheaptuple(tuplesort, tuple);
/* In scan-and-sort mode, report increase in number of tuples scanned */
pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED,
num_tuples);
}
else
{
const int ct_index[] = {
PROGRESS_CLUSTER_HEAP_TUPLES_SCANNED,
PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN
};
int64 ct_val[2];
reform_and_rewrite_tuple(tuple,
oldTupDesc, newTupDesc,
values, isnull,
rwstate);
/* In indexscan mode and also VACUUM FULL, report increase in number of tuples scanned and written */
ct_val[0] = num_tuples;
ct_val[1] = num_tuples;
pgstat_progress_update_multi_param(2, ct_index, ct_val);
}
}
if (indexScan != NULL)
index_endscan(indexScan);
if (heapScan != NULL)
table_endscan(tableScan);
if (slot)
ExecDropSingleTupleTableSlot(slot);
/*
* In scan-and-sort mode, complete the sort, then read out all live tuples
* from the tuplestore and write them to the new relation.
*/
if (tuplesort != NULL)
{
double n_tuples = 0;
/* Report that we are now sorting tuples */
pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
PROGRESS_CLUSTER_PHASE_SORT_TUPLES);
tuplesort_performsort(tuplesort);
/* Report that we are now writing new heap */
pgstat_progress_update_param(PROGRESS_CLUSTER_PHASE,
PROGRESS_CLUSTER_PHASE_WRITE_NEW_HEAP);
for (;;)
{
HeapTuple tuple;
CHECK_FOR_INTERRUPTS();
tuple = tuplesort_getheaptuple(tuplesort, true);
if (tuple == NULL)
break;
n_tuples += 1;
reform_and_rewrite_tuple(tuple,
oldTupDesc, newTupDesc,
values, isnull,
rwstate);
/* Report n_tuples */
pgstat_progress_update_param(PROGRESS_CLUSTER_HEAP_TUPLES_WRITTEN,
n_tuples);
}
tuplesort_end(tuplesort);
}
/* Write out any remaining tuples, and fsync if needed */
end_heap_rewrite(rwstate);
table_relation_copy_for_cluster(OldHeap, NewHeap, OldIndex, use_sort,
OldestXmin, FreezeXid, MultiXactCutoff,
&num_tuples, &tups_vacuumed,
&tups_recently_dead);
/* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
NewHeap->rd_toastoid = InvalidOid;
@@ -1201,10 +938,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
tups_recently_dead,
pg_rusage_show(&ru0))));
/* Clean up */
pfree(values);
pfree(isnull);
if (OldIndex != NULL)
index_close(OldIndex, NoLock);
table_close(OldHeap, NoLock);
@@ -1839,46 +1572,3 @@ get_tables_to_cluster(MemoryContext cluster_context)
return rvs;
}
/*
* Reconstruct and rewrite the given tuple
*
* We cannot simply copy the tuple as-is, for several reasons:
*
* 1. We'd like to squeeze out the values of any dropped columns, both
* to save space and to ensure we have no corner-case failures. (It's
* possible for example that the new table hasn't got a TOAST table
* and so is unable to store any large values of dropped cols.)
*
* 2. The tuple might not even be legal for the new table; this is
* currently only known to happen as an after-effect of ALTER TABLE
* SET WITHOUT OIDS (in an older version, via pg_upgrade).
*
* So, we must reconstruct the tuple from component Datums.
*/
static void
reform_and_rewrite_tuple(HeapTuple tuple,
TupleDesc oldTupDesc, TupleDesc newTupDesc,
Datum *values, bool *isnull,
RewriteState rwstate)
{
HeapTuple copiedTuple;
int i;
heap_deform_tuple(tuple, oldTupDesc, values, isnull);
/* Be sure to null out any dropped columns */
for (i = 0; i < newTupDesc->natts; i++)
{
if (TupleDescAttr(newTupDesc, i)->attisdropped)
isnull[i] = true;
}
copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
/* The heap rewrite module does the rest */
rewrite_heap_tuple(rwstate, tuple, copiedTuple);
heap_freetuple(copiedTuple);
}

View File

@@ -312,12 +312,17 @@ ResetSequence(Oid seq_relid)
seq->log_cnt = 0;
/*
* Create a new storage file for the sequence. We want to keep the
* sequence's relfrozenxid at 0, since it won't contain any unfrozen XIDs.
* Same with relminmxid, since a sequence will never contain multixacts.
* Create a new storage file for the sequence.
*/
RelationSetNewRelfilenode(seq_rel, seq_rel->rd_rel->relpersistence,
InvalidTransactionId, InvalidMultiXactId);
RelationSetNewRelfilenode(seq_rel, seq_rel->rd_rel->relpersistence);
/*
* Ensure sequence's relfrozenxid is at 0, since it won't contain any
* unfrozen XIDs. Same with relminmxid, since a sequence will never
* contain multixacts.
*/
Assert(seq_rel->rd_rel->relfrozenxid == InvalidTransactionId);
Assert(seq_rel->rd_rel->relminmxid == InvalidMultiXactId);
/*
* Insert the modified tuple into the new storage file.
@@ -482,12 +487,17 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt)
/*
* Create a new storage file for the sequence, making the state
* changes transactional. We want to keep the sequence's relfrozenxid
* at 0, since it won't contain any unfrozen XIDs. Same with
* relminmxid, since a sequence will never contain multixacts.
* changes transactional.
*/
RelationSetNewRelfilenode(seqrel, seqrel->rd_rel->relpersistence,
InvalidTransactionId, InvalidMultiXactId);
RelationSetNewRelfilenode(seqrel, seqrel->rd_rel->relpersistence);
/*
* Ensure sequence's relfrozenxid is at 0, since it won't contain any
* unfrozen XIDs. Same with relminmxid, since a sequence will never
* contain multixacts.
*/
Assert(seqrel->rd_rel->relfrozenxid == InvalidTransactionId);
Assert(seqrel->rd_rel->relminmxid == InvalidMultiXactId);
/*
* Insert the modified tuple into the new storage file.

View File

@@ -20,6 +20,7 @@
#include "access/multixact.h"
#include "access/reloptions.h"
#include "access/relscan.h"
#include "access/tableam.h"
#include "access/sysattr.h"
#include "access/tableam.h"
#include "access/tupconvert.h"
@@ -473,8 +474,7 @@ static void ATExecEnableRowSecurity(Relation rel);
static void ATExecDisableRowSecurity(Relation rel);
static void ATExecForceNoForceRowSecurity(Relation rel, bool force_rls);
static void copy_relation_data(SMgrRelation rel, SMgrRelation dst,
ForkNumber forkNum, char relpersistence);
static void index_copy_data(Relation rel, RelFileNode newrnode);
static const char *storage_name(char c);
static void RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid,
@@ -1697,7 +1697,6 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
{
Oid heap_relid;
Oid toast_relid;
MultiXactId minmulti;
/*
* This effectively deletes all rows in the table, and may be done
@@ -1707,8 +1706,6 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
*/
CheckTableForSerializableConflictIn(rel);
minmulti = GetOldestMultiXactId();
/*
* Need the full transaction-safe pushups.
*
@@ -1716,10 +1713,7 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
* as the relfilenode value. The old storage file is scheduled for
* deletion at commit.
*/
RelationSetNewRelfilenode(rel, rel->rd_rel->relpersistence,
RecentXmin, minmulti);
if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
heap_create_init_fork(rel);
RelationSetNewRelfilenode(rel, rel->rd_rel->relpersistence);
heap_relid = RelationGetRelid(rel);
@@ -1731,12 +1725,8 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
{
Relation toastrel = relation_open(toast_relid,
AccessExclusiveLock);
RelationSetNewRelfilenode(toastrel,
toastrel->rd_rel->relpersistence,
RecentXmin, minmulti);
if (toastrel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
heap_create_init_fork(toastrel);
toastrel->rd_rel->relpersistence);
table_close(toastrel, NoLock);
}
@@ -4928,13 +4918,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
/* Write the tuple out to the new relation */
if (newrel)
{
HeapTuple tuple;
tuple = ExecFetchSlotHeapTuple(newslot, true, NULL);
heap_insert(newrel, tuple, mycid, hi_options, bistate);
ItemPointerCopy(&tuple->t_self, &newslot->tts_tid);
}
table_insert(newrel, insertslot, mycid, hi_options, bistate);
ResetExprContext(econtext);
@@ -11492,11 +11476,9 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
Oid reltoastrelid;
Oid newrelfilenode;
RelFileNode newrnode;
SMgrRelation dstrel;
Relation pg_class;
HeapTuple tuple;
Form_pg_class rd_rel;
ForkNumber forkNum;
List *reltoastidxids = NIL;
ListCell *lc;
@@ -11581,46 +11563,19 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
newrnode = rel->rd_node;
newrnode.relNode = newrelfilenode;
newrnode.spcNode = newTableSpace;
dstrel = smgropen(newrnode, rel->rd_backend);
RelationOpenSmgr(rel);
/*
* Create and copy all forks of the relation, and schedule unlinking of
* old physical files.
*
* NOTE: any conflict in relfilenode value will be caught in
* RelationCreateStorage().
*/
RelationCreateStorage(newrnode, rel->rd_rel->relpersistence);
/* copy main fork */
copy_relation_data(rel->rd_smgr, dstrel, MAIN_FORKNUM,
rel->rd_rel->relpersistence);
/* copy those extra forks that exist */
for (forkNum = MAIN_FORKNUM + 1; forkNum <= MAX_FORKNUM; forkNum++)
/* hand off to AM to actually create the new filenode and copy the data */
if (rel->rd_rel->relkind == RELKIND_INDEX)
{
if (smgrexists(rel->rd_smgr, forkNum))
{
smgrcreate(dstrel, forkNum, false);
/*
* WAL log creation if the relation is persistent, or this is the
* init fork of an unlogged relation.
*/
if (rel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT ||
(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
forkNum == INIT_FORKNUM))
log_smgrcreate(&newrnode, forkNum);
copy_relation_data(rel->rd_smgr, dstrel, forkNum,
rel->rd_rel->relpersistence);
}
index_copy_data(rel, newrnode);
}
else
{
Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
rel->rd_rel->relkind == RELKIND_MATVIEW ||
rel->rd_rel->relkind == RELKIND_TOASTVALUE);
table_relation_copy_data(rel, newrnode);
}
/* drop old relation, and close new one */
RelationDropStorage(rel);
smgrclose(dstrel);
/* update the pg_class row */
rd_rel->reltablespace = (newTableSpace == MyDatabaseTableSpace) ? InvalidOid : newTableSpace;
@@ -11882,90 +11837,51 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt)
return new_tablespaceoid;
}
/*
* Copy data, block by block
*/
static void
copy_relation_data(SMgrRelation src, SMgrRelation dst,
ForkNumber forkNum, char relpersistence)
index_copy_data(Relation rel, RelFileNode newrnode)
{
PGAlignedBlock buf;
Page page;
bool use_wal;
bool copying_initfork;
BlockNumber nblocks;
BlockNumber blkno;
SMgrRelation dstrel;
page = (Page) buf.data;
dstrel = smgropen(newrnode, rel->rd_backend);
RelationOpenSmgr(rel);
/*
* The init fork for an unlogged relation in many respects has to be
* treated the same as normal relation, changes need to be WAL logged and
* it needs to be synced to disk.
* Create and copy all forks of the relation, and schedule unlinking of
* old physical files.
*
* NOTE: any conflict in relfilenode value will be caught in
* RelationCreateStorage().
*/
copying_initfork = relpersistence == RELPERSISTENCE_UNLOGGED &&
forkNum == INIT_FORKNUM;
RelationCreateStorage(newrnode, rel->rd_rel->relpersistence);
/*
* We need to log the copied data in WAL iff WAL archiving/streaming is
* enabled AND it's a permanent relation.
*/
use_wal = XLogIsNeeded() &&
(relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork);
/* copy main fork */
RelationCopyStorage(rel->rd_smgr, dstrel, MAIN_FORKNUM,
rel->rd_rel->relpersistence);
nblocks = smgrnblocks(src, forkNum);
for (blkno = 0; blkno < nblocks; blkno++)
/* copy those extra forks that exist */
for (ForkNumber forkNum = MAIN_FORKNUM + 1;
forkNum <= MAX_FORKNUM; forkNum++)
{
/* If we got a cancel signal during the copy of the data, quit */
CHECK_FOR_INTERRUPTS();
if (smgrexists(rel->rd_smgr, forkNum))
{
smgrcreate(dstrel, forkNum, false);
smgrread(src, forkNum, blkno, buf.data);
if (!PageIsVerified(page, blkno))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("invalid page in block %u of relation %s",
blkno,
relpathbackend(src->smgr_rnode.node,
src->smgr_rnode.backend,
forkNum))));
/*
* WAL-log the copied page. Unfortunately we don't know what kind of a
* page this is, so we have to log the full page including any unused
* space.
*/
if (use_wal)
log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, false);
PageSetChecksumInplace(page, blkno);
/*
* Now write the page. We say isTemp = true even if it's not a temp
* rel, because there's no need for smgr to schedule an fsync for this
* write; we'll do it ourselves below.
*/
smgrextend(dst, forkNum, blkno, buf.data, true);
/*
* WAL log creation if the relation is persistent, or this is the
* init fork of an unlogged relation.
*/
if (rel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT ||
(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
forkNum == INIT_FORKNUM))
log_smgrcreate(&newrnode, forkNum);
RelationCopyStorage(rel->rd_smgr, dstrel, forkNum,
rel->rd_rel->relpersistence);
}
}
/*
* If the rel is WAL-logged, must fsync before commit. We use heap_sync
* to ensure that the toast table gets fsync'd too. (For a temp or
* unlogged rel we don't care since the data will be gone after a crash
* anyway.)
*
* It's obvious that we must do this when not WAL-logging the copy. It's
* less obvious that we have to do it even if we did WAL-log the copied
* pages. The reason is that since we're copying outside shared buffers, a
* CHECKPOINT occurring during the copy has no way to flush the previously
* written data to disk (indeed it won't know the new rel even exists). A
* crash later on would replay WAL from the checkpoint, therefore it
* wouldn't replay our earlier WAL entries. If we do not fsync those pages
* here, they might still not be on disk when the crash occurs.
*/
if (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork)
smgrimmedsync(dst, forkNum);
/* drop old relation, and close new one */
RelationDropStorage(rel);
smgrclose(dstrel);
}
/*