mirror of
https://github.com/postgres/postgres.git
synced 2025-06-19 04:21:08 +03:00
tableam: relation creation, VACUUM FULL/CLUSTER, SET TABLESPACE.
This moves the responsibility for: - creating the storage necessary for a relation, including creating a new relfilenode for a relation with existing storage - non-transactional truncation of a relation - VACUUM FULL / CLUSTER's rewrite of a table below tableam. This is fairly straight forward, with a bit of complexity smattered in to move the computation of xid / multixid horizons below the AM, as they don't make sense for every table AM. Author: Andres Freund Discussion: https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de
This commit is contained in:
@ -20,6 +20,7 @@
|
||||
#include "access/multixact.h"
|
||||
#include "access/reloptions.h"
|
||||
#include "access/relscan.h"
|
||||
#include "access/tableam.h"
|
||||
#include "access/sysattr.h"
|
||||
#include "access/tableam.h"
|
||||
#include "access/tupconvert.h"
|
||||
@ -473,8 +474,7 @@ static void ATExecEnableRowSecurity(Relation rel);
|
||||
static void ATExecDisableRowSecurity(Relation rel);
|
||||
static void ATExecForceNoForceRowSecurity(Relation rel, bool force_rls);
|
||||
|
||||
static void copy_relation_data(SMgrRelation rel, SMgrRelation dst,
|
||||
ForkNumber forkNum, char relpersistence);
|
||||
static void index_copy_data(Relation rel, RelFileNode newrnode);
|
||||
static const char *storage_name(char c);
|
||||
|
||||
static void RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid,
|
||||
@ -1697,7 +1697,6 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
|
||||
{
|
||||
Oid heap_relid;
|
||||
Oid toast_relid;
|
||||
MultiXactId minmulti;
|
||||
|
||||
/*
|
||||
* This effectively deletes all rows in the table, and may be done
|
||||
@ -1707,8 +1706,6 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
|
||||
*/
|
||||
CheckTableForSerializableConflictIn(rel);
|
||||
|
||||
minmulti = GetOldestMultiXactId();
|
||||
|
||||
/*
|
||||
* Need the full transaction-safe pushups.
|
||||
*
|
||||
@ -1716,10 +1713,7 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
|
||||
* as the relfilenode value. The old storage file is scheduled for
|
||||
* deletion at commit.
|
||||
*/
|
||||
RelationSetNewRelfilenode(rel, rel->rd_rel->relpersistence,
|
||||
RecentXmin, minmulti);
|
||||
if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
|
||||
heap_create_init_fork(rel);
|
||||
RelationSetNewRelfilenode(rel, rel->rd_rel->relpersistence);
|
||||
|
||||
heap_relid = RelationGetRelid(rel);
|
||||
|
||||
@ -1731,12 +1725,8 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged,
|
||||
{
|
||||
Relation toastrel = relation_open(toast_relid,
|
||||
AccessExclusiveLock);
|
||||
|
||||
RelationSetNewRelfilenode(toastrel,
|
||||
toastrel->rd_rel->relpersistence,
|
||||
RecentXmin, minmulti);
|
||||
if (toastrel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
|
||||
heap_create_init_fork(toastrel);
|
||||
toastrel->rd_rel->relpersistence);
|
||||
table_close(toastrel, NoLock);
|
||||
}
|
||||
|
||||
@ -4928,13 +4918,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
|
||||
|
||||
/* Write the tuple out to the new relation */
|
||||
if (newrel)
|
||||
{
|
||||
HeapTuple tuple;
|
||||
|
||||
tuple = ExecFetchSlotHeapTuple(newslot, true, NULL);
|
||||
heap_insert(newrel, tuple, mycid, hi_options, bistate);
|
||||
ItemPointerCopy(&tuple->t_self, &newslot->tts_tid);
|
||||
}
|
||||
table_insert(newrel, insertslot, mycid, hi_options, bistate);
|
||||
|
||||
ResetExprContext(econtext);
|
||||
|
||||
@ -11492,11 +11476,9 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
|
||||
Oid reltoastrelid;
|
||||
Oid newrelfilenode;
|
||||
RelFileNode newrnode;
|
||||
SMgrRelation dstrel;
|
||||
Relation pg_class;
|
||||
HeapTuple tuple;
|
||||
Form_pg_class rd_rel;
|
||||
ForkNumber forkNum;
|
||||
List *reltoastidxids = NIL;
|
||||
ListCell *lc;
|
||||
|
||||
@ -11581,46 +11563,19 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
|
||||
newrnode = rel->rd_node;
|
||||
newrnode.relNode = newrelfilenode;
|
||||
newrnode.spcNode = newTableSpace;
|
||||
dstrel = smgropen(newrnode, rel->rd_backend);
|
||||
|
||||
RelationOpenSmgr(rel);
|
||||
|
||||
/*
|
||||
* Create and copy all forks of the relation, and schedule unlinking of
|
||||
* old physical files.
|
||||
*
|
||||
* NOTE: any conflict in relfilenode value will be caught in
|
||||
* RelationCreateStorage().
|
||||
*/
|
||||
RelationCreateStorage(newrnode, rel->rd_rel->relpersistence);
|
||||
|
||||
/* copy main fork */
|
||||
copy_relation_data(rel->rd_smgr, dstrel, MAIN_FORKNUM,
|
||||
rel->rd_rel->relpersistence);
|
||||
|
||||
/* copy those extra forks that exist */
|
||||
for (forkNum = MAIN_FORKNUM + 1; forkNum <= MAX_FORKNUM; forkNum++)
|
||||
/* hand off to AM to actually create the new filenode and copy the data */
|
||||
if (rel->rd_rel->relkind == RELKIND_INDEX)
|
||||
{
|
||||
if (smgrexists(rel->rd_smgr, forkNum))
|
||||
{
|
||||
smgrcreate(dstrel, forkNum, false);
|
||||
|
||||
/*
|
||||
* WAL log creation if the relation is persistent, or this is the
|
||||
* init fork of an unlogged relation.
|
||||
*/
|
||||
if (rel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT ||
|
||||
(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
|
||||
forkNum == INIT_FORKNUM))
|
||||
log_smgrcreate(&newrnode, forkNum);
|
||||
copy_relation_data(rel->rd_smgr, dstrel, forkNum,
|
||||
rel->rd_rel->relpersistence);
|
||||
}
|
||||
index_copy_data(rel, newrnode);
|
||||
}
|
||||
else
|
||||
{
|
||||
Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
|
||||
rel->rd_rel->relkind == RELKIND_MATVIEW ||
|
||||
rel->rd_rel->relkind == RELKIND_TOASTVALUE);
|
||||
table_relation_copy_data(rel, newrnode);
|
||||
}
|
||||
|
||||
/* drop old relation, and close new one */
|
||||
RelationDropStorage(rel);
|
||||
smgrclose(dstrel);
|
||||
|
||||
/* update the pg_class row */
|
||||
rd_rel->reltablespace = (newTableSpace == MyDatabaseTableSpace) ? InvalidOid : newTableSpace;
|
||||
@ -11882,90 +11837,51 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt)
|
||||
return new_tablespaceoid;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy data, block by block
|
||||
*/
|
||||
static void
|
||||
copy_relation_data(SMgrRelation src, SMgrRelation dst,
|
||||
ForkNumber forkNum, char relpersistence)
|
||||
index_copy_data(Relation rel, RelFileNode newrnode)
|
||||
{
|
||||
PGAlignedBlock buf;
|
||||
Page page;
|
||||
bool use_wal;
|
||||
bool copying_initfork;
|
||||
BlockNumber nblocks;
|
||||
BlockNumber blkno;
|
||||
SMgrRelation dstrel;
|
||||
|
||||
page = (Page) buf.data;
|
||||
dstrel = smgropen(newrnode, rel->rd_backend);
|
||||
RelationOpenSmgr(rel);
|
||||
|
||||
/*
|
||||
* The init fork for an unlogged relation in many respects has to be
|
||||
* treated the same as normal relation, changes need to be WAL logged and
|
||||
* it needs to be synced to disk.
|
||||
* Create and copy all forks of the relation, and schedule unlinking of
|
||||
* old physical files.
|
||||
*
|
||||
* NOTE: any conflict in relfilenode value will be caught in
|
||||
* RelationCreateStorage().
|
||||
*/
|
||||
copying_initfork = relpersistence == RELPERSISTENCE_UNLOGGED &&
|
||||
forkNum == INIT_FORKNUM;
|
||||
RelationCreateStorage(newrnode, rel->rd_rel->relpersistence);
|
||||
|
||||
/*
|
||||
* We need to log the copied data in WAL iff WAL archiving/streaming is
|
||||
* enabled AND it's a permanent relation.
|
||||
*/
|
||||
use_wal = XLogIsNeeded() &&
|
||||
(relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork);
|
||||
/* copy main fork */
|
||||
RelationCopyStorage(rel->rd_smgr, dstrel, MAIN_FORKNUM,
|
||||
rel->rd_rel->relpersistence);
|
||||
|
||||
nblocks = smgrnblocks(src, forkNum);
|
||||
|
||||
for (blkno = 0; blkno < nblocks; blkno++)
|
||||
/* copy those extra forks that exist */
|
||||
for (ForkNumber forkNum = MAIN_FORKNUM + 1;
|
||||
forkNum <= MAX_FORKNUM; forkNum++)
|
||||
{
|
||||
/* If we got a cancel signal during the copy of the data, quit */
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
if (smgrexists(rel->rd_smgr, forkNum))
|
||||
{
|
||||
smgrcreate(dstrel, forkNum, false);
|
||||
|
||||
smgrread(src, forkNum, blkno, buf.data);
|
||||
|
||||
if (!PageIsVerified(page, blkno))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_CORRUPTED),
|
||||
errmsg("invalid page in block %u of relation %s",
|
||||
blkno,
|
||||
relpathbackend(src->smgr_rnode.node,
|
||||
src->smgr_rnode.backend,
|
||||
forkNum))));
|
||||
|
||||
/*
|
||||
* WAL-log the copied page. Unfortunately we don't know what kind of a
|
||||
* page this is, so we have to log the full page including any unused
|
||||
* space.
|
||||
*/
|
||||
if (use_wal)
|
||||
log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, false);
|
||||
|
||||
PageSetChecksumInplace(page, blkno);
|
||||
|
||||
/*
|
||||
* Now write the page. We say isTemp = true even if it's not a temp
|
||||
* rel, because there's no need for smgr to schedule an fsync for this
|
||||
* write; we'll do it ourselves below.
|
||||
*/
|
||||
smgrextend(dst, forkNum, blkno, buf.data, true);
|
||||
/*
|
||||
* WAL log creation if the relation is persistent, or this is the
|
||||
* init fork of an unlogged relation.
|
||||
*/
|
||||
if (rel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT ||
|
||||
(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
|
||||
forkNum == INIT_FORKNUM))
|
||||
log_smgrcreate(&newrnode, forkNum);
|
||||
RelationCopyStorage(rel->rd_smgr, dstrel, forkNum,
|
||||
rel->rd_rel->relpersistence);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If the rel is WAL-logged, must fsync before commit. We use heap_sync
|
||||
* to ensure that the toast table gets fsync'd too. (For a temp or
|
||||
* unlogged rel we don't care since the data will be gone after a crash
|
||||
* anyway.)
|
||||
*
|
||||
* It's obvious that we must do this when not WAL-logging the copy. It's
|
||||
* less obvious that we have to do it even if we did WAL-log the copied
|
||||
* pages. The reason is that since we're copying outside shared buffers, a
|
||||
* CHECKPOINT occurring during the copy has no way to flush the previously
|
||||
* written data to disk (indeed it won't know the new rel even exists). A
|
||||
* crash later on would replay WAL from the checkpoint, therefore it
|
||||
* wouldn't replay our earlier WAL entries. If we do not fsync those pages
|
||||
* here, they might still not be on disk when the crash occurs.
|
||||
*/
|
||||
if (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork)
|
||||
smgrimmedsync(dst, forkNum);
|
||||
/* drop old relation, and close new one */
|
||||
RelationDropStorage(rel);
|
||||
smgrclose(dstrel);
|
||||
}
|
||||
|
||||
/*
|
||||
|
Reference in New Issue
Block a user