1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-24 00:23:06 +03:00

tableam: relation creation, VACUUM FULL/CLUSTER, SET TABLESPACE.

This moves the responsibility for:
- creating the storage necessary for a relation, including creating a
  new relfilenode for a relation with existing storage
- non-transactional truncation of a relation
- VACUUM FULL / CLUSTER's rewrite of a table
below tableam.

This is fairly straight forward, with a bit of complexity smattered in
to move the computation of xid / multixid horizons below the AM, as
they don't make sense for every table AM.

Author: Andres Freund
Discussion: https://postgr.es/m/20180703070645.wchpu5muyto5n647@alap3.anarazel.de
This commit is contained in:
Andres Freund
2019-03-28 20:01:14 -07:00
parent 7e69323bf7
commit d25f519107
13 changed files with 856 additions and 579 deletions

View File

@@ -19,6 +19,8 @@
#include "postgres.h"
#include "miscadmin.h"
#include "access/visibilitymap.h"
#include "access/xact.h"
#include "access/xlog.h"
@@ -290,6 +292,92 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks);
}
/*
* Copy a fork's data, block by block.
*/
void
RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
ForkNumber forkNum, char relpersistence)
{
PGAlignedBlock buf;
Page page;
bool use_wal;
bool copying_initfork;
BlockNumber nblocks;
BlockNumber blkno;
page = (Page) buf.data;
/*
* The init fork for an unlogged relation in many respects has to be
* treated the same as normal relation, changes need to be WAL logged and
* it needs to be synced to disk.
*/
copying_initfork = relpersistence == RELPERSISTENCE_UNLOGGED &&
forkNum == INIT_FORKNUM;
/*
* We need to log the copied data in WAL iff WAL archiving/streaming is
* enabled AND it's a permanent relation.
*/
use_wal = XLogIsNeeded() &&
(relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork);
nblocks = smgrnblocks(src, forkNum);
for (blkno = 0; blkno < nblocks; blkno++)
{
/* If we got a cancel signal during the copy of the data, quit */
CHECK_FOR_INTERRUPTS();
smgrread(src, forkNum, blkno, buf.data);
if (!PageIsVerified(page, blkno))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("invalid page in block %u of relation %s",
blkno,
relpathbackend(src->smgr_rnode.node,
src->smgr_rnode.backend,
forkNum))));
/*
* WAL-log the copied page. Unfortunately we don't know what kind of a
* page this is, so we have to log the full page including any unused
* space.
*/
if (use_wal)
log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, false);
PageSetChecksumInplace(page, blkno);
/*
* Now write the page. We say isTemp = true even if it's not a temp
* rel, because there's no need for smgr to schedule an fsync for this
* write; we'll do it ourselves below.
*/
smgrextend(dst, forkNum, blkno, buf.data, true);
}
/*
* If the rel is WAL-logged, must fsync before commit. We use heap_sync
* to ensure that the toast table gets fsync'd too. (For a temp or
* unlogged rel we don't care since the data will be gone after a crash
* anyway.)
*
* It's obvious that we must do this when not WAL-logging the copy. It's
* less obvious that we have to do it even if we did WAL-log the copied
* pages. The reason is that since we're copying outside shared buffers, a
* CHECKPOINT occurring during the copy has no way to flush the previously
* written data to disk (indeed it won't know the new rel even exists). A
* crash later on would replay WAL from the checkpoint, therefore it
* wouldn't replay our earlier WAL entries. If we do not fsync those pages
* here, they might still not be on disk when the crash occurs.
*/
if (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork)
smgrimmedsync(dst, forkNum);
}
/*
* smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
*