mirror of
https://github.com/postgres/postgres.git
synced 2025-07-05 07:21:24 +03:00
Optimize vacuuming of relations with no indexes.
If there are no indexes on a relation, items can be marked LP_UNUSED instead of LP_DEAD when pruning. This significantly reduces WAL volume, since we no longer need to emit one WAL record for pruning and a second to change the LP_DEAD line pointers thus created to LP_UNUSED. Melanie Plageman, reviewed by Andres Freund, Peter Geoghegan, and me Discussion: https://postgr.es/m/CAAKRu_bgvb_k0gKOXWzNKWHt560R0smrGe3E8zewKPs8fiMKkw%40mail.gmail.com
This commit is contained in:
@ -35,6 +35,8 @@ typedef struct
|
|||||||
|
|
||||||
/* tuple visibility test, initialized for the relation */
|
/* tuple visibility test, initialized for the relation */
|
||||||
GlobalVisState *vistest;
|
GlobalVisState *vistest;
|
||||||
|
/* whether or not dead items can be set LP_UNUSED during pruning */
|
||||||
|
bool mark_unused_now;
|
||||||
|
|
||||||
TransactionId new_prune_xid; /* new prune hint value for page */
|
TransactionId new_prune_xid; /* new prune hint value for page */
|
||||||
TransactionId snapshotConflictHorizon; /* latest xid removed */
|
TransactionId snapshotConflictHorizon; /* latest xid removed */
|
||||||
@ -67,6 +69,7 @@ static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
|
|||||||
static void heap_prune_record_redirect(PruneState *prstate,
|
static void heap_prune_record_redirect(PruneState *prstate,
|
||||||
OffsetNumber offnum, OffsetNumber rdoffnum);
|
OffsetNumber offnum, OffsetNumber rdoffnum);
|
||||||
static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
|
static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
|
||||||
|
static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum);
|
||||||
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
|
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
|
||||||
static void page_verify_redirects(Page page);
|
static void page_verify_redirects(Page page);
|
||||||
|
|
||||||
@ -148,7 +151,13 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
|
|||||||
{
|
{
|
||||||
PruneResult presult;
|
PruneResult presult;
|
||||||
|
|
||||||
heap_page_prune(relation, buffer, vistest, &presult, NULL);
|
/*
|
||||||
|
* For now, pass mark_unused_now as false regardless of whether or
|
||||||
|
* not the relation has indexes, since we cannot safely determine
|
||||||
|
* that during on-access pruning with the current implementation.
|
||||||
|
*/
|
||||||
|
heap_page_prune(relation, buffer, vistest, false,
|
||||||
|
&presult, NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Report the number of tuples reclaimed to pgstats. This is
|
* Report the number of tuples reclaimed to pgstats. This is
|
||||||
@ -193,6 +202,9 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
|
|||||||
* (see heap_prune_satisfies_vacuum and
|
* (see heap_prune_satisfies_vacuum and
|
||||||
* HeapTupleSatisfiesVacuum).
|
* HeapTupleSatisfiesVacuum).
|
||||||
*
|
*
|
||||||
|
* mark_unused_now indicates whether or not dead items can be set LP_UNUSED during
|
||||||
|
* pruning.
|
||||||
|
*
|
||||||
* off_loc is the offset location required by the caller to use in error
|
* off_loc is the offset location required by the caller to use in error
|
||||||
* callback.
|
* callback.
|
||||||
*
|
*
|
||||||
@ -203,6 +215,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
|
|||||||
void
|
void
|
||||||
heap_page_prune(Relation relation, Buffer buffer,
|
heap_page_prune(Relation relation, Buffer buffer,
|
||||||
GlobalVisState *vistest,
|
GlobalVisState *vistest,
|
||||||
|
bool mark_unused_now,
|
||||||
PruneResult *presult,
|
PruneResult *presult,
|
||||||
OffsetNumber *off_loc)
|
OffsetNumber *off_loc)
|
||||||
{
|
{
|
||||||
@ -227,6 +240,7 @@ heap_page_prune(Relation relation, Buffer buffer,
|
|||||||
prstate.new_prune_xid = InvalidTransactionId;
|
prstate.new_prune_xid = InvalidTransactionId;
|
||||||
prstate.rel = relation;
|
prstate.rel = relation;
|
||||||
prstate.vistest = vistest;
|
prstate.vistest = vistest;
|
||||||
|
prstate.mark_unused_now = mark_unused_now;
|
||||||
prstate.snapshotConflictHorizon = InvalidTransactionId;
|
prstate.snapshotConflictHorizon = InvalidTransactionId;
|
||||||
prstate.nredirected = prstate.ndead = prstate.nunused = 0;
|
prstate.nredirected = prstate.ndead = prstate.nunused = 0;
|
||||||
memset(prstate.marked, 0, sizeof(prstate.marked));
|
memset(prstate.marked, 0, sizeof(prstate.marked));
|
||||||
@ -306,9 +320,9 @@ heap_page_prune(Relation relation, Buffer buffer,
|
|||||||
if (off_loc)
|
if (off_loc)
|
||||||
*off_loc = offnum;
|
*off_loc = offnum;
|
||||||
|
|
||||||
/* Nothing to do if slot is empty or already dead */
|
/* Nothing to do if slot is empty */
|
||||||
itemid = PageGetItemId(page, offnum);
|
itemid = PageGetItemId(page, offnum);
|
||||||
if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid))
|
if (!ItemIdIsUsed(itemid))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* Process this item or chain of items */
|
/* Process this item or chain of items */
|
||||||
@ -581,7 +595,17 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
|
|||||||
* function.)
|
* function.)
|
||||||
*/
|
*/
|
||||||
if (ItemIdIsDead(lp))
|
if (ItemIdIsDead(lp))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If the caller set mark_unused_now true, we can set dead line
|
||||||
|
* pointers LP_UNUSED now. We don't increment ndeleted here since
|
||||||
|
* the LP was already marked dead.
|
||||||
|
*/
|
||||||
|
if (unlikely(prstate->mark_unused_now))
|
||||||
|
heap_prune_record_unused(prstate, offnum);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
Assert(ItemIdIsNormal(lp));
|
Assert(ItemIdIsNormal(lp));
|
||||||
htup = (HeapTupleHeader) PageGetItem(dp, lp);
|
htup = (HeapTupleHeader) PageGetItem(dp, lp);
|
||||||
@ -715,7 +739,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
|
|||||||
* redirect the root to the correct chain member.
|
* redirect the root to the correct chain member.
|
||||||
*/
|
*/
|
||||||
if (i >= nchain)
|
if (i >= nchain)
|
||||||
heap_prune_record_dead(prstate, rootoffnum);
|
heap_prune_record_dead_or_unused(prstate, rootoffnum);
|
||||||
else
|
else
|
||||||
heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
|
heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
|
||||||
}
|
}
|
||||||
@ -726,9 +750,9 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
|
|||||||
* item. This can happen if the loop in heap_page_prune caused us to
|
* item. This can happen if the loop in heap_page_prune caused us to
|
||||||
* visit the dead successor of a redirect item before visiting the
|
* visit the dead successor of a redirect item before visiting the
|
||||||
* redirect item. We can clean up by setting the redirect item to
|
* redirect item. We can clean up by setting the redirect item to
|
||||||
* DEAD state.
|
* DEAD state or LP_UNUSED if the caller indicated.
|
||||||
*/
|
*/
|
||||||
heap_prune_record_dead(prstate, rootoffnum);
|
heap_prune_record_dead_or_unused(prstate, rootoffnum);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ndeleted;
|
return ndeleted;
|
||||||
@ -774,6 +798,27 @@ heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
|
|||||||
prstate->marked[offnum] = true;
|
prstate->marked[offnum] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Depending on whether or not the caller set mark_unused_now to true, record that a
|
||||||
|
* line pointer should be marked LP_DEAD or LP_UNUSED. There are other cases in
|
||||||
|
* which we will mark line pointers LP_UNUSED, but we will not mark line
|
||||||
|
* pointers LP_DEAD if mark_unused_now is true.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If the caller set mark_unused_now to true, we can remove dead tuples
|
||||||
|
* during pruning instead of marking their line pointers dead. Set this
|
||||||
|
* tuple's line pointer LP_UNUSED. We hint that this option is less
|
||||||
|
* likely.
|
||||||
|
*/
|
||||||
|
if (unlikely(prstate->mark_unused_now))
|
||||||
|
heap_prune_record_unused(prstate, offnum);
|
||||||
|
else
|
||||||
|
heap_prune_record_dead(prstate, offnum);
|
||||||
|
}
|
||||||
|
|
||||||
/* Record line pointer to be marked unused */
|
/* Record line pointer to be marked unused */
|
||||||
static void
|
static void
|
||||||
heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
|
heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
|
||||||
@ -903,13 +948,24 @@ heap_page_prune_execute(Buffer buffer,
|
|||||||
#ifdef USE_ASSERT_CHECKING
|
#ifdef USE_ASSERT_CHECKING
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Only heap-only tuples can become LP_UNUSED during pruning. They
|
* When heap_page_prune() was called, mark_unused_now may have been
|
||||||
* don't need to be left in place as LP_DEAD items until VACUUM gets
|
* passed as true, which allows would-be LP_DEAD items to be made
|
||||||
* around to doing index vacuuming.
|
* LP_UNUSED instead. This is only possible if the relation has no
|
||||||
|
* indexes. If there are any dead items, then mark_unused_now was not
|
||||||
|
* true and every item being marked LP_UNUSED must refer to a
|
||||||
|
* heap-only tuple.
|
||||||
*/
|
*/
|
||||||
|
if (ndead > 0)
|
||||||
|
{
|
||||||
Assert(ItemIdHasStorage(lp) && ItemIdIsNormal(lp));
|
Assert(ItemIdHasStorage(lp) && ItemIdIsNormal(lp));
|
||||||
htup = (HeapTupleHeader) PageGetItem(page, lp);
|
htup = (HeapTupleHeader) PageGetItem(page, lp);
|
||||||
Assert(HeapTupleHeaderIsHeapOnly(htup));
|
Assert(HeapTupleHeaderIsHeapOnly(htup));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Assert(ItemIdIsUsed(lp));
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ItemIdSetUnused(lp);
|
ItemIdSetUnused(lp);
|
||||||
|
@ -1036,69 +1036,6 @@ lazy_scan_heap(LVRelState *vacrel)
|
|||||||
|
|
||||||
Assert(!prunestate.all_visible || !prunestate.has_lpdead_items);
|
Assert(!prunestate.all_visible || !prunestate.has_lpdead_items);
|
||||||
|
|
||||||
if (vacrel->nindexes == 0)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Consider the need to do page-at-a-time heap vacuuming when
|
|
||||||
* using the one-pass strategy now.
|
|
||||||
*
|
|
||||||
* The one-pass strategy will never call lazy_vacuum(). The steps
|
|
||||||
* performed here can be thought of as the one-pass equivalent of
|
|
||||||
* a call to lazy_vacuum().
|
|
||||||
*/
|
|
||||||
if (prunestate.has_lpdead_items)
|
|
||||||
{
|
|
||||||
Size freespace;
|
|
||||||
|
|
||||||
lazy_vacuum_heap_page(vacrel, blkno, buf, 0, vmbuffer);
|
|
||||||
|
|
||||||
/* Forget the LP_DEAD items that we just vacuumed */
|
|
||||||
dead_items->num_items = 0;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Now perform FSM processing for blkno, and move on to next
|
|
||||||
* page.
|
|
||||||
*
|
|
||||||
* Our call to lazy_vacuum_heap_page() will have considered if
|
|
||||||
* it's possible to set all_visible/all_frozen independently
|
|
||||||
* of lazy_scan_prune(). Note that prunestate was invalidated
|
|
||||||
* by lazy_vacuum_heap_page() call.
|
|
||||||
*/
|
|
||||||
freespace = PageGetHeapFreeSpace(page);
|
|
||||||
|
|
||||||
UnlockReleaseBuffer(buf);
|
|
||||||
RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Periodically perform FSM vacuuming to make newly-freed
|
|
||||||
* space visible on upper FSM pages. FreeSpaceMapVacuumRange()
|
|
||||||
* vacuums the portion of the freespace map covering heap
|
|
||||||
* pages from start to end - 1. Include the block we just
|
|
||||||
* vacuumed by passing it blkno + 1. Overflow isn't an issue
|
|
||||||
* because MaxBlockNumber + 1 is InvalidBlockNumber which
|
|
||||||
* causes FreeSpaceMapVacuumRange() to vacuum freespace map
|
|
||||||
* pages covering the remainder of the relation.
|
|
||||||
*/
|
|
||||||
if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
|
|
||||||
{
|
|
||||||
FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
|
|
||||||
blkno + 1);
|
|
||||||
next_fsm_block_to_vacuum = blkno + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* There was no call to lazy_vacuum_heap_page() because pruning
|
|
||||||
* didn't encounter/create any LP_DEAD items that needed to be
|
|
||||||
* vacuumed. Prune state has not been invalidated, so proceed
|
|
||||||
* with prunestate-driven visibility map and FSM steps (just like
|
|
||||||
* the two-pass strategy).
|
|
||||||
*/
|
|
||||||
Assert(dead_items->num_items == 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Handle setting visibility map bit based on information from the VM
|
* Handle setting visibility map bit based on information from the VM
|
||||||
* (as of last lazy_scan_skip() call), and from prunestate
|
* (as of last lazy_scan_skip() call), and from prunestate
|
||||||
@ -1209,39 +1146,46 @@ lazy_scan_heap(LVRelState *vacrel)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Final steps for block: drop cleanup lock, record free space in the
|
* Final steps for block: drop cleanup lock, record free space in the
|
||||||
* FSM
|
* FSM.
|
||||||
*/
|
*
|
||||||
if (prunestate.has_lpdead_items && vacrel->do_index_vacuuming)
|
* If we will likely do index vacuuming, wait until
|
||||||
{
|
* lazy_vacuum_heap_rel() to save free space. This doesn't just save
|
||||||
/*
|
* us some cycles; it also allows us to record any additional free
|
||||||
* Wait until lazy_vacuum_heap_rel() to save free space. This
|
* space that lazy_vacuum_heap_page() will make available in cases
|
||||||
* doesn't just save us some cycles; it also allows us to record
|
* where it's possible to truncate the page's line pointer array.
|
||||||
* any additional free space that lazy_vacuum_heap_page() will
|
|
||||||
* make available in cases where it's possible to truncate the
|
|
||||||
* page's line pointer array.
|
|
||||||
*
|
*
|
||||||
* Note: It's not in fact 100% certain that we really will call
|
* Note: It's not in fact 100% certain that we really will call
|
||||||
* lazy_vacuum_heap_rel() -- lazy_vacuum() might yet opt to skip
|
* lazy_vacuum_heap_rel() -- lazy_vacuum() might yet opt to skip index
|
||||||
* index vacuuming (and so must skip heap vacuuming). This is
|
* vacuuming (and so must skip heap vacuuming). This is deemed okay
|
||||||
* deemed okay because it only happens in emergencies, or when
|
* because it only happens in emergencies, or when there is very
|
||||||
* there is very little free space anyway. (Besides, we start
|
* little free space anyway. (Besides, we start recording free space
|
||||||
* recording free space in the FSM once index vacuuming has been
|
* in the FSM once index vacuuming has been abandoned.)
|
||||||
* abandoned.)
|
|
||||||
*
|
|
||||||
* Note: The one-pass (no indexes) case is only supposed to make
|
|
||||||
* it this far when there were no LP_DEAD items during pruning.
|
|
||||||
*/
|
*/
|
||||||
Assert(vacrel->nindexes > 0);
|
if (vacrel->nindexes == 0
|
||||||
UnlockReleaseBuffer(buf);
|
|| !vacrel->do_index_vacuuming
|
||||||
}
|
|| !prunestate.has_lpdead_items)
|
||||||
else
|
|
||||||
{
|
{
|
||||||
Size freespace = PageGetHeapFreeSpace(page);
|
Size freespace = PageGetHeapFreeSpace(page);
|
||||||
|
|
||||||
UnlockReleaseBuffer(buf);
|
UnlockReleaseBuffer(buf);
|
||||||
RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
|
RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Periodically perform FSM vacuuming to make newly-freed space
|
||||||
|
* visible on upper FSM pages. This is done after vacuuming if the
|
||||||
|
* table has indexes.
|
||||||
|
*/
|
||||||
|
if (vacrel->nindexes == 0 && prunestate.has_lpdead_items &&
|
||||||
|
blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
|
||||||
|
{
|
||||||
|
FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
|
||||||
|
blkno);
|
||||||
|
next_fsm_block_to_vacuum = blkno;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
UnlockReleaseBuffer(buf);
|
||||||
|
}
|
||||||
|
|
||||||
vacrel->blkno = InvalidBlockNumber;
|
vacrel->blkno = InvalidBlockNumber;
|
||||||
if (BufferIsValid(vmbuffer))
|
if (BufferIsValid(vmbuffer))
|
||||||
@ -1596,8 +1540,13 @@ lazy_scan_prune(LVRelState *vacrel,
|
|||||||
* in presult.ndeleted. It should not be confused with lpdead_items;
|
* in presult.ndeleted. It should not be confused with lpdead_items;
|
||||||
* lpdead_items's final value can be thought of as the number of tuples
|
* lpdead_items's final value can be thought of as the number of tuples
|
||||||
* that were deleted from indexes.
|
* that were deleted from indexes.
|
||||||
|
*
|
||||||
|
* If the relation has no indexes, we can immediately mark would-be dead
|
||||||
|
* items LP_UNUSED, so mark_unused_now should be true if no indexes and
|
||||||
|
* false otherwise.
|
||||||
*/
|
*/
|
||||||
heap_page_prune(rel, buf, vacrel->vistest, &presult, &vacrel->offnum);
|
heap_page_prune(rel, buf, vacrel->vistest, vacrel->nindexes == 0,
|
||||||
|
&presult, &vacrel->offnum);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now scan the page to collect LP_DEAD items and check for tuples
|
* Now scan the page to collect LP_DEAD items and check for tuples
|
||||||
@ -2520,7 +2469,7 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
|
|||||||
bool all_frozen;
|
bool all_frozen;
|
||||||
LVSavedErrInfo saved_err_info;
|
LVSavedErrInfo saved_err_info;
|
||||||
|
|
||||||
Assert(vacrel->nindexes == 0 || vacrel->do_index_vacuuming);
|
Assert(vacrel->do_index_vacuuming);
|
||||||
|
|
||||||
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
|
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
|
||||||
|
|
||||||
|
@ -320,6 +320,7 @@ struct GlobalVisState;
|
|||||||
extern void heap_page_prune_opt(Relation relation, Buffer buffer);
|
extern void heap_page_prune_opt(Relation relation, Buffer buffer);
|
||||||
extern void heap_page_prune(Relation relation, Buffer buffer,
|
extern void heap_page_prune(Relation relation, Buffer buffer,
|
||||||
struct GlobalVisState *vistest,
|
struct GlobalVisState *vistest,
|
||||||
|
bool mark_unused_now,
|
||||||
PruneResult *presult,
|
PruneResult *presult,
|
||||||
OffsetNumber *off_loc);
|
OffsetNumber *off_loc);
|
||||||
extern void heap_page_prune_execute(Buffer buffer,
|
extern void heap_page_prune_execute(Buffer buffer,
|
||||||
|
Reference in New Issue
Block a user