1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-07 00:36:50 +03:00

More infrastructure for btree compaction project. Tree-traversal code

now knows what to do upon hitting a dead page (in theory anyway, it's
untested...).  Add a post-VACUUM-cleanup entry point for index AMs, to
provide a place for dead-page scavenging to happen.
Also, fix oversight that broke btpo_prev links in temporary indexes.
initdb forced due to additions in pg_am.
This commit is contained in:
Tom Lane
2003-02-22 00:45:05 +00:00
parent 4fff132d1b
commit 799bc58dc7
18 changed files with 709 additions and 345 deletions

View File

@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.95 2003/02/21 00:06:21 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.96 2003/02/22 00:45:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -23,6 +23,7 @@
#include "access/nbtree.h"
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/freespace.h"
/* Working state for btbuild and its callback */
@ -44,7 +45,6 @@ typedef struct
} BTBuildState;
bool BuildingBtree = false; /* see comment in btbuild() */
bool FastBuild = true; /* use SORT instead of insertion build */
/*
@ -68,13 +68,7 @@ static void btbuildCallback(Relation index,
void
AtEOXact_nbtree(void)
{
/*
* Note: these actions should only be necessary during xact abort; but
* they can't hurt during a commit.
*/
/* If we were building a btree, we ain't anymore. */
BuildingBtree = false;
/* nothing to do at the moment */
}
@ -95,9 +89,6 @@ btbuild(PG_FUNCTION_ARGS)
double reltuples;
BTBuildState buildstate;
/* set flag to disable locking */
BuildingBtree = true;
/*
* bootstrap processing does something strange, so don't use
* sort/build for initial catalog indices. at some point i need to
@ -172,9 +163,6 @@ btbuild(PG_FUNCTION_ARGS)
}
#endif /* BTREE_BUILD_STATS */
/* all done */
BuildingBtree = false;
/*
* Since we just counted the tuples in the heap, we update its stats
* in pg_class to guarantee that the planner takes advantage of the
@ -689,10 +677,6 @@ btbulkdelete(PG_FUNCTION_ARGS)
* We now need to back up the scan one item, so that the next
* cycle will re-examine the same offnum on this page (which
* now holds the next item).
*
* For now, just hack the current-item index. Will need to
* be smarter when deletion includes removal of empty
* index pages.
*/
current->ip_posid--;
}
@ -708,12 +692,89 @@ btbulkdelete(PG_FUNCTION_ARGS)
result = (IndexBulkDeleteResult *) palloc(sizeof(IndexBulkDeleteResult));
result->num_pages = num_pages;
result->tuples_removed = tuples_removed;
result->num_index_tuples = num_index_tuples;
result->tuples_removed = tuples_removed;
result->pages_free = 0; /* not computed here */
PG_RETURN_POINTER(result);
}
/*
* Post-VACUUM cleanup.
*
* Here, we scan looking for pages we can delete or return to the freelist.
*
* Result: a palloc'd struct containing statistical info for VACUUM displays.
*/
Datum
btvacuumcleanup(PG_FUNCTION_ARGS)
{
Relation rel = (Relation) PG_GETARG_POINTER(0);
#ifdef NOT_USED
IndexVacuumCleanupInfo *info = (IndexVacuumCleanupInfo *) PG_GETARG_POINTER(1);
#endif
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(2);
BlockNumber num_pages;
BlockNumber blkno;
PageFreeSpaceInfo *pageSpaces;
int nFreePages,
maxFreePages;
Assert(stats != NULL);
num_pages = RelationGetNumberOfBlocks(rel);
/* No point in remembering more than MaxFSMPages pages */
maxFreePages = MaxFSMPages;
if ((BlockNumber) maxFreePages > num_pages)
maxFreePages = (int) num_pages + 1; /* +1 to avoid palloc(0) */
pageSpaces = (PageFreeSpaceInfo *) palloc(maxFreePages * sizeof(PageFreeSpaceInfo));
nFreePages = 0;
/*
* Scan through all pages of index, except metapage. (Any pages added
* after we start the scan will not be examined; this should be fine,
* since they can't possibly be empty.)
*/
for (blkno = BTREE_METAPAGE+1; blkno < num_pages; blkno++)
{
Buffer buf;
Page page;
BTPageOpaque opaque;
buf = _bt_getbuf(rel, blkno, BT_READ);
page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
if (P_ISDELETED(opaque))
{
/* XXX if safe-to-reclaim... */
if (nFreePages < maxFreePages)
{
pageSpaces[nFreePages].blkno = blkno;
/* The avail-space value is bogus, but must be < BLCKSZ */
pageSpaces[nFreePages].avail = BLCKSZ-1;
nFreePages++;
}
}
_bt_relbuf(rel, buf);
}
/*
* Update the shared Free Space Map with the info we now have about
* free space in the index, discarding any old info the map may have.
* We do not need to sort the page numbers; they're in order already.
*/
MultiRecordFreeSpace(&rel->rd_node, 0, nFreePages, pageSpaces);
pfree(pageSpaces);
/* update statistics */
stats->num_pages = num_pages;
stats->pages_free = nFreePages;
PG_RETURN_POINTER(stats);
}
/*
* Restore scan position when btgettuple is called to continue a scan.
*
@ -739,7 +800,7 @@ _bt_restscan(IndexScanDesc scan)
maxoff;
BTPageOpaque opaque;
Buffer nextbuf;
ItemPointerData target = so->curHeapIptr;
ItemPointer target = &(so->curHeapIptr);
BTItem item;
BlockNumber blkno;
@ -759,7 +820,7 @@ _bt_restscan(IndexScanDesc scan)
* current->ip_posid before first index tuple on the current page
* (_bt_step will move it right)... XXX still needed?
*/
if (!ItemPointerIsValid(&target))
if (!ItemPointerIsValid(target))
{
ItemPointerSetOffsetNumber(current,
OffsetNumberPrev(P_FIRSTDATAKEY(opaque)));
@ -778,11 +839,7 @@ _bt_restscan(IndexScanDesc scan)
offnum = OffsetNumberNext(offnum))
{
item = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
if (item->bti_itup.t_tid.ip_blkid.bi_hi ==
target.ip_blkid.bi_hi &&
item->bti_itup.t_tid.ip_blkid.bi_lo ==
target.ip_blkid.bi_lo &&
item->bti_itup.t_tid.ip_posid == target.ip_posid)
if (BTTidSame(item->bti_itup.t_tid, *target))
{
/* Found it */
current->ip_posid = offnum;
@ -793,22 +850,33 @@ _bt_restscan(IndexScanDesc scan)
/*
* The item we're looking for moved right at least one page, so
* move right. We are careful here to pin and read-lock the next
* page before releasing the current one. This ensures that a
* concurrent btbulkdelete scan cannot pass our position --- if it
* non-dead page before releasing the current one. This ensures that
* a concurrent btbulkdelete scan cannot pass our position --- if it
* did, it might be able to reach and delete our target item before
* we can find it again.
*/
if (P_RIGHTMOST(opaque))
elog(FATAL, "_bt_restscan: my bits moved right off the end of the world!"
elog(ERROR, "_bt_restscan: my bits moved right off the end of the world!"
"\n\tRecreate index %s.", RelationGetRelationName(rel));
blkno = opaque->btpo_next;
nextbuf = _bt_getbuf(rel, blkno, BT_READ);
/* Advance to next non-dead page --- there must be one */
nextbuf = InvalidBuffer;
for (;;)
{
blkno = opaque->btpo_next;
if (nextbuf != InvalidBuffer)
_bt_relbuf(rel, nextbuf);
nextbuf = _bt_getbuf(rel, blkno, BT_READ);
page = BufferGetPage(nextbuf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
if (!P_IGNORE(opaque))
break;
if (P_RIGHTMOST(opaque))
elog(ERROR, "_bt_restscan: fell off the end of %s",
RelationGetRelationName(rel));
}
_bt_relbuf(rel, buf);
so->btso_curbuf = buf = nextbuf;
page = BufferGetPage(buf);
maxoff = PageGetMaxOffsetNumber(page);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
offnum = P_FIRSTDATAKEY(opaque);
ItemPointerSet(current, blkno, offnum);
}