mirror of
https://github.com/postgres/postgres.git
synced 2025-04-20 00:42:27 +03:00
More infrastructure for btree compaction project. Tree-traversal code
now knows what to do upon hitting a dead page (in theory anyway, it's untested...). Add a post-VACUUM-cleanup entry point for index AMs, to provide a place for dead-page scavenging to happen. Also, fix oversight that broke btpo_prev links in temporary indexes. initdb forced due to additions in pg_am.
This commit is contained in:
parent
4fff132d1b
commit
799bc58dc7
@ -1,6 +1,6 @@
|
||||
<!--
|
||||
Documentation of the system catalogs, directed toward PostgreSQL developers
|
||||
$Header: /cvsroot/pgsql/doc/src/sgml/catalogs.sgml,v 2.65 2003/01/19 00:13:28 momjian Exp $
|
||||
$Header: /cvsroot/pgsql/doc/src/sgml/catalogs.sgml,v 2.66 2003/02/22 00:45:03 tgl Exp $
|
||||
-->
|
||||
|
||||
<chapter id="catalogs">
|
||||
@ -446,6 +446,13 @@
|
||||
<entry>bulk-delete function</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>amvacuumcleanup</entry>
|
||||
<entry><type>regproc</type></entry>
|
||||
<entry>pg_proc.oid</entry>
|
||||
<entry>post-VACUUM cleanup function</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>amcostestimate</entry>
|
||||
<entry><type>regproc</type></entry>
|
||||
|
@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.99 2002/11/13 00:39:46 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.100 2003/02/22 00:45:03 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -1650,8 +1650,9 @@ gistbulkdelete(PG_FUNCTION_ARGS)
|
||||
|
||||
result = (IndexBulkDeleteResult *) palloc(sizeof(IndexBulkDeleteResult));
|
||||
result->num_pages = num_pages;
|
||||
result->tuples_removed = tuples_removed;
|
||||
result->num_index_tuples = num_index_tuples;
|
||||
result->tuples_removed = tuples_removed;
|
||||
result->pages_free = 0;
|
||||
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.60 2002/09/04 20:31:09 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.61 2003/02/22 00:45:03 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* This file contains only the public interface routines.
|
||||
@ -491,8 +491,9 @@ hashbulkdelete(PG_FUNCTION_ARGS)
|
||||
|
||||
result = (IndexBulkDeleteResult *) palloc(sizeof(IndexBulkDeleteResult));
|
||||
result->num_pages = num_pages;
|
||||
result->tuples_removed = tuples_removed;
|
||||
result->num_index_tuples = num_index_tuples;
|
||||
result->tuples_removed = tuples_removed;
|
||||
result->pages_free = 0;
|
||||
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/index/indexam.c,v 1.63 2003/01/08 19:41:40 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/index/indexam.c,v 1.64 2003/02/22 00:45:03 tgl Exp $
|
||||
*
|
||||
* INTERFACE ROUTINES
|
||||
* index_open - open an index relation by relation OID
|
||||
@ -23,6 +23,7 @@
|
||||
* index_restrpos - restore a scan position
|
||||
* index_getnext - get the next tuple from a scan
|
||||
* index_bulk_delete - bulk deletion of index tuples
|
||||
* index_vacuum_cleanup - post-deletion cleanup of an index
|
||||
* index_cost_estimator - fetch amcostestimate procedure OID
|
||||
* index_getprocid - get a support procedure OID
|
||||
*
|
||||
@ -579,6 +580,37 @@ index_bulk_delete(Relation indexRelation,
|
||||
return result;
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* index_vacuum_cleanup - do post-deletion cleanup of an index
|
||||
*
|
||||
* return value is an optional palloc'd struct of statistics
|
||||
* ----------------
|
||||
*/
|
||||
IndexBulkDeleteResult *
|
||||
index_vacuum_cleanup(Relation indexRelation,
|
||||
IndexVacuumCleanupInfo *info,
|
||||
IndexBulkDeleteResult *stats)
|
||||
{
|
||||
RegProcedure procedure;
|
||||
IndexBulkDeleteResult *result;
|
||||
|
||||
RELATION_CHECKS;
|
||||
|
||||
/* It's okay for an index AM not to have a vacuumcleanup procedure */
|
||||
if (!RegProcedureIsValid(indexRelation->rd_am->amvacuumcleanup))
|
||||
return stats;
|
||||
|
||||
GET_REL_PROCEDURE(vacuum_cleanup, amvacuumcleanup);
|
||||
|
||||
result = (IndexBulkDeleteResult *)
|
||||
DatumGetPointer(OidFunctionCall3(procedure,
|
||||
PointerGetDatum(indexRelation),
|
||||
PointerGetDatum((Pointer) info),
|
||||
PointerGetDatum((Pointer) stats)));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* index_cost_estimator
|
||||
*
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.97 2003/02/21 00:06:21 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.98 2003/02/22 00:45:03 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -280,12 +280,21 @@ _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel,
|
||||
if (!_bt_isequal(itupdesc, page, P_HIKEY,
|
||||
natts, itup_scankey))
|
||||
break;
|
||||
/* Advance to next non-dead page --- there must be one */
|
||||
for (;;)
|
||||
{
|
||||
nblkno = opaque->btpo_next;
|
||||
if (nbuf != InvalidBuffer)
|
||||
_bt_relbuf(rel, nbuf);
|
||||
nbuf = _bt_getbuf(rel, nblkno, BT_READ);
|
||||
page = BufferGetPage(nbuf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
if (!P_IGNORE(opaque))
|
||||
break;
|
||||
if (P_RIGHTMOST(opaque))
|
||||
elog(ERROR, "_bt_check_unique: fell off the end of %s",
|
||||
RelationGetRelationName(rel));
|
||||
}
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
offset = P_FIRSTDATAKEY(opaque);
|
||||
}
|
||||
@ -414,20 +423,34 @@ _bt_insertonpg(Relation rel,
|
||||
_bt_compare(rel, keysz, scankey, page, P_HIKEY) == 0 &&
|
||||
random() > (MAX_RANDOM_VALUE / 100))
|
||||
{
|
||||
/* step right one page */
|
||||
BlockNumber rblkno = lpageop->btpo_next;
|
||||
Buffer rbuf;
|
||||
|
||||
/*
|
||||
* must write-lock next page before releasing write lock on
|
||||
* step right to next non-dead page
|
||||
*
|
||||
* must write-lock that page before releasing write lock on
|
||||
* current page; else someone else's _bt_check_unique scan
|
||||
* could fail to see our insertion.
|
||||
* could fail to see our insertion. write locks on intermediate
|
||||
* dead pages won't do because we don't know when they will get
|
||||
* de-linked from the tree.
|
||||
*/
|
||||
Buffer rbuf = InvalidBuffer;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
BlockNumber rblkno = lpageop->btpo_next;
|
||||
|
||||
if (rbuf != InvalidBuffer)
|
||||
_bt_relbuf(rel, rbuf);
|
||||
rbuf = _bt_getbuf(rel, rblkno, BT_WRITE);
|
||||
page = BufferGetPage(rbuf);
|
||||
lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
if (!P_IGNORE(lpageop))
|
||||
break;
|
||||
if (P_RIGHTMOST(lpageop))
|
||||
elog(ERROR, "_bt_insertonpg: fell off the end of %s",
|
||||
RelationGetRelationName(rel));
|
||||
}
|
||||
_bt_relbuf(rel, buf);
|
||||
buf = rbuf;
|
||||
page = BufferGetPage(buf);
|
||||
lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
movedright = true;
|
||||
}
|
||||
|
||||
@ -633,8 +656,9 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
|
||||
BTPageOpaque ropaque,
|
||||
lopaque,
|
||||
oopaque;
|
||||
Buffer sbuf = 0;
|
||||
Page spage = 0;
|
||||
Buffer sbuf = InvalidBuffer;
|
||||
Page spage = NULL;
|
||||
BTPageOpaque sopaque = NULL;
|
||||
Size itemsz;
|
||||
ItemId itemid;
|
||||
BTItem item;
|
||||
@ -792,6 +816,9 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
|
||||
{
|
||||
sbuf = _bt_getbuf(rel, ropaque->btpo_next, BT_WRITE);
|
||||
spage = BufferGetPage(sbuf);
|
||||
sopaque = (BTPageOpaque) PageGetSpecialPointer(spage);
|
||||
if (sopaque->btpo_prev != ropaque->btpo_prev)
|
||||
elog(PANIC, "btree: right sibling's left-link doesn't match");
|
||||
}
|
||||
|
||||
/*
|
||||
@ -802,6 +829,9 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
|
||||
*/
|
||||
START_CRIT_SECTION();
|
||||
|
||||
if (!P_RIGHTMOST(ropaque))
|
||||
sopaque->btpo_prev = BufferGetBlockNumber(rbuf);
|
||||
|
||||
/* XLOG stuff */
|
||||
if (!rel->rd_istemp)
|
||||
{
|
||||
@ -847,10 +877,6 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
|
||||
|
||||
if (!P_RIGHTMOST(ropaque))
|
||||
{
|
||||
BTPageOpaque sopaque = (BTPageOpaque) PageGetSpecialPointer(spage);
|
||||
|
||||
sopaque->btpo_prev = BufferGetBlockNumber(rbuf);
|
||||
|
||||
rdata[2].next = &(rdata[3]);
|
||||
rdata[3].buffer = sbuf;
|
||||
rdata[3].data = NULL;
|
||||
@ -1250,15 +1276,19 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access)
|
||||
Buffer buf;
|
||||
Page page;
|
||||
BTPageOpaque opaque;
|
||||
|
||||
buf = _bt_getbuf(rel, blkno, access);
|
||||
page = BufferGetPage(buf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
if (!P_IGNORE(opaque))
|
||||
{
|
||||
OffsetNumber offnum,
|
||||
minoff,
|
||||
maxoff;
|
||||
ItemId itemid;
|
||||
BTItem item;
|
||||
|
||||
buf = _bt_getbuf(rel, blkno, access);
|
||||
page = BufferGetPage(buf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
minoff = P_FIRSTDATAKEY(opaque);
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
|
||||
@ -1271,9 +1301,9 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access)
|
||||
start = minoff;
|
||||
|
||||
/*
|
||||
* These loops will check every item on the page --- but in an order
|
||||
* that's attuned to the probability of where it actually is. Scan
|
||||
* to the right first, then to the left.
|
||||
* These loops will check every item on the page --- but in an
|
||||
* order that's attuned to the probability of where it actually
|
||||
* is. Scan to the right first, then to the left.
|
||||
*/
|
||||
for (offnum = start;
|
||||
offnum <= maxoff;
|
||||
@ -1304,6 +1334,7 @@ _bt_getstackbuf(Relation rel, BTStack stack, int access)
|
||||
return buf;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The item we're looking for moved right at least one page.
|
||||
@ -1365,6 +1396,8 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
|
||||
rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
|
||||
rootpage = BufferGetPage(rootbuf);
|
||||
rootblknum = BufferGetBlockNumber(rootbuf);
|
||||
|
||||
/* acquire lock on the metapage */
|
||||
metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE);
|
||||
metapg = BufferGetPage(metabuf);
|
||||
metad = BTPageGetMeta(metapg);
|
||||
|
@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.59 2003/02/21 00:06:21 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.60 2003/02/22 00:45:04 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Postgres btree pages look like ordinary relation pages. The opaque
|
||||
@ -22,34 +22,17 @@
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include <time.h>
|
||||
|
||||
#include "access/nbtree.h"
|
||||
#include "miscadmin.h"
|
||||
#include "storage/lmgr.h"
|
||||
|
||||
extern bool FixBTree; /* comments in nbtree.c */
|
||||
extern Buffer _bt_fixroot(Relation rel, Buffer oldrootbuf, bool release);
|
||||
|
||||
/*
|
||||
* We use high-concurrency locking on btrees. There are two cases in
|
||||
* which we don't do locking. One is when we're building the btree.
|
||||
* Since the creating transaction has not committed, no one can see
|
||||
* the index, and there's no reason to share locks. The second case
|
||||
* is when we're just starting up the database system. We use some
|
||||
* special-purpose initialization code in the relation cache manager
|
||||
* (see utils/cache/relcache.c) to allow us to do indexed scans on
|
||||
* the system catalogs before we'd normally be able to. This happens
|
||||
* before the lock table is fully initialized, so we can't use it.
|
||||
* Strictly speaking, this violates 2pl, but we don't do 2pl on the
|
||||
* system catalogs anyway, so I declare this to be okay.
|
||||
*/
|
||||
|
||||
#define USELOCKING (!BuildingBtree && !IsInitProcessingMode())
|
||||
|
||||
|
||||
/*
|
||||
* _bt_metapinit() -- Initialize the metadata page of a new btree.
|
||||
*
|
||||
* Note: there's no real need for any locking here. Since the transaction
|
||||
* creating the index hasn't committed yet, no one else can even see the index
|
||||
* much less be trying to use it.
|
||||
*/
|
||||
void
|
||||
_bt_metapinit(Relation rel)
|
||||
@ -59,10 +42,6 @@ _bt_metapinit(Relation rel)
|
||||
BTMetaPageData *metad;
|
||||
BTPageOpaque op;
|
||||
|
||||
/* can't be sharing this with anyone, now... */
|
||||
if (USELOCKING)
|
||||
LockRelation(rel, AccessExclusiveLock);
|
||||
|
||||
if (RelationGetNumberOfBlocks(rel) != 0)
|
||||
elog(ERROR, "Cannot initialize non-empty btree %s",
|
||||
RelationGetRelationName(rel));
|
||||
@ -114,10 +93,6 @@ _bt_metapinit(Relation rel)
|
||||
END_CRIT_SECTION();
|
||||
|
||||
WriteBuffer(buf);
|
||||
|
||||
/* all done */
|
||||
if (USELOCKING)
|
||||
UnlockRelation(rel, AccessExclusiveLock);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -142,7 +117,8 @@ _bt_metapinit(Relation rel)
|
||||
* what we will return is the old root, which is now just the leftmost
|
||||
* page on a probably-not-very-wide level. For most purposes this is
|
||||
* as good as or better than the true root, so we do not bother to
|
||||
* insist on finding the true root.
|
||||
* insist on finding the true root. We do, however, guarantee to
|
||||
* return a live (not deleted or half-dead) page.
|
||||
*
|
||||
* On successful return, the root page is pinned and read-locked.
|
||||
* The metadata page is not locked or pinned on exit.
|
||||
@ -157,6 +133,7 @@ _bt_getroot(Relation rel, int access)
|
||||
Page rootpage;
|
||||
BTPageOpaque rootopaque;
|
||||
BlockNumber rootblkno;
|
||||
uint32 rootlevel;
|
||||
BTMetaPageData *metad;
|
||||
|
||||
metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
|
||||
@ -164,6 +141,7 @@ _bt_getroot(Relation rel, int access)
|
||||
metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg);
|
||||
metad = BTPageGetMeta(metapg);
|
||||
|
||||
/* sanity-check the metapage */
|
||||
if (!(metaopaque->btpo_flags & BTP_META) ||
|
||||
metad->btm_magic != BTREE_MAGIC)
|
||||
elog(ERROR, "Index %s is not a btree",
|
||||
@ -191,10 +169,20 @@ _bt_getroot(Relation rel, int access)
|
||||
/*
|
||||
* Race condition: if someone else initialized the metadata
|
||||
* between the time we released the read lock and acquired the
|
||||
* write lock, above, we must avoid doing it again.
|
||||
* write lock, we must avoid doing it again.
|
||||
*/
|
||||
if (metad->btm_root == P_NONE)
|
||||
if (metad->btm_root != P_NONE)
|
||||
{
|
||||
/*
|
||||
* Metadata initialized by someone else. In order to
|
||||
* guarantee no deadlocks, we have to release the metadata
|
||||
* page and start all over again. (Is that really true?
|
||||
* But it's hardly worth trying to optimize this case.)
|
||||
*/
|
||||
_bt_relbuf(rel, metabuf);
|
||||
return _bt_getroot(rel, access);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get, initialize, write, and leave a lock of the appropriate
|
||||
* type on the new root page. Since this is the first page in
|
||||
@ -258,23 +246,36 @@ _bt_getroot(Relation rel, int access)
|
||||
_bt_wrtbuf(rel, metabuf);
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Metadata initialized by someone else. In order to
|
||||
* guarantee no deadlocks, we have to release the metadata
|
||||
* page and start all over again.
|
||||
*/
|
||||
_bt_relbuf(rel, metabuf);
|
||||
return _bt_getroot(rel, access);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
rootblkno = metad->btm_fastroot;
|
||||
Assert(rootblkno != P_NONE);
|
||||
rootlevel = metad->btm_fastlevel;
|
||||
|
||||
_bt_relbuf(rel, metabuf); /* done with the meta page */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
rootbuf = _bt_getbuf(rel, rootblkno, BT_READ);
|
||||
rootpage = BufferGetPage(rootbuf);
|
||||
rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
|
||||
|
||||
if (!P_IGNORE(rootopaque))
|
||||
break;
|
||||
|
||||
/* it's dead, Jim. step right one page */
|
||||
if (P_RIGHTMOST(rootopaque))
|
||||
elog(ERROR, "No live root page found in %s",
|
||||
RelationGetRelationName(rel));
|
||||
rootblkno = rootopaque->btpo_next;
|
||||
|
||||
_bt_relbuf(rel, rootbuf);
|
||||
}
|
||||
|
||||
/* Note: can't check btpo.level on deleted pages */
|
||||
if (rootopaque->btpo.level != rootlevel)
|
||||
elog(ERROR, "Root page %u of %s has level %u, expected %u",
|
||||
rootblkno, RelationGetRelationName(rel),
|
||||
rootopaque->btpo.level, rootlevel);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -305,7 +306,10 @@ _bt_gettrueroot(Relation rel)
|
||||
Page metapg;
|
||||
BTPageOpaque metaopaque;
|
||||
Buffer rootbuf;
|
||||
Page rootpage;
|
||||
BTPageOpaque rootopaque;
|
||||
BlockNumber rootblkno;
|
||||
uint32 rootlevel;
|
||||
BTMetaPageData *metad;
|
||||
|
||||
metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
|
||||
@ -331,10 +335,33 @@ _bt_gettrueroot(Relation rel)
|
||||
}
|
||||
|
||||
rootblkno = metad->btm_root;
|
||||
rootlevel = metad->btm_level;
|
||||
|
||||
_bt_relbuf(rel, metabuf); /* done with the meta page */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
rootbuf = _bt_getbuf(rel, rootblkno, BT_READ);
|
||||
rootpage = BufferGetPage(rootbuf);
|
||||
rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);
|
||||
|
||||
if (!P_IGNORE(rootopaque))
|
||||
break;
|
||||
|
||||
/* it's dead, Jim. step right one page */
|
||||
if (P_RIGHTMOST(rootopaque))
|
||||
elog(ERROR, "No live root page found in %s",
|
||||
RelationGetRelationName(rel));
|
||||
rootblkno = rootopaque->btpo_next;
|
||||
|
||||
_bt_relbuf(rel, rootbuf);
|
||||
}
|
||||
|
||||
/* Note: can't check btpo.level on deleted pages */
|
||||
if (rootopaque->btpo.level != rootlevel)
|
||||
elog(ERROR, "Root page %u of %s has level %u, expected %u",
|
||||
rootblkno, RelationGetRelationName(rel),
|
||||
rootopaque->btpo.level, rootlevel);
|
||||
|
||||
return rootbuf;
|
||||
}
|
||||
@ -342,6 +369,8 @@ _bt_gettrueroot(Relation rel)
|
||||
/*
|
||||
* _bt_getbuf() -- Get a buffer by block number for read or write.
|
||||
*
|
||||
* blkno == P_NEW means to get an unallocated index page.
|
||||
*
|
||||
* When this routine returns, the appropriate lock is set on the
|
||||
* requested buffer and its reference count has been incremented
|
||||
* (ie, the buffer is "locked and pinned").
|
||||
@ -359,19 +388,36 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
|
||||
}
|
||||
else
|
||||
{
|
||||
bool needLock;
|
||||
Page page;
|
||||
|
||||
/* XXX soon: ask FSM about free space */
|
||||
|
||||
/*
|
||||
* Extend the relation by one page.
|
||||
*
|
||||
* Extend bufmgr code is unclean and so we have to use extra locking
|
||||
* here.
|
||||
* We have to use a lock to ensure no one else is extending the rel at
|
||||
* the same time, else we will both try to initialize the same new
|
||||
* page. We can skip locking for new or temp relations, however,
|
||||
* since no one else could be accessing them.
|
||||
*/
|
||||
needLock = !(rel->rd_isnew || rel->rd_istemp);
|
||||
|
||||
if (needLock)
|
||||
LockPage(rel, 0, ExclusiveLock);
|
||||
buf = ReadBuffer(rel, blkno);
|
||||
LockBuffer(buf, access);
|
||||
|
||||
buf = ReadBuffer(rel, P_NEW);
|
||||
|
||||
/*
|
||||
* Release the file-extension lock; it's now OK for someone else to
|
||||
* extend the relation some more.
|
||||
*/
|
||||
if (needLock)
|
||||
UnlockPage(rel, 0, ExclusiveLock);
|
||||
|
||||
/* Acquire appropriate buffer lock on new page */
|
||||
LockBuffer(buf, access);
|
||||
|
||||
/* Initialize the new page before returning it */
|
||||
page = BufferGetPage(buf);
|
||||
_bt_pageinit(page, BufferGetPageSize(buf));
|
||||
@ -403,10 +449,9 @@ _bt_relbuf(Relation rel, Buffer buf)
|
||||
* and a pin on the buffer.
|
||||
*
|
||||
* NOTE: actually, the buffer manager just marks the shared buffer page
|
||||
* dirty here, the real I/O happens later. Since we can't persuade the
|
||||
* Unix kernel to schedule disk writes in a particular order, there's not
|
||||
* much point in worrying about this. The most we can say is that all the
|
||||
* writes will occur before commit.
|
||||
* dirty here; the real I/O happens later. This is okay since we are not
|
||||
* relying on write ordering anyway. The WAL mechanism is responsible for
|
||||
* guaranteeing correctness after a crash.
|
||||
*/
|
||||
void
|
||||
_bt_wrtbuf(Relation rel, Buffer buf)
|
||||
@ -455,8 +500,9 @@ _bt_pageinit(Page page, Size size)
|
||||
* mistake. On exit, metapage data is correct and we no longer have
|
||||
* a pin or lock on the metapage.
|
||||
*
|
||||
* XXX this is not used for splitting anymore, only in nbtsort.c at the
|
||||
* completion of btree building.
|
||||
* Actually this is not used for splitting on-the-fly anymore. It's only used
|
||||
* in nbtsort.c at the completion of btree building, where we know we have
|
||||
* sole access to the index anyway.
|
||||
*/
|
||||
void
|
||||
_bt_metaproot(Relation rel, BlockNumber rootbknum, uint32 level)
|
||||
@ -512,6 +558,10 @@ _bt_metaproot(Relation rel, BlockNumber rootbknum, uint32 level)
|
||||
/*
|
||||
* Delete an item from a btree page.
|
||||
*
|
||||
* This must only be used for deleting leaf items. Deleting an item on a
|
||||
* non-leaf page has to be done as part of an atomic action that includes
|
||||
* deleting the page it points to.
|
||||
*
|
||||
* This routine assumes that the caller has pinned and locked the buffer,
|
||||
* and will write the buffer afterwards.
|
||||
*/
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.95 2003/02/21 00:06:21 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.96 2003/02/22 00:45:04 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -23,6 +23,7 @@
|
||||
#include "access/nbtree.h"
|
||||
#include "catalog/index.h"
|
||||
#include "miscadmin.h"
|
||||
#include "storage/freespace.h"
|
||||
|
||||
|
||||
/* Working state for btbuild and its callback */
|
||||
@ -44,7 +45,6 @@ typedef struct
|
||||
} BTBuildState;
|
||||
|
||||
|
||||
bool BuildingBtree = false; /* see comment in btbuild() */
|
||||
bool FastBuild = true; /* use SORT instead of insertion build */
|
||||
|
||||
/*
|
||||
@ -68,13 +68,7 @@ static void btbuildCallback(Relation index,
|
||||
void
|
||||
AtEOXact_nbtree(void)
|
||||
{
|
||||
/*
|
||||
* Note: these actions should only be necessary during xact abort; but
|
||||
* they can't hurt during a commit.
|
||||
*/
|
||||
|
||||
/* If we were building a btree, we ain't anymore. */
|
||||
BuildingBtree = false;
|
||||
/* nothing to do at the moment */
|
||||
}
|
||||
|
||||
|
||||
@ -95,9 +89,6 @@ btbuild(PG_FUNCTION_ARGS)
|
||||
double reltuples;
|
||||
BTBuildState buildstate;
|
||||
|
||||
/* set flag to disable locking */
|
||||
BuildingBtree = true;
|
||||
|
||||
/*
|
||||
* bootstrap processing does something strange, so don't use
|
||||
* sort/build for initial catalog indices. at some point i need to
|
||||
@ -172,9 +163,6 @@ btbuild(PG_FUNCTION_ARGS)
|
||||
}
|
||||
#endif /* BTREE_BUILD_STATS */
|
||||
|
||||
/* all done */
|
||||
BuildingBtree = false;
|
||||
|
||||
/*
|
||||
* Since we just counted the tuples in the heap, we update its stats
|
||||
* in pg_class to guarantee that the planner takes advantage of the
|
||||
@ -689,10 +677,6 @@ btbulkdelete(PG_FUNCTION_ARGS)
|
||||
* We now need to back up the scan one item, so that the next
|
||||
* cycle will re-examine the same offnum on this page (which
|
||||
* now holds the next item).
|
||||
*
|
||||
* For now, just hack the current-item index. Will need to
|
||||
* be smarter when deletion includes removal of empty
|
||||
* index pages.
|
||||
*/
|
||||
current->ip_posid--;
|
||||
}
|
||||
@ -708,12 +692,89 @@ btbulkdelete(PG_FUNCTION_ARGS)
|
||||
|
||||
result = (IndexBulkDeleteResult *) palloc(sizeof(IndexBulkDeleteResult));
|
||||
result->num_pages = num_pages;
|
||||
result->tuples_removed = tuples_removed;
|
||||
result->num_index_tuples = num_index_tuples;
|
||||
result->tuples_removed = tuples_removed;
|
||||
result->pages_free = 0; /* not computed here */
|
||||
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* Post-VACUUM cleanup.
|
||||
*
|
||||
* Here, we scan looking for pages we can delete or return to the freelist.
|
||||
*
|
||||
* Result: a palloc'd struct containing statistical info for VACUUM displays.
|
||||
*/
|
||||
Datum
|
||||
btvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Relation rel = (Relation) PG_GETARG_POINTER(0);
|
||||
#ifdef NOT_USED
|
||||
IndexVacuumCleanupInfo *info = (IndexVacuumCleanupInfo *) PG_GETARG_POINTER(1);
|
||||
#endif
|
||||
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(2);
|
||||
BlockNumber num_pages;
|
||||
BlockNumber blkno;
|
||||
PageFreeSpaceInfo *pageSpaces;
|
||||
int nFreePages,
|
||||
maxFreePages;
|
||||
|
||||
Assert(stats != NULL);
|
||||
|
||||
num_pages = RelationGetNumberOfBlocks(rel);
|
||||
|
||||
/* No point in remembering more than MaxFSMPages pages */
|
||||
maxFreePages = MaxFSMPages;
|
||||
if ((BlockNumber) maxFreePages > num_pages)
|
||||
maxFreePages = (int) num_pages + 1; /* +1 to avoid palloc(0) */
|
||||
pageSpaces = (PageFreeSpaceInfo *) palloc(maxFreePages * sizeof(PageFreeSpaceInfo));
|
||||
nFreePages = 0;
|
||||
|
||||
/*
|
||||
* Scan through all pages of index, except metapage. (Any pages added
|
||||
* after we start the scan will not be examined; this should be fine,
|
||||
* since they can't possibly be empty.)
|
||||
*/
|
||||
for (blkno = BTREE_METAPAGE+1; blkno < num_pages; blkno++)
|
||||
{
|
||||
Buffer buf;
|
||||
Page page;
|
||||
BTPageOpaque opaque;
|
||||
|
||||
buf = _bt_getbuf(rel, blkno, BT_READ);
|
||||
page = BufferGetPage(buf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
if (P_ISDELETED(opaque))
|
||||
{
|
||||
/* XXX if safe-to-reclaim... */
|
||||
if (nFreePages < maxFreePages)
|
||||
{
|
||||
pageSpaces[nFreePages].blkno = blkno;
|
||||
/* The avail-space value is bogus, but must be < BLCKSZ */
|
||||
pageSpaces[nFreePages].avail = BLCKSZ-1;
|
||||
nFreePages++;
|
||||
}
|
||||
}
|
||||
_bt_relbuf(rel, buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the shared Free Space Map with the info we now have about
|
||||
* free space in the index, discarding any old info the map may have.
|
||||
* We do not need to sort the page numbers; they're in order already.
|
||||
*/
|
||||
MultiRecordFreeSpace(&rel->rd_node, 0, nFreePages, pageSpaces);
|
||||
|
||||
pfree(pageSpaces);
|
||||
|
||||
/* update statistics */
|
||||
stats->num_pages = num_pages;
|
||||
stats->pages_free = nFreePages;
|
||||
|
||||
PG_RETURN_POINTER(stats);
|
||||
}
|
||||
|
||||
/*
|
||||
* Restore scan position when btgettuple is called to continue a scan.
|
||||
*
|
||||
@ -739,7 +800,7 @@ _bt_restscan(IndexScanDesc scan)
|
||||
maxoff;
|
||||
BTPageOpaque opaque;
|
||||
Buffer nextbuf;
|
||||
ItemPointerData target = so->curHeapIptr;
|
||||
ItemPointer target = &(so->curHeapIptr);
|
||||
BTItem item;
|
||||
BlockNumber blkno;
|
||||
|
||||
@ -759,7 +820,7 @@ _bt_restscan(IndexScanDesc scan)
|
||||
* current->ip_posid before first index tuple on the current page
|
||||
* (_bt_step will move it right)... XXX still needed?
|
||||
*/
|
||||
if (!ItemPointerIsValid(&target))
|
||||
if (!ItemPointerIsValid(target))
|
||||
{
|
||||
ItemPointerSetOffsetNumber(current,
|
||||
OffsetNumberPrev(P_FIRSTDATAKEY(opaque)));
|
||||
@ -778,11 +839,7 @@ _bt_restscan(IndexScanDesc scan)
|
||||
offnum = OffsetNumberNext(offnum))
|
||||
{
|
||||
item = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
|
||||
if (item->bti_itup.t_tid.ip_blkid.bi_hi ==
|
||||
target.ip_blkid.bi_hi &&
|
||||
item->bti_itup.t_tid.ip_blkid.bi_lo ==
|
||||
target.ip_blkid.bi_lo &&
|
||||
item->bti_itup.t_tid.ip_posid == target.ip_posid)
|
||||
if (BTTidSame(item->bti_itup.t_tid, *target))
|
||||
{
|
||||
/* Found it */
|
||||
current->ip_posid = offnum;
|
||||
@ -793,22 +850,33 @@ _bt_restscan(IndexScanDesc scan)
|
||||
/*
|
||||
* The item we're looking for moved right at least one page, so
|
||||
* move right. We are careful here to pin and read-lock the next
|
||||
* page before releasing the current one. This ensures that a
|
||||
* concurrent btbulkdelete scan cannot pass our position --- if it
|
||||
* non-dead page before releasing the current one. This ensures that
|
||||
* a concurrent btbulkdelete scan cannot pass our position --- if it
|
||||
* did, it might be able to reach and delete our target item before
|
||||
* we can find it again.
|
||||
*/
|
||||
if (P_RIGHTMOST(opaque))
|
||||
elog(FATAL, "_bt_restscan: my bits moved right off the end of the world!"
|
||||
elog(ERROR, "_bt_restscan: my bits moved right off the end of the world!"
|
||||
"\n\tRecreate index %s.", RelationGetRelationName(rel));
|
||||
|
||||
/* Advance to next non-dead page --- there must be one */
|
||||
nextbuf = InvalidBuffer;
|
||||
for (;;)
|
||||
{
|
||||
blkno = opaque->btpo_next;
|
||||
if (nextbuf != InvalidBuffer)
|
||||
_bt_relbuf(rel, nextbuf);
|
||||
nextbuf = _bt_getbuf(rel, blkno, BT_READ);
|
||||
page = BufferGetPage(nextbuf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
if (!P_IGNORE(opaque))
|
||||
break;
|
||||
if (P_RIGHTMOST(opaque))
|
||||
elog(ERROR, "_bt_restscan: fell off the end of %s",
|
||||
RelationGetRelationName(rel));
|
||||
}
|
||||
_bt_relbuf(rel, buf);
|
||||
so->btso_curbuf = buf = nextbuf;
|
||||
page = BufferGetPage(buf);
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
offnum = P_FIRSTDATAKEY(opaque);
|
||||
ItemPointerSet(current, blkno, offnum);
|
||||
}
|
||||
|
@ -1,14 +1,14 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* nbtsearch.c
|
||||
* search code for postgres btrees.
|
||||
* Search code for postgres btrees.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.73 2003/02/21 00:06:21 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.74 2003/02/22 00:45:04 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -19,6 +19,7 @@
|
||||
#include "access/nbtree.h"
|
||||
|
||||
|
||||
static Buffer _bt_walk_left(Relation rel, Buffer buf);
|
||||
static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
|
||||
|
||||
|
||||
@ -79,10 +80,11 @@ _bt_search(Relation rel, int keysz, ScanKey scankey,
|
||||
par_blkno = BufferGetBlockNumber(*bufP);
|
||||
|
||||
/*
|
||||
* We need to save the bit image of the index entry we chose in
|
||||
* We need to save the location of the index entry we chose in
|
||||
* the parent page on a stack. In case we split the tree, we'll
|
||||
* use this bit image to figure out what our real parent page is,
|
||||
* in case the parent splits while we're working lower in the
|
||||
* use the stack to work back up to the parent page. We also save
|
||||
* the actual downlink (TID) to uniquely identify the index entry,
|
||||
* in case it moves right while we're working lower in the
|
||||
* tree. See the paper by Lehman and Yao for how this is detected
|
||||
* and handled. (We use the child link to disambiguate duplicate
|
||||
* keys in the index -- Lehman and Yao disallow duplicate keys.)
|
||||
@ -114,7 +116,7 @@ _bt_search(Relation rel, int keysz, ScanKey scankey,
|
||||
/*
|
||||
* _bt_moveright() -- move right in the btree if necessary.
|
||||
*
|
||||
* When we drop and reacquire a pointer to a page, it is possible that
|
||||
* When we follow a pointer to reach a page, it is possible that
|
||||
* the page has changed in the meanwhile. If this happens, we're
|
||||
* guaranteed that the page has "split right" -- that is, that any
|
||||
* data that appeared on the page originally is either on the page
|
||||
@ -148,9 +150,13 @@ _bt_moveright(Relation rel,
|
||||
* right. (If the scan key is equal to the high key, we might or
|
||||
* might not need to move right; have to scan the page first anyway.)
|
||||
* It could even have split more than once, so scan as far as needed.
|
||||
*
|
||||
* We also have to move right if we followed a link that brought us to
|
||||
* a dead page.
|
||||
*/
|
||||
while (!P_RIGHTMOST(opaque) &&
|
||||
_bt_compare(rel, keysz, scankey, page, P_HIKEY) > 0)
|
||||
(P_IGNORE(opaque) ||
|
||||
_bt_compare(rel, keysz, scankey, page, P_HIKEY) > 0))
|
||||
{
|
||||
/* step right one page */
|
||||
BlockNumber rblkno = opaque->btpo_next;
|
||||
@ -161,6 +167,10 @@ _bt_moveright(Relation rel,
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
}
|
||||
|
||||
if (P_IGNORE(opaque))
|
||||
elog(ERROR, "_bt_moveright: fell off the end of %s",
|
||||
RelationGetRelationName(rel));
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
@ -796,7 +806,6 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
|
||||
OffsetNumber offnum,
|
||||
maxoff;
|
||||
BlockNumber blkno;
|
||||
BlockNumber obknum;
|
||||
|
||||
/*
|
||||
* Don't use ItemPointerGetOffsetNumber or you risk to get assertion
|
||||
@ -814,7 +823,7 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
|
||||
offnum = OffsetNumberNext(offnum);
|
||||
else
|
||||
{
|
||||
/* walk right to the next page with data */
|
||||
/* Walk right to the next page with data */
|
||||
for (;;)
|
||||
{
|
||||
/* if we're at end of scan, release the buffer and return */
|
||||
@ -831,6 +840,8 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
|
||||
*bufP = _bt_getbuf(rel, blkno, BT_READ);
|
||||
page = BufferGetPage(*bufP);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
if (!P_IGNORE(opaque))
|
||||
{
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
/* done if it's not empty */
|
||||
offnum = P_FIRSTDATAKEY(opaque);
|
||||
@ -839,53 +850,49 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
}
|
||||
else /* backwards scan */
|
||||
{
|
||||
if (offnum > P_FIRSTDATAKEY(opaque))
|
||||
offnum = OffsetNumberPrev(offnum);
|
||||
else
|
||||
{
|
||||
/* walk left to the next page with data */
|
||||
/*
|
||||
* Walk left to the next page with data. This is much more
|
||||
* complex than the walk-right case because of the possibility
|
||||
* that the page to our left splits while we are in flight to it,
|
||||
* plus the possibility that the page we were on gets deleted
|
||||
* after we leave it. See nbtree/README for details.
|
||||
*/
|
||||
for (;;)
|
||||
{
|
||||
/* if we're at end of scan, release the buffer and return */
|
||||
if (P_LEFTMOST(opaque))
|
||||
*bufP = _bt_walk_left(rel, *bufP);
|
||||
|
||||
/* if we're at end of scan, return failure */
|
||||
if (*bufP == InvalidBuffer)
|
||||
{
|
||||
_bt_relbuf(rel, *bufP);
|
||||
ItemPointerSetInvalid(current);
|
||||
*bufP = so->btso_curbuf = InvalidBuffer;
|
||||
so->btso_curbuf = InvalidBuffer;
|
||||
return false;
|
||||
}
|
||||
/* step left */
|
||||
obknum = BufferGetBlockNumber(*bufP);
|
||||
blkno = opaque->btpo_prev;
|
||||
_bt_relbuf(rel, *bufP);
|
||||
*bufP = _bt_getbuf(rel, blkno, BT_READ);
|
||||
page = BufferGetPage(*bufP);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
/*
|
||||
* If the adjacent page just split, then we have to walk
|
||||
* right to find the block that's now adjacent to where we
|
||||
* were. Because pages only split right, we don't have to
|
||||
* worry about this failing to terminate.
|
||||
* Okay, we managed to move left to a non-deleted page.
|
||||
* Done if it's not half-dead and not empty. Else loop back
|
||||
* and do it all again.
|
||||
*/
|
||||
while (opaque->btpo_next != obknum)
|
||||
if (!P_IGNORE(opaque))
|
||||
{
|
||||
blkno = opaque->btpo_next;
|
||||
_bt_relbuf(rel, *bufP);
|
||||
*bufP = _bt_getbuf(rel, blkno, BT_READ);
|
||||
page = BufferGetPage(*bufP);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
}
|
||||
/* done if it's not empty */
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
offnum = maxoff;
|
||||
if (!PageIsEmpty(page) && maxoff >= P_FIRSTDATAKEY(opaque))
|
||||
if (!PageIsEmpty(page) &&
|
||||
maxoff >= P_FIRSTDATAKEY(opaque))
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Update scan state */
|
||||
so->btso_curbuf = *bufP;
|
||||
@ -895,11 +902,133 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* _bt_walk_left() -- step left one page, if possible
|
||||
*
|
||||
* The given buffer must be pinned and read-locked. This will be dropped
|
||||
* before stepping left. On return, we have pin and read lock on the
|
||||
* returned page, instead.
|
||||
*
|
||||
* Returns InvalidBuffer if there is no page to the left (no lock is held
|
||||
* in that case).
|
||||
*
|
||||
* When working on a non-leaf level, it is possible for the returned page
|
||||
* to be half-dead; the caller should check that condition and step left
|
||||
* again if it's important.
|
||||
*/
|
||||
static Buffer
|
||||
_bt_walk_left(Relation rel, Buffer buf)
|
||||
{
|
||||
Page page;
|
||||
BTPageOpaque opaque;
|
||||
|
||||
page = BufferGetPage(buf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
BlockNumber obknum;
|
||||
BlockNumber lblkno;
|
||||
BlockNumber blkno;
|
||||
int tries;
|
||||
|
||||
/* if we're at end of tree, release buf and return failure */
|
||||
if (P_LEFTMOST(opaque))
|
||||
{
|
||||
_bt_relbuf(rel, buf);
|
||||
break;
|
||||
}
|
||||
/* remember original page we are stepping left from */
|
||||
obknum = BufferGetBlockNumber(buf);
|
||||
/* step left */
|
||||
blkno = lblkno = opaque->btpo_prev;
|
||||
_bt_relbuf(rel, buf);
|
||||
buf = _bt_getbuf(rel, blkno, BT_READ);
|
||||
page = BufferGetPage(buf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
/*
|
||||
* If this isn't the page we want, walk right till we find
|
||||
* what we want --- but go no more than four hops (an
|
||||
* arbitrary limit). If we don't find the correct page by then,
|
||||
* the most likely bet is that the original page got deleted
|
||||
* and isn't in the sibling chain at all anymore, not that its
|
||||
* left sibling got split more than four times.
|
||||
*
|
||||
* Note that it is correct to test P_ISDELETED not P_IGNORE
|
||||
* here, because half-dead pages are still in the sibling
|
||||
* chain. Caller must reject half-dead pages if wanted.
|
||||
*/
|
||||
tries = 0;
|
||||
for (;;)
|
||||
{
|
||||
if (!P_ISDELETED(opaque) && opaque->btpo_next == obknum)
|
||||
{
|
||||
/* Found desired page, return it */
|
||||
return buf;
|
||||
}
|
||||
if (P_RIGHTMOST(opaque) || ++tries > 4)
|
||||
break;
|
||||
blkno = opaque->btpo_next;
|
||||
_bt_relbuf(rel, buf);
|
||||
buf = _bt_getbuf(rel, blkno, BT_READ);
|
||||
page = BufferGetPage(buf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
}
|
||||
|
||||
/* Return to the original page to see what's up */
|
||||
_bt_relbuf(rel, buf);
|
||||
buf = _bt_getbuf(rel, obknum, BT_READ);
|
||||
page = BufferGetPage(buf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
if (P_ISDELETED(opaque))
|
||||
{
|
||||
/*
|
||||
* It was deleted. Move right to first nondeleted page
|
||||
* (there must be one); that is the page that has acquired the
|
||||
* deleted one's keyspace, so stepping left from it will take
|
||||
* us where we want to be.
|
||||
*/
|
||||
for (;;)
|
||||
{
|
||||
if (P_RIGHTMOST(opaque))
|
||||
elog(ERROR, "_bt_walk_left: fell off the end of %s",
|
||||
RelationGetRelationName(rel));
|
||||
blkno = opaque->btpo_next;
|
||||
_bt_relbuf(rel, buf);
|
||||
buf = _bt_getbuf(rel, blkno, BT_READ);
|
||||
page = BufferGetPage(buf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
if (!P_ISDELETED(opaque))
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* Now return to top of loop, resetting obknum to
|
||||
* point to this nondeleted page, and try again.
|
||||
*/
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* It wasn't deleted; the explanation had better be
|
||||
* that the page to the left got split or deleted.
|
||||
* Without this check, we'd go into an infinite loop
|
||||
* if there's anything wrong.
|
||||
*/
|
||||
if (opaque->btpo_prev == lblkno)
|
||||
elog(ERROR, "_bt_walk_left: can't find left sibling in %s",
|
||||
RelationGetRelationName(rel));
|
||||
/* Okay to try again with new lblkno value */
|
||||
}
|
||||
}
|
||||
|
||||
return InvalidBuffer;
|
||||
}
|
||||
|
||||
/*
|
||||
* _bt_get_endpoint() -- Find the first or last page on a given tree level
|
||||
*
|
||||
* If the index is empty, we will return InvalidBuffer; any other failure
|
||||
* condition causes elog().
|
||||
* condition causes elog(). We will not return a dead page.
|
||||
*
|
||||
* The returned buffer is pinned and read-locked.
|
||||
*/
|
||||
@ -941,12 +1070,13 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost)
|
||||
* step right if needed to get to it (this could happen if the
|
||||
* page split since we obtained a pointer to it).
|
||||
*/
|
||||
while (P_ISDELETED(opaque) ||
|
||||
while (P_IGNORE(opaque) ||
|
||||
(rightmost && !P_RIGHTMOST(opaque)))
|
||||
{
|
||||
blkno = opaque->btpo_next;
|
||||
if (blkno == P_NONE)
|
||||
elog(ERROR, "_bt_get_endpoint: ran off end of btree");
|
||||
elog(ERROR, "_bt_get_endpoint: fell off the end of %s",
|
||||
RelationGetRelationName(rel));
|
||||
_bt_relbuf(rel, buf);
|
||||
buf = _bt_getbuf(rel, blkno, BT_READ);
|
||||
page = BufferGetPage(buf);
|
||||
@ -959,7 +1089,7 @@ _bt_get_endpoint(Relation rel, uint32 level, bool rightmost)
|
||||
if (opaque->btpo.level < level)
|
||||
elog(ERROR, "_bt_get_endpoint: btree level %u not found", level);
|
||||
|
||||
/* Step to leftmost or rightmost child page */
|
||||
/* Descend to leftmost or rightmost child page */
|
||||
if (rightmost)
|
||||
offnum = PageGetMaxOffsetNumber(page);
|
||||
else
|
||||
|
@ -1,4 +1,5 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* nbtsort.c
|
||||
* Build a btree from sorted input by loading leaf pages sequentially.
|
||||
*
|
||||
@ -35,7 +36,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsort.c,v 1.71 2003/02/21 00:06:21 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsort.c,v 1.72 2003/02/22 00:45:04 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -164,8 +165,8 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
|
||||
ResetUsage();
|
||||
}
|
||||
#endif /* BTREE_BUILD_STATS */
|
||||
tuplesort_performsort(btspool->sortstate);
|
||||
|
||||
tuplesort_performsort(btspool->sortstate);
|
||||
if (btspool2)
|
||||
tuplesort_performsort(btspool2->sortstate);
|
||||
_bt_load(btspool->index, btspool, btspool2);
|
||||
@ -331,7 +332,7 @@ _bt_sortaddtup(Page page,
|
||||
|
||||
if (PageAddItem(page, (Item) btitem, itemsize, itup_off,
|
||||
LP_USED) == InvalidOffsetNumber)
|
||||
elog(FATAL, "btree: failed to add item to the page in _bt_sort");
|
||||
elog(ERROR, "btree: failed to add item to the page in _bt_sort");
|
||||
}
|
||||
|
||||
/*----------
|
||||
@ -470,8 +471,7 @@ _bt_buildadd(Relation index, BTPageState *state, BTItem bti)
|
||||
|
||||
/*
|
||||
* Write out the old page. We never want to see it again, so we
|
||||
* can give up our lock (if we had one; most likely BuildingBtree
|
||||
* is set, so we aren't locking).
|
||||
* can give up our lock.
|
||||
*/
|
||||
_bt_blwritepage(index, obuf);
|
||||
|
||||
@ -534,7 +534,7 @@ _bt_uppershutdown(Relation index, BTPageState *state)
|
||||
if (s->btps_next == (BTPageState *) NULL)
|
||||
{
|
||||
opaque->btpo_flags |= BTP_ROOT;
|
||||
_bt_metaproot(index, blkno, s->btps_level + 1);
|
||||
_bt_metaproot(index, blkno, s->btps_level);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.75 2002/09/04 20:31:13 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.76 2003/02/22 00:45:04 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -1250,8 +1250,9 @@ rtbulkdelete(PG_FUNCTION_ARGS)
|
||||
|
||||
result = (IndexBulkDeleteResult *) palloc(sizeof(IndexBulkDeleteResult));
|
||||
result->num_pages = num_pages;
|
||||
result->tuples_removed = tuples_removed;
|
||||
result->num_index_tuples = num_index_tuples;
|
||||
result->tuples_removed = tuples_removed;
|
||||
result->pages_free = 0;
|
||||
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
@ -13,7 +13,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.247 2003/02/09 06:56:27 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.248 2003/02/22 00:45:05 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -2603,17 +2603,25 @@ static void
|
||||
scan_index(Relation indrel, double num_tuples)
|
||||
{
|
||||
IndexBulkDeleteResult *stats;
|
||||
IndexVacuumCleanupInfo vcinfo;
|
||||
VacRUsage ru0;
|
||||
|
||||
vac_init_rusage(&ru0);
|
||||
|
||||
/*
|
||||
* Even though we're not planning to delete anything, use the
|
||||
* ambulkdelete call, so that the scan happens within the index AM for
|
||||
* more speed.
|
||||
* Even though we're not planning to delete anything, we use the
|
||||
* ambulkdelete call, because (a) the scan happens within the index AM
|
||||
* for more speed, and (b) it may want to pass private statistics to
|
||||
* the amvacuumcleanup call.
|
||||
*/
|
||||
stats = index_bulk_delete(indrel, dummy_tid_reaped, NULL);
|
||||
|
||||
/* Do post-VACUUM cleanup, even though we deleted nothing */
|
||||
vcinfo.vacuum_full = true;
|
||||
vcinfo.message_level = elevel;
|
||||
|
||||
stats = index_vacuum_cleanup(indrel, &vcinfo, stats);
|
||||
|
||||
if (!stats)
|
||||
return;
|
||||
|
||||
@ -2622,9 +2630,9 @@ scan_index(Relation indrel, double num_tuples)
|
||||
stats->num_pages, stats->num_index_tuples,
|
||||
false);
|
||||
|
||||
elog(elevel, "Index %s: Pages %u; Tuples %.0f.\n\t%s",
|
||||
elog(elevel, "Index %s: Pages %u, %u free; Tuples %.0f.\n\t%s",
|
||||
RelationGetRelationName(indrel),
|
||||
stats->num_pages, stats->num_index_tuples,
|
||||
stats->num_pages, stats->pages_free, stats->num_index_tuples,
|
||||
vac_show_rusage(&ru0));
|
||||
|
||||
/*
|
||||
@ -2661,6 +2669,7 @@ vacuum_index(VacPageList vacpagelist, Relation indrel,
|
||||
double num_tuples, int keep_tuples)
|
||||
{
|
||||
IndexBulkDeleteResult *stats;
|
||||
IndexVacuumCleanupInfo vcinfo;
|
||||
VacRUsage ru0;
|
||||
|
||||
vac_init_rusage(&ru0);
|
||||
@ -2668,6 +2677,12 @@ vacuum_index(VacPageList vacpagelist, Relation indrel,
|
||||
/* Do bulk deletion */
|
||||
stats = index_bulk_delete(indrel, tid_reaped, (void *) vacpagelist);
|
||||
|
||||
/* Do post-VACUUM cleanup */
|
||||
vcinfo.vacuum_full = true;
|
||||
vcinfo.message_level = elevel;
|
||||
|
||||
stats = index_vacuum_cleanup(indrel, &vcinfo, stats);
|
||||
|
||||
if (!stats)
|
||||
return;
|
||||
|
||||
@ -2676,8 +2691,9 @@ vacuum_index(VacPageList vacpagelist, Relation indrel,
|
||||
stats->num_pages, stats->num_index_tuples,
|
||||
false);
|
||||
|
||||
elog(elevel, "Index %s: Pages %u; Tuples %.0f: Deleted %.0f.\n\t%s",
|
||||
RelationGetRelationName(indrel), stats->num_pages,
|
||||
elog(elevel, "Index %s: Pages %u, %u free; Tuples %.0f: Deleted %.0f.\n\t%s",
|
||||
RelationGetRelationName(indrel),
|
||||
stats->num_pages, stats->pages_free,
|
||||
stats->num_index_tuples - keep_tuples, stats->tuples_removed,
|
||||
vac_show_rusage(&ru0));
|
||||
|
||||
|
@ -31,7 +31,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.23 2002/11/13 00:39:46 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/commands/vacuumlazy.c,v 1.24 2003/02/22 00:45:05 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -200,7 +200,6 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
||||
tups_vacuumed,
|
||||
nkeep,
|
||||
nunused;
|
||||
bool did_vacuum_index = false;
|
||||
int i;
|
||||
VacRUsage ru0;
|
||||
|
||||
@ -244,7 +243,6 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
||||
/* Remove index entries */
|
||||
for (i = 0; i < nindexes; i++)
|
||||
lazy_vacuum_index(Irel[i], vacrelstats);
|
||||
did_vacuum_index = true;
|
||||
/* Remove tuples from heap */
|
||||
lazy_vacuum_heap(onerel, vacrelstats);
|
||||
/* Forget the now-vacuumed tuples, and press on */
|
||||
@ -415,7 +413,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
||||
vacrelstats->rel_tuples = num_tuples;
|
||||
|
||||
/* If any tuples need to be deleted, perform final vacuum cycle */
|
||||
/* XXX put a threshold on min nuber of tuples here? */
|
||||
/* XXX put a threshold on min number of tuples here? */
|
||||
if (vacrelstats->num_dead_tuples > 0)
|
||||
{
|
||||
/* Remove index entries */
|
||||
@ -424,9 +422,9 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
||||
/* Remove tuples from heap */
|
||||
lazy_vacuum_heap(onerel, vacrelstats);
|
||||
}
|
||||
else if (!did_vacuum_index)
|
||||
else
|
||||
{
|
||||
/* Scan indexes just to update pg_class statistics about them */
|
||||
/* Must do post-vacuum cleanup and statistics update anyway */
|
||||
for (i = 0; i < nindexes; i++)
|
||||
lazy_scan_index(Irel[i], vacrelstats);
|
||||
}
|
||||
@ -551,42 +549,36 @@ static void
|
||||
lazy_scan_index(Relation indrel, LVRelStats *vacrelstats)
|
||||
{
|
||||
IndexBulkDeleteResult *stats;
|
||||
IndexVacuumCleanupInfo vcinfo;
|
||||
VacRUsage ru0;
|
||||
|
||||
vac_init_rusage(&ru0);
|
||||
|
||||
/*
|
||||
* If the index is not partial, skip the scan, and just assume it has
|
||||
* the same number of tuples as the heap.
|
||||
*/
|
||||
if (!vac_is_partial_index(indrel))
|
||||
{
|
||||
vac_update_relstats(RelationGetRelid(indrel),
|
||||
RelationGetNumberOfBlocks(indrel),
|
||||
vacrelstats->rel_tuples,
|
||||
false);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If index is unsafe for concurrent access, must lock it; but a
|
||||
* shared lock should be sufficient.
|
||||
* If index is unsafe for concurrent access, must lock it.
|
||||
*/
|
||||
if (!indrel->rd_am->amconcurrent)
|
||||
LockRelation(indrel, AccessShareLock);
|
||||
LockRelation(indrel, AccessExclusiveLock);
|
||||
|
||||
/*
|
||||
* Even though we're not planning to delete anything, use the
|
||||
* ambulkdelete call, so that the scan happens within the index AM for
|
||||
* more speed.
|
||||
* Even though we're not planning to delete anything, we use the
|
||||
* ambulkdelete call, because (a) the scan happens within the index AM
|
||||
* for more speed, and (b) it may want to pass private statistics to
|
||||
* the amvacuumcleanup call.
|
||||
*/
|
||||
stats = index_bulk_delete(indrel, dummy_tid_reaped, NULL);
|
||||
|
||||
/* Do post-VACUUM cleanup, even though we deleted nothing */
|
||||
vcinfo.vacuum_full = false;
|
||||
vcinfo.message_level = elevel;
|
||||
|
||||
stats = index_vacuum_cleanup(indrel, &vcinfo, stats);
|
||||
|
||||
/*
|
||||
* Release lock acquired above.
|
||||
*/
|
||||
if (!indrel->rd_am->amconcurrent)
|
||||
UnlockRelation(indrel, AccessShareLock);
|
||||
UnlockRelation(indrel, AccessExclusiveLock);
|
||||
|
||||
if (!stats)
|
||||
return;
|
||||
@ -596,9 +588,9 @@ lazy_scan_index(Relation indrel, LVRelStats *vacrelstats)
|
||||
stats->num_pages, stats->num_index_tuples,
|
||||
false);
|
||||
|
||||
elog(elevel, "Index %s: Pages %u; Tuples %.0f.\n\t%s",
|
||||
elog(elevel, "Index %s: Pages %u, %u free; Tuples %.0f.\n\t%s",
|
||||
RelationGetRelationName(indrel),
|
||||
stats->num_pages, stats->num_index_tuples,
|
||||
stats->num_pages, stats->pages_free, stats->num_index_tuples,
|
||||
vac_show_rusage(&ru0));
|
||||
|
||||
pfree(stats);
|
||||
@ -617,6 +609,7 @@ static void
|
||||
lazy_vacuum_index(Relation indrel, LVRelStats *vacrelstats)
|
||||
{
|
||||
IndexBulkDeleteResult *stats;
|
||||
IndexVacuumCleanupInfo vcinfo;
|
||||
VacRUsage ru0;
|
||||
|
||||
vac_init_rusage(&ru0);
|
||||
@ -630,26 +623,33 @@ lazy_vacuum_index(Relation indrel, LVRelStats *vacrelstats)
|
||||
/* Do bulk deletion */
|
||||
stats = index_bulk_delete(indrel, lazy_tid_reaped, (void *) vacrelstats);
|
||||
|
||||
/* Do post-VACUUM cleanup */
|
||||
vcinfo.vacuum_full = false;
|
||||
vcinfo.message_level = elevel;
|
||||
|
||||
stats = index_vacuum_cleanup(indrel, &vcinfo, stats);
|
||||
|
||||
/*
|
||||
* Release lock acquired above.
|
||||
*/
|
||||
if (!indrel->rd_am->amconcurrent)
|
||||
UnlockRelation(indrel, AccessExclusiveLock);
|
||||
|
||||
if (!stats)
|
||||
return;
|
||||
|
||||
/* now update statistics in pg_class */
|
||||
if (stats)
|
||||
{
|
||||
vac_update_relstats(RelationGetRelid(indrel),
|
||||
stats->num_pages, stats->num_index_tuples,
|
||||
false);
|
||||
|
||||
elog(elevel, "Index %s: Pages %u; Tuples %.0f: Deleted %.0f.\n\t%s",
|
||||
RelationGetRelationName(indrel), stats->num_pages,
|
||||
elog(elevel, "Index %s: Pages %u, %u free; Tuples %.0f: Deleted %.0f.\n\t%s",
|
||||
RelationGetRelationName(indrel),
|
||||
stats->num_pages, stats->pages_free,
|
||||
stats->num_index_tuples, stats->tuples_removed,
|
||||
vac_show_rusage(&ru0));
|
||||
|
||||
pfree(stats);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: genam.h,v 1.37 2002/09/04 20:31:36 momjian Exp $
|
||||
* $Id: genam.h,v 1.38 2003/02/22 00:45:05 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -20,17 +20,32 @@
|
||||
#include "nodes/primnodes.h"
|
||||
|
||||
|
||||
/* Struct for statistics returned by bulk-delete operation */
|
||||
/*
|
||||
* Struct for statistics returned by bulk-delete operation
|
||||
*
|
||||
* This is now also passed to the index AM's vacuum-cleanup operation,
|
||||
* if it has one, which can modify the results as needed. Note that
|
||||
* an index AM could choose to have bulk-delete return a larger struct
|
||||
* of which this is just the first field; this provides a way for bulk-delete
|
||||
* to communicate additional private data to vacuum-cleanup.
|
||||
*/
|
||||
typedef struct IndexBulkDeleteResult
|
||||
{
|
||||
BlockNumber num_pages; /* pages remaining in index */
|
||||
double num_index_tuples; /* tuples remaining */
|
||||
double tuples_removed; /* # removed by bulk-delete operation */
|
||||
double num_index_tuples; /* # remaining */
|
||||
BlockNumber pages_free; /* # unused pages in index */
|
||||
} IndexBulkDeleteResult;
|
||||
|
||||
/* Typedef for callback function to determine if a tuple is bulk-deletable */
|
||||
typedef bool (*IndexBulkDeleteCallback) (ItemPointer itemptr, void *state);
|
||||
|
||||
/* Struct for additional arguments passed to vacuum-cleanup operation */
|
||||
typedef struct IndexVacuumCleanupInfo
|
||||
{
|
||||
bool vacuum_full; /* VACUUM FULL (we have exclusive lock) */
|
||||
int message_level; /* elog level for progress messages */
|
||||
} IndexVacuumCleanupInfo;
|
||||
|
||||
/* Struct for heap-or-index scans of system tables */
|
||||
typedef struct SysScanDescData
|
||||
@ -72,6 +87,9 @@ extern bool index_getnext_indexitem(IndexScanDesc scan,
|
||||
extern IndexBulkDeleteResult *index_bulk_delete(Relation indexRelation,
|
||||
IndexBulkDeleteCallback callback,
|
||||
void *callback_state);
|
||||
extern IndexBulkDeleteResult *index_vacuum_cleanup(Relation indexRelation,
|
||||
IndexVacuumCleanupInfo *info,
|
||||
IndexBulkDeleteResult *stats);
|
||||
extern RegProcedure index_cost_estimator(Relation indexRelation);
|
||||
extern RegProcedure index_getprocid(Relation irel, AttrNumber attnum,
|
||||
uint16 procnum);
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: nbtree.h,v 1.64 2003/02/21 00:06:22 tgl Exp $
|
||||
* $Id: nbtree.h,v 1.65 2003/02/22 00:45:05 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -54,6 +54,7 @@ typedef BTPageOpaqueData *BTPageOpaque;
|
||||
#define BTP_ROOT (1 << 1) /* root page (has no parent) */
|
||||
#define BTP_DELETED (1 << 2) /* page has been deleted from tree */
|
||||
#define BTP_META (1 << 3) /* meta-page */
|
||||
#define BTP_HALF_DEAD (1 << 4) /* empty, but still in tree */
|
||||
|
||||
|
||||
/*
|
||||
@ -124,12 +125,13 @@ typedef BTItemData *BTItem;
|
||||
#define SizeOfBTItem sizeof(BTItemData)
|
||||
|
||||
/* Test whether items are the "same" per the above notes */
|
||||
#define BTItemSame(i1, i2) ( (i1)->bti_itup.t_tid.ip_blkid.bi_hi == \
|
||||
(i2)->bti_itup.t_tid.ip_blkid.bi_hi && \
|
||||
(i1)->bti_itup.t_tid.ip_blkid.bi_lo == \
|
||||
(i2)->bti_itup.t_tid.ip_blkid.bi_lo && \
|
||||
(i1)->bti_itup.t_tid.ip_posid == \
|
||||
(i2)->bti_itup.t_tid.ip_posid )
|
||||
#define BTTidSame(i1, i2) \
|
||||
( (i1).ip_blkid.bi_hi == (i2).ip_blkid.bi_hi && \
|
||||
(i1).ip_blkid.bi_lo == (i2).ip_blkid.bi_lo && \
|
||||
(i1).ip_posid == (i2).ip_posid )
|
||||
#define BTItemSame(i1, i2) \
|
||||
BTTidSame((i1)->bti_itup.t_tid, (i2)->bti_itup.t_tid)
|
||||
|
||||
|
||||
/*
|
||||
* In general, the btree code tries to localize its knowledge about
|
||||
@ -150,6 +152,7 @@ typedef BTItemData *BTItem;
|
||||
#define P_ISLEAF(opaque) ((opaque)->btpo_flags & BTP_LEAF)
|
||||
#define P_ISROOT(opaque) ((opaque)->btpo_flags & BTP_ROOT)
|
||||
#define P_ISDELETED(opaque) ((opaque)->btpo_flags & BTP_DELETED)
|
||||
#define P_IGNORE(opaque) ((opaque)->btpo_flags & (BTP_DELETED|BTP_HALF_DEAD))
|
||||
|
||||
/*
|
||||
* Lehman and Yao's algorithm requires a ``high key'' on every non-rightmost
|
||||
@ -412,8 +415,6 @@ typedef BTScanOpaqueData *BTScanOpaque;
|
||||
/*
|
||||
* prototypes for functions in nbtree.c (external entry points for btree)
|
||||
*/
|
||||
extern bool BuildingBtree; /* in nbtree.c */
|
||||
|
||||
extern void AtEOXact_nbtree(void);
|
||||
|
||||
extern Datum btbuild(PG_FUNCTION_ARGS);
|
||||
@ -426,6 +427,7 @@ extern Datum btendscan(PG_FUNCTION_ARGS);
|
||||
extern Datum btmarkpos(PG_FUNCTION_ARGS);
|
||||
extern Datum btrestrpos(PG_FUNCTION_ARGS);
|
||||
extern Datum btbulkdelete(PG_FUNCTION_ARGS);
|
||||
extern Datum btvacuumcleanup(PG_FUNCTION_ARGS);
|
||||
|
||||
/*
|
||||
* prototypes for functions in nbtinsert.c
|
||||
|
@ -6,7 +6,7 @@
|
||||
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: xlog.h,v 1.41 2003/02/21 00:06:22 tgl Exp $
|
||||
* $Id: xlog.h,v 1.42 2003/02/22 00:45:05 tgl Exp $
|
||||
*/
|
||||
#ifndef XLOG_H
|
||||
#define XLOG_H
|
||||
@ -56,17 +56,18 @@ typedef struct XLogRecord
|
||||
#define XLR_INFO_MASK 0x0F
|
||||
|
||||
/*
|
||||
* We support backup of up to 2 disk blocks per XLOG record (could support
|
||||
* more if we cared to dedicate more xl_info bits for this purpose; currently
|
||||
* do not need more than 2 anyway). If we backed up any disk blocks then we
|
||||
* use flag bits in xl_info to signal it.
|
||||
* If we backed up any disk blocks with the XLOG record, we use flag bits in
|
||||
* xl_info to signal it. We support backup of up to 3 disk blocks per XLOG
|
||||
* record. (Could support 4 if we cared to dedicate all the xl_info bits for
|
||||
* this purpose; currently bit 0 of xl_info is unused and available.)
|
||||
*/
|
||||
#define XLR_BKP_BLOCK_MASK 0x0C /* all info bits used for bkp
|
||||
#define XLR_BKP_BLOCK_MASK 0x0E /* all info bits used for bkp
|
||||
* blocks */
|
||||
#define XLR_MAX_BKP_BLOCKS 2
|
||||
#define XLR_MAX_BKP_BLOCKS 3
|
||||
#define XLR_SET_BKP_BLOCK(iblk) (0x08 >> (iblk))
|
||||
#define XLR_BKP_BLOCK_1 XLR_SET_BKP_BLOCK(0) /* 0x08 */
|
||||
#define XLR_BKP_BLOCK_2 XLR_SET_BKP_BLOCK(1) /* 0x04 */
|
||||
#define XLR_BKP_BLOCK_3 XLR_SET_BKP_BLOCK(2) /* 0x02 */
|
||||
|
||||
/*
|
||||
* Sometimes we log records which are out of transaction control.
|
||||
|
@ -37,7 +37,7 @@
|
||||
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: catversion.h,v 1.178 2003/02/21 00:06:22 tgl Exp $
|
||||
* $Id: catversion.h,v 1.179 2003/02/22 00:45:05 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -53,6 +53,6 @@
|
||||
*/
|
||||
|
||||
/* yyyymmddN */
|
||||
#define CATALOG_VERSION_NO 200302171
|
||||
#define CATALOG_VERSION_NO 200302211
|
||||
|
||||
#endif
|
||||
|
@ -8,7 +8,7 @@
|
||||
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: pg_am.h,v 1.23 2002/07/29 22:14:11 tgl Exp $
|
||||
* $Id: pg_am.h,v 1.24 2003/02/22 00:45:05 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* the genbki.sh script reads this file and generates .bki
|
||||
@ -58,6 +58,7 @@ CATALOG(pg_am)
|
||||
regproc amrestrpos; /* "restore marked scan position" function */
|
||||
regproc ambuild; /* "build new index" function */
|
||||
regproc ambulkdelete; /* bulk-delete function */
|
||||
regproc amvacuumcleanup; /* post-VACUUM cleanup function */
|
||||
regproc amcostestimate; /* estimate cost of an indexscan */
|
||||
} FormData_pg_am;
|
||||
|
||||
@ -72,7 +73,7 @@ typedef FormData_pg_am *Form_pg_am;
|
||||
* compiler constants for pg_am
|
||||
* ----------------
|
||||
*/
|
||||
#define Natts_pg_am 19
|
||||
#define Natts_pg_am 20
|
||||
#define Anum_pg_am_amname 1
|
||||
#define Anum_pg_am_amowner 2
|
||||
#define Anum_pg_am_amstrategies 3
|
||||
@ -91,21 +92,22 @@ typedef FormData_pg_am *Form_pg_am;
|
||||
#define Anum_pg_am_amrestrpos 16
|
||||
#define Anum_pg_am_ambuild 17
|
||||
#define Anum_pg_am_ambulkdelete 18
|
||||
#define Anum_pg_am_amcostestimate 19
|
||||
#define Anum_pg_am_amvacuumcleanup 19
|
||||
#define Anum_pg_am_amcostestimate 20
|
||||
|
||||
/* ----------------
|
||||
* initial contents of pg_am
|
||||
* ----------------
|
||||
*/
|
||||
|
||||
DATA(insert OID = 402 ( rtree PGUID 8 3 0 f f f f rtgettuple rtinsert rtbeginscan rtrescan rtendscan rtmarkpos rtrestrpos rtbuild rtbulkdelete rtcostestimate ));
|
||||
DATA(insert OID = 402 ( rtree PGUID 8 3 0 f f f f rtgettuple rtinsert rtbeginscan rtrescan rtendscan rtmarkpos rtrestrpos rtbuild rtbulkdelete - rtcostestimate ));
|
||||
DESCR("r-tree index access method");
|
||||
DATA(insert OID = 403 ( btree PGUID 5 1 1 t t t t btgettuple btinsert btbeginscan btrescan btendscan btmarkpos btrestrpos btbuild btbulkdelete btcostestimate ));
|
||||
DATA(insert OID = 403 ( btree PGUID 5 1 1 t t t t btgettuple btinsert btbeginscan btrescan btendscan btmarkpos btrestrpos btbuild btbulkdelete btvacuumcleanup btcostestimate ));
|
||||
DESCR("b-tree index access method");
|
||||
#define BTREE_AM_OID 403
|
||||
DATA(insert OID = 405 ( hash PGUID 1 1 0 f f f t hashgettuple hashinsert hashbeginscan hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete hashcostestimate ));
|
||||
DATA(insert OID = 405 ( hash PGUID 1 1 0 f f f t hashgettuple hashinsert hashbeginscan hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete - hashcostestimate ));
|
||||
DESCR("hash index access method");
|
||||
DATA(insert OID = 783 ( gist PGUID 100 7 0 f t f f gistgettuple gistinsert gistbeginscan gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistcostestimate ));
|
||||
DATA(insert OID = 783 ( gist PGUID 100 7 0 f t f f gistgettuple gistinsert gistbeginscan gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete - gistcostestimate ));
|
||||
DESCR("GiST index access method");
|
||||
#define GIST_AM_OID 783
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Id: pg_proc.h,v 1.283 2003/02/13 05:24:02 momjian Exp $
|
||||
* $Id: pg_proc.h,v 1.284 2003/02/22 00:45:05 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* The script catalog/genbki.sh reads this file and generates .bki
|
||||
@ -710,6 +710,8 @@ DATA(insert OID = 338 ( btbuild PGNSP PGUID 12 f f t f v 3 2278 "2281 2281
|
||||
DESCR("btree(internal)");
|
||||
DATA(insert OID = 332 ( btbulkdelete PGNSP PGUID 12 f f t f v 3 2281 "2281 2281 2281" btbulkdelete - _null_ ));
|
||||
DESCR("btree(internal)");
|
||||
DATA(insert OID = 972 ( btvacuumcleanup PGNSP PGUID 12 f f t f v 3 2281 "2281 2281 2281" btvacuumcleanup - _null_ ));
|
||||
DESCR("btree(internal)");
|
||||
DATA(insert OID = 1268 ( btcostestimate PGNSP PGUID 12 f f t f v 8 2278 "2281 2281 2281 2281 2281 2281 2281 2281" btcostestimate - _null_ ));
|
||||
DESCR("btree(internal)");
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user