1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-10 17:42:29 +03:00

First steps towards index scans with heap access decoupled from index

access: define new index access method functions 'amgetmulti' that can
fetch multiple TIDs per call.  (The functions exist but are totally
untested as yet.)  Since I was modifying pg_am anyway, remove the
no-longer-needed 'rel' parameter from amcostestimate functions, and
also remove the vestigial amowner column that was creating useless
work for Alvaro's shared-object-dependencies project.
Initdb forced due to changes in pg_am.
This commit is contained in:
Tom Lane
2005-03-27 23:53:05 +00:00
parent 351519affc
commit bf3dbb5881
20 changed files with 535 additions and 174 deletions

View File

@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.44 2005/02/05 19:38:58 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.45 2005/03/27 23:52:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -47,6 +47,33 @@ gistgettuple(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(res);
}
Datum
gistgetmulti(PG_FUNCTION_ARGS)
{
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1);
int32 max_tids = PG_GETARG_INT32(2);
int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3);
bool res = true;
int32 ntids = 0;
/* XXX generic implementation: loop around guts of gistgettuple */
while (ntids < max_tids)
{
if (ItemPointerIsValid(&(s->currentItemData)))
res = gistnext(s, ForwardScanDirection);
else
res = gistfirst(s, ForwardScanDirection);
if (!res)
break;
tids[ntids] = s->xs_ctup.t_self;
ntids++;
}
*returned_tids = ntids;
PG_RETURN_BOOL(res);
}
static bool
gistfirst(IndexScanDesc s, ScanDirection dir)
{

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.77 2005/03/21 01:23:57 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.78 2005/03/27 23:52:57 tgl Exp $
*
* NOTES
* This file contains only the public interface routines.
@@ -264,6 +264,75 @@ hashgettuple(PG_FUNCTION_ARGS)
}
/*
* hashgetmulti() -- get multiple tuples at once
*
* This is a somewhat generic implementation: it avoids lock reacquisition
* overhead, but there's no smarts about picking especially good stopping
* points such as index page boundaries.
*/
Datum
hashgetmulti(PG_FUNCTION_ARGS)
{
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1);
int32 max_tids = PG_GETARG_INT32(2);
int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3);
HashScanOpaque so = (HashScanOpaque) scan->opaque;
Relation rel = scan->indexRelation;
bool res = true;
int32 ntids = 0;
/*
* We hold pin but not lock on current buffer while outside the hash
* AM. Reacquire the read lock here.
*/
if (BufferIsValid(so->hashso_curbuf))
_hash_chgbufaccess(rel, so->hashso_curbuf, HASH_NOLOCK, HASH_READ);
while (ntids < max_tids)
{
/*
* Start scan, or advance to next tuple.
*/
if (ItemPointerIsValid(&(scan->currentItemData)))
res = _hash_next(scan, ForwardScanDirection);
else
res = _hash_first(scan, ForwardScanDirection);
/*
* Skip killed tuples if asked to.
*/
if (scan->ignore_killed_tuples)
{
while (res)
{
Page page;
OffsetNumber offnum;
offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData));
page = BufferGetPage(so->hashso_curbuf);
if (!ItemIdDeleted(PageGetItemId(page, offnum)))
break;
res = _hash_next(scan, ForwardScanDirection);
}
}
if (!res)
break;
/* Save tuple ID, and continue scanning */
tids[ntids] = scan->xs_ctup.t_self;
ntids++;
}
/* Release read lock on current buffer, but keep it pinned */
if (BufferIsValid(so->hashso_curbuf))
_hash_chgbufaccess(rel, so->hashso_curbuf, HASH_READ, HASH_NOLOCK);
*returned_tids = ntids;
PG_RETURN_BOOL(res);
}
/*
* hashbeginscan() -- start a scan on a hash index
*/

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.184 2005/03/20 23:40:23 neilc Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.185 2005/03/27 23:52:58 tgl Exp $
*
*
* INTERFACE ROUTINES
@@ -933,18 +933,35 @@ heap_release_fetch(Relation relation,
* Need share lock on buffer to examine tuple commit status.
*/
LockBuffer(buffer, BUFFER_LOCK_SHARE);
dp = (PageHeader) BufferGetPage(buffer);
/*
* We'd better check for out-of-range offnum in case of VACUUM since
* the TID was obtained.
*/
offnum = ItemPointerGetOffsetNumber(tid);
if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(dp))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
if (keep_buf)
*userbuf = buffer;
else
{
ReleaseBuffer(buffer);
*userbuf = InvalidBuffer;
}
tuple->t_datamcxt = NULL;
tuple->t_data = NULL;
return false;
}
/*
* get the item line pointer corresponding to the requested tid
*/
dp = (PageHeader) BufferGetPage(buffer);
offnum = ItemPointerGetOffsetNumber(tid);
lp = PageGetItemId(dp, offnum);
/*
* must check for deleted tuple (see for example analyze.c, which is
* careful to pass an offnum in range, but doesn't know if the offnum
* actually corresponds to an undeleted tuple).
* Must check for deleted tuple.
*/
if (!ItemIdIsUsed(lp))
{

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/index/genam.c,v 1.44 2004/12/31 21:59:19 pgsql Exp $
* $PostgreSQL: pgsql/src/backend/access/index/genam.c,v 1.45 2005/03/27 23:52:59 tgl Exp $
*
* NOTES
* many of the old access method routines have been turned into
@@ -103,6 +103,7 @@ RelationGetIndexScan(Relation indexRelation,
/* mark cached function lookup data invalid; it will be set later */
scan->fn_getnext.fn_oid = InvalidOid;
scan->fn_getmulti.fn_oid = InvalidOid;
scan->unique_tuple_pos = 0;
scan->unique_tuple_mark = 0;

View File

@@ -8,20 +8,22 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.78 2005/03/21 01:23:58 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.79 2005/03/27 23:52:59 tgl Exp $
*
* INTERFACE ROUTINES
* index_open - open an index relation by relation OID
* index_openrv - open an index relation specified by a RangeVar
* index_openr - open a system index relation by name
* index_close - close an index relation
* index_beginscan - start a scan of an index
* index_beginscan - start a scan of an index with amgettuple
* index_beginscan_multi - start a scan of an index with amgetmulti
* index_rescan - restart a scan of an index
* index_endscan - end a scan
* index_insert - insert an index tuple into a relation
* index_markpos - mark a scan position
* index_restrpos - restore a scan position
* index_getnext - get the next tuple from a scan
* index_getmulti - get multiple tuples from a scan
* index_bulk_delete - bulk deletion of index tuples
* index_vacuum_cleanup - post-deletion cleanup of an index
* index_cost_estimator - fetch amcostestimate procedure OID
@@ -85,24 +87,25 @@
AssertMacro(PointerIsValid(scan->indexRelation->rd_am)) \
)
#define GET_REL_PROCEDURE(x,y) \
#define GET_REL_PROCEDURE(pname) \
( \
procedure = indexRelation->rd_am->y, \
procedure = indexRelation->rd_am->pname, \
(!RegProcedureIsValid(procedure)) ? \
elog(ERROR, "index_%s: invalid %s regproc", \
CppAsString(x), CppAsString(y)) \
elog(ERROR, "invalid %s regproc", CppAsString(pname)) \
: (void)NULL \
)
#define GET_SCAN_PROCEDURE(x,y) \
#define GET_SCAN_PROCEDURE(pname) \
( \
procedure = scan->indexRelation->rd_am->y, \
procedure = scan->indexRelation->rd_am->pname, \
(!RegProcedureIsValid(procedure)) ? \
elog(ERROR, "index_%s: invalid %s regproc", \
CppAsString(x), CppAsString(y)) \
elog(ERROR, "invalid %s regproc", CppAsString(pname)) \
: (void)NULL \
)
static IndexScanDesc index_beginscan_internal(Relation indexRelation,
int nkeys, ScanKey key);
/* ----------------------------------------------------------------
* index_ interface functions
@@ -222,7 +225,7 @@ index_insert(Relation indexRelation,
RegProcedure procedure;
RELATION_CHECKS;
GET_REL_PROCEDURE(insert, aminsert);
GET_REL_PROCEDURE(aminsert);
/*
* have the am's insert proc do all the work.
@@ -236,15 +239,14 @@ index_insert(Relation indexRelation,
BoolGetDatum(check_uniqueness)));
}
/* ----------------
* index_beginscan - start a scan of an index
/*
* index_beginscan - start a scan of an index with amgettuple
*
* Note: heapRelation may be NULL if there is no intention of calling
* index_getnext on this scan; index_getnext_indexitem will not use the
* heapRelation link (nor the snapshot). However, the caller had better
* be holding some kind of lock on the heap relation in any case, to ensure
* no one deletes it (or the index) out from under us.
* ----------------
*/
IndexScanDesc
index_beginscan(Relation heapRelation,
@@ -255,8 +257,71 @@ index_beginscan(Relation heapRelation,
IndexScanDesc scan;
RegProcedure procedure;
scan = index_beginscan_internal(indexRelation, nkeys, key);
/*
* Save additional parameters into the scandesc. Everything else was
* set up by RelationGetIndexScan.
*/
scan->heapRelation = heapRelation;
scan->xs_snapshot = snapshot;
/*
* We want to look up the amgettuple procedure just once per scan, not
* once per index_getnext call. So do it here and save the fmgr info
* result in the scan descriptor.
*/
GET_SCAN_PROCEDURE(amgettuple);
fmgr_info(procedure, &scan->fn_getnext);
return scan;
}
/*
* index_beginscan_multi - start a scan of an index with amgetmulti
*
* As above, caller had better be holding some lock on the parent heap
* relation, even though it's not explicitly mentioned here.
*/
IndexScanDesc
index_beginscan_multi(Relation indexRelation,
Snapshot snapshot,
int nkeys, ScanKey key)
{
IndexScanDesc scan;
RegProcedure procedure;
scan = index_beginscan_internal(indexRelation, nkeys, key);
/*
* Save additional parameters into the scandesc. Everything else was
* set up by RelationGetIndexScan.
*/
scan->xs_snapshot = snapshot;
/*
* We want to look up the amgetmulti procedure just once per scan, not
* once per index_getmulti call. So do it here and save the fmgr info
* result in the scan descriptor.
*/
GET_SCAN_PROCEDURE(amgetmulti);
fmgr_info(procedure, &scan->fn_getmulti);
return scan;
}
/*
* index_beginscan_internal --- common code for index_beginscan variants
*/
static IndexScanDesc
index_beginscan_internal(Relation indexRelation,
int nkeys, ScanKey key)
{
IndexScanDesc scan;
RegProcedure procedure;
RELATION_CHECKS;
GET_REL_PROCEDURE(beginscan, ambeginscan);
GET_REL_PROCEDURE(ambeginscan);
RelationIncrementReferenceCount(indexRelation);
@@ -278,21 +343,6 @@ index_beginscan(Relation heapRelation,
Int32GetDatum(nkeys),
PointerGetDatum(key)));
/*
* Save additional parameters into the scandesc. Everything else was
* set up by RelationGetIndexScan.
*/
scan->heapRelation = heapRelation;
scan->xs_snapshot = snapshot;
/*
* We want to look up the amgettuple procedure just once per scan, not
* once per index_getnext call. So do it here and save the fmgr info
* result in the scan descriptor.
*/
GET_SCAN_PROCEDURE(beginscan, amgettuple);
fmgr_info(procedure, &scan->fn_getnext);
return scan;
}
@@ -314,7 +364,7 @@ index_rescan(IndexScanDesc scan, ScanKey key)
RegProcedure procedure;
SCAN_CHECKS;
GET_SCAN_PROCEDURE(rescan, amrescan);
GET_SCAN_PROCEDURE(amrescan);
/* Release any held pin on a heap page */
if (BufferIsValid(scan->xs_cbuf))
@@ -346,7 +396,7 @@ index_endscan(IndexScanDesc scan)
RegProcedure procedure;
SCAN_CHECKS;
GET_SCAN_PROCEDURE(endscan, amendscan);
GET_SCAN_PROCEDURE(amendscan);
/* Release any held pin on a heap page */
if (BufferIsValid(scan->xs_cbuf))
@@ -378,7 +428,7 @@ index_markpos(IndexScanDesc scan)
RegProcedure procedure;
SCAN_CHECKS;
GET_SCAN_PROCEDURE(markpos, ammarkpos);
GET_SCAN_PROCEDURE(ammarkpos);
scan->unique_tuple_mark = scan->unique_tuple_pos;
@@ -395,7 +445,7 @@ index_restrpos(IndexScanDesc scan)
RegProcedure procedure;
SCAN_CHECKS;
GET_SCAN_PROCEDURE(restrpos, amrestrpos);
GET_SCAN_PROCEDURE(amrestrpos);
scan->kill_prior_tuple = false; /* for safety */
@@ -525,9 +575,9 @@ index_getnext(IndexScanDesc scan, ScanDirection direction)
&scan->xs_pgstat_info))
break;
/* Skip if no tuple at this location */
/* Skip if no undeleted tuple at this location */
if (heapTuple->t_data == NULL)
continue; /* should we raise an error instead? */
continue;
/*
* If we can't see it, maybe no one else can either. Check to see
@@ -595,6 +645,44 @@ index_getnext_indexitem(IndexScanDesc scan,
return found;
}
/* ----------------
* index_getmulti - get multiple tuples from an index scan
*
* Collects the TIDs of multiple heap tuples satisfying the scan keys.
* Since there's no interlock between the index scan and the eventual heap
* access, this is only safe to use with MVCC-based snapshots: the heap
* item slot could have been replaced by a newer tuple by the time we get
* to it.
*
* A TRUE result indicates more calls should occur; a FALSE result says the
* scan is done. *returned_tids could be zero or nonzero in either case.
* ----------------
*/
bool
index_getmulti(IndexScanDesc scan,
ItemPointer tids, int32 max_tids,
int32 *returned_tids)
{
bool found;
SCAN_CHECKS;
/* just make sure this is false... */
scan->kill_prior_tuple = false;
/*
* have the am's getmulti proc do all the work. index_beginscan_multi
* already set up fn_getmulti.
*/
found = DatumGetBool(FunctionCall4(&scan->fn_getmulti,
PointerGetDatum(scan),
PointerGetDatum(tids),
Int32GetDatum(max_tids),
PointerGetDatum(returned_tids)));
return found;
}
/* ----------------
* index_bulk_delete - do mass deletion of index entries
*
@@ -613,7 +701,7 @@ index_bulk_delete(Relation indexRelation,
IndexBulkDeleteResult *result;
RELATION_CHECKS;
GET_REL_PROCEDURE(bulk_delete, ambulkdelete);
GET_REL_PROCEDURE(ambulkdelete);
result = (IndexBulkDeleteResult *)
DatumGetPointer(OidFunctionCall3(procedure,
@@ -644,7 +732,7 @@ index_vacuum_cleanup(Relation indexRelation,
if (!RegProcedureIsValid(indexRelation->rd_am->amvacuumcleanup))
return stats;
GET_REL_PROCEDURE(vacuum_cleanup, amvacuumcleanup);
GET_REL_PROCEDURE(amvacuumcleanup);
result = (IndexBulkDeleteResult *)
DatumGetPointer(OidFunctionCall3(procedure,
@@ -671,7 +759,7 @@ index_cost_estimator(Relation indexRelation)
RegProcedure procedure;
RELATION_CHECKS;
GET_REL_PROCEDURE(cost_estimator, amcostestimate);
GET_REL_PROCEDURE(amcostestimate);
return procedure;
}

View File

@@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.126 2005/03/21 01:23:59 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.127 2005/03/27 23:53:00 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -340,6 +340,79 @@ btgettuple(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(res);
}
/*
* btgetmulti() -- get multiple tuples at once
*
* This is a somewhat generic implementation: it avoids the _bt_restscan
* overhead, but there's no smarts about picking especially good stopping
* points such as index page boundaries.
*/
Datum
btgetmulti(PG_FUNCTION_ARGS)
{
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1);
int32 max_tids = PG_GETARG_INT32(2);
int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3);
BTScanOpaque so = (BTScanOpaque) scan->opaque;
bool res = true;
int32 ntids = 0;
/*
* Restore prior state if we were already called at least once.
*/
if (ItemPointerIsValid(&(scan->currentItemData)))
_bt_restscan(scan);
while (ntids < max_tids)
{
/*
* Start scan, or advance to next tuple.
*/
if (ItemPointerIsValid(&(scan->currentItemData)))
res = _bt_next(scan, ForwardScanDirection);
else
res = _bt_first(scan, ForwardScanDirection);
/*
* Skip killed tuples if asked to.
*/
if (scan->ignore_killed_tuples)
{
while (res)
{
Page page;
OffsetNumber offnum;
offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData));
page = BufferGetPage(so->btso_curbuf);
if (!ItemIdDeleted(PageGetItemId(page, offnum)))
break;
res = _bt_next(scan, ForwardScanDirection);
}
}
if (!res)
break;
/* Save tuple ID, and continue scanning */
tids[ntids] = scan->xs_ctup.t_self;
ntids++;
}
/*
* Save heap TID to use it in _bt_restscan. Then release the read
* lock on the buffer so that we aren't blocking other backends.
*/
if (res)
{
((BTScanOpaque) scan->opaque)->curHeapIptr = scan->xs_ctup.t_self;
LockBuffer(((BTScanOpaque) scan->opaque)->btso_curbuf,
BUFFER_LOCK_UNLOCK);
}
*returned_tids = ntids;
PG_RETURN_BOOL(res);
}
/*
* btbeginscan() -- start a scan on a btree index
*/

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/rtree/rtget.c,v 1.34 2005/01/18 23:25:43 neilc Exp $
* $PostgreSQL: pgsql/src/backend/access/rtree/rtget.c,v 1.35 2005/03/27 23:53:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -29,15 +29,13 @@ rtgettuple(PG_FUNCTION_ARGS)
{
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
RTreeScanOpaque so = (RTreeScanOpaque) s->opaque;
Page page;
OffsetNumber offnum;
RTreeScanOpaque so;
so = (RTreeScanOpaque) s->opaque;
/*
* If we've already produced a tuple and the executor has informed
* us that it should be marked "killed", do so know.
* us that it should be marked "killed", do so now.
*/
if (s->kill_prior_tuple && ItemPointerIsValid(&(s->currentItemData)))
{
@@ -57,7 +55,7 @@ rtgettuple(PG_FUNCTION_ARGS)
{
bool res = rtnext(s, dir);
if (res == true && s->ignore_killed_tuples)
if (res && s->ignore_killed_tuples)
{
offnum = ItemPointerGetOffsetNumber(&(s->currentItemData));
page = BufferGetPage(so->curbuf);
@@ -69,6 +67,42 @@ rtgettuple(PG_FUNCTION_ARGS)
}
}
Datum
rtgetmulti(PG_FUNCTION_ARGS)
{
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1);
int32 max_tids = PG_GETARG_INT32(2);
int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3);
RTreeScanOpaque so = (RTreeScanOpaque) s->opaque;
bool res = true;
int32 ntids = 0;
/* XXX generic implementation: loop around guts of rtgettuple */
while (ntids < max_tids)
{
res = rtnext(s, ForwardScanDirection);
if (res && s->ignore_killed_tuples)
{
Page page;
OffsetNumber offnum;
offnum = ItemPointerGetOffsetNumber(&(s->currentItemData));
page = BufferGetPage(so->curbuf);
if (ItemIdDeleted(PageGetItemId(page, offnum)))
continue;
}
if (!res)
break;
tids[ntids] = s->xs_ctup.t_self;
ntids++;
}
*returned_tids = ntids;
PG_RETURN_BOOL(res);
}
static bool
rtnext(IndexScanDesc s, ScanDirection dir)
{