mirror of
https://github.com/postgres/postgres.git
synced 2025-07-18 17:42:25 +03:00
430 lines
10 KiB
C
430 lines
10 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* rtscan.c
|
|
* routines to manage scans on index relations
|
|
*
|
|
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* $PostgreSQL: pgsql/src/backend/access/rtree/rtscan.c,v 1.50 2003/11/29 19:51:40 pgsql Exp $
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "access/genam.h"
|
|
#include "access/rtree.h"
|
|
#include "utils/lsyscache.h"
|
|
|
|
|
|
/* routines defined and used here */
|
|
static void rtregscan(IndexScanDesc s);
|
|
static void rtdropscan(IndexScanDesc s);
|
|
static void rtadjone(IndexScanDesc s, int op, BlockNumber blkno,
|
|
OffsetNumber offnum);
|
|
static void adjuststack(RTSTACK *stk, BlockNumber blkno);
|
|
static void adjustiptr(IndexScanDesc s, ItemPointer iptr,
|
|
int op, BlockNumber blkno, OffsetNumber offnum);
|
|
|
|
/*
|
|
* Whenever we start an rtree scan in a backend, we register it in private
|
|
* space. Then if the rtree index gets updated, we check all registered
|
|
* scans and adjust them if the tuple they point at got moved by the
|
|
* update. We only need to do this in private space, because when we update
|
|
* an rtree we have a write lock on the tree, so no other process can have
|
|
* any locks at all on it. A single transaction can have write and read
|
|
* locks on the same object, so that's why we need to handle this case.
|
|
*/
|
|
|
|
typedef struct RTScanListData
|
|
{
|
|
IndexScanDesc rtsl_scan;
|
|
struct RTScanListData *rtsl_next;
|
|
} RTScanListData;
|
|
|
|
typedef RTScanListData *RTScanList;
|
|
|
|
/* pointer to list of local scans on rtrees */
|
|
static RTScanList RTScans = (RTScanList) NULL;
|
|
|
|
Datum
|
|
rtbeginscan(PG_FUNCTION_ARGS)
|
|
{
|
|
Relation r = (Relation) PG_GETARG_POINTER(0);
|
|
int nkeys = PG_GETARG_INT32(1);
|
|
ScanKey key = (ScanKey) PG_GETARG_POINTER(2);
|
|
IndexScanDesc s;
|
|
|
|
s = RelationGetIndexScan(r, nkeys, key);
|
|
|
|
rtregscan(s);
|
|
|
|
PG_RETURN_POINTER(s);
|
|
}
|
|
|
|
Datum
|
|
rtrescan(PG_FUNCTION_ARGS)
|
|
{
|
|
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
|
|
ScanKey key = (ScanKey) PG_GETARG_POINTER(1);
|
|
RTreeScanOpaque p;
|
|
int i;
|
|
|
|
/*
|
|
* Clear all the pointers.
|
|
*/
|
|
ItemPointerSetInvalid(&s->currentItemData);
|
|
ItemPointerSetInvalid(&s->currentMarkData);
|
|
|
|
p = (RTreeScanOpaque) s->opaque;
|
|
if (p != (RTreeScanOpaque) NULL)
|
|
{
|
|
/* rescan an existing indexscan --- reset state */
|
|
freestack(p->s_stack);
|
|
freestack(p->s_markstk);
|
|
p->s_stack = p->s_markstk = (RTSTACK *) NULL;
|
|
p->s_flags = 0x0;
|
|
}
|
|
else
|
|
{
|
|
/* initialize opaque data */
|
|
p = (RTreeScanOpaque) palloc(sizeof(RTreeScanOpaqueData));
|
|
p->s_stack = p->s_markstk = (RTSTACK *) NULL;
|
|
p->s_internalNKey = s->numberOfKeys;
|
|
p->s_flags = 0x0;
|
|
s->opaque = p;
|
|
if (s->numberOfKeys > 0)
|
|
p->s_internalKey = (ScanKey) palloc(sizeof(ScanKeyData) * s->numberOfKeys);
|
|
}
|
|
|
|
/* Update scan key, if a new one is given */
|
|
if (key && s->numberOfKeys > 0)
|
|
{
|
|
memmove(s->keyData,
|
|
key,
|
|
s->numberOfKeys * sizeof(ScanKeyData));
|
|
|
|
/*
|
|
* Scans on internal pages use different operators than they do on
|
|
* leaf pages. For example, if the user wants all boxes that
|
|
* exactly match (x1,y1,x2,y2), then on internal pages we need to
|
|
* find all boxes that contain (x1,y1,x2,y2).
|
|
*/
|
|
for (i = 0; i < s->numberOfKeys; i++)
|
|
{
|
|
AttrNumber attno = s->keyData[i].sk_attno;
|
|
Oid opclass;
|
|
StrategyNumber int_strategy;
|
|
Oid int_oper;
|
|
RegProcedure int_proc;
|
|
|
|
opclass = s->indexRelation->rd_index->indclass[attno-1];
|
|
int_strategy = RTMapToInternalOperator(s->keyData[i].sk_strategy);
|
|
int_oper = get_opclass_member(opclass,
|
|
s->keyData[i].sk_subtype,
|
|
int_strategy);
|
|
int_proc = get_opcode(int_oper);
|
|
ScanKeyEntryInitialize(&(p->s_internalKey[i]),
|
|
s->keyData[i].sk_flags,
|
|
attno,
|
|
int_strategy,
|
|
s->keyData[i].sk_subtype,
|
|
int_proc,
|
|
s->keyData[i].sk_argument);
|
|
}
|
|
}
|
|
|
|
PG_RETURN_VOID();
|
|
}
|
|
|
|
Datum
|
|
rtmarkpos(PG_FUNCTION_ARGS)
|
|
{
|
|
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
|
|
RTreeScanOpaque p;
|
|
RTSTACK *o,
|
|
*n,
|
|
*tmp;
|
|
|
|
s->currentMarkData = s->currentItemData;
|
|
p = (RTreeScanOpaque) s->opaque;
|
|
if (p->s_flags & RTS_CURBEFORE)
|
|
p->s_flags |= RTS_MRKBEFORE;
|
|
else
|
|
p->s_flags &= ~RTS_MRKBEFORE;
|
|
|
|
o = (RTSTACK *) NULL;
|
|
n = p->s_stack;
|
|
|
|
/* copy the parent stack from the current item data */
|
|
while (n != (RTSTACK *) NULL)
|
|
{
|
|
tmp = (RTSTACK *) palloc(sizeof(RTSTACK));
|
|
tmp->rts_child = n->rts_child;
|
|
tmp->rts_blk = n->rts_blk;
|
|
tmp->rts_parent = o;
|
|
o = tmp;
|
|
n = n->rts_parent;
|
|
}
|
|
|
|
freestack(p->s_markstk);
|
|
p->s_markstk = o;
|
|
|
|
PG_RETURN_VOID();
|
|
}
|
|
|
|
Datum
|
|
rtrestrpos(PG_FUNCTION_ARGS)
|
|
{
|
|
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
|
|
RTreeScanOpaque p;
|
|
RTSTACK *o,
|
|
*n,
|
|
*tmp;
|
|
|
|
s->currentItemData = s->currentMarkData;
|
|
p = (RTreeScanOpaque) s->opaque;
|
|
if (p->s_flags & RTS_MRKBEFORE)
|
|
p->s_flags |= RTS_CURBEFORE;
|
|
else
|
|
p->s_flags &= ~RTS_CURBEFORE;
|
|
|
|
o = (RTSTACK *) NULL;
|
|
n = p->s_markstk;
|
|
|
|
/* copy the parent stack from the current item data */
|
|
while (n != (RTSTACK *) NULL)
|
|
{
|
|
tmp = (RTSTACK *) palloc(sizeof(RTSTACK));
|
|
tmp->rts_child = n->rts_child;
|
|
tmp->rts_blk = n->rts_blk;
|
|
tmp->rts_parent = o;
|
|
o = tmp;
|
|
n = n->rts_parent;
|
|
}
|
|
|
|
freestack(p->s_stack);
|
|
p->s_stack = o;
|
|
|
|
PG_RETURN_VOID();
|
|
}
|
|
|
|
Datum
|
|
rtendscan(PG_FUNCTION_ARGS)
|
|
{
|
|
IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0);
|
|
RTreeScanOpaque p;
|
|
|
|
p = (RTreeScanOpaque) s->opaque;
|
|
|
|
if (p != (RTreeScanOpaque) NULL)
|
|
{
|
|
freestack(p->s_stack);
|
|
freestack(p->s_markstk);
|
|
pfree(s->opaque);
|
|
}
|
|
|
|
rtdropscan(s);
|
|
/* XXX don't unset read lock -- two-phase locking */
|
|
|
|
PG_RETURN_VOID();
|
|
}
|
|
|
|
static void
|
|
rtregscan(IndexScanDesc s)
|
|
{
|
|
RTScanList l;
|
|
|
|
l = (RTScanList) palloc(sizeof(RTScanListData));
|
|
l->rtsl_scan = s;
|
|
l->rtsl_next = RTScans;
|
|
RTScans = l;
|
|
}
|
|
|
|
static void
|
|
rtdropscan(IndexScanDesc s)
|
|
{
|
|
RTScanList l;
|
|
RTScanList prev;
|
|
|
|
prev = (RTScanList) NULL;
|
|
|
|
for (l = RTScans;
|
|
l != (RTScanList) NULL && l->rtsl_scan != s;
|
|
l = l->rtsl_next)
|
|
prev = l;
|
|
|
|
if (l == (RTScanList) NULL)
|
|
elog(ERROR, "rtree scan list corrupted -- could not find 0x%p",
|
|
(void *) s);
|
|
|
|
if (prev == (RTScanList) NULL)
|
|
RTScans = l->rtsl_next;
|
|
else
|
|
prev->rtsl_next = l->rtsl_next;
|
|
|
|
pfree(l);
|
|
}
|
|
|
|
/*
|
|
* AtEOXact_rtree() --- clean up rtree subsystem at xact abort or commit.
|
|
*
|
|
* This is here because it needs to touch this module's static var RTScans.
|
|
*/
|
|
void
|
|
AtEOXact_rtree(void)
|
|
{
|
|
/*
|
|
* Note: these actions should only be necessary during xact abort; but
|
|
* they can't hurt during a commit.
|
|
*/
|
|
|
|
/*
|
|
* Reset the active-scans list to empty. We do not need to free the
|
|
* list elements, because they're all palloc()'d, so they'll go away
|
|
* at end of transaction anyway.
|
|
*/
|
|
RTScans = NULL;
|
|
}
|
|
|
|
void
|
|
rtadjscans(Relation r, int op, BlockNumber blkno, OffsetNumber offnum)
|
|
{
|
|
RTScanList l;
|
|
Oid relid;
|
|
|
|
relid = RelationGetRelid(r);
|
|
for (l = RTScans; l != (RTScanList) NULL; l = l->rtsl_next)
|
|
{
|
|
if (RelationGetRelid(l->rtsl_scan->indexRelation) == relid)
|
|
rtadjone(l->rtsl_scan, op, blkno, offnum);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* rtadjone() -- adjust one scan for update.
|
|
*
|
|
* By here, the scan passed in is on a modified relation. Op tells
|
|
* us what the modification is, and blkno and offind tell us what
|
|
* block and offset index were affected. This routine checks the
|
|
* current and marked positions, and the current and marked stacks,
|
|
* to see if any stored location needs to be changed because of the
|
|
* update. If so, we make the change here.
|
|
*/
|
|
static void
|
|
rtadjone(IndexScanDesc s,
|
|
int op,
|
|
BlockNumber blkno,
|
|
OffsetNumber offnum)
|
|
{
|
|
RTreeScanOpaque so;
|
|
|
|
adjustiptr(s, &(s->currentItemData), op, blkno, offnum);
|
|
adjustiptr(s, &(s->currentMarkData), op, blkno, offnum);
|
|
|
|
so = (RTreeScanOpaque) s->opaque;
|
|
|
|
if (op == RTOP_SPLIT)
|
|
{
|
|
adjuststack(so->s_stack, blkno);
|
|
adjuststack(so->s_markstk, blkno);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* adjustiptr() -- adjust current and marked item pointers in the scan
|
|
*
|
|
* Depending on the type of update and the place it happened, we
|
|
* need to do nothing, to back up one record, or to start over on
|
|
* the same page.
|
|
*/
|
|
static void
|
|
adjustiptr(IndexScanDesc s,
|
|
ItemPointer iptr,
|
|
int op,
|
|
BlockNumber blkno,
|
|
OffsetNumber offnum)
|
|
{
|
|
OffsetNumber curoff;
|
|
RTreeScanOpaque so;
|
|
|
|
if (ItemPointerIsValid(iptr))
|
|
{
|
|
if (ItemPointerGetBlockNumber(iptr) == blkno)
|
|
{
|
|
curoff = ItemPointerGetOffsetNumber(iptr);
|
|
so = (RTreeScanOpaque) s->opaque;
|
|
|
|
switch (op)
|
|
{
|
|
case RTOP_DEL:
|
|
/* back up one if we need to */
|
|
if (curoff >= offnum)
|
|
{
|
|
|
|
if (curoff > FirstOffsetNumber)
|
|
{
|
|
/* just adjust the item pointer */
|
|
ItemPointerSet(iptr, blkno, OffsetNumberPrev(curoff));
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* remember that we're before the current
|
|
* tuple
|
|
*/
|
|
ItemPointerSet(iptr, blkno, FirstOffsetNumber);
|
|
if (iptr == &(s->currentItemData))
|
|
so->s_flags |= RTS_CURBEFORE;
|
|
else
|
|
so->s_flags |= RTS_MRKBEFORE;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case RTOP_SPLIT:
|
|
/* back to start of page on split */
|
|
ItemPointerSet(iptr, blkno, FirstOffsetNumber);
|
|
if (iptr == &(s->currentItemData))
|
|
so->s_flags &= ~RTS_CURBEFORE;
|
|
else
|
|
so->s_flags &= ~RTS_MRKBEFORE;
|
|
break;
|
|
|
|
default:
|
|
elog(ERROR, "unrecognized operation in rtree scan adjust: %d", op);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* adjuststack() -- adjust the supplied stack for a split on a page in
|
|
* the index we're scanning.
|
|
*
|
|
* If a page on our parent stack has split, we need to back up to the
|
|
* beginning of the page and rescan it. The reason for this is that
|
|
* the split algorithm for rtrees doesn't order tuples in any useful
|
|
* way on a single page. This means on that a split, we may wind up
|
|
* looking at some heap tuples more than once. This is handled in the
|
|
* access method update code for heaps; if we've modified the tuple we
|
|
* are looking at already in this transaction, we ignore the update
|
|
* request.
|
|
*/
|
|
/*ARGSUSED*/
|
|
static void
|
|
adjuststack(RTSTACK *stk,
|
|
BlockNumber blkno)
|
|
{
|
|
while (stk != (RTSTACK *) NULL)
|
|
{
|
|
if (stk->rts_blk == blkno)
|
|
stk->rts_child = FirstOffsetNumber;
|
|
|
|
stk = stk->rts_parent;
|
|
}
|
|
}
|