1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-03 20:02:46 +03:00

KNNGIST, otherwise known as order-by-operator support for GIST.

This commit represents a rather heavily editorialized version of
Teodor's builtin_knngist_itself-0.8.2 and builtin_knngist_proc-0.8.1
patches.  I redid the opclass API to add a separate Distance method
instead of turning the Consistent method into an illogical mess,
fixed some bit-rot in the rbtree interfaces, and generally worked over
the code style and comments.

There's still no non-code documentation to speak of, but I'll work on
that separately.  Some contrib-module changes are also yet to come
(right now, point <-> point is the only KNN-ified operator).

Teodor Sigaev and Tom Lane
This commit is contained in:
Tom Lane
2010-12-03 20:52:18 -05:00
parent c0a4d3e051
commit 554506871b
16 changed files with 981 additions and 507 deletions

View File

@ -20,8 +20,84 @@
#include "access/relscan.h"
#include "storage/bufmgr.h"
#include "utils/memutils.h"
#include "utils/rel.h"
static void gistfreestack(GISTSearchStack *s);
/*
* RBTree support functions for the GISTSearchTreeItem queue
*/
static int
GISTSearchTreeItemComparator(const RBNode *a, const RBNode *b, void *arg)
{
const GISTSearchTreeItem *sa = (const GISTSearchTreeItem *) a;
const GISTSearchTreeItem *sb = (const GISTSearchTreeItem *) b;
IndexScanDesc scan = (IndexScanDesc) arg;
int i;
/* Order according to distance comparison */
for (i = 0; i < scan->numberOfOrderBys; i++)
{
if (sa->distances[i] != sb->distances[i])
return (sa->distances[i] > sb->distances[i]) ? 1 : -1;
}
return 0;
}
static void
GISTSearchTreeItemCombiner(RBNode *existing, const RBNode *newrb, void *arg)
{
GISTSearchTreeItem *scurrent = (GISTSearchTreeItem *) existing;
const GISTSearchTreeItem *snew = (const GISTSearchTreeItem *) newrb;
GISTSearchItem *newitem = snew->head;
/* snew should have just one item in its chain */
Assert(newitem && newitem->next == NULL);
/*
* If new item is heap tuple, it goes to front of chain; otherwise insert
* it before the first index-page item, so that index pages are visited
* in LIFO order, ensuring depth-first search of index pages. See
* comments in gist_private.h.
*/
if (GISTSearchItemIsHeap(*newitem))
{
newitem->next = scurrent->head;
scurrent->head = newitem;
if (scurrent->lastHeap == NULL)
scurrent->lastHeap = newitem;
}
else if (scurrent->lastHeap == NULL)
{
newitem->next = scurrent->head;
scurrent->head = newitem;
}
else
{
newitem->next = scurrent->lastHeap->next;
scurrent->lastHeap->next = newitem;
}
}
static RBNode *
GISTSearchTreeItemAllocator(void *arg)
{
IndexScanDesc scan = (IndexScanDesc) arg;
return palloc(GSTIHDRSZ + sizeof(double) * scan->numberOfOrderBys);
}
static void
GISTSearchTreeItemDeleter(RBNode *rb, void *arg)
{
pfree(rb);
}
/*
* Index AM API functions for scanning GiST indexes
*/
Datum
gistbeginscan(PG_FUNCTION_ARGS)
@ -32,18 +108,22 @@ gistbeginscan(PG_FUNCTION_ARGS)
IndexScanDesc scan;
GISTScanOpaque so;
/* no order by operators allowed */
Assert(norderbys == 0);
scan = RelationGetIndexScan(r, nkeys, norderbys);
/* initialize opaque data */
so = (GISTScanOpaque) palloc(sizeof(GISTScanOpaqueData));
so->stack = NULL;
so = (GISTScanOpaque) palloc0(sizeof(GISTScanOpaqueData));
so->queueCxt = AllocSetContextCreate(CurrentMemoryContext,
"GiST queue context",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
so->tempCxt = createTempGistContext();
so->curbuf = InvalidBuffer;
so->giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE));
initGISTstate(so->giststate, scan->indexRelation);
/* workspaces with size dependent on numberOfOrderBys: */
so->tmpTreeItem = palloc(GSTIHDRSZ + sizeof(double) * scan->numberOfOrderBys);
so->distances = palloc(sizeof(double) * scan->numberOfOrderBys);
so->qual_ok = true; /* in case there are zero keys */
scan->opaque = so;
@ -55,27 +135,27 @@ gistrescan(PG_FUNCTION_ARGS)
{
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
ScanKey key = (ScanKey) PG_GETARG_POINTER(1);
/* remaining arguments are ignored */
ScanKey orderbys = (ScanKey) PG_GETARG_POINTER(3);
/* nkeys and norderbys arguments are ignored */
GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
int i;
MemoryContext oldCxt;
/* rescan an existing indexscan --- reset state */
gistfreestack(so->stack);
so->stack = NULL;
/* drop pins on buffers -- no locks held */
if (BufferIsValid(so->curbuf))
{
ReleaseBuffer(so->curbuf);
so->curbuf = InvalidBuffer;
}
MemoryContextReset(so->queueCxt);
so->curTreeItem = NULL;
/*
* Clear all the pointers.
*/
ItemPointerSetInvalid(&so->curpos);
so->nPageData = so->curPageData = 0;
/* create new, empty RBTree for search queue */
oldCxt = MemoryContextSwitchTo(so->queueCxt);
so->queue = rb_create(GSTIHDRSZ + sizeof(double) * scan->numberOfOrderBys,
GISTSearchTreeItemComparator,
GISTSearchTreeItemCombiner,
GISTSearchTreeItemAllocator,
GISTSearchTreeItemDeleter,
scan);
MemoryContextSwitchTo(oldCxt);
so->qual_ok = true;
so->firstCall = true;
/* Update scan key, if a new one is given */
if (key && scan->numberOfKeys > 0)
@ -84,7 +164,7 @@ gistrescan(PG_FUNCTION_ARGS)
scan->numberOfKeys * sizeof(ScanKeyData));
/*
* Modify the scan key so that all the Consistent method is called for
* Modify the scan key so that the Consistent method is called for
* all comparisons. The original operator is passed to the Consistent
* function in the form of its strategy number, which is available
* from the sk_strategy field, and its subtype from the sk_subtype
@ -94,9 +174,11 @@ gistrescan(PG_FUNCTION_ARGS)
* SK_SEARCHNULL/SK_SEARCHNOTNULL then nothing can be found (ie, we
* assume all indexable operators are strict).
*/
so->qual_ok = true;
for (i = 0; i < scan->numberOfKeys; i++)
{
ScanKey skey = &(scan->keyData[i]);
ScanKey skey = scan->keyData + i;
skey->sk_func = so->giststate->consistentFn[skey->sk_attno - 1];
@ -108,6 +190,33 @@ gistrescan(PG_FUNCTION_ARGS)
}
}
/* Update order-by key, if a new one is given */
if (orderbys && scan->numberOfOrderBys > 0)
{
memmove(scan->orderByData, orderbys,
scan->numberOfOrderBys * sizeof(ScanKeyData));
/*
* Modify the order-by key so that the Distance method is called for
* all comparisons. The original operator is passed to the Distance
* function in the form of its strategy number, which is available
* from the sk_strategy field, and its subtype from the sk_subtype
* field.
*/
for (i = 0; i < scan->numberOfOrderBys; i++)
{
ScanKey skey = scan->orderByData + i;
skey->sk_func = so->giststate->distanceFn[skey->sk_attno - 1];
/* Check we actually have a distance function ... */
if (!OidIsValid(skey->sk_func.fn_oid))
elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
GIST_DISTANCE_PROC, skey->sk_attno,
RelationGetRelationName(scan->indexRelation));
}
}
PG_RETURN_VOID();
}
@ -131,26 +240,12 @@ gistendscan(PG_FUNCTION_ARGS)
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
gistfreestack(so->stack);
if (so->giststate != NULL)
freeGISTstate(so->giststate);
/* drop pins on buffers -- we aren't holding any locks */
if (BufferIsValid(so->curbuf))
ReleaseBuffer(so->curbuf);
freeGISTstate(so->giststate);
MemoryContextDelete(so->queueCxt);
MemoryContextDelete(so->tempCxt);
pfree(so->tmpTreeItem);
pfree(so->distances);
pfree(so);
PG_RETURN_VOID();
}
static void
gistfreestack(GISTSearchStack *s)
{
while (s != NULL)
{
GISTSearchStack *p = s->next;
pfree(s);
s = p;
}
}