1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-09 22:41:56 +03:00

Allow GiST distance function to return merely a lower-bound.

The distance function can now set *recheck = false, like index quals. The
executor will then re-check the ORDER BY expressions, and use a queue to
reorder the results on the fly.

This makes it possible to do kNN-searches on polygons and circles, which
don't store the exact value in the index, but just a bounding box.

Alexander Korotkov and me
This commit is contained in:
Heikki Linnakangas
2015-05-15 14:26:51 +03:00
parent ecd222e770
commit 35fcb1b3d0
19 changed files with 699 additions and 40 deletions

@ -30,10 +30,10 @@
* The index tuple might represent either a heap tuple or a lower index page,
* depending on whether the containing page is a leaf page or not.
*
* On success return for a heap tuple, *recheck_p is set to indicate
* whether recheck is needed. We recheck if any of the consistent() functions
* request it. recheck is not interesting when examining a non-leaf entry,
* since we must visit the lower index page if there's any doubt.
* On success return for a heap tuple, *recheck_p is set to indicate whether
* recheck is needed. We recheck if any of the consistent() or distance()
* functions request it. recheck is not interesting when examining a non-leaf
* entry, since we must visit the lower index page if there's any doubt.
*
* If we are doing an ordered scan, so->distances[] is filled with distance
* data from the distance() functions before returning success.
@ -176,6 +176,7 @@ gistindex_keytest(IndexScanDesc scan,
else
{
Datum dist;
bool recheck;
GISTENTRY de;
gistdentryinit(giststate, key->sk_attno - 1, &de,
@ -192,16 +193,21 @@ gistindex_keytest(IndexScanDesc scan,
* always be zero, but might as well pass it for possible future
* use.)
*
* Note that Distance functions don't get a recheck argument. We
* can't tolerate lossy distance calculations on leaf tuples;
* there is no opportunity to re-sort the tuples afterwards.
* Distance functions get a recheck argument as well. In this
* case the returned distance is the lower bound of distance
* and needs to be rechecked. We return single recheck flag
* which means that both quals and distances are to be
* rechecked.
*/
dist = FunctionCall4Coll(&key->sk_func,
dist = FunctionCall5Coll(&key->sk_func,
key->sk_collation,
PointerGetDatum(&de),
key->sk_argument,
Int32GetDatum(key->sk_strategy),
ObjectIdGetDatum(key->sk_subtype));
ObjectIdGetDatum(key->sk_subtype),
PointerGetDatum(&recheck));
*recheck_p |= recheck;
*distance_p = DatumGetFloat8(dist);
}
@ -434,6 +440,7 @@ getNextNearest(IndexScanDesc scan)
{
GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
bool res = false;
int i;
if (scan->xs_itup)
{
@ -454,6 +461,11 @@ getNextNearest(IndexScanDesc scan)
/* found a heap item at currently minimal distance */
scan->xs_ctup.t_self = item->data.heap.heapPtr;
scan->xs_recheck = item->data.heap.recheck;
for (i = 0; i < scan->numberOfOrderBys; i++)
{
scan->xs_orderbyvals[i] = Float8GetDatum(item->distances[i]);
scan->xs_orderbynulls[i] = false;
}
/* in an index-only scan, also return the reconstructed tuple. */
if (scan->xs_want_itup)

@ -1478,3 +1478,40 @@ gist_point_distance(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(distance);
}
/*
* The inexact GiST distance method for geometric types that store bounding
* boxes.
*
* Compute lossy distance from point to index entries. The result is inexact
* because index entries are bounding boxes, not the exact shapes of the
* indexed geometric types. We use distance from point to MBR of index entry.
* This is a lower bound estimate of distance from point to indexed geometric
* type.
*/
Datum
gist_bbox_distance(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
bool *recheck = (bool *) PG_GETARG_POINTER(4);
double distance;
StrategyNumber strategyGroup = strategy / GeoStrategyNumberOffset;
/* Bounding box distance is always inexact. */
*recheck = true;
switch (strategyGroup)
{
case PointStrategyNumberGroup:
distance = computeDistance(false,
DatumGetBoxP(entry->key),
PG_GETARG_POINT_P(1));
break;
default:
elog(ERROR, "unknown strategy number: %d", strategy);
distance = 0.0; /* keep compiler quiet */
}
PG_RETURN_FLOAT8(distance);
}

@ -85,6 +85,11 @@ gistbeginscan(PG_FUNCTION_ARGS)
/* workspaces with size dependent on numberOfOrderBys: */
so->distances = palloc(sizeof(double) * scan->numberOfOrderBys);
so->qual_ok = true; /* in case there are zero keys */
if (scan->numberOfOrderBys > 0)
{
scan->xs_orderbyvals = palloc(sizeof(Datum) * scan->numberOfOrderBys);
scan->xs_orderbynulls = palloc(sizeof(bool) * scan->numberOfOrderBys);
}
scan->opaque = so;