mirror of
https://github.com/postgres/postgres.git
synced 2025-07-09 22:41:56 +03:00
Allow GiST distance function to return merely a lower-bound.
The distance function can now set *recheck = false, like index quals. The executor will then re-check the ORDER BY expressions, and use a queue to reorder the results on the fly. This makes it possible to do kNN-searches on polygons and circles, which don't store the exact value in the index, but just a bounding box. Alexander Korotkov and me
This commit is contained in:
doc/src/sgml
src
backend
access
executor
optimizer
plan
utils
adt
include
access
catalog
nodes
utils
test
regress
@ -30,10 +30,10 @@
|
||||
* The index tuple might represent either a heap tuple or a lower index page,
|
||||
* depending on whether the containing page is a leaf page or not.
|
||||
*
|
||||
* On success return for a heap tuple, *recheck_p is set to indicate
|
||||
* whether recheck is needed. We recheck if any of the consistent() functions
|
||||
* request it. recheck is not interesting when examining a non-leaf entry,
|
||||
* since we must visit the lower index page if there's any doubt.
|
||||
* On success return for a heap tuple, *recheck_p is set to indicate whether
|
||||
* recheck is needed. We recheck if any of the consistent() or distance()
|
||||
* functions request it. recheck is not interesting when examining a non-leaf
|
||||
* entry, since we must visit the lower index page if there's any doubt.
|
||||
*
|
||||
* If we are doing an ordered scan, so->distances[] is filled with distance
|
||||
* data from the distance() functions before returning success.
|
||||
@ -176,6 +176,7 @@ gistindex_keytest(IndexScanDesc scan,
|
||||
else
|
||||
{
|
||||
Datum dist;
|
||||
bool recheck;
|
||||
GISTENTRY de;
|
||||
|
||||
gistdentryinit(giststate, key->sk_attno - 1, &de,
|
||||
@ -192,16 +193,21 @@ gistindex_keytest(IndexScanDesc scan,
|
||||
* always be zero, but might as well pass it for possible future
|
||||
* use.)
|
||||
*
|
||||
* Note that Distance functions don't get a recheck argument. We
|
||||
* can't tolerate lossy distance calculations on leaf tuples;
|
||||
* there is no opportunity to re-sort the tuples afterwards.
|
||||
* Distance functions get a recheck argument as well. In this
|
||||
* case the returned distance is the lower bound of distance
|
||||
* and needs to be rechecked. We return single recheck flag
|
||||
* which means that both quals and distances are to be
|
||||
* rechecked.
|
||||
*/
|
||||
dist = FunctionCall4Coll(&key->sk_func,
|
||||
dist = FunctionCall5Coll(&key->sk_func,
|
||||
key->sk_collation,
|
||||
PointerGetDatum(&de),
|
||||
key->sk_argument,
|
||||
Int32GetDatum(key->sk_strategy),
|
||||
ObjectIdGetDatum(key->sk_subtype));
|
||||
ObjectIdGetDatum(key->sk_subtype),
|
||||
PointerGetDatum(&recheck));
|
||||
|
||||
*recheck_p |= recheck;
|
||||
|
||||
*distance_p = DatumGetFloat8(dist);
|
||||
}
|
||||
@ -434,6 +440,7 @@ getNextNearest(IndexScanDesc scan)
|
||||
{
|
||||
GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
|
||||
bool res = false;
|
||||
int i;
|
||||
|
||||
if (scan->xs_itup)
|
||||
{
|
||||
@ -454,6 +461,11 @@ getNextNearest(IndexScanDesc scan)
|
||||
/* found a heap item at currently minimal distance */
|
||||
scan->xs_ctup.t_self = item->data.heap.heapPtr;
|
||||
scan->xs_recheck = item->data.heap.recheck;
|
||||
for (i = 0; i < scan->numberOfOrderBys; i++)
|
||||
{
|
||||
scan->xs_orderbyvals[i] = Float8GetDatum(item->distances[i]);
|
||||
scan->xs_orderbynulls[i] = false;
|
||||
}
|
||||
|
||||
/* in an index-only scan, also return the reconstructed tuple. */
|
||||
if (scan->xs_want_itup)
|
||||
|
@ -1478,3 +1478,40 @@ gist_point_distance(PG_FUNCTION_ARGS)
|
||||
|
||||
PG_RETURN_FLOAT8(distance);
|
||||
}
|
||||
|
||||
/*
|
||||
* The inexact GiST distance method for geometric types that store bounding
|
||||
* boxes.
|
||||
*
|
||||
* Compute lossy distance from point to index entries. The result is inexact
|
||||
* because index entries are bounding boxes, not the exact shapes of the
|
||||
* indexed geometric types. We use distance from point to MBR of index entry.
|
||||
* This is a lower bound estimate of distance from point to indexed geometric
|
||||
* type.
|
||||
*/
|
||||
Datum
|
||||
gist_bbox_distance(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
|
||||
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
|
||||
bool *recheck = (bool *) PG_GETARG_POINTER(4);
|
||||
double distance;
|
||||
StrategyNumber strategyGroup = strategy / GeoStrategyNumberOffset;
|
||||
|
||||
/* Bounding box distance is always inexact. */
|
||||
*recheck = true;
|
||||
|
||||
switch (strategyGroup)
|
||||
{
|
||||
case PointStrategyNumberGroup:
|
||||
distance = computeDistance(false,
|
||||
DatumGetBoxP(entry->key),
|
||||
PG_GETARG_POINT_P(1));
|
||||
break;
|
||||
default:
|
||||
elog(ERROR, "unknown strategy number: %d", strategy);
|
||||
distance = 0.0; /* keep compiler quiet */
|
||||
}
|
||||
|
||||
PG_RETURN_FLOAT8(distance);
|
||||
}
|
||||
|
@ -85,6 +85,11 @@ gistbeginscan(PG_FUNCTION_ARGS)
|
||||
/* workspaces with size dependent on numberOfOrderBys: */
|
||||
so->distances = palloc(sizeof(double) * scan->numberOfOrderBys);
|
||||
so->qual_ok = true; /* in case there are zero keys */
|
||||
if (scan->numberOfOrderBys > 0)
|
||||
{
|
||||
scan->xs_orderbyvals = palloc(sizeof(Datum) * scan->numberOfOrderBys);
|
||||
scan->xs_orderbynulls = palloc(sizeof(bool) * scan->numberOfOrderBys);
|
||||
}
|
||||
|
||||
scan->opaque = so;
|
||||
|
||||
|
Reference in New Issue
Block a user