1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-11 10:01:57 +03:00

Allow GiST distance function to return merely a lower-bound.

The distance function can now set *recheck = false, like index quals. The
executor will then re-check the ORDER BY expressions, and use a queue to
reorder the results on the fly.

This makes it possible to do kNN-searches on polygons and circles, which
don't store the exact value in the index, but just a bounding box.

Alexander Korotkov and me
This commit is contained in:
Heikki Linnakangas
2015-05-15 14:26:51 +03:00
parent ecd222e770
commit 35fcb1b3d0
19 changed files with 699 additions and 40 deletions

View File

@ -16,6 +16,7 @@
* INTERFACE ROUTINES
* ExecIndexScan scans a relation using an index
* IndexNext retrieve next tuple using index
* IndexNextWithReorder same, but recheck ORDER BY expressions
* ExecInitIndexScan creates and initializes state info.
* ExecReScanIndexScan rescans the indexed relation.
* ExecEndIndexScan releases all storage.
@ -28,14 +29,38 @@
#include "access/relscan.h"
#include "executor/execdebug.h"
#include "executor/nodeIndexscan.h"
#include "lib/pairingheap.h"
#include "optimizer/clauses.h"
#include "utils/array.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/rel.h"
/*
* When an ordering operator is used, tuples fetched from the index that
* need to be reordered are queued in a pairing heap, as ReorderTuples.
*/
typedef struct
{
pairingheap_node ph_node;
HeapTuple htup;
Datum *orderbyvals;
bool *orderbynulls;
} ReorderTuple;
static TupleTableSlot *IndexNext(IndexScanState *node);
static TupleTableSlot *IndexNextWithReorder(IndexScanState *node);
static void EvalOrderByExpressions(IndexScanState *node, ExprContext *econtext);
static bool IndexRecheck(IndexScanState *node, TupleTableSlot *slot);
static int cmp_orderbyvals(const Datum *adist, const bool *anulls,
const Datum *bdist, const bool *bnulls,
IndexScanState *node);
static int reorderqueue_cmp(const pairingheap_node *a,
const pairingheap_node *b, void *arg);
static void reorderqueue_push(IndexScanState *node, HeapTuple tuple,
Datum *orderbyvals, bool *orderbynulls);
static HeapTuple reorderqueue_pop(IndexScanState *node);
/* ----------------------------------------------------------------
@ -110,9 +135,199 @@ IndexNext(IndexScanState *node)
* if we get here it means the index scan failed so we are at the end of
* the scan..
*/
node->iss_ReachedEnd = true;
return ExecClearTuple(slot);
}
/* ----------------------------------------------------------------
* IndexNextWithReorder
*
* Like IndexNext, but his version can also re-check any
* ORDER BY expressions, and reorder the tuples as necessary.
* ----------------------------------------------------------------
*/
static TupleTableSlot *
IndexNextWithReorder(IndexScanState *node)
{
ExprContext *econtext;
IndexScanDesc scandesc;
HeapTuple tuple;
TupleTableSlot *slot;
ReorderTuple *topmost = NULL;
bool was_exact;
Datum *lastfetched_vals;
bool *lastfetched_nulls;
int cmp;
/* only forward scan is supported with reordering. */
Assert(!ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir));
Assert(ScanDirectionIsForward(node->ss.ps.state->es_direction));
scandesc = node->iss_ScanDesc;
econtext = node->ss.ps.ps_ExprContext;
slot = node->ss.ss_ScanTupleSlot;
for (;;)
{
/*
* Check the reorder queue first. If the topmost tuple in the queue
* has an ORDER BY value smaller than (or equal to) the value last
* returned by the index, we can return it now.
*/
if (!pairingheap_is_empty(node->iss_ReorderQueue))
{
topmost = (ReorderTuple *) pairingheap_first(node->iss_ReorderQueue);
if (node->iss_ReachedEnd ||
cmp_orderbyvals(topmost->orderbyvals,
topmost->orderbynulls,
scandesc->xs_orderbyvals,
scandesc->xs_orderbynulls,
node) <= 0)
{
tuple = reorderqueue_pop(node);
/* Pass 'true', as the tuple in the queue is a palloc'd copy */
ExecStoreTuple(tuple, slot, InvalidBuffer, true);
return slot;
}
}
else if (node->iss_ReachedEnd)
{
/* Queue is empty, and no more tuples from index. We're done. */
return ExecClearTuple(slot);
}
/*
* Fetch next tuple from the index.
*/
next_indextuple:
tuple = index_getnext(scandesc, ForwardScanDirection);
if (!tuple)
{
/*
* No more tuples from the index. But we still need to drain any
* remaining tuples from the queue before we're done.
*/
node->iss_ReachedEnd = true;
continue;
}
/*
* Store the scanned tuple in the scan tuple slot of the scan state.
* Note: we pass 'false' because tuples returned by amgetnext are
* pointers onto disk pages and must not be pfree()'d.
*/
ExecStoreTuple(tuple, /* tuple to store */
slot, /* slot to store in */
scandesc->xs_cbuf, /* buffer containing tuple */
false); /* don't pfree */
/*
* If the index was lossy, we have to recheck the index quals and
* ORDER BY expressions using the fetched tuple.
*/
if (scandesc->xs_recheck)
{
econtext->ecxt_scantuple = slot;
ResetExprContext(econtext);
if (!ExecQual(node->indexqualorig, econtext, false))
{
/* Fails recheck, so drop it and loop back for another */
InstrCountFiltered2(node, 1);
goto next_indextuple;
}
EvalOrderByExpressions(node, econtext);
/*
* Was the ORDER BY value returned by the index accurate? The
* recheck flag means that the index can return inaccurate values,
* but then again, the value returned for any particular tuple
* could also be exactly correct. Compare the value returned by
* the index with the recalculated value. (If the value returned
* by the index happened to be exact right, we can often avoid
* pushing the tuple to the queue, just to pop it back out again.)
*/
cmp = cmp_orderbyvals(node->iss_OrderByValues,
node->iss_OrderByNulls,
scandesc->xs_orderbyvals,
scandesc->xs_orderbynulls,
node);
if (cmp < 0)
elog(ERROR, "index returned tuples in wrong order");
else if (cmp == 0)
was_exact = true;
else
was_exact = false;
lastfetched_vals = node->iss_OrderByValues;
lastfetched_nulls = node->iss_OrderByNulls;
}
else
{
was_exact = true;
lastfetched_vals = scandesc->xs_orderbyvals;
lastfetched_nulls = scandesc->xs_orderbynulls;
}
/*
* Can we return this tuple immediately, or does it need to be pushed
* to the reorder queue? If the ORDER BY expression values returned
* by the index were inaccurate, we can't return it yet, because the
* next tuple from the index might need to come before this one.
* Also, we can't return it yet if there are any smaller tuples in the
* queue already.
*/
if (!was_exact || (topmost && cmp_orderbyvals(lastfetched_vals,
lastfetched_nulls,
topmost->orderbyvals,
topmost->orderbynulls,
node) > 0))
{
/* Put this tuple to the queue */
reorderqueue_push(node, tuple, lastfetched_vals, lastfetched_nulls);
continue;
}
else
{
/* Can return this tuple immediately. */
return slot;
}
}
/*
* if we get here it means the index scan failed so we are at the end of
* the scan..
*/
return ExecClearTuple(slot);
}
/*
* Calculate the expressions in the ORDER BY clause, based on the heap tuple.
*/
static void
EvalOrderByExpressions(IndexScanState *node, ExprContext *econtext)
{
int i;
ListCell *l;
MemoryContext oldContext;
oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
i = 0;
foreach(l, node->indexorderbyorig)
{
ExprState *orderby = (ExprState *) lfirst(l);
node->iss_OrderByValues[i] = ExecEvalExpr(orderby,
econtext,
&node->iss_OrderByNulls[i],
NULL);
i++;
}
MemoryContextSwitchTo(oldContext);
}
/*
* IndexRecheck -- access method routine to recheck a tuple in EvalPlanQual
*/
@ -134,6 +349,109 @@ IndexRecheck(IndexScanState *node, TupleTableSlot *slot)
return ExecQual(node->indexqualorig, econtext, false);
}
/*
* Compare ORDER BY expression values.
*/
static int
cmp_orderbyvals(const Datum *adist, const bool *anulls,
const Datum *bdist, const bool *bnulls,
IndexScanState *node)
{
int i;
int result;
for (i = 0; i < node->iss_NumOrderByKeys; i++)
{
SortSupport ssup = &node->iss_SortSupport[i];
/* Handle nulls. We only support NULLS LAST. */
if (anulls[i] && !bnulls[i])
return 1;
else if (!anulls[i] && bnulls[i])
return -1;
else if (anulls[i] && bnulls[i])
return 0;
result = ssup->comparator(adist[i], bdist[i], ssup);
if (result != 0)
return result;
}
return 0;
}
/*
* Pairing heap provides getting topmost (greatest) element while KNN provides
* ascending sort. That's why we inverse the sort order.
*/
static int
reorderqueue_cmp(const pairingheap_node *a, const pairingheap_node *b,
void *arg)
{
ReorderTuple *rta = (ReorderTuple *) a;
ReorderTuple *rtb = (ReorderTuple *) b;
IndexScanState *node = (IndexScanState *) arg;
return -cmp_orderbyvals(rta->orderbyvals, rta->orderbynulls,
rtb->orderbyvals, rtb->orderbynulls,
node);
}
/*
* Helper function to push a tuple to the reorder queue.
*/
static void
reorderqueue_push(IndexScanState *node, HeapTuple tuple,
Datum *orderbyvals, bool *orderbynulls)
{
IndexScanDesc scandesc = node->iss_ScanDesc;
EState *estate = node->ss.ps.state;
MemoryContext oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
ReorderTuple *rt;
int i;
rt = (ReorderTuple *) palloc(sizeof(ReorderTuple));
rt->htup = heap_copytuple(tuple);
rt->orderbyvals =
(Datum *) palloc(sizeof(Datum) * scandesc->numberOfOrderBys);
rt->orderbynulls =
(bool *) palloc(sizeof(bool) * scandesc->numberOfOrderBys);
for (i = 0; i < node->iss_NumOrderByKeys; i++)
{
if (!orderbynulls[i])
rt->orderbyvals[i] = datumCopy(orderbyvals[i],
node->iss_OrderByTypByVals[i],
node->iss_OrderByTypLens[i]);
else
rt->orderbyvals[i] = (Datum) 0;
rt->orderbynulls[i] = orderbynulls[i];
}
pairingheap_add(node->iss_ReorderQueue, &rt->ph_node);
MemoryContextSwitchTo(oldContext);
}
/*
* Helper function to pop the next tuple from the reorder queue.
*/
static HeapTuple
reorderqueue_pop(IndexScanState *node)
{
HeapTuple result;
ReorderTuple *topmost;
topmost = (ReorderTuple *) pairingheap_remove_first(node->iss_ReorderQueue);
result = topmost->htup;
pfree(topmost->orderbyvals);
pfree(topmost->orderbynulls);
pfree(topmost);
return result;
}
/* ----------------------------------------------------------------
* ExecIndexScan(node)
* ----------------------------------------------------------------
@ -147,9 +465,14 @@ ExecIndexScan(IndexScanState *node)
if (node->iss_NumRuntimeKeys != 0 && !node->iss_RuntimeKeysReady)
ExecReScan((PlanState *) node);
return ExecScan(&node->ss,
(ExecScanAccessMtd) IndexNext,
(ExecScanRecheckMtd) IndexRecheck);
if (node->iss_NumOrderByKeys > 0)
return ExecScan(&node->ss,
(ExecScanAccessMtd) IndexNextWithReorder,
(ExecScanRecheckMtd) IndexRecheck);
else
return ExecScan(&node->ss,
(ExecScanAccessMtd) IndexNext,
(ExecScanRecheckMtd) IndexRecheck);
}
/* ----------------------------------------------------------------
@ -465,6 +788,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
IndexScanState *indexstate;
Relation currentRelation;
bool relistarget;
int i;
/*
* create state structure
@ -501,6 +825,9 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
indexstate->indexqualorig = (List *)
ExecInitExpr((Expr *) node->indexqualorig,
(PlanState *) indexstate);
indexstate->indexorderbyorig = (List *)
ExecInitExpr((Expr *) node->indexorderbyorig,
(PlanState *) indexstate);
/*
* tuple table initialization
@ -581,6 +908,52 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
NULL, /* no ArrayKeys */
NULL);
/* Initialize sort support, if we need to re-check ORDER BY exprs */
if (indexstate->iss_NumOrderByKeys > 0)
{
int numOrderByKeys = indexstate->iss_NumOrderByKeys;
/*
* Prepare sort support, and look up the distance type for each ORDER
* BY expression.
*/
indexstate->iss_SortSupport =
palloc0(numOrderByKeys * sizeof(SortSupportData));
indexstate->iss_OrderByTypByVals =
palloc(numOrderByKeys * sizeof(bool));
indexstate->iss_OrderByTypLens =
palloc(numOrderByKeys * sizeof(int16));
for (i = 0; i < indexstate->iss_NumOrderByKeys; i++)
{
Oid orderbyType;
Oid opfamily;
int16 strategy;
PrepareSortSupportFromOrderingOp(node->indexorderbyops[i],
&indexstate->iss_SortSupport[i]);
if (!get_ordering_op_properties(node->indexorderbyops[i],
&opfamily, &orderbyType, &strategy))
{
elog(LOG, "operator %u is not a valid ordering operator",
node->indexorderbyops[i]);
}
get_typlenbyval(orderbyType,
&indexstate->iss_OrderByTypLens[i],
&indexstate->iss_OrderByTypByVals[i]);
}
/* allocate arrays to hold the re-calculated distances */
indexstate->iss_OrderByValues =
palloc(indexstate->iss_NumOrderByKeys * sizeof(Datum));
indexstate->iss_OrderByNulls =
palloc(indexstate->iss_NumOrderByKeys * sizeof(bool));
/* and initialize the reorder queue */
indexstate->iss_ReorderQueue = pairingheap_allocate(reorderqueue_cmp,
indexstate);
}
/*
* If we have runtime keys, we need an ExprContext to evaluate them. The
* node's standard context won't do because we want to reset that context