mirror of
https://github.com/postgres/postgres.git
synced 2025-07-11 10:01:57 +03:00
Add support for nearest-neighbor (KNN) searches to SP-GiST
Currently, KNN searches were supported only by GiST. SP-GiST also capable to support them. This commit implements that support. SP-GiST scan stack is replaced with queue, which serves as stack if no ordering is specified. KNN support is provided for three SP-GIST opclasses: quad_point_ops, kd_point_ops and poly_ops (catversion is bumped). Some common parts between GiST and SP-GiST KNNs are extracted into separate functions. Discussion: https://postgr.es/m/570825e8-47d0-4732-2bf6-88d67d2d51c8%40postgrespro.ru Author: Nikita Glukhov, Alexander Korotkov based on GSoC work by Vlad Sterzhanov Review: Andrey Borodin, Alexander Korotkov
This commit is contained in:
@ -14,6 +14,7 @@ include $(top_builddir)/src/Makefile.global
|
||||
|
||||
OBJS = spgutils.o spginsert.o spgscan.o spgvacuum.o spgvalidate.o \
|
||||
spgdoinsert.o spgxlog.o \
|
||||
spgtextproc.o spgquadtreeproc.o spgkdtreeproc.o
|
||||
spgtextproc.o spgquadtreeproc.o spgkdtreeproc.o \
|
||||
spgproc.o
|
||||
|
||||
include $(top_srcdir)/src/backend/common.mk
|
||||
|
@ -41,7 +41,11 @@ contain exactly one inner tuple.
|
||||
|
||||
When the search traversal algorithm reaches an inner tuple, it chooses a set
|
||||
of nodes to continue tree traverse in depth. If it reaches a leaf page it
|
||||
scans a list of leaf tuples to find the ones that match the query.
|
||||
scans a list of leaf tuples to find the ones that match the query. SP-GiST
|
||||
also supports ordered (nearest-neighbor) searches - that is during scan pending
|
||||
nodes are put into priority queue, so traversal is performed by the
|
||||
closest-first model.
|
||||
|
||||
|
||||
The insertion algorithm descends the tree similarly, except it must choose
|
||||
just one node to descend to from each inner tuple. Insertion might also have
|
||||
|
@ -16,9 +16,11 @@
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/spgist.h"
|
||||
#include "access/spgist_private.h"
|
||||
#include "access/stratnum.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/float.h"
|
||||
#include "utils/geo_decls.h"
|
||||
|
||||
|
||||
@ -162,6 +164,7 @@ spg_kd_inner_consistent(PG_FUNCTION_ARGS)
|
||||
double coord;
|
||||
int which;
|
||||
int i;
|
||||
BOX bboxes[2];
|
||||
|
||||
Assert(in->hasPrefix);
|
||||
coord = DatumGetFloat8(in->prefixDatum);
|
||||
@ -248,12 +251,87 @@ spg_kd_inner_consistent(PG_FUNCTION_ARGS)
|
||||
}
|
||||
|
||||
/* We must descend into the children identified by which */
|
||||
out->nodeNumbers = (int *) palloc(sizeof(int) * 2);
|
||||
out->nNodes = 0;
|
||||
|
||||
/* Fast-path for no matching children */
|
||||
if (!which)
|
||||
PG_RETURN_VOID();
|
||||
|
||||
out->nodeNumbers = (int *) palloc(sizeof(int) * 2);
|
||||
|
||||
/*
|
||||
* When ordering scan keys are specified, we've to calculate distance for
|
||||
* them. In order to do that, we need calculate bounding boxes for both
|
||||
* children nodes. Calculation of those bounding boxes on non-zero level
|
||||
* require knowledge of bounding box of upper node. So, we save bounding
|
||||
* boxes to traversalValues.
|
||||
*/
|
||||
if (in->norderbys > 0)
|
||||
{
|
||||
BOX infArea;
|
||||
BOX *area;
|
||||
|
||||
out->distances = (double **) palloc(sizeof(double *) * in->nNodes);
|
||||
out->traversalValues = (void **) palloc(sizeof(void *) * in->nNodes);
|
||||
|
||||
if (in->level == 0)
|
||||
{
|
||||
float8 inf = get_float8_infinity();
|
||||
|
||||
infArea.high.x = inf;
|
||||
infArea.high.y = inf;
|
||||
infArea.low.x = -inf;
|
||||
infArea.low.y = -inf;
|
||||
area = &infArea;
|
||||
}
|
||||
else
|
||||
{
|
||||
area = (BOX *) in->traversalValue;
|
||||
Assert(area);
|
||||
}
|
||||
|
||||
bboxes[0].low = area->low;
|
||||
bboxes[1].high = area->high;
|
||||
|
||||
if (in->level % 2)
|
||||
{
|
||||
/* split box by x */
|
||||
bboxes[0].high.x = bboxes[1].low.x = coord;
|
||||
bboxes[0].high.y = area->high.y;
|
||||
bboxes[1].low.y = area->low.y;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* split box by y */
|
||||
bboxes[0].high.y = bboxes[1].low.y = coord;
|
||||
bboxes[0].high.x = area->high.x;
|
||||
bboxes[1].low.x = area->low.x;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 1; i <= 2; i++)
|
||||
{
|
||||
if (which & (1 << i))
|
||||
out->nodeNumbers[out->nNodes++] = i - 1;
|
||||
{
|
||||
out->nodeNumbers[out->nNodes] = i - 1;
|
||||
|
||||
if (in->norderbys > 0)
|
||||
{
|
||||
MemoryContext oldCtx = MemoryContextSwitchTo(
|
||||
in->traversalMemoryContext);
|
||||
BOX *box = box_copy(&bboxes[i - 1]);
|
||||
|
||||
MemoryContextSwitchTo(oldCtx);
|
||||
|
||||
out->traversalValues[out->nNodes] = box;
|
||||
|
||||
out->distances[out->nNodes] = spg_key_orderbys_distances(
|
||||
BoxPGetDatum(box), false,
|
||||
in->orderbys, in->norderbys);
|
||||
}
|
||||
|
||||
out->nNodes++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Set up level increments, too */
|
||||
|
88
src/backend/access/spgist/spgproc.c
Normal file
88
src/backend/access/spgist/spgproc.c
Normal file
@ -0,0 +1,88 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* spgproc.c
|
||||
* Common supporting procedures for SP-GiST opclasses.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* src/backend/access/spgist/spgproc.c
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "access/spgist_private.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/float.h"
|
||||
#include "utils/geo_decls.h"
|
||||
|
||||
#define point_point_distance(p1,p2) \
|
||||
DatumGetFloat8(DirectFunctionCall2(point_distance, \
|
||||
PointPGetDatum(p1), PointPGetDatum(p2)))
|
||||
|
||||
/* Point-box distance in the assumption that box is aligned by axis */
|
||||
static double
|
||||
point_box_distance(Point *point, BOX *box)
|
||||
{
|
||||
double dx,
|
||||
dy;
|
||||
|
||||
if (isnan(point->x) || isnan(box->low.x) ||
|
||||
isnan(point->y) || isnan(box->low.y))
|
||||
return get_float8_nan();
|
||||
|
||||
if (point->x < box->low.x)
|
||||
dx = box->low.x - point->x;
|
||||
else if (point->x > box->high.x)
|
||||
dx = point->x - box->high.x;
|
||||
else
|
||||
dx = 0.0;
|
||||
|
||||
if (point->y < box->low.y)
|
||||
dy = box->low.y - point->y;
|
||||
else if (point->y > box->high.y)
|
||||
dy = point->y - box->high.y;
|
||||
else
|
||||
dy = 0.0;
|
||||
|
||||
return HYPOT(dx, dy);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns distances from given key to array of ordering scan keys. Leaf key
|
||||
* is expected to be point, non-leaf key is expected to be box. Scan key
|
||||
* arguments are expected to be points.
|
||||
*/
|
||||
double *
|
||||
spg_key_orderbys_distances(Datum key, bool isLeaf,
|
||||
ScanKey orderbys, int norderbys)
|
||||
{
|
||||
int sk_num;
|
||||
double *distances = (double *) palloc(norderbys * sizeof(double)),
|
||||
*distance = distances;
|
||||
|
||||
for (sk_num = 0; sk_num < norderbys; ++sk_num, ++orderbys, ++distance)
|
||||
{
|
||||
Point *point = DatumGetPointP(orderbys->sk_argument);
|
||||
|
||||
*distance = isLeaf ? point_point_distance(point, DatumGetPointP(key))
|
||||
: point_box_distance(point, DatumGetBoxP(key));
|
||||
}
|
||||
|
||||
return distances;
|
||||
}
|
||||
|
||||
BOX *
|
||||
box_copy(BOX *orig)
|
||||
{
|
||||
BOX *result = palloc(sizeof(BOX));
|
||||
|
||||
*result = *orig;
|
||||
return result;
|
||||
}
|
@ -17,8 +17,10 @@
|
||||
|
||||
#include "access/spgist.h"
|
||||
#include "access/stratnum.h"
|
||||
#include "access/spgist_private.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/float.h"
|
||||
#include "utils/geo_decls.h"
|
||||
|
||||
|
||||
@ -77,6 +79,38 @@ getQuadrant(Point *centroid, Point *tst)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Returns bounding box of a given quadrant inside given bounding box */
|
||||
static BOX *
|
||||
getQuadrantArea(BOX *bbox, Point *centroid, int quadrant)
|
||||
{
|
||||
BOX *result = (BOX *) palloc(sizeof(BOX));
|
||||
|
||||
switch (quadrant)
|
||||
{
|
||||
case 1:
|
||||
result->high = bbox->high;
|
||||
result->low = *centroid;
|
||||
break;
|
||||
case 2:
|
||||
result->high.x = bbox->high.x;
|
||||
result->high.y = centroid->y;
|
||||
result->low.x = centroid->x;
|
||||
result->low.y = bbox->low.y;
|
||||
break;
|
||||
case 3:
|
||||
result->high = *centroid;
|
||||
result->low = bbox->low;
|
||||
break;
|
||||
case 4:
|
||||
result->high.x = centroid->x;
|
||||
result->high.y = bbox->high.y;
|
||||
result->low.x = bbox->low.x;
|
||||
result->low.y = centroid->y;
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Datum
|
||||
spg_quad_choose(PG_FUNCTION_ARGS)
|
||||
@ -196,19 +230,68 @@ spg_quad_inner_consistent(PG_FUNCTION_ARGS)
|
||||
spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
|
||||
spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
|
||||
Point *centroid;
|
||||
BOX infbbox;
|
||||
BOX *bbox = NULL;
|
||||
int which;
|
||||
int i;
|
||||
|
||||
Assert(in->hasPrefix);
|
||||
centroid = DatumGetPointP(in->prefixDatum);
|
||||
|
||||
/*
|
||||
* When ordering scan keys are specified, we've to calculate distance for
|
||||
* them. In order to do that, we need calculate bounding boxes for all
|
||||
* children nodes. Calculation of those bounding boxes on non-zero level
|
||||
* require knowledge of bounding box of upper node. So, we save bounding
|
||||
* boxes to traversalValues.
|
||||
*/
|
||||
if (in->norderbys > 0)
|
||||
{
|
||||
out->distances = (double **) palloc(sizeof(double *) * in->nNodes);
|
||||
out->traversalValues = (void **) palloc(sizeof(void *) * in->nNodes);
|
||||
|
||||
if (in->level == 0)
|
||||
{
|
||||
double inf = get_float8_infinity();
|
||||
|
||||
infbbox.high.x = inf;
|
||||
infbbox.high.y = inf;
|
||||
infbbox.low.x = -inf;
|
||||
infbbox.low.y = -inf;
|
||||
bbox = &infbbox;
|
||||
}
|
||||
else
|
||||
{
|
||||
bbox = in->traversalValue;
|
||||
Assert(bbox);
|
||||
}
|
||||
}
|
||||
|
||||
if (in->allTheSame)
|
||||
{
|
||||
/* Report that all nodes should be visited */
|
||||
out->nNodes = in->nNodes;
|
||||
out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
|
||||
for (i = 0; i < in->nNodes; i++)
|
||||
{
|
||||
out->nodeNumbers[i] = i;
|
||||
|
||||
if (in->norderbys > 0)
|
||||
{
|
||||
MemoryContext oldCtx = MemoryContextSwitchTo(
|
||||
in->traversalMemoryContext);
|
||||
|
||||
/* Use parent quadrant box as traversalValue */
|
||||
BOX *quadrant = box_copy(bbox);
|
||||
|
||||
MemoryContextSwitchTo(oldCtx);
|
||||
|
||||
out->traversalValues[i] = quadrant;
|
||||
out->distances[i] = spg_key_orderbys_distances(
|
||||
BoxPGetDatum(quadrant), false,
|
||||
in->orderbys, in->norderbys);
|
||||
}
|
||||
}
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
@ -286,13 +369,37 @@ spg_quad_inner_consistent(PG_FUNCTION_ARGS)
|
||||
break; /* no need to consider remaining conditions */
|
||||
}
|
||||
|
||||
out->levelAdds = palloc(sizeof(int) * 4);
|
||||
for (i = 0; i < 4; ++i)
|
||||
out->levelAdds[i] = 1;
|
||||
|
||||
/* We must descend into the quadrant(s) identified by which */
|
||||
out->nodeNumbers = (int *) palloc(sizeof(int) * 4);
|
||||
out->nNodes = 0;
|
||||
|
||||
for (i = 1; i <= 4; i++)
|
||||
{
|
||||
if (which & (1 << i))
|
||||
out->nodeNumbers[out->nNodes++] = i - 1;
|
||||
{
|
||||
out->nodeNumbers[out->nNodes] = i - 1;
|
||||
|
||||
if (in->norderbys > 0)
|
||||
{
|
||||
MemoryContext oldCtx = MemoryContextSwitchTo(
|
||||
in->traversalMemoryContext);
|
||||
BOX *quadrant = getQuadrantArea(bbox, centroid, i);
|
||||
|
||||
MemoryContextSwitchTo(oldCtx);
|
||||
|
||||
out->traversalValues[out->nNodes] = quadrant;
|
||||
|
||||
out->distances[out->nNodes] = spg_key_orderbys_distances(
|
||||
BoxPGetDatum(quadrant), false,
|
||||
in->orderbys, in->norderbys);
|
||||
}
|
||||
|
||||
out->nNodes++;
|
||||
}
|
||||
}
|
||||
|
||||
PG_RETURN_VOID();
|
||||
@ -356,5 +463,11 @@ spg_quad_leaf_consistent(PG_FUNCTION_ARGS)
|
||||
break;
|
||||
}
|
||||
|
||||
if (res && in->norderbys > 0)
|
||||
/* ok, it passes -> let's compute the distances */
|
||||
out->distances = spg_key_orderbys_distances(
|
||||
BoxPGetDatum(in->leafDatum), true,
|
||||
in->orderbys, in->norderbys);
|
||||
|
||||
PG_RETURN_BOOL(res);
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -15,17 +15,26 @@
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/amvalidate.h"
|
||||
#include "access/htup_details.h"
|
||||
#include "access/reloptions.h"
|
||||
#include "access/spgist_private.h"
|
||||
#include "access/transam.h"
|
||||
#include "access/xact.h"
|
||||
#include "catalog/pg_amop.h"
|
||||
#include "optimizer/paths.h"
|
||||
#include "storage/bufmgr.h"
|
||||
#include "storage/indexfsm.h"
|
||||
#include "storage/lmgr.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/catcache.h"
|
||||
#include "utils/index_selfuncs.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/syscache.h"
|
||||
|
||||
extern Expr *spgcanorderbyop(IndexOptInfo *index,
|
||||
PathKey *pathkey, int pathkeyno,
|
||||
Expr *orderby_clause, int *indexcol_p);
|
||||
|
||||
/*
|
||||
* SP-GiST handler function: return IndexAmRoutine with access method parameters
|
||||
@ -39,7 +48,7 @@ spghandler(PG_FUNCTION_ARGS)
|
||||
amroutine->amstrategies = 0;
|
||||
amroutine->amsupport = SPGISTNProc;
|
||||
amroutine->amcanorder = false;
|
||||
amroutine->amcanorderbyop = false;
|
||||
amroutine->amcanorderbyop = true;
|
||||
amroutine->amcanbackward = false;
|
||||
amroutine->amcanunique = false;
|
||||
amroutine->amcanmulticol = false;
|
||||
@ -61,7 +70,7 @@ spghandler(PG_FUNCTION_ARGS)
|
||||
amroutine->amcanreturn = spgcanreturn;
|
||||
amroutine->amcostestimate = spgcostestimate;
|
||||
amroutine->amoptions = spgoptions;
|
||||
amroutine->amproperty = NULL;
|
||||
amroutine->amproperty = spgproperty;
|
||||
amroutine->amvalidate = spgvalidate;
|
||||
amroutine->ambeginscan = spgbeginscan;
|
||||
amroutine->amrescan = spgrescan;
|
||||
@ -949,3 +958,82 @@ SpGistPageAddNewItem(SpGistState *state, Page page, Item item, Size size,
|
||||
|
||||
return offnum;
|
||||
}
|
||||
|
||||
/*
|
||||
* spgproperty() -- Check boolean properties of indexes.
|
||||
*
|
||||
* This is optional for most AMs, but is required for SP-GiST because the core
|
||||
* property code doesn't support AMPROP_DISTANCE_ORDERABLE.
|
||||
*/
|
||||
bool
|
||||
spgproperty(Oid index_oid, int attno,
|
||||
IndexAMProperty prop, const char *propname,
|
||||
bool *res, bool *isnull)
|
||||
{
|
||||
Oid opclass,
|
||||
opfamily,
|
||||
opcintype;
|
||||
CatCList *catlist;
|
||||
int i;
|
||||
|
||||
/* Only answer column-level inquiries */
|
||||
if (attno == 0)
|
||||
return false;
|
||||
|
||||
switch (prop)
|
||||
{
|
||||
case AMPROP_DISTANCE_ORDERABLE:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Currently, SP-GiST distance-ordered scans require that there be a
|
||||
* distance operator in the opclass with the default types. So we assume
|
||||
* that if such a operator exists, then there's a reason for it.
|
||||
*/
|
||||
|
||||
/* First we need to know the column's opclass. */
|
||||
opclass = get_index_column_opclass(index_oid, attno);
|
||||
if (!OidIsValid(opclass))
|
||||
{
|
||||
*isnull = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Now look up the opclass family and input datatype. */
|
||||
if (!get_opclass_opfamily_and_input_type(opclass, &opfamily, &opcintype))
|
||||
{
|
||||
*isnull = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* And now we can check whether the operator is provided. */
|
||||
catlist = SearchSysCacheList1(AMOPSTRATEGY,
|
||||
ObjectIdGetDatum(opfamily));
|
||||
|
||||
*res = false;
|
||||
|
||||
for (i = 0; i < catlist->n_members; i++)
|
||||
{
|
||||
HeapTuple amoptup = &catlist->members[i]->tuple;
|
||||
Form_pg_amop amopform = (Form_pg_amop) GETSTRUCT(amoptup);
|
||||
|
||||
if (amopform->amoppurpose == AMOP_ORDER &&
|
||||
(amopform->amoplefttype == opcintype ||
|
||||
amopform->amoprighttype == opcintype) &&
|
||||
opfamily_can_sort_type(amopform->amopsortfamily,
|
||||
get_op_rettype(amopform->amopopr)))
|
||||
{
|
||||
*res = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ReleaseSysCacheList(catlist);
|
||||
|
||||
*isnull = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -187,6 +187,7 @@ spgvalidate(Oid opclassoid)
|
||||
{
|
||||
HeapTuple oprtup = &oprlist->members[i]->tuple;
|
||||
Form_pg_amop oprform = (Form_pg_amop) GETSTRUCT(oprtup);
|
||||
Oid op_rettype;
|
||||
|
||||
/* TODO: Check that only allowed strategy numbers exist */
|
||||
if (oprform->amopstrategy < 1 || oprform->amopstrategy > 63)
|
||||
@ -200,20 +201,26 @@ spgvalidate(Oid opclassoid)
|
||||
result = false;
|
||||
}
|
||||
|
||||
/* spgist doesn't support ORDER BY operators */
|
||||
if (oprform->amoppurpose != AMOP_SEARCH ||
|
||||
OidIsValid(oprform->amopsortfamily))
|
||||
/* spgist supports ORDER BY operators */
|
||||
if (oprform->amoppurpose != AMOP_SEARCH)
|
||||
{
|
||||
ereport(INFO,
|
||||
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
||||
errmsg("operator family \"%s\" of access method %s contains invalid ORDER BY specification for operator %s",
|
||||
opfamilyname, "spgist",
|
||||
format_operator(oprform->amopopr))));
|
||||
result = false;
|
||||
/* ... and operator result must match the claimed btree opfamily */
|
||||
op_rettype = get_op_rettype(oprform->amopopr);
|
||||
if (!opfamily_can_sort_type(oprform->amopsortfamily, op_rettype))
|
||||
{
|
||||
ereport(INFO,
|
||||
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
||||
errmsg("operator family \"%s\" of access method %s contains invalid ORDER BY specification for operator %s",
|
||||
opfamilyname, "spgist",
|
||||
format_operator(oprform->amopopr))));
|
||||
result = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
op_rettype = BOOLOID;
|
||||
|
||||
/* Check operator signature --- same for all spgist strategies */
|
||||
if (!check_amop_signature(oprform->amopopr, BOOLOID,
|
||||
if (!check_amop_signature(oprform->amopopr, op_rettype,
|
||||
oprform->amoplefttype,
|
||||
oprform->amoprighttype))
|
||||
{
|
||||
|
Reference in New Issue
Block a user