mirror of
https://github.com/postgres/postgres.git
synced 2025-07-15 19:21:59 +03:00
Add support for nearest-neighbor (KNN) searches to SP-GiST
Currently, KNN searches were supported only by GiST. SP-GiST also capable to support them. This commit implements that support. SP-GiST scan stack is replaced with queue, which serves as stack if no ordering is specified. KNN support is provided for three SP-GIST opclasses: quad_point_ops, kd_point_ops and poly_ops (catversion is bumped). Some common parts between GiST and SP-GiST KNNs are extracted into separate functions. Discussion: https://postgr.es/m/570825e8-47d0-4732-2bf6-88d67d2d51c8%40postgrespro.ru Author: Nikita Glukhov, Alexander Korotkov based on GSoC work by Vlad Sterzhanov Review: Andrey Borodin, Alexander Korotkov
This commit is contained in:
@ -64,12 +64,13 @@
|
||||
|
||||
<table id="spgist-builtin-opclasses-table">
|
||||
<title>Built-in <acronym>SP-GiST</acronym> Operator Classes</title>
|
||||
<tgroup cols="3">
|
||||
<tgroup cols="4">
|
||||
<thead>
|
||||
<row>
|
||||
<entry>Name</entry>
|
||||
<entry>Indexed Data Type</entry>
|
||||
<entry>Indexable Operators</entry>
|
||||
<entry>Ordering Operators</entry>
|
||||
</row>
|
||||
</thead>
|
||||
<tbody>
|
||||
@ -84,6 +85,9 @@
|
||||
<literal>>^</literal>
|
||||
<literal>~=</literal>
|
||||
</entry>
|
||||
<entry>
|
||||
<literal><-></literal>
|
||||
</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal>quad_point_ops</literal></entry>
|
||||
@ -96,6 +100,9 @@
|
||||
<literal>>^</literal>
|
||||
<literal>~=</literal>
|
||||
</entry>
|
||||
<entry>
|
||||
<literal><-></literal>
|
||||
</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal>range_ops</literal></entry>
|
||||
@ -111,6 +118,8 @@
|
||||
<literal>>></literal>
|
||||
<literal>@></literal>
|
||||
</entry>
|
||||
<entry>
|
||||
</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal>box_ops</literal></entry>
|
||||
@ -129,6 +138,8 @@
|
||||
<literal>|>></literal>
|
||||
<literal>|&></literal>
|
||||
</entry>
|
||||
<entry>
|
||||
</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal>poly_ops</literal></entry>
|
||||
@ -147,6 +158,9 @@
|
||||
<literal>|>></literal>
|
||||
<literal>|&></literal>
|
||||
</entry>
|
||||
<entry>
|
||||
<literal><-></literal>
|
||||
</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal>text_ops</literal></entry>
|
||||
@ -163,6 +177,8 @@
|
||||
<literal>~>~</literal>
|
||||
<literal>^@</literal>
|
||||
</entry>
|
||||
<entry>
|
||||
</entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry><literal>inet_ops</literal></entry>
|
||||
@ -180,6 +196,8 @@
|
||||
<literal><=</literal>
|
||||
<literal>=</literal>
|
||||
</entry>
|
||||
<entry>
|
||||
</entry>
|
||||
</row>
|
||||
</tbody>
|
||||
</tgroup>
|
||||
@ -191,6 +209,12 @@
|
||||
supports the same operators but uses a different index data structure which
|
||||
may offer better performance in some applications.
|
||||
</para>
|
||||
<para>
|
||||
The <literal>quad_point_ops</literal>, <literal>kd_point_ops</literal> and
|
||||
<literal>poly_ops</literal> operator classes support the <literal><-></literal>
|
||||
ordering operator, which enables the k-nearest neighbor (<literal>k-NN</literal>)
|
||||
search over indexed point or polygon datasets.
|
||||
</para>
|
||||
|
||||
</sect1>
|
||||
|
||||
@ -630,7 +654,10 @@ CREATE FUNCTION my_inner_consistent(internal, internal) RETURNS void ...
|
||||
typedef struct spgInnerConsistentIn
|
||||
{
|
||||
ScanKey scankeys; /* array of operators and comparison values */
|
||||
int nkeys; /* length of array */
|
||||
ScanKey orderbys; /* array of ordering operators and comparison
|
||||
* values */
|
||||
int nkeys; /* length of scankeys array */
|
||||
int norderbys; /* length of orderbys array */
|
||||
|
||||
Datum reconstructedValue; /* value reconstructed at parent */
|
||||
void *traversalValue; /* opclass-specific traverse value */
|
||||
@ -653,6 +680,7 @@ typedef struct spgInnerConsistentOut
|
||||
int *levelAdds; /* increment level by this much for each */
|
||||
Datum *reconstructedValues; /* associated reconstructed values */
|
||||
void **traversalValues; /* opclass-specific traverse values */
|
||||
double **distances; /* associated distances */
|
||||
} spgInnerConsistentOut;
|
||||
</programlisting>
|
||||
|
||||
@ -667,6 +695,8 @@ typedef struct spgInnerConsistentOut
|
||||
In particular it is not necessary to check <structfield>sk_flags</structfield> to
|
||||
see if the comparison value is NULL, because the SP-GiST core code
|
||||
will filter out such conditions.
|
||||
The array <structfield>orderbys</structfield>, of length <structfield>norderbys</structfield>,
|
||||
describes ordering operators (if any) in the same manner.
|
||||
<structfield>reconstructedValue</structfield> is the value reconstructed for the
|
||||
parent tuple; it is <literal>(Datum) 0</literal> at the root level or if the
|
||||
<function>inner_consistent</function> function did not provide a value at the
|
||||
@ -709,6 +739,10 @@ typedef struct spgInnerConsistentOut
|
||||
of <structname>spgConfigOut</structname>.<structfield>leafType</structfield> type
|
||||
reconstructed for each child node to be visited; otherwise, leave
|
||||
<structfield>reconstructedValues</structfield> as NULL.
|
||||
If ordered search is performed, set <structfield>distances</structfield>
|
||||
to an array of distance values according to <structfield>orderbys</structfield>
|
||||
array (nodes with lowest distances will be processed first). Leave it
|
||||
NULL otherwise.
|
||||
If it is desired to pass down additional out-of-band information
|
||||
(<quote>traverse values</quote>) to lower levels of the tree search,
|
||||
set <structfield>traversalValues</structfield> to an array of the appropriate
|
||||
@ -717,6 +751,7 @@ typedef struct spgInnerConsistentOut
|
||||
Note that the <function>inner_consistent</function> function is
|
||||
responsible for palloc'ing the
|
||||
<structfield>nodeNumbers</structfield>, <structfield>levelAdds</structfield>,
|
||||
<structfield>distances</structfield>,
|
||||
<structfield>reconstructedValues</structfield>, and
|
||||
<structfield>traversalValues</structfield> arrays in the current memory context.
|
||||
However, any output traverse values pointed to by
|
||||
@ -747,7 +782,10 @@ CREATE FUNCTION my_leaf_consistent(internal, internal) RETURNS bool ...
|
||||
typedef struct spgLeafConsistentIn
|
||||
{
|
||||
ScanKey scankeys; /* array of operators and comparison values */
|
||||
int nkeys; /* length of array */
|
||||
ScanKey orderbys; /* array of ordering operators and comparison
|
||||
* values */
|
||||
int nkeys; /* length of scankeys array */
|
||||
int norderbys; /* length of orderbys array */
|
||||
|
||||
Datum reconstructedValue; /* value reconstructed at parent */
|
||||
void *traversalValue; /* opclass-specific traverse value */
|
||||
@ -759,8 +797,10 @@ typedef struct spgLeafConsistentIn
|
||||
|
||||
typedef struct spgLeafConsistentOut
|
||||
{
|
||||
Datum leafValue; /* reconstructed original data, if any */
|
||||
bool recheck; /* set true if operator must be rechecked */
|
||||
Datum leafValue; /* reconstructed original data, if any */
|
||||
bool recheck; /* set true if operator must be rechecked */
|
||||
bool recheckDistances; /* set true if distances must be rechecked */
|
||||
double *distances; /* associated distances */
|
||||
} spgLeafConsistentOut;
|
||||
</programlisting>
|
||||
|
||||
@ -775,6 +815,8 @@ typedef struct spgLeafConsistentOut
|
||||
In particular it is not necessary to check <structfield>sk_flags</structfield> to
|
||||
see if the comparison value is NULL, because the SP-GiST core code
|
||||
will filter out such conditions.
|
||||
The array <structfield>orderbys</structfield>, of length <structfield>norderbys</structfield>,
|
||||
describes the ordering operators in the same manner.
|
||||
<structfield>reconstructedValue</structfield> is the value reconstructed for the
|
||||
parent tuple; it is <literal>(Datum) 0</literal> at the root level or if the
|
||||
<function>inner_consistent</function> function did not provide a value at the
|
||||
@ -803,6 +845,12 @@ typedef struct spgLeafConsistentOut
|
||||
<structfield>recheck</structfield> may be set to <literal>true</literal> if the match
|
||||
is uncertain and so the operator(s) must be re-applied to the actual
|
||||
heap tuple to verify the match.
|
||||
If ordered search is performed, set <structfield>distances</structfield>
|
||||
to an array of distance values according to <structfield>orderbys</structfield>
|
||||
array. Leave it NULL otherwise. If at least one of returned distances
|
||||
is not exact, set <structfield>recheckDistances</structfield> to true.
|
||||
In this case, the executor will calculate the exact distances after
|
||||
fetching the tuple from the heap, and will reorder the tuples if needed.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
Reference in New Issue
Block a user