1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-11 20:28:21 +03:00

Cube extension kNN support

Introduce distance operators over cubes:
<#> taxicab distance
<->  euclidean distance
<=> chebyshev distance

Also add kNN support of those distances in GiST opclass.

Author: Stas Kelvich
This commit is contained in:
Teodor Sigaev
2015-12-18 14:38:27 +03:00
parent 3d0c50ffa0
commit 33bd250f6c
12 changed files with 1684 additions and 4 deletions

View File

@ -40,6 +40,8 @@ PG_FUNCTION_INFO_V1(cube_c_f8_f8);
PG_FUNCTION_INFO_V1(cube_dim);
PG_FUNCTION_INFO_V1(cube_ll_coord);
PG_FUNCTION_INFO_V1(cube_ur_coord);
PG_FUNCTION_INFO_V1(cube_coord);
PG_FUNCTION_INFO_V1(cube_coord_llur);
PG_FUNCTION_INFO_V1(cube_subset);
/*
@ -53,6 +55,7 @@ PG_FUNCTION_INFO_V1(g_cube_penalty);
PG_FUNCTION_INFO_V1(g_cube_picksplit);
PG_FUNCTION_INFO_V1(g_cube_union);
PG_FUNCTION_INFO_V1(g_cube_same);
PG_FUNCTION_INFO_V1(g_cube_distance);
/*
** B-tree support functions
@ -79,7 +82,9 @@ PG_FUNCTION_INFO_V1(cube_size);
/*
** miscellaneous
*/
PG_FUNCTION_INFO_V1(distance_taxicab);
PG_FUNCTION_INFO_V1(cube_distance);
PG_FUNCTION_INFO_V1(distance_chebyshev);
PG_FUNCTION_INFO_V1(cube_is_point);
PG_FUNCTION_INFO_V1(cube_enlarge);
@ -1257,6 +1262,144 @@ cube_distance(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(sqrt(distance));
}
Datum
distance_taxicab(PG_FUNCTION_ARGS)
{
NDBOX *a = PG_GETARG_NDBOX(0),
*b = PG_GETARG_NDBOX(1);
bool swapped = false;
double distance;
int i;
/* swap the box pointers if needed */
if (DIM(a) < DIM(b))
{
NDBOX *tmp = b;
b = a;
a = tmp;
swapped = true;
}
distance = 0.0;
/* compute within the dimensions of (b) */
for (i = 0; i < DIM(b); i++)
distance += fabs(distance_1D(LL_COORD(a,i), UR_COORD(a,i), LL_COORD(b,i), UR_COORD(b,i)));
/* compute distance to zero for those dimensions in (a) absent in (b) */
for (i = DIM(b); i < DIM(a); i++)
distance += fabs(distance_1D(LL_COORD(a,i), UR_COORD(a,i), 0.0, 0.0));
if (swapped)
{
PG_FREE_IF_COPY(b, 0);
PG_FREE_IF_COPY(a, 1);
}
else
{
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
}
PG_RETURN_FLOAT8(distance);
}
Datum
distance_chebyshev(PG_FUNCTION_ARGS)
{
NDBOX *a = PG_GETARG_NDBOX(0),
*b = PG_GETARG_NDBOX(1);
bool swapped = false;
double d, distance;
int i;
/* swap the box pointers if needed */
if (DIM(a) < DIM(b))
{
NDBOX *tmp = b;
b = a;
a = tmp;
swapped = true;
}
distance = 0.0;
/* compute within the dimensions of (b) */
for (i = 0; i < DIM(b); i++)
{
d = fabs(distance_1D(LL_COORD(a,i), UR_COORD(a,i), LL_COORD(b,i), UR_COORD(b,i)));
if (d > distance)
distance = d;
}
/* compute distance to zero for those dimensions in (a) absent in (b) */
for (i = DIM(b); i < DIM(a); i++)
{
d = fabs(distance_1D(LL_COORD(a,i), UR_COORD(a,i), 0.0, 0.0));
if (d > distance)
distance = d;
}
if (swapped)
{
PG_FREE_IF_COPY(b, 0);
PG_FREE_IF_COPY(a, 1);
}
else
{
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
}
PG_RETURN_FLOAT8(distance);
}
Datum
g_cube_distance(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
NDBOX *cube = DatumGetNDBOX(entry->key);
double retval;
if (strategy == CubeKNNDistanceCoord)
{
int coord = PG_GETARG_INT32(1);
if IS_POINT(cube)
{
retval = (cube)->x[(coord-1)%DIM(cube)];
}
else
{
retval = Min(
(cube)->x[(coord-1)%DIM(cube)],
(cube)->x[(coord-1)%DIM(cube) + DIM(cube)]
);
}
}
else
{
NDBOX *query = PG_GETARG_NDBOX(1);
switch(strategy)
{
case CubeKNNDistanceTaxicab:
retval = DatumGetFloat8(DirectFunctionCall2(distance_taxicab,
PointerGetDatum(cube), PointerGetDatum(query)));
break;
case CubeKNNDistanceEuclid:
retval = DatumGetFloat8(DirectFunctionCall2(cube_distance,
PointerGetDatum(cube), PointerGetDatum(query)));
break;
case CubeKNNDistanceChebyshev:
retval = DatumGetFloat8(DirectFunctionCall2(distance_chebyshev,
PointerGetDatum(cube), PointerGetDatum(query)));
break;
default:
elog(ERROR, "Cube: unknown strategy number.");
}
}
PG_RETURN_FLOAT8(retval);
}
static double
distance_1D(double a1, double a2, double b1, double b2)
{
@ -1352,6 +1495,71 @@ cube_ur_coord(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(result);
}
/*
* Function returns cube coordinate.
* Numbers from 1 to DIM denotes first corner coordinates.
* Numbers from DIM+1 to 2*DIM denotes second corner coordinates.
*/
Datum
cube_coord(PG_FUNCTION_ARGS)
{
NDBOX *cube = PG_GETARG_NDBOX(0);
int coord = PG_GETARG_INT16(1);
if ((coord > 0) && (coord <= 2*DIM(cube)))
{
if IS_POINT(cube)
PG_RETURN_FLOAT8( (cube)->x[(coord-1)%DIM(cube)] );
else
PG_RETURN_FLOAT8( (cube)->x[coord-1] );
}
else
{
ereport(ERROR,
(errcode(ERRCODE_ARRAY_ELEMENT_ERROR),
errmsg("Cube index out of bounds")));
}
}
/*
* This function works like cube_coord(),
* but rearranges coordinates of corners to get cube representation
* in the form of (lower left, upper right).
* For historical reasons that extension allows us to create cubes in form
* ((2,1),(1,2)) and instead of normalizing such cube to ((1,1),(2,2)) it
* stores cube in original way. But to get cubes ordered by one of dimensions
* directly from the index without extra sort step we need some
* representation-independent coordinate getter. This function implements it.
*/
Datum
cube_coord_llur(PG_FUNCTION_ARGS)
{
NDBOX *cube = PG_GETARG_NDBOX(0);
int coord = PG_GETARG_INT16(1);
if ((coord > 0) && (coord <= DIM(cube)))
{
if IS_POINT(cube)
PG_RETURN_FLOAT8( (cube)->x[coord-1] );
else
PG_RETURN_FLOAT8( Min((cube)->x[coord-1], (cube)->x[coord-1+DIM(cube)]) );
}
else if ((coord > DIM(cube)) && (coord <= 2*DIM(cube)))
{
if IS_POINT(cube)
PG_RETURN_FLOAT8( (cube)->x[(coord-1)%DIM(cube)] );
else
PG_RETURN_FLOAT8( Max((cube)->x[coord-1], (cube)->x[coord-1-DIM(cube)]) );
}
else
{
ereport(ERROR,
(errcode(ERRCODE_ARRAY_ELEMENT_ERROR),
errmsg("Cube index out of bounds")));
}
}
/* Increase or decrease box size by a radius in at least n dimensions. */
Datum
cube_enlarge(PG_FUNCTION_ARGS)