mirror of
https://github.com/postgres/postgres.git
synced 2025-06-11 20:28:21 +03:00
Cube extension kNN support
Introduce distance operators over cubes: <#> taxicab distance <-> euclidean distance <=> chebyshev distance Also add kNN support of those distances in GiST opclass. Author: Stas Kelvich
This commit is contained in:
@ -40,6 +40,8 @@ PG_FUNCTION_INFO_V1(cube_c_f8_f8);
|
||||
PG_FUNCTION_INFO_V1(cube_dim);
|
||||
PG_FUNCTION_INFO_V1(cube_ll_coord);
|
||||
PG_FUNCTION_INFO_V1(cube_ur_coord);
|
||||
PG_FUNCTION_INFO_V1(cube_coord);
|
||||
PG_FUNCTION_INFO_V1(cube_coord_llur);
|
||||
PG_FUNCTION_INFO_V1(cube_subset);
|
||||
|
||||
/*
|
||||
@ -53,6 +55,7 @@ PG_FUNCTION_INFO_V1(g_cube_penalty);
|
||||
PG_FUNCTION_INFO_V1(g_cube_picksplit);
|
||||
PG_FUNCTION_INFO_V1(g_cube_union);
|
||||
PG_FUNCTION_INFO_V1(g_cube_same);
|
||||
PG_FUNCTION_INFO_V1(g_cube_distance);
|
||||
|
||||
/*
|
||||
** B-tree support functions
|
||||
@ -79,7 +82,9 @@ PG_FUNCTION_INFO_V1(cube_size);
|
||||
/*
|
||||
** miscellaneous
|
||||
*/
|
||||
PG_FUNCTION_INFO_V1(distance_taxicab);
|
||||
PG_FUNCTION_INFO_V1(cube_distance);
|
||||
PG_FUNCTION_INFO_V1(distance_chebyshev);
|
||||
PG_FUNCTION_INFO_V1(cube_is_point);
|
||||
PG_FUNCTION_INFO_V1(cube_enlarge);
|
||||
|
||||
@ -1257,6 +1262,144 @@ cube_distance(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_FLOAT8(sqrt(distance));
|
||||
}
|
||||
|
||||
Datum
|
||||
distance_taxicab(PG_FUNCTION_ARGS)
|
||||
{
|
||||
NDBOX *a = PG_GETARG_NDBOX(0),
|
||||
*b = PG_GETARG_NDBOX(1);
|
||||
bool swapped = false;
|
||||
double distance;
|
||||
int i;
|
||||
|
||||
/* swap the box pointers if needed */
|
||||
if (DIM(a) < DIM(b))
|
||||
{
|
||||
NDBOX *tmp = b;
|
||||
b = a;
|
||||
a = tmp;
|
||||
swapped = true;
|
||||
}
|
||||
|
||||
distance = 0.0;
|
||||
/* compute within the dimensions of (b) */
|
||||
for (i = 0; i < DIM(b); i++)
|
||||
distance += fabs(distance_1D(LL_COORD(a,i), UR_COORD(a,i), LL_COORD(b,i), UR_COORD(b,i)));
|
||||
|
||||
/* compute distance to zero for those dimensions in (a) absent in (b) */
|
||||
for (i = DIM(b); i < DIM(a); i++)
|
||||
distance += fabs(distance_1D(LL_COORD(a,i), UR_COORD(a,i), 0.0, 0.0));
|
||||
|
||||
if (swapped)
|
||||
{
|
||||
PG_FREE_IF_COPY(b, 0);
|
||||
PG_FREE_IF_COPY(a, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
PG_FREE_IF_COPY(a, 0);
|
||||
PG_FREE_IF_COPY(b, 1);
|
||||
}
|
||||
|
||||
PG_RETURN_FLOAT8(distance);
|
||||
}
|
||||
|
||||
Datum
|
||||
distance_chebyshev(PG_FUNCTION_ARGS)
|
||||
{
|
||||
NDBOX *a = PG_GETARG_NDBOX(0),
|
||||
*b = PG_GETARG_NDBOX(1);
|
||||
bool swapped = false;
|
||||
double d, distance;
|
||||
int i;
|
||||
|
||||
/* swap the box pointers if needed */
|
||||
if (DIM(a) < DIM(b))
|
||||
{
|
||||
NDBOX *tmp = b;
|
||||
b = a;
|
||||
a = tmp;
|
||||
swapped = true;
|
||||
}
|
||||
|
||||
distance = 0.0;
|
||||
/* compute within the dimensions of (b) */
|
||||
for (i = 0; i < DIM(b); i++)
|
||||
{
|
||||
d = fabs(distance_1D(LL_COORD(a,i), UR_COORD(a,i), LL_COORD(b,i), UR_COORD(b,i)));
|
||||
if (d > distance)
|
||||
distance = d;
|
||||
}
|
||||
|
||||
/* compute distance to zero for those dimensions in (a) absent in (b) */
|
||||
for (i = DIM(b); i < DIM(a); i++)
|
||||
{
|
||||
d = fabs(distance_1D(LL_COORD(a,i), UR_COORD(a,i), 0.0, 0.0));
|
||||
if (d > distance)
|
||||
distance = d;
|
||||
}
|
||||
|
||||
if (swapped)
|
||||
{
|
||||
PG_FREE_IF_COPY(b, 0);
|
||||
PG_FREE_IF_COPY(a, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
PG_FREE_IF_COPY(a, 0);
|
||||
PG_FREE_IF_COPY(b, 1);
|
||||
}
|
||||
|
||||
PG_RETURN_FLOAT8(distance);
|
||||
}
|
||||
|
||||
Datum
|
||||
g_cube_distance(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
|
||||
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
|
||||
NDBOX *cube = DatumGetNDBOX(entry->key);
|
||||
double retval;
|
||||
|
||||
if (strategy == CubeKNNDistanceCoord)
|
||||
{
|
||||
int coord = PG_GETARG_INT32(1);
|
||||
|
||||
if IS_POINT(cube)
|
||||
{
|
||||
retval = (cube)->x[(coord-1)%DIM(cube)];
|
||||
}
|
||||
else
|
||||
{
|
||||
retval = Min(
|
||||
(cube)->x[(coord-1)%DIM(cube)],
|
||||
(cube)->x[(coord-1)%DIM(cube) + DIM(cube)]
|
||||
);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
NDBOX *query = PG_GETARG_NDBOX(1);
|
||||
switch(strategy)
|
||||
{
|
||||
case CubeKNNDistanceTaxicab:
|
||||
retval = DatumGetFloat8(DirectFunctionCall2(distance_taxicab,
|
||||
PointerGetDatum(cube), PointerGetDatum(query)));
|
||||
break;
|
||||
case CubeKNNDistanceEuclid:
|
||||
retval = DatumGetFloat8(DirectFunctionCall2(cube_distance,
|
||||
PointerGetDatum(cube), PointerGetDatum(query)));
|
||||
break;
|
||||
case CubeKNNDistanceChebyshev:
|
||||
retval = DatumGetFloat8(DirectFunctionCall2(distance_chebyshev,
|
||||
PointerGetDatum(cube), PointerGetDatum(query)));
|
||||
break;
|
||||
default:
|
||||
elog(ERROR, "Cube: unknown strategy number.");
|
||||
}
|
||||
}
|
||||
PG_RETURN_FLOAT8(retval);
|
||||
}
|
||||
|
||||
static double
|
||||
distance_1D(double a1, double a2, double b1, double b2)
|
||||
{
|
||||
@ -1352,6 +1495,71 @@ cube_ur_coord(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_FLOAT8(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* Function returns cube coordinate.
|
||||
* Numbers from 1 to DIM denotes first corner coordinates.
|
||||
* Numbers from DIM+1 to 2*DIM denotes second corner coordinates.
|
||||
*/
|
||||
Datum
|
||||
cube_coord(PG_FUNCTION_ARGS)
|
||||
{
|
||||
NDBOX *cube = PG_GETARG_NDBOX(0);
|
||||
int coord = PG_GETARG_INT16(1);
|
||||
|
||||
if ((coord > 0) && (coord <= 2*DIM(cube)))
|
||||
{
|
||||
if IS_POINT(cube)
|
||||
PG_RETURN_FLOAT8( (cube)->x[(coord-1)%DIM(cube)] );
|
||||
else
|
||||
PG_RETURN_FLOAT8( (cube)->x[coord-1] );
|
||||
}
|
||||
else
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_ARRAY_ELEMENT_ERROR),
|
||||
errmsg("Cube index out of bounds")));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* This function works like cube_coord(),
|
||||
* but rearranges coordinates of corners to get cube representation
|
||||
* in the form of (lower left, upper right).
|
||||
* For historical reasons that extension allows us to create cubes in form
|
||||
* ((2,1),(1,2)) and instead of normalizing such cube to ((1,1),(2,2)) it
|
||||
* stores cube in original way. But to get cubes ordered by one of dimensions
|
||||
* directly from the index without extra sort step we need some
|
||||
* representation-independent coordinate getter. This function implements it.
|
||||
*/
|
||||
Datum
|
||||
cube_coord_llur(PG_FUNCTION_ARGS)
|
||||
{
|
||||
NDBOX *cube = PG_GETARG_NDBOX(0);
|
||||
int coord = PG_GETARG_INT16(1);
|
||||
|
||||
if ((coord > 0) && (coord <= DIM(cube)))
|
||||
{
|
||||
if IS_POINT(cube)
|
||||
PG_RETURN_FLOAT8( (cube)->x[coord-1] );
|
||||
else
|
||||
PG_RETURN_FLOAT8( Min((cube)->x[coord-1], (cube)->x[coord-1+DIM(cube)]) );
|
||||
}
|
||||
else if ((coord > DIM(cube)) && (coord <= 2*DIM(cube)))
|
||||
{
|
||||
if IS_POINT(cube)
|
||||
PG_RETURN_FLOAT8( (cube)->x[(coord-1)%DIM(cube)] );
|
||||
else
|
||||
PG_RETURN_FLOAT8( Max((cube)->x[coord-1], (cube)->x[coord-1-DIM(cube)]) );
|
||||
}
|
||||
else
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_ARRAY_ELEMENT_ERROR),
|
||||
errmsg("Cube index out of bounds")));
|
||||
}
|
||||
}
|
||||
|
||||
/* Increase or decrease box size by a radius in at least n dimensions. */
|
||||
Datum
|
||||
cube_enlarge(PG_FUNCTION_ARGS)
|
||||
|
Reference in New Issue
Block a user