mirror of
https://github.com/postgres/postgres.git
synced 2025-05-06 19:59:18 +03:00
Improve handling of NULLs in KNN-GiST and KNN-SP-GiST
This commit improves subject in two ways: * It removes ugliness of 02f90879e7, which stores distance values and null flags in two separate arrays after GISTSearchItem struct. Instead we pack both distance value and null flag in IndexOrderByDistance struct. Alignment overhead should be negligible, because we typically deal with at most few "col op const" expressions in ORDER BY clause. * It fixes handling of "col op NULL" expression in KNN-SP-GiST. Now, these expression are not passed to support functions, which can't deal with them. Instead, NULL result is implicitly assumed. It future we may decide to teach support functions to deal with NULL arguments, but current solution is bugfix suitable for backpatch. Reported-by: Nikita Glukhov Discussion: https://postgr.es/m/826f57ee-afc7-8977-c44c-6111d18b02ec%40postgrespro.ru Author: Nikita Glukhov Reviewed-by: Alexander Korotkov Backpatch-through: 9.4
This commit is contained in:
parent
3153328fa9
commit
d6a90aac56
@ -112,9 +112,8 @@ gistkillitems(IndexScanDesc scan)
|
|||||||
* Similarly, *recheck_distances_p is set to indicate whether the distances
|
* Similarly, *recheck_distances_p is set to indicate whether the distances
|
||||||
* need to be rechecked, and it is also ignored for non-leaf entries.
|
* need to be rechecked, and it is also ignored for non-leaf entries.
|
||||||
*
|
*
|
||||||
* If we are doing an ordered scan, so->distancesValues[] and
|
* If we are doing an ordered scan, so->distances[] is filled with distance
|
||||||
* so->distancesNulls[] is filled with distance data from the distance()
|
* data from the distance() functions before returning success.
|
||||||
* functions before returning success.
|
|
||||||
*
|
*
|
||||||
* We must decompress the key in the IndexTuple before passing it to the
|
* We must decompress the key in the IndexTuple before passing it to the
|
||||||
* sk_funcs (which actually are the opclass Consistent or Distance methods).
|
* sk_funcs (which actually are the opclass Consistent or Distance methods).
|
||||||
@ -135,8 +134,7 @@ gistindex_keytest(IndexScanDesc scan,
|
|||||||
GISTSTATE *giststate = so->giststate;
|
GISTSTATE *giststate = so->giststate;
|
||||||
ScanKey key = scan->keyData;
|
ScanKey key = scan->keyData;
|
||||||
int keySize = scan->numberOfKeys;
|
int keySize = scan->numberOfKeys;
|
||||||
double *distance_value_p;
|
IndexOrderByDistance *distance_p;
|
||||||
bool *distance_null_p;
|
|
||||||
Relation r = scan->indexRelation;
|
Relation r = scan->indexRelation;
|
||||||
|
|
||||||
*recheck_p = false;
|
*recheck_p = false;
|
||||||
@ -155,8 +153,8 @@ gistindex_keytest(IndexScanDesc scan,
|
|||||||
elog(ERROR, "invalid GiST tuple found on leaf page");
|
elog(ERROR, "invalid GiST tuple found on leaf page");
|
||||||
for (i = 0; i < scan->numberOfOrderBys; i++)
|
for (i = 0; i < scan->numberOfOrderBys; i++)
|
||||||
{
|
{
|
||||||
so->distanceValues[i] = -get_float8_infinity();
|
so->distances[i].value = -get_float8_infinity();
|
||||||
so->distanceNulls[i] = false;
|
so->distances[i].isnull = false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -240,8 +238,7 @@ gistindex_keytest(IndexScanDesc scan,
|
|||||||
|
|
||||||
/* OK, it passes --- now let's compute the distances */
|
/* OK, it passes --- now let's compute the distances */
|
||||||
key = scan->orderByData;
|
key = scan->orderByData;
|
||||||
distance_value_p = so->distanceValues;
|
distance_p = so->distances;
|
||||||
distance_null_p = so->distanceNulls;
|
|
||||||
keySize = scan->numberOfOrderBys;
|
keySize = scan->numberOfOrderBys;
|
||||||
while (keySize > 0)
|
while (keySize > 0)
|
||||||
{
|
{
|
||||||
@ -256,8 +253,8 @@ gistindex_keytest(IndexScanDesc scan,
|
|||||||
if ((key->sk_flags & SK_ISNULL) || isNull)
|
if ((key->sk_flags & SK_ISNULL) || isNull)
|
||||||
{
|
{
|
||||||
/* Assume distance computes as null */
|
/* Assume distance computes as null */
|
||||||
*distance_value_p = 0.0;
|
distance_p->value = 0.0;
|
||||||
*distance_null_p = true;
|
distance_p->isnull = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -294,13 +291,12 @@ gistindex_keytest(IndexScanDesc scan,
|
|||||||
ObjectIdGetDatum(key->sk_subtype),
|
ObjectIdGetDatum(key->sk_subtype),
|
||||||
PointerGetDatum(&recheck));
|
PointerGetDatum(&recheck));
|
||||||
*recheck_distances_p |= recheck;
|
*recheck_distances_p |= recheck;
|
||||||
*distance_value_p = DatumGetFloat8(dist);
|
distance_p->value = DatumGetFloat8(dist);
|
||||||
*distance_null_p = false;
|
distance_p->isnull = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
key++;
|
key++;
|
||||||
distance_value_p++;
|
distance_p++;
|
||||||
distance_null_p++;
|
|
||||||
keySize--;
|
keySize--;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -313,8 +309,7 @@ gistindex_keytest(IndexScanDesc scan,
|
|||||||
*
|
*
|
||||||
* scan: index scan we are executing
|
* scan: index scan we are executing
|
||||||
* pageItem: search queue item identifying an index page to scan
|
* pageItem: search queue item identifying an index page to scan
|
||||||
* myDistanceValues: distances array associated with pageItem, or NULL at the root
|
* myDistances: distances array associated with pageItem, or NULL at the root
|
||||||
* myDistanceNulls: null flags for myDistanceValues array, or NULL at the root
|
|
||||||
* tbm: if not NULL, gistgetbitmap's output bitmap
|
* tbm: if not NULL, gistgetbitmap's output bitmap
|
||||||
* ntids: if not NULL, gistgetbitmap's output tuple counter
|
* ntids: if not NULL, gistgetbitmap's output tuple counter
|
||||||
*
|
*
|
||||||
@ -332,8 +327,7 @@ gistindex_keytest(IndexScanDesc scan,
|
|||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
|
gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
|
||||||
double *myDistanceValues, bool *myDistanceNulls,
|
IndexOrderByDistance *myDistances, TIDBitmap *tbm, int64 *ntids)
|
||||||
TIDBitmap *tbm, int64 *ntids)
|
|
||||||
{
|
{
|
||||||
GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
|
GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
|
||||||
GISTSTATE *giststate = so->giststate;
|
GISTSTATE *giststate = so->giststate;
|
||||||
@ -370,7 +364,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
|
|||||||
GISTSearchItem *item;
|
GISTSearchItem *item;
|
||||||
|
|
||||||
/* This can't happen when starting at the root */
|
/* This can't happen when starting at the root */
|
||||||
Assert(myDistanceValues != NULL && myDistanceNulls != NULL);
|
Assert(myDistances != NULL);
|
||||||
|
|
||||||
oldcxt = MemoryContextSwitchTo(so->queueCxt);
|
oldcxt = MemoryContextSwitchTo(so->queueCxt);
|
||||||
|
|
||||||
@ -380,10 +374,8 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
|
|||||||
item->data.parentlsn = pageItem->data.parentlsn;
|
item->data.parentlsn = pageItem->data.parentlsn;
|
||||||
|
|
||||||
/* Insert it into the queue using same distances as for this page */
|
/* Insert it into the queue using same distances as for this page */
|
||||||
memcpy(GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
|
memcpy(item->distances, myDistances,
|
||||||
myDistanceValues, sizeof(double) * scan->numberOfOrderBys);
|
sizeof(item->distances[0]) * scan->numberOfOrderBys);
|
||||||
memcpy(GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
|
|
||||||
myDistanceNulls, sizeof(bool) * scan->numberOfOrderBys);
|
|
||||||
|
|
||||||
pairingheap_add(so->queue, &item->phNode);
|
pairingheap_add(so->queue, &item->phNode);
|
||||||
|
|
||||||
@ -513,10 +505,8 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Insert it into the queue using new distance data */
|
/* Insert it into the queue using new distance data */
|
||||||
memcpy(GISTSearchItemDistanceValues(item, nOrderBys),
|
memcpy(item->distances, so->distances,
|
||||||
so->distanceValues, sizeof(double) * nOrderBys);
|
sizeof(item->distances[0]) * nOrderBys);
|
||||||
memcpy(GISTSearchItemDistanceNulls(item, nOrderBys),
|
|
||||||
so->distanceNulls, sizeof(bool) * nOrderBys);
|
|
||||||
|
|
||||||
pairingheap_add(so->queue, &item->phNode);
|
pairingheap_add(so->queue, &item->phNode);
|
||||||
|
|
||||||
@ -571,8 +561,6 @@ getNextNearest(IndexScanDesc scan)
|
|||||||
do
|
do
|
||||||
{
|
{
|
||||||
GISTSearchItem *item = getNextGISTSearchItem(so);
|
GISTSearchItem *item = getNextGISTSearchItem(so);
|
||||||
float8 *distanceValues = GISTSearchItemDistanceValues(item, scan->numberOfOrderBys);
|
|
||||||
bool *distanceNulls = GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys);
|
|
||||||
|
|
||||||
if (!item)
|
if (!item)
|
||||||
break;
|
break;
|
||||||
@ -592,8 +580,8 @@ getNextNearest(IndexScanDesc scan)
|
|||||||
if (!scan->xs_orderbynulls[i])
|
if (!scan->xs_orderbynulls[i])
|
||||||
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
|
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
|
||||||
#endif
|
#endif
|
||||||
scan->xs_orderbyvals[i] = Float8GetDatum(distanceValues[i]);
|
scan->xs_orderbyvals[i] = item->distances[i].value;
|
||||||
scan->xs_orderbynulls[i] = distanceNulls[i];
|
scan->xs_orderbynulls[i] = item->distances[i].isnull;
|
||||||
}
|
}
|
||||||
else if (so->orderByTypes[i] == FLOAT4OID)
|
else if (so->orderByTypes[i] == FLOAT4OID)
|
||||||
{
|
{
|
||||||
@ -603,8 +591,8 @@ getNextNearest(IndexScanDesc scan)
|
|||||||
if (!scan->xs_orderbynulls[i])
|
if (!scan->xs_orderbynulls[i])
|
||||||
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
|
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
|
||||||
#endif
|
#endif
|
||||||
scan->xs_orderbyvals[i] = Float4GetDatum(distanceValues[i]);
|
scan->xs_orderbyvals[i] = Float4GetDatum(item->distances[i].value);
|
||||||
scan->xs_orderbynulls[i] = distanceNulls[i];
|
scan->xs_orderbynulls[i] = item->distances[i].isnull;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -632,10 +620,7 @@ getNextNearest(IndexScanDesc scan)
|
|||||||
/* visit an index page, extract its items into queue */
|
/* visit an index page, extract its items into queue */
|
||||||
CHECK_FOR_INTERRUPTS();
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
|
||||||
gistScanPage(scan, item,
|
gistScanPage(scan, item, item->distances, NULL, NULL);
|
||||||
GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
|
|
||||||
GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
|
|
||||||
NULL, NULL);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pfree(item);
|
pfree(item);
|
||||||
@ -673,7 +658,7 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir)
|
|||||||
|
|
||||||
fakeItem.blkno = GIST_ROOT_BLKNO;
|
fakeItem.blkno = GIST_ROOT_BLKNO;
|
||||||
memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
|
memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
|
||||||
gistScanPage(scan, &fakeItem, NULL, NULL, NULL, NULL);
|
gistScanPage(scan, &fakeItem, NULL, NULL, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (scan->numberOfOrderBys > 0)
|
if (scan->numberOfOrderBys > 0)
|
||||||
@ -767,10 +752,7 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir)
|
|||||||
* this page, we fall out of the inner "do" and loop around to
|
* this page, we fall out of the inner "do" and loop around to
|
||||||
* return them.
|
* return them.
|
||||||
*/
|
*/
|
||||||
gistScanPage(scan, item,
|
gistScanPage(scan, item, item->distances, NULL, NULL);
|
||||||
GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
|
|
||||||
GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
|
|
||||||
NULL, NULL);
|
|
||||||
|
|
||||||
pfree(item);
|
pfree(item);
|
||||||
} while (so->nPageData == 0);
|
} while (so->nPageData == 0);
|
||||||
@ -801,7 +783,7 @@ gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
|
|||||||
|
|
||||||
fakeItem.blkno = GIST_ROOT_BLKNO;
|
fakeItem.blkno = GIST_ROOT_BLKNO;
|
||||||
memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
|
memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
|
||||||
gistScanPage(scan, &fakeItem, NULL, NULL, tbm, &ntids);
|
gistScanPage(scan, &fakeItem, NULL, tbm, &ntids);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* While scanning a leaf page, ItemPointers of matching heap tuples will
|
* While scanning a leaf page, ItemPointers of matching heap tuples will
|
||||||
@ -816,10 +798,7 @@ gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
|
|||||||
|
|
||||||
CHECK_FOR_INTERRUPTS();
|
CHECK_FOR_INTERRUPTS();
|
||||||
|
|
||||||
gistScanPage(scan, item,
|
gistScanPage(scan, item, item->distances, tbm, &ntids);
|
||||||
GISTSearchItemDistanceValues(item, scan->numberOfOrderBys),
|
|
||||||
GISTSearchItemDistanceNulls(item, scan->numberOfOrderBys),
|
|
||||||
tbm, &ntids);
|
|
||||||
|
|
||||||
pfree(item);
|
pfree(item);
|
||||||
}
|
}
|
||||||
|
@ -33,26 +33,23 @@ pairingheap_GISTSearchItem_cmp(const pairingheap_node *a, const pairingheap_node
|
|||||||
const GISTSearchItem *sb = (const GISTSearchItem *) b;
|
const GISTSearchItem *sb = (const GISTSearchItem *) b;
|
||||||
IndexScanDesc scan = (IndexScanDesc) arg;
|
IndexScanDesc scan = (IndexScanDesc) arg;
|
||||||
int i;
|
int i;
|
||||||
double *da = GISTSearchItemDistanceValues(sa, scan->numberOfOrderBys),
|
|
||||||
*db = GISTSearchItemDistanceValues(sb, scan->numberOfOrderBys);
|
|
||||||
bool *na = GISTSearchItemDistanceNulls(sa, scan->numberOfOrderBys),
|
|
||||||
*nb = GISTSearchItemDistanceNulls(sb, scan->numberOfOrderBys);
|
|
||||||
|
|
||||||
/* Order according to distance comparison */
|
/* Order according to distance comparison */
|
||||||
for (i = 0; i < scan->numberOfOrderBys; i++)
|
for (i = 0; i < scan->numberOfOrderBys; i++)
|
||||||
{
|
{
|
||||||
if (na[i])
|
if (sa->distances[i].isnull)
|
||||||
{
|
{
|
||||||
if (!nb[i])
|
if (!sb->distances[i].isnull)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
else if (nb[i])
|
else if (sb->distances[i].isnull)
|
||||||
{
|
{
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int cmp = -float8_cmp_internal(da[i], db[i]);
|
int cmp = -float8_cmp_internal(sa->distances[i].value,
|
||||||
|
sb->distances[i].value);
|
||||||
|
|
||||||
if (cmp != 0)
|
if (cmp != 0)
|
||||||
return cmp;
|
return cmp;
|
||||||
@ -100,8 +97,7 @@ gistbeginscan(Relation r, int nkeys, int norderbys)
|
|||||||
so->queueCxt = giststate->scanCxt; /* see gistrescan */
|
so->queueCxt = giststate->scanCxt; /* see gistrescan */
|
||||||
|
|
||||||
/* workspaces with size dependent on numberOfOrderBys: */
|
/* workspaces with size dependent on numberOfOrderBys: */
|
||||||
so->distanceValues = palloc(sizeof(double) * scan->numberOfOrderBys);
|
so->distances = palloc(sizeof(so->distances[0]) * scan->numberOfOrderBys);
|
||||||
so->distanceNulls = palloc(sizeof(bool) * scan->numberOfOrderBys);
|
|
||||||
so->qual_ok = true; /* in case there are zero keys */
|
so->qual_ok = true; /* in case there are zero keys */
|
||||||
if (scan->numberOfOrderBys > 0)
|
if (scan->numberOfOrderBys > 0)
|
||||||
{
|
{
|
||||||
|
@ -117,6 +117,13 @@ typedef enum IndexUniqueCheck
|
|||||||
} IndexUniqueCheck;
|
} IndexUniqueCheck;
|
||||||
|
|
||||||
|
|
||||||
|
/* Nullable "ORDER BY col op const" distance */
|
||||||
|
typedef struct IndexOrderByDistance
|
||||||
|
{
|
||||||
|
double value;
|
||||||
|
bool isnull;
|
||||||
|
} IndexOrderByDistance;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* generalized index_ interface routines (in indexam.c)
|
* generalized index_ interface routines (in indexam.c)
|
||||||
*/
|
*/
|
||||||
|
@ -137,29 +137,15 @@ typedef struct GISTSearchItem
|
|||||||
GISTSearchHeapItem heap; /* heap info, if heap tuple */
|
GISTSearchHeapItem heap; /* heap info, if heap tuple */
|
||||||
} data;
|
} data;
|
||||||
|
|
||||||
/*
|
/* numberOfOrderBys entries */
|
||||||
* This data structure is followed by arrays of distance values and
|
IndexOrderByDistance distances[FLEXIBLE_ARRAY_MEMBER];
|
||||||
* distance null flags. Size of both arrays is
|
|
||||||
* IndexScanDesc->numberOfOrderBys. See macros below for accessing those
|
|
||||||
* arrays.
|
|
||||||
*/
|
|
||||||
} GISTSearchItem;
|
} GISTSearchItem;
|
||||||
|
|
||||||
#define GISTSearchItemIsHeap(item) ((item).blkno == InvalidBlockNumber)
|
#define GISTSearchItemIsHeap(item) ((item).blkno == InvalidBlockNumber)
|
||||||
|
|
||||||
#define SizeOfGISTSearchItem(n_distances) (DOUBLEALIGN(sizeof(GISTSearchItem)) + \
|
#define SizeOfGISTSearchItem(n_distances) \
|
||||||
(sizeof(double) + sizeof(bool)) * (n_distances))
|
(offsetof(GISTSearchItem, distances) + \
|
||||||
|
sizeof(IndexOrderByDistance) * (n_distances))
|
||||||
/*
|
|
||||||
* We actually don't need n_distances compute pointer to distance values.
|
|
||||||
* Nevertheless take n_distances as argument to have same arguments list for
|
|
||||||
* GISTSearchItemDistanceValues() and GISTSearchItemDistanceNulls().
|
|
||||||
*/
|
|
||||||
#define GISTSearchItemDistanceValues(item, n_distances) \
|
|
||||||
((double *) ((Pointer) (item) + DOUBLEALIGN(sizeof(GISTSearchItem))))
|
|
||||||
|
|
||||||
#define GISTSearchItemDistanceNulls(item, n_distances) \
|
|
||||||
((bool *) ((Pointer) (item) + DOUBLEALIGN(sizeof(GISTSearchItem)) + sizeof(double) * (n_distances)))
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* GISTScanOpaqueData: private state for a scan of a GiST index
|
* GISTScanOpaqueData: private state for a scan of a GiST index
|
||||||
@ -175,8 +161,7 @@ typedef struct GISTScanOpaqueData
|
|||||||
bool firstCall; /* true until first gistgettuple call */
|
bool firstCall; /* true until first gistgettuple call */
|
||||||
|
|
||||||
/* pre-allocated workspace arrays */
|
/* pre-allocated workspace arrays */
|
||||||
double *distanceValues; /* output area for gistindex_keytest */
|
IndexOrderByDistance *distances; /* output area for gistindex_keytest */
|
||||||
bool *distanceNulls;
|
|
||||||
|
|
||||||
/* info about killed items if any (killedItems is NULL if never used) */
|
/* info about killed items if any (killedItems is NULL if never used) */
|
||||||
OffsetNumber *killedItems; /* offset numbers of killed items */
|
OffsetNumber *killedItems; /* offset numbers of killed items */
|
||||||
|
@ -1022,6 +1022,7 @@ IndexList
|
|||||||
IndexOnlyScan
|
IndexOnlyScan
|
||||||
IndexOnlyScanState
|
IndexOnlyScanState
|
||||||
IndexOptInfo
|
IndexOptInfo
|
||||||
|
IndexOrderByDistance
|
||||||
IndexPath
|
IndexPath
|
||||||
IndexQualInfo
|
IndexQualInfo
|
||||||
IndexRuntimeKeyInfo
|
IndexRuntimeKeyInfo
|
||||||
|
Loading…
x
Reference in New Issue
Block a user