1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-24 01:29:19 +03:00

Fix handling of NULL distances in KNN-GiST

In order to implement NULL LAST semantic GiST previously assumed distance to
the NULL value to be Inf.  However, our distance functions can return Inf and
NaN for non-null values.  In such cases, NULL LAST semantic appears to be
broken.  This commit fixes that by introducing separate array of null flags for
distances.

Backpatch to all supported versions.

Discussion: https://postgr.es/m/CAPpHfdsNvNdA0DBS%2BwMpFrgwT6C3-q50sFVGLSiuWnV3FqOJuQ%40mail.gmail.com
Author: Alexander Korotkov
Backpatch-through: 9.4
This commit is contained in:
Alexander Korotkov
2019-09-08 21:13:40 +03:00
parent e5d8f35961
commit 02f90879e7
7 changed files with 106 additions and 36 deletions

View File

@@ -137,13 +137,30 @@ typedef struct GISTSearchItem
/* we must store parentlsn to detect whether a split occurred */
GISTSearchHeapItem heap; /* heap info, if heap tuple */
} data;
double distances[FLEXIBLE_ARRAY_MEMBER]; /* numberOfOrderBys
* entries */
/*
* This data structure is followed by arrays of distance values and
* distance null flags. Size of both arrays is
* IndexScanDesc->numberOfOrderBys. See macros below for accessing those
* arrays.
*/
} GISTSearchItem;
#define GISTSearchItemIsHeap(item) ((item).blkno == InvalidBlockNumber)
#define SizeOfGISTSearchItem(n_distances) (offsetof(GISTSearchItem, distances) + sizeof(double) * (n_distances))
#define SizeOfGISTSearchItem(n_distances) (DOUBLEALIGN(sizeof(GISTSearchItem)) + \
(sizeof(double) + sizeof(bool)) * (n_distances))
/*
* We actually don't need n_distances compute pointer to distance values.
* Nevertheless take n_distances as argument to have same arguments list for
* GISTSearchItemDistanceValues() and GISTSearchItemDistanceNulls().
*/
#define GISTSearchItemDistanceValues(item, n_distances) \
((double *) ((Pointer) (item) + DOUBLEALIGN(sizeof(GISTSearchItem))))
#define GISTSearchItemDistanceNulls(item, n_distances) \
((bool *) ((Pointer) (item) + DOUBLEALIGN(sizeof(GISTSearchItem)) + sizeof(double) * (n_distances)))
/*
* GISTScanOpaqueData: private state for a scan of a GiST index
@@ -159,7 +176,8 @@ typedef struct GISTScanOpaqueData
bool firstCall; /* true until first gistgettuple call */
/* pre-allocated workspace arrays */
double *distances; /* output area for gistindex_keytest */
double *distanceValues; /* output area for gistindex_keytest */
bool *distanceNulls;
/* info about killed items if any (killedItems is NULL if never used) */
OffsetNumber *killedItems; /* offset numbers of killed items */