mirror of
https://github.com/postgres/postgres.git
synced 2025-05-12 16:21:30 +03:00
With Asserts off, these variables are set but never used, resulting in warnings from pickier compilers. Fix that with our standard solution. Per report from Jeff Janes.
967 lines
24 KiB
C
967 lines
24 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* gistutil.c
|
|
* utilities routines for the postgres GiST index access method.
|
|
*
|
|
*
|
|
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/access/gist/gistutil.c
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include <math.h>
|
|
|
|
#include "access/gist_private.h"
|
|
#include "access/htup_details.h"
|
|
#include "access/reloptions.h"
|
|
#include "catalog/pg_opclass.h"
|
|
#include "storage/indexfsm.h"
|
|
#include "storage/lmgr.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/syscache.h"
|
|
|
|
|
|
/*
|
|
* Write itup vector to page, has no control of free space.
|
|
*/
|
|
void
|
|
gistfillbuffer(Page page, IndexTuple *itup, int len, OffsetNumber off)
|
|
{
|
|
OffsetNumber l = InvalidOffsetNumber;
|
|
int i;
|
|
|
|
if (off == InvalidOffsetNumber)
|
|
off = (PageIsEmpty(page)) ? FirstOffsetNumber :
|
|
OffsetNumberNext(PageGetMaxOffsetNumber(page));
|
|
|
|
for (i = 0; i < len; i++)
|
|
{
|
|
Size sz = IndexTupleSize(itup[i]);
|
|
|
|
l = PageAddItem(page, (Item) itup[i], sz, off, false, false);
|
|
if (l == InvalidOffsetNumber)
|
|
elog(ERROR, "failed to add item to GiST index page, item %d out of %d, size %d bytes",
|
|
i, len, (int) sz);
|
|
off++;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Check space for itup vector on page
|
|
*/
|
|
bool
|
|
gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace)
|
|
{
|
|
unsigned int size = freespace,
|
|
deleted = 0;
|
|
int i;
|
|
|
|
for (i = 0; i < len; i++)
|
|
size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
|
|
|
|
if (todelete != InvalidOffsetNumber)
|
|
{
|
|
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, todelete));
|
|
|
|
deleted = IndexTupleSize(itup) + sizeof(ItemIdData);
|
|
}
|
|
|
|
return (PageGetFreeSpace(page) + deleted < size);
|
|
}
|
|
|
|
bool
|
|
gistfitpage(IndexTuple *itvec, int len)
|
|
{
|
|
int i;
|
|
Size size = 0;
|
|
|
|
for (i = 0; i < len; i++)
|
|
size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
|
|
|
|
/* TODO: Consider fillfactor */
|
|
return (size <= GiSTPageSize);
|
|
}
|
|
|
|
/*
|
|
* Read buffer into itup vector
|
|
*/
|
|
IndexTuple *
|
|
gistextractpage(Page page, int *len /* out */ )
|
|
{
|
|
OffsetNumber i,
|
|
maxoff;
|
|
IndexTuple *itvec;
|
|
|
|
maxoff = PageGetMaxOffsetNumber(page);
|
|
*len = maxoff;
|
|
itvec = palloc(sizeof(IndexTuple) * maxoff);
|
|
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
|
|
itvec[i - FirstOffsetNumber] = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
|
|
|
|
return itvec;
|
|
}
|
|
|
|
/*
|
|
* join two vectors into one
|
|
*/
|
|
IndexTuple *
|
|
gistjoinvector(IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen)
|
|
{
|
|
itvec = (IndexTuple *) repalloc((void *) itvec, sizeof(IndexTuple) * ((*len) + addlen));
|
|
memmove(&itvec[*len], additvec, sizeof(IndexTuple) * addlen);
|
|
*len += addlen;
|
|
return itvec;
|
|
}
|
|
|
|
/*
|
|
* make plain IndexTupleVector
|
|
*/
|
|
|
|
IndexTupleData *
|
|
gistfillitupvec(IndexTuple *vec, int veclen, int *memlen)
|
|
{
|
|
char *ptr,
|
|
*ret;
|
|
int i;
|
|
|
|
*memlen = 0;
|
|
|
|
for (i = 0; i < veclen; i++)
|
|
*memlen += IndexTupleSize(vec[i]);
|
|
|
|
ptr = ret = palloc(*memlen);
|
|
|
|
for (i = 0; i < veclen; i++)
|
|
{
|
|
memcpy(ptr, vec[i], IndexTupleSize(vec[i]));
|
|
ptr += IndexTupleSize(vec[i]);
|
|
}
|
|
|
|
return (IndexTupleData *) ret;
|
|
}
|
|
|
|
/*
|
|
* Make unions of keys in IndexTuple vector (one union datum per index column).
|
|
* Union Datums are returned into the attr/isnull arrays.
|
|
* Resulting Datums aren't compressed.
|
|
*/
|
|
void
|
|
gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len,
|
|
Datum *attr, bool *isnull)
|
|
{
|
|
int i;
|
|
GistEntryVector *evec;
|
|
int attrsize;
|
|
|
|
evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ);
|
|
|
|
for (i = 0; i < giststate->tupdesc->natts; i++)
|
|
{
|
|
int j;
|
|
|
|
/* Collect non-null datums for this column */
|
|
evec->n = 0;
|
|
for (j = 0; j < len; j++)
|
|
{
|
|
Datum datum;
|
|
bool IsNull;
|
|
|
|
datum = index_getattr(itvec[j], i + 1, giststate->tupdesc, &IsNull);
|
|
if (IsNull)
|
|
continue;
|
|
|
|
gistdentryinit(giststate, i,
|
|
evec->vector + evec->n,
|
|
datum,
|
|
NULL, NULL, (OffsetNumber) 0,
|
|
FALSE, IsNull);
|
|
evec->n++;
|
|
}
|
|
|
|
/* If this column was all NULLs, the union is NULL */
|
|
if (evec->n == 0)
|
|
{
|
|
attr[i] = (Datum) 0;
|
|
isnull[i] = TRUE;
|
|
}
|
|
else
|
|
{
|
|
if (evec->n == 1)
|
|
{
|
|
/* unionFn may expect at least two inputs */
|
|
evec->n = 2;
|
|
evec->vector[1] = evec->vector[0];
|
|
}
|
|
|
|
/* Make union and store in attr array */
|
|
attr[i] = FunctionCall2Coll(&giststate->unionFn[i],
|
|
giststate->supportCollation[i],
|
|
PointerGetDatum(evec),
|
|
PointerGetDatum(&attrsize));
|
|
|
|
isnull[i] = FALSE;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Return an IndexTuple containing the result of applying the "union"
|
|
* method to the specified IndexTuple vector.
|
|
*/
|
|
IndexTuple
|
|
gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
|
|
{
|
|
Datum attr[INDEX_MAX_KEYS];
|
|
bool isnull[INDEX_MAX_KEYS];
|
|
|
|
gistMakeUnionItVec(giststate, itvec, len, attr, isnull);
|
|
|
|
return gistFormTuple(giststate, r, attr, isnull, false);
|
|
}
|
|
|
|
/*
|
|
* makes union of two key
|
|
*/
|
|
void
|
|
gistMakeUnionKey(GISTSTATE *giststate, int attno,
|
|
GISTENTRY *entry1, bool isnull1,
|
|
GISTENTRY *entry2, bool isnull2,
|
|
Datum *dst, bool *dstisnull)
|
|
{
|
|
/* we need a GistEntryVector with room for exactly 2 elements */
|
|
union
|
|
{
|
|
GistEntryVector gev;
|
|
char padding[2 * sizeof(GISTENTRY) + GEVHDRSZ];
|
|
} storage;
|
|
GistEntryVector *evec = &storage.gev;
|
|
int dstsize;
|
|
|
|
evec->n = 2;
|
|
|
|
if (isnull1 && isnull2)
|
|
{
|
|
*dstisnull = TRUE;
|
|
*dst = (Datum) 0;
|
|
}
|
|
else
|
|
{
|
|
if (isnull1 == FALSE && isnull2 == FALSE)
|
|
{
|
|
evec->vector[0] = *entry1;
|
|
evec->vector[1] = *entry2;
|
|
}
|
|
else if (isnull1 == FALSE)
|
|
{
|
|
evec->vector[0] = *entry1;
|
|
evec->vector[1] = *entry1;
|
|
}
|
|
else
|
|
{
|
|
evec->vector[0] = *entry2;
|
|
evec->vector[1] = *entry2;
|
|
}
|
|
|
|
*dstisnull = FALSE;
|
|
*dst = FunctionCall2Coll(&giststate->unionFn[attno],
|
|
giststate->supportCollation[attno],
|
|
PointerGetDatum(evec),
|
|
PointerGetDatum(&dstsize));
|
|
}
|
|
}
|
|
|
|
bool
|
|
gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b)
|
|
{
|
|
bool result;
|
|
|
|
FunctionCall3Coll(&giststate->equalFn[attno],
|
|
giststate->supportCollation[attno],
|
|
a, b,
|
|
PointerGetDatum(&result));
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Decompress all keys in tuple
|
|
*/
|
|
void
|
|
gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p,
|
|
OffsetNumber o, GISTENTRY *attdata, bool *isnull)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < r->rd_att->natts; i++)
|
|
{
|
|
Datum datum;
|
|
|
|
datum = index_getattr(tuple, i + 1, giststate->tupdesc, &isnull[i]);
|
|
gistdentryinit(giststate, i, &attdata[i],
|
|
datum, r, p, o,
|
|
FALSE, isnull[i]);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Forms union of oldtup and addtup, if union == oldtup then return NULL
|
|
*/
|
|
IndexTuple
|
|
gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *giststate)
|
|
{
|
|
bool neednew = FALSE;
|
|
GISTENTRY oldentries[INDEX_MAX_KEYS],
|
|
addentries[INDEX_MAX_KEYS];
|
|
bool oldisnull[INDEX_MAX_KEYS],
|
|
addisnull[INDEX_MAX_KEYS];
|
|
Datum attr[INDEX_MAX_KEYS];
|
|
bool isnull[INDEX_MAX_KEYS];
|
|
IndexTuple newtup = NULL;
|
|
int i;
|
|
|
|
gistDeCompressAtt(giststate, r, oldtup, NULL,
|
|
(OffsetNumber) 0, oldentries, oldisnull);
|
|
|
|
gistDeCompressAtt(giststate, r, addtup, NULL,
|
|
(OffsetNumber) 0, addentries, addisnull);
|
|
|
|
for (i = 0; i < r->rd_att->natts; i++)
|
|
{
|
|
gistMakeUnionKey(giststate, i,
|
|
oldentries + i, oldisnull[i],
|
|
addentries + i, addisnull[i],
|
|
attr + i, isnull + i);
|
|
|
|
if (neednew)
|
|
/* we already need new key, so we can skip check */
|
|
continue;
|
|
|
|
if (isnull[i])
|
|
/* union of key may be NULL if and only if both keys are NULL */
|
|
continue;
|
|
|
|
if (!addisnull[i])
|
|
{
|
|
if (oldisnull[i] ||
|
|
!gistKeyIsEQ(giststate, i, oldentries[i].key, attr[i]))
|
|
neednew = true;
|
|
}
|
|
}
|
|
|
|
if (neednew)
|
|
{
|
|
/* need to update key */
|
|
newtup = gistFormTuple(giststate, r, attr, isnull, false);
|
|
newtup->t_tid = oldtup->t_tid;
|
|
}
|
|
|
|
return newtup;
|
|
}
|
|
|
|
/*
|
|
* Search an upper index page for the entry with lowest penalty for insertion
|
|
* of the new index key contained in "it".
|
|
*
|
|
* Returns the index of the page entry to insert into.
|
|
*/
|
|
OffsetNumber
|
|
gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
|
|
GISTSTATE *giststate)
|
|
{
|
|
OffsetNumber result;
|
|
OffsetNumber maxoff;
|
|
OffsetNumber i;
|
|
float best_penalty[INDEX_MAX_KEYS];
|
|
GISTENTRY entry,
|
|
identry[INDEX_MAX_KEYS];
|
|
bool isnull[INDEX_MAX_KEYS];
|
|
int keep_current_best;
|
|
|
|
Assert(!GistPageIsLeaf(p));
|
|
|
|
gistDeCompressAtt(giststate, r,
|
|
it, NULL, (OffsetNumber) 0,
|
|
identry, isnull);
|
|
|
|
/* we'll return FirstOffsetNumber if page is empty (shouldn't happen) */
|
|
result = FirstOffsetNumber;
|
|
|
|
/*
|
|
* The index may have multiple columns, and there's a penalty value for
|
|
* each column. The penalty associated with a column that appears earlier
|
|
* in the index definition is strictly more important than the penalty of
|
|
* a column that appears later in the index definition.
|
|
*
|
|
* best_penalty[j] is the best penalty we have seen so far for column j,
|
|
* or -1 when we haven't yet examined column j. Array entries to the
|
|
* right of the first -1 are undefined.
|
|
*/
|
|
best_penalty[0] = -1;
|
|
|
|
/*
|
|
* If we find a tuple that's exactly as good as the currently best one, we
|
|
* could use either one. When inserting a lot of tuples with the same or
|
|
* similar keys, it's preferable to descend down the same path when
|
|
* possible, as that's more cache-friendly. On the other hand, if all
|
|
* inserts land on the same leaf page after a split, we're never going to
|
|
* insert anything to the other half of the split, and will end up using
|
|
* only 50% of the available space. Distributing the inserts evenly would
|
|
* lead to better space usage, but that hurts cache-locality during
|
|
* insertion. To get the best of both worlds, when we find a tuple that's
|
|
* exactly as good as the previous best, choose randomly whether to stick
|
|
* to the old best, or use the new one. Once we decide to stick to the
|
|
* old best, we keep sticking to it for any subsequent equally good tuples
|
|
* we might find. This favors tuples with low offsets, but still allows
|
|
* some inserts to go to other equally-good subtrees.
|
|
*
|
|
* keep_current_best is -1 if we haven't yet had to make a random choice
|
|
* whether to keep the current best tuple. If we have done so, and
|
|
* decided to keep it, keep_current_best is 1; if we've decided to
|
|
* replace, keep_current_best is 0. (This state will be reset to -1 as
|
|
* soon as we've made the replacement, but sometimes we make the choice in
|
|
* advance of actually finding a replacement best tuple.)
|
|
*/
|
|
keep_current_best = -1;
|
|
|
|
/*
|
|
* Loop over tuples on page.
|
|
*/
|
|
maxoff = PageGetMaxOffsetNumber(p);
|
|
Assert(maxoff >= FirstOffsetNumber);
|
|
|
|
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
|
|
{
|
|
IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
|
|
bool zero_penalty;
|
|
int j;
|
|
|
|
zero_penalty = true;
|
|
|
|
/* Loop over index attributes. */
|
|
for (j = 0; j < r->rd_att->natts; j++)
|
|
{
|
|
Datum datum;
|
|
float usize;
|
|
bool IsNull;
|
|
|
|
/* Compute penalty for this column. */
|
|
datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull);
|
|
gistdentryinit(giststate, j, &entry, datum, r, p, i,
|
|
FALSE, IsNull);
|
|
usize = gistpenalty(giststate, j, &entry, IsNull,
|
|
&identry[j], isnull[j]);
|
|
if (usize > 0)
|
|
zero_penalty = false;
|
|
|
|
if (best_penalty[j] < 0 || usize < best_penalty[j])
|
|
{
|
|
/*
|
|
* New best penalty for column. Tentatively select this tuple
|
|
* as the target, and record the best penalty. Then reset the
|
|
* next column's penalty to "unknown" (and indirectly, the
|
|
* same for all the ones to its right). This will force us to
|
|
* adopt this tuple's penalty values as the best for all the
|
|
* remaining columns during subsequent loop iterations.
|
|
*/
|
|
result = i;
|
|
best_penalty[j] = usize;
|
|
|
|
if (j < r->rd_att->natts - 1)
|
|
best_penalty[j + 1] = -1;
|
|
|
|
/* we have new best, so reset keep-it decision */
|
|
keep_current_best = -1;
|
|
}
|
|
else if (best_penalty[j] == usize)
|
|
{
|
|
/*
|
|
* The current tuple is exactly as good for this column as the
|
|
* best tuple seen so far. The next iteration of this loop
|
|
* will compare the next column.
|
|
*/
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* The current tuple is worse for this column than the best
|
|
* tuple seen so far. Skip the remaining columns and move on
|
|
* to the next tuple, if any.
|
|
*/
|
|
zero_penalty = false; /* so outer loop won't exit */
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If we looped past the last column, and did not update "result",
|
|
* then this tuple is exactly as good as the prior best tuple.
|
|
*/
|
|
if (j == r->rd_att->natts && result != i)
|
|
{
|
|
if (keep_current_best == -1)
|
|
{
|
|
/* we didn't make the random choice yet for this old best */
|
|
keep_current_best = (random() <= (MAX_RANDOM_VALUE / 2)) ? 1 : 0;
|
|
}
|
|
if (keep_current_best == 0)
|
|
{
|
|
/* we choose to use the new tuple */
|
|
result = i;
|
|
/* choose again if there are even more exactly-as-good ones */
|
|
keep_current_best = -1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If we find a tuple with zero penalty for all columns, and we've
|
|
* decided we don't want to search for another tuple with equal
|
|
* penalty, there's no need to examine remaining tuples; just break
|
|
* out of the loop and return it.
|
|
*/
|
|
if (zero_penalty)
|
|
{
|
|
if (keep_current_best == -1)
|
|
{
|
|
/* we didn't make the random choice yet for this old best */
|
|
keep_current_best = (random() <= (MAX_RANDOM_VALUE / 2)) ? 1 : 0;
|
|
}
|
|
if (keep_current_best == 1)
|
|
break;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* initialize a GiST entry with a decompressed version of key
|
|
*/
|
|
void
|
|
gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
|
|
Datum k, Relation r, Page pg, OffsetNumber o,
|
|
bool l, bool isNull)
|
|
{
|
|
if (!isNull)
|
|
{
|
|
GISTENTRY *dep;
|
|
|
|
gistentryinit(*e, k, r, pg, o, l);
|
|
dep = (GISTENTRY *)
|
|
DatumGetPointer(FunctionCall1Coll(&giststate->decompressFn[nkey],
|
|
giststate->supportCollation[nkey],
|
|
PointerGetDatum(e)));
|
|
/* decompressFn may just return the given pointer */
|
|
if (dep != e)
|
|
gistentryinit(*e, dep->key, dep->rel, dep->page, dep->offset,
|
|
dep->leafkey);
|
|
}
|
|
else
|
|
gistentryinit(*e, (Datum) 0, r, pg, o, l);
|
|
}
|
|
|
|
IndexTuple
|
|
gistFormTuple(GISTSTATE *giststate, Relation r,
|
|
Datum attdata[], bool isnull[], bool isleaf)
|
|
{
|
|
Datum compatt[INDEX_MAX_KEYS];
|
|
int i;
|
|
IndexTuple res;
|
|
|
|
/*
|
|
* Call the compress method on each attribute.
|
|
*/
|
|
for (i = 0; i < r->rd_att->natts; i++)
|
|
{
|
|
if (isnull[i])
|
|
compatt[i] = (Datum) 0;
|
|
else
|
|
{
|
|
GISTENTRY centry;
|
|
GISTENTRY *cep;
|
|
|
|
gistentryinit(centry, attdata[i], r, NULL, (OffsetNumber) 0,
|
|
isleaf);
|
|
cep = (GISTENTRY *)
|
|
DatumGetPointer(FunctionCall1Coll(&giststate->compressFn[i],
|
|
giststate->supportCollation[i],
|
|
PointerGetDatum(¢ry)));
|
|
compatt[i] = cep->key;
|
|
}
|
|
}
|
|
|
|
res = index_form_tuple(giststate->tupdesc, compatt, isnull);
|
|
|
|
/*
|
|
* The offset number on tuples on internal pages is unused. For historical
|
|
* reasons, it is set to 0xffff.
|
|
*/
|
|
ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
|
|
return res;
|
|
}
|
|
|
|
/*
|
|
* initialize a GiST entry with fetched value in key field
|
|
*/
|
|
static Datum
|
|
gistFetchAtt(GISTSTATE *giststate, int nkey, Datum k, Relation r)
|
|
{
|
|
GISTENTRY fentry;
|
|
GISTENTRY *fep;
|
|
|
|
gistentryinit(fentry, k, r, NULL, (OffsetNumber) 0, false);
|
|
|
|
fep = (GISTENTRY *)
|
|
DatumGetPointer(FunctionCall1Coll(&giststate->fetchFn[nkey],
|
|
giststate->supportCollation[nkey],
|
|
PointerGetDatum(&fentry)));
|
|
|
|
/* fetchFn set 'key', return it to the caller */
|
|
return fep->key;
|
|
}
|
|
|
|
/*
|
|
* Fetch all keys in tuple.
|
|
* returns new IndexTuple that contains GISTENTRY with fetched data
|
|
*/
|
|
IndexTuple
|
|
gistFetchTuple(GISTSTATE *giststate, Relation r, IndexTuple tuple)
|
|
{
|
|
MemoryContext oldcxt = MemoryContextSwitchTo(giststate->tempCxt);
|
|
Datum fetchatt[INDEX_MAX_KEYS];
|
|
bool isnull[INDEX_MAX_KEYS];
|
|
int i;
|
|
|
|
for (i = 0; i < r->rd_att->natts; i++)
|
|
{
|
|
Datum datum;
|
|
|
|
datum = index_getattr(tuple, i + 1, giststate->tupdesc, &isnull[i]);
|
|
|
|
if (giststate->fetchFn[i].fn_oid != InvalidOid)
|
|
{
|
|
if (!isnull[i])
|
|
fetchatt[i] = gistFetchAtt(giststate, i, datum, r);
|
|
else
|
|
fetchatt[i] = (Datum) 0;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* Index-only scans not supported for this column. Since the
|
|
* planner chose an index-only scan anyway, it is not interested
|
|
* in this column, and we can replace it with a NULL.
|
|
*/
|
|
isnull[i] = true;
|
|
fetchatt[i] = (Datum) 0;
|
|
}
|
|
}
|
|
MemoryContextSwitchTo(oldcxt);
|
|
|
|
return index_form_tuple(giststate->fetchTupdesc, fetchatt, isnull);
|
|
}
|
|
|
|
float
|
|
gistpenalty(GISTSTATE *giststate, int attno,
|
|
GISTENTRY *orig, bool isNullOrig,
|
|
GISTENTRY *add, bool isNullAdd)
|
|
{
|
|
float penalty = 0.0;
|
|
|
|
if (giststate->penaltyFn[attno].fn_strict == FALSE ||
|
|
(isNullOrig == FALSE && isNullAdd == FALSE))
|
|
{
|
|
FunctionCall3Coll(&giststate->penaltyFn[attno],
|
|
giststate->supportCollation[attno],
|
|
PointerGetDatum(orig),
|
|
PointerGetDatum(add),
|
|
PointerGetDatum(&penalty));
|
|
/* disallow negative or NaN penalty */
|
|
if (isnan(penalty) || penalty < 0.0)
|
|
penalty = 0.0;
|
|
}
|
|
else if (isNullOrig && isNullAdd)
|
|
penalty = 0.0;
|
|
else
|
|
{
|
|
/* try to prevent mixing null and non-null values */
|
|
penalty = get_float4_infinity();
|
|
}
|
|
|
|
return penalty;
|
|
}
|
|
|
|
/*
|
|
* Initialize a new index page
|
|
*/
|
|
void
|
|
GISTInitBuffer(Buffer b, uint32 f)
|
|
{
|
|
GISTPageOpaque opaque;
|
|
Page page;
|
|
Size pageSize;
|
|
|
|
pageSize = BufferGetPageSize(b);
|
|
page = BufferGetPage(b);
|
|
PageInit(page, pageSize, sizeof(GISTPageOpaqueData));
|
|
|
|
opaque = GistPageGetOpaque(page);
|
|
/* page was already zeroed by PageInit, so this is not needed: */
|
|
/* memset(&(opaque->nsn), 0, sizeof(GistNSN)); */
|
|
opaque->rightlink = InvalidBlockNumber;
|
|
opaque->flags = f;
|
|
opaque->gist_page_id = GIST_PAGE_ID;
|
|
}
|
|
|
|
/*
|
|
* Verify that a freshly-read page looks sane.
|
|
*/
|
|
void
|
|
gistcheckpage(Relation rel, Buffer buf)
|
|
{
|
|
Page page = BufferGetPage(buf);
|
|
|
|
/*
|
|
* ReadBuffer verifies that every newly-read page passes
|
|
* PageHeaderIsValid, which means it either contains a reasonably sane
|
|
* page header or is all-zero. We have to defend against the all-zero
|
|
* case, however.
|
|
*/
|
|
if (PageIsNew(page))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INDEX_CORRUPTED),
|
|
errmsg("index \"%s\" contains unexpected zero page at block %u",
|
|
RelationGetRelationName(rel),
|
|
BufferGetBlockNumber(buf)),
|
|
errhint("Please REINDEX it.")));
|
|
|
|
/*
|
|
* Additionally check that the special area looks sane.
|
|
*/
|
|
if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GISTPageOpaqueData)))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INDEX_CORRUPTED),
|
|
errmsg("index \"%s\" contains corrupted page at block %u",
|
|
RelationGetRelationName(rel),
|
|
BufferGetBlockNumber(buf)),
|
|
errhint("Please REINDEX it.")));
|
|
}
|
|
|
|
|
|
/*
|
|
* Allocate a new page (either by recycling, or by extending the index file)
|
|
*
|
|
* The returned buffer is already pinned and exclusive-locked
|
|
*
|
|
* Caller is responsible for initializing the page by calling GISTInitBuffer
|
|
*/
|
|
Buffer
|
|
gistNewBuffer(Relation r)
|
|
{
|
|
Buffer buffer;
|
|
bool needLock;
|
|
|
|
/* First, try to get a page from FSM */
|
|
for (;;)
|
|
{
|
|
BlockNumber blkno = GetFreeIndexPage(r);
|
|
|
|
if (blkno == InvalidBlockNumber)
|
|
break; /* nothing left in FSM */
|
|
|
|
buffer = ReadBuffer(r, blkno);
|
|
|
|
/*
|
|
* We have to guard against the possibility that someone else already
|
|
* recycled this page; the buffer may be locked if so.
|
|
*/
|
|
if (ConditionalLockBuffer(buffer))
|
|
{
|
|
Page page = BufferGetPage(buffer);
|
|
|
|
if (PageIsNew(page))
|
|
return buffer; /* OK to use, if never initialized */
|
|
|
|
gistcheckpage(r, buffer);
|
|
|
|
if (GistPageIsDeleted(page))
|
|
return buffer; /* OK to use */
|
|
|
|
LockBuffer(buffer, GIST_UNLOCK);
|
|
}
|
|
|
|
/* Can't use it, so release buffer and try again */
|
|
ReleaseBuffer(buffer);
|
|
}
|
|
|
|
/* Must extend the file */
|
|
needLock = !RELATION_IS_LOCAL(r);
|
|
|
|
if (needLock)
|
|
LockRelationForExtension(r, ExclusiveLock);
|
|
|
|
buffer = ReadBuffer(r, P_NEW);
|
|
LockBuffer(buffer, GIST_EXCLUSIVE);
|
|
|
|
if (needLock)
|
|
UnlockRelationForExtension(r, ExclusiveLock);
|
|
|
|
return buffer;
|
|
}
|
|
|
|
bytea *
|
|
gistoptions(Datum reloptions, bool validate)
|
|
{
|
|
relopt_value *options;
|
|
GiSTOptions *rdopts;
|
|
int numoptions;
|
|
static const relopt_parse_elt tab[] = {
|
|
{"fillfactor", RELOPT_TYPE_INT, offsetof(GiSTOptions, fillfactor)},
|
|
{"buffering", RELOPT_TYPE_STRING, offsetof(GiSTOptions, bufferingModeOffset)}
|
|
};
|
|
|
|
options = parseRelOptions(reloptions, validate, RELOPT_KIND_GIST,
|
|
&numoptions);
|
|
|
|
/* if none set, we're done */
|
|
if (numoptions == 0)
|
|
return NULL;
|
|
|
|
rdopts = allocateReloptStruct(sizeof(GiSTOptions), options, numoptions);
|
|
|
|
fillRelOptions((void *) rdopts, sizeof(GiSTOptions), options, numoptions,
|
|
validate, tab, lengthof(tab));
|
|
|
|
pfree(options);
|
|
|
|
return (bytea *) rdopts;
|
|
}
|
|
|
|
/*
|
|
* gistproperty() -- Check boolean properties of indexes.
|
|
*
|
|
* This is optional for most AMs, but is required for GiST because the core
|
|
* property code doesn't support AMPROP_DISTANCE_ORDERABLE. We also handle
|
|
* AMPROP_RETURNABLE here to save opening the rel to call gistcanreturn.
|
|
*/
|
|
bool
|
|
gistproperty(Oid index_oid, int attno,
|
|
IndexAMProperty prop, const char *propname,
|
|
bool *res, bool *isnull)
|
|
{
|
|
HeapTuple tuple;
|
|
Form_pg_index rd_index PG_USED_FOR_ASSERTS_ONLY;
|
|
Form_pg_opclass rd_opclass;
|
|
Datum datum;
|
|
bool disnull;
|
|
oidvector *indclass;
|
|
Oid opclass,
|
|
opfamily,
|
|
opcintype;
|
|
int16 procno;
|
|
|
|
/* Only answer column-level inquiries */
|
|
if (attno == 0)
|
|
return false;
|
|
|
|
/*
|
|
* Currently, GiST distance-ordered scans require that there be a distance
|
|
* function in the opclass with the default types (i.e. the one loaded
|
|
* into the relcache entry, see initGISTstate). So we assume that if such
|
|
* a function exists, then there's a reason for it (rather than grubbing
|
|
* through all the opfamily's operators to find an ordered one).
|
|
*
|
|
* Essentially the same code can test whether we support returning the
|
|
* column data, since that's true if the opclass provides a fetch proc.
|
|
*/
|
|
|
|
switch (prop)
|
|
{
|
|
case AMPROP_DISTANCE_ORDERABLE:
|
|
procno = GIST_DISTANCE_PROC;
|
|
break;
|
|
case AMPROP_RETURNABLE:
|
|
procno = GIST_FETCH_PROC;
|
|
break;
|
|
default:
|
|
return false;
|
|
}
|
|
|
|
/* First we need to know the column's opclass. */
|
|
|
|
tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(index_oid));
|
|
if (!HeapTupleIsValid(tuple))
|
|
{
|
|
*isnull = true;
|
|
return true;
|
|
}
|
|
rd_index = (Form_pg_index) GETSTRUCT(tuple);
|
|
|
|
/* caller is supposed to guarantee this */
|
|
Assert(attno > 0 && attno <= rd_index->indnatts);
|
|
|
|
datum = SysCacheGetAttr(INDEXRELID, tuple,
|
|
Anum_pg_index_indclass, &disnull);
|
|
Assert(!disnull);
|
|
|
|
indclass = ((oidvector *) DatumGetPointer(datum));
|
|
opclass = indclass->values[attno - 1];
|
|
|
|
ReleaseSysCache(tuple);
|
|
|
|
/* Now look up the opclass family and input datatype. */
|
|
|
|
tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass));
|
|
if (!HeapTupleIsValid(tuple))
|
|
{
|
|
*isnull = true;
|
|
return true;
|
|
}
|
|
rd_opclass = (Form_pg_opclass) GETSTRUCT(tuple);
|
|
|
|
opfamily = rd_opclass->opcfamily;
|
|
opcintype = rd_opclass->opcintype;
|
|
|
|
ReleaseSysCache(tuple);
|
|
|
|
/* And now we can check whether the function is provided. */
|
|
|
|
*res = SearchSysCacheExists4(AMPROCNUM,
|
|
ObjectIdGetDatum(opfamily),
|
|
ObjectIdGetDatum(opcintype),
|
|
ObjectIdGetDatum(opcintype),
|
|
Int16GetDatum(procno));
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Temporary and unlogged GiST indexes are not WAL-logged, but we need LSNs
|
|
* to detect concurrent page splits anyway. This function provides a fake
|
|
* sequence of LSNs for that purpose.
|
|
*/
|
|
XLogRecPtr
|
|
gistGetFakeLSN(Relation rel)
|
|
{
|
|
static XLogRecPtr counter = 1;
|
|
|
|
if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
|
|
{
|
|
/*
|
|
* Temporary relations are only accessible in our session, so a simple
|
|
* backend-local counter will do.
|
|
*/
|
|
return counter++;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* Unlogged relations are accessible from other backends, and survive
|
|
* (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
|
|
*/
|
|
Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
|
|
return GetFakeLSNForUnloggedRel();
|
|
}
|
|
}
|