mirror of
https://github.com/postgres/postgres.git
synced 2025-05-02 11:44:50 +03:00
Document and clean up gistsplit.c.
Improve comments, rename some variables and functions, slightly simplify a couple of APIs, in an attempt to make this code readable by people other than its original author. Even though this is essentially just cosmetic, back-patch to all active branches, because otherwise it's going to make back-patching future fixes in this file very painful.
This commit is contained in:
parent
3ea1ab283a
commit
2f0491dd1a
@ -1238,18 +1238,12 @@ gistSplit(Relation r,
|
||||
IndexTuple *lvectup,
|
||||
*rvectup;
|
||||
GistSplitVector v;
|
||||
GistEntryVector *entryvec;
|
||||
int i;
|
||||
SplitedPageLayout *res = NULL;
|
||||
|
||||
/* generate the item array */
|
||||
entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY));
|
||||
entryvec->n = len + 1;
|
||||
|
||||
memset(v.spl_lisnull, TRUE, sizeof(bool) * giststate->tupdesc->natts);
|
||||
memset(v.spl_risnull, TRUE, sizeof(bool) * giststate->tupdesc->natts);
|
||||
gistSplitByKey(r, page, itup, len, giststate,
|
||||
&v, entryvec, 0);
|
||||
gistSplitByKey(r, page, itup, len, giststate, &v, 0);
|
||||
|
||||
/* form left and right vector */
|
||||
lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
|
||||
|
@ -1,7 +1,18 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* gistsplit.c
|
||||
* Split page algorithm
|
||||
* Multi-column page splitting algorithm
|
||||
*
|
||||
* This file is concerned with making good page-split decisions in multi-column
|
||||
* GiST indexes. The opclass-specific picksplit functions can only be expected
|
||||
* to produce answers based on a single column. We first run the picksplit
|
||||
* function for column 1; then, if there are more columns, we check if any of
|
||||
* the tuples are "don't cares" so far as the column 1 split is concerned
|
||||
* (that is, they could go to either side for no additional penalty). If so,
|
||||
* we try to redistribute those tuples on the basis of the next column.
|
||||
* Repeat till we're out of columns.
|
||||
*
|
||||
* gistSplitByKey() is the entry point to this file.
|
||||
*
|
||||
*
|
||||
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
||||
@ -23,13 +34,14 @@ typedef struct
|
||||
int len;
|
||||
Datum *attr;
|
||||
bool *isnull;
|
||||
bool *equiv;
|
||||
bool *dontcare;
|
||||
} GistSplitUnion;
|
||||
|
||||
|
||||
/*
|
||||
* Form unions of subkeys after a page split, ignoring any tuples
|
||||
* that are marked in gsvp->equiv[]
|
||||
* Form unions of subkeys in itvec[] entries listed in gsvp->entries[],
|
||||
* ignoring any tuples that are marked in gsvp->dontcare[]. Subroutine for
|
||||
* gistunionsubkey.
|
||||
*/
|
||||
static void
|
||||
gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
|
||||
@ -43,7 +55,7 @@ gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
|
||||
|
||||
for (i = 0; i < gsvp->len; i++)
|
||||
{
|
||||
if (gsvp->equiv && gsvp->equiv[gsvp->entries[i]])
|
||||
if (gsvp->dontcare && gsvp->dontcare[gsvp->entries[i]])
|
||||
continue;
|
||||
|
||||
cleanedItVec[cleanedLen++] = itvec[gsvp->entries[i] - 1];
|
||||
@ -56,15 +68,20 @@ gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
|
||||
}
|
||||
|
||||
/*
|
||||
* Recompute unions of subkeys after a page split, ignoring any tuples
|
||||
* that are marked in spl->spl_equiv[]
|
||||
* Recompute unions of left- and right-side subkeys after a page split,
|
||||
* ignoring any tuples that are marked in spl->spl_dontcare[].
|
||||
*
|
||||
* Note: we always recompute union keys for all index columns. In some cases
|
||||
* this might represent duplicate work for the leftmost column(s), but it's
|
||||
* not safe to assume that "zero penalty to move a tuple" means "the union
|
||||
* key doesn't change at all". Penalty functions aren't 100% accurate.
|
||||
*/
|
||||
static void
|
||||
gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl)
|
||||
{
|
||||
GistSplitUnion gsvp;
|
||||
|
||||
gsvp.equiv = spl->spl_equiv;
|
||||
gsvp.dontcare = spl->spl_dontcare;
|
||||
|
||||
gsvp.entries = spl->splitVector.spl_left;
|
||||
gsvp.len = spl->splitVector.spl_nleft;
|
||||
@ -82,90 +99,127 @@ gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl)
|
||||
}
|
||||
|
||||
/*
|
||||
* find group in vector with equivalent value
|
||||
* Find tuples that are "don't cares", that is could be moved to the other
|
||||
* side of the split with zero penalty, so far as the attno column is
|
||||
* concerned.
|
||||
*
|
||||
* Don't-care tuples are marked by setting the corresponding entry in
|
||||
* spl->spl_dontcare[] to "true". Caller must have initialized that array
|
||||
* to zeroes.
|
||||
*
|
||||
* Returns number of don't-cares found.
|
||||
*/
|
||||
static int
|
||||
gistfindgroup(Relation r, GISTSTATE *giststate, GISTENTRY *valvec, GistSplitVector *spl, int attno)
|
||||
findDontCares(Relation r, GISTSTATE *giststate, GISTENTRY *valvec,
|
||||
GistSplitVector *spl, int attno)
|
||||
{
|
||||
int i;
|
||||
GISTENTRY entry;
|
||||
int len = 0;
|
||||
int NumDontCare = 0;
|
||||
|
||||
/*
|
||||
* attno key is always not null (see gistSplitByKey), so we may not check
|
||||
* for nulls
|
||||
* First, search the left-side tuples to see if any have zero penalty to
|
||||
* be added to the right-side union key.
|
||||
*
|
||||
* attno column is known all-not-null (see gistSplitByKey), so we need not
|
||||
* check for nulls
|
||||
*/
|
||||
gistentryinit(entry, spl->splitVector.spl_rdatum, r, NULL, (OffsetNumber) 0, FALSE);
|
||||
gistentryinit(entry, spl->splitVector.spl_rdatum, r, NULL,
|
||||
(OffsetNumber) 0, FALSE);
|
||||
for (i = 0; i < spl->splitVector.spl_nleft; i++)
|
||||
{
|
||||
int j = spl->splitVector.spl_left[i];
|
||||
float penalty = gistpenalty(giststate, attno, &entry, false,
|
||||
&valvec[spl->splitVector.spl_left[i]], false);
|
||||
&valvec[j], false);
|
||||
|
||||
if (penalty == 0.0)
|
||||
{
|
||||
spl->spl_equiv[spl->splitVector.spl_left[i]] = true;
|
||||
len++;
|
||||
spl->spl_dontcare[j] = true;
|
||||
NumDontCare++;
|
||||
}
|
||||
}
|
||||
|
||||
gistentryinit(entry, spl->splitVector.spl_ldatum, r, NULL, (OffsetNumber) 0, FALSE);
|
||||
/* And conversely for the right-side tuples */
|
||||
gistentryinit(entry, spl->splitVector.spl_ldatum, r, NULL,
|
||||
(OffsetNumber) 0, FALSE);
|
||||
for (i = 0; i < spl->splitVector.spl_nright; i++)
|
||||
{
|
||||
int j = spl->splitVector.spl_right[i];
|
||||
float penalty = gistpenalty(giststate, attno, &entry, false,
|
||||
&valvec[spl->splitVector.spl_right[i]], false);
|
||||
&valvec[j], false);
|
||||
|
||||
if (penalty == 0.0)
|
||||
{
|
||||
spl->spl_equiv[spl->splitVector.spl_right[i]] = true;
|
||||
len++;
|
||||
spl->spl_dontcare[j] = true;
|
||||
NumDontCare++;
|
||||
}
|
||||
}
|
||||
|
||||
return len;
|
||||
return NumDontCare;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove tuples that are marked don't-cares from the tuple index array a[]
|
||||
* of length *len. This is applied separately to the spl_left and spl_right
|
||||
* arrays.
|
||||
*
|
||||
* Corner case: we do not wish to reduce the index array to zero length.
|
||||
* (If we did, then the union key for this side would be null, and having just
|
||||
* one of spl_ldatum_exists and spl_rdatum_exists be TRUE might confuse
|
||||
* user-defined PickSplit methods.) To avoid that, we'll forcibly redefine
|
||||
* one tuple as non-don't-care if necessary. Hence, we must be able to adjust
|
||||
* caller's NumDontCare count.
|
||||
*/
|
||||
static void
|
||||
cleanupOffsets(OffsetNumber *a, int *len, bool *equiv, int *LenEquiv)
|
||||
removeDontCares(OffsetNumber *a, int *len, bool *dontcare, int *NumDontCare)
|
||||
{
|
||||
int curlen,
|
||||
int origlen,
|
||||
curlen,
|
||||
i;
|
||||
OffsetNumber *curwpos;
|
||||
|
||||
curlen = *len;
|
||||
origlen = curlen = *len;
|
||||
curwpos = a;
|
||||
for (i = 0; i < *len; i++)
|
||||
for (i = 0; i < origlen; i++)
|
||||
{
|
||||
if (equiv[a[i]] == FALSE)
|
||||
OffsetNumber ai = a[i];
|
||||
|
||||
if (dontcare[ai] == FALSE)
|
||||
{
|
||||
*curwpos = a[i];
|
||||
/* re-emit item into a[] */
|
||||
*curwpos = ai;
|
||||
curwpos++;
|
||||
}
|
||||
else
|
||||
else if (curlen == 1)
|
||||
{
|
||||
/* corner case: we shouldn't make void array */
|
||||
if (curlen == 1)
|
||||
{
|
||||
equiv[a[i]] = FALSE; /* mark item as non-equivalent */
|
||||
i--; /* redo the same */
|
||||
*LenEquiv -= 1;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
curlen--;
|
||||
/* corner case: don't let array become empty */
|
||||
dontcare[ai] = FALSE; /* mark item as non-dont-care */
|
||||
*NumDontCare -= 1;
|
||||
i--; /* reprocess item on next iteration */
|
||||
}
|
||||
else
|
||||
curlen--;
|
||||
}
|
||||
|
||||
*len = curlen;
|
||||
}
|
||||
|
||||
/*
|
||||
* Place a single don't-care tuple into either the left or right side of the
|
||||
* split, according to which has least penalty for merging the tuple into
|
||||
* the previously-computed union keys. We need consider only columns starting
|
||||
* at attno.
|
||||
*/
|
||||
static void
|
||||
placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v, IndexTuple itup, OffsetNumber off, int attno)
|
||||
placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v,
|
||||
IndexTuple itup, OffsetNumber off, int attno)
|
||||
{
|
||||
GISTENTRY identry[INDEX_MAX_KEYS];
|
||||
bool isnull[INDEX_MAX_KEYS];
|
||||
bool toLeft = true;
|
||||
|
||||
gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0, identry, isnull);
|
||||
gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0,
|
||||
identry, isnull);
|
||||
|
||||
for (; attno < giststate->tupdesc->natts; attno++)
|
||||
{
|
||||
@ -174,9 +228,11 @@ placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v, IndexTuple itup,
|
||||
GISTENTRY entry;
|
||||
|
||||
gistentryinit(entry, v->spl_lattr[attno], r, NULL, 0, FALSE);
|
||||
lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno], identry + attno, isnull[attno]);
|
||||
lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno],
|
||||
identry + attno, isnull[attno]);
|
||||
gistentryinit(entry, v->spl_rattr[attno], r, NULL, 0, FALSE);
|
||||
rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno], identry + attno, isnull[attno]);
|
||||
rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno],
|
||||
identry + attno, isnull[attno]);
|
||||
|
||||
if (lpenalty != rpenalty)
|
||||
{
|
||||
@ -200,13 +256,21 @@ do { \
|
||||
} while(0)
|
||||
|
||||
/*
|
||||
* adjust left and right unions according to splits by previous
|
||||
* split by first columns. This function is called only in case
|
||||
* when pickSplit doesn't support subsplit.
|
||||
* Clean up when we did a secondary split but the user-defined PickSplit
|
||||
* method didn't support it (leaving spl_ldatum_exists or spl_rdatum_exists
|
||||
* true).
|
||||
*
|
||||
* We consider whether to swap the left and right outputs of the secondary
|
||||
* split; this can be worthwhile if the penalty for merging those tuples into
|
||||
* the previously chosen sets is less that way.
|
||||
*
|
||||
* In any case we must update the union datums for the current column by
|
||||
* adding in the previous union keys (oldL/oldR), since the user-defined
|
||||
* PickSplit method didn't do so.
|
||||
*/
|
||||
|
||||
static void
|
||||
supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno, GIST_SPLITVEC *sv, Datum oldL, Datum oldR)
|
||||
supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno,
|
||||
GIST_SPLITVEC *sv, Datum oldL, Datum oldR)
|
||||
{
|
||||
bool leaveOnLeft = true,
|
||||
tmpBool;
|
||||
@ -232,7 +296,6 @@ supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno, GIST_SPLITVEC
|
||||
|
||||
if (penalty1 > penalty2)
|
||||
leaveOnLeft = false;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -244,7 +307,6 @@ supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno, GIST_SPLITVEC
|
||||
* there is only one previously defined union, so we just choose swap
|
||||
* or not by lowest penalty
|
||||
*/
|
||||
|
||||
penalty1 = gistpenalty(giststate, attno, entry1, false, &entrySL, false);
|
||||
penalty2 = gistpenalty(giststate, attno, entry1, false, &entrySR, false);
|
||||
|
||||
@ -282,10 +344,9 @@ supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno, GIST_SPLITVEC
|
||||
}
|
||||
|
||||
/*
|
||||
* Trivial picksplit implementaion. Function called only
|
||||
* if user-defined picksplit puts all keys to the one page.
|
||||
* That is a bug of user-defined picksplit but we'd like
|
||||
* to "fix" that.
|
||||
* Trivial picksplit implementation. Function called only
|
||||
* if user-defined picksplit puts all keys on the same side of the split.
|
||||
* That is a bug of user-defined picksplit but we don't want to fail.
|
||||
*/
|
||||
static void
|
||||
genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC *v, int attno)
|
||||
@ -318,9 +379,8 @@ genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC
|
||||
}
|
||||
|
||||
/*
|
||||
* Form unions of each page
|
||||
* Form union datums for each side
|
||||
*/
|
||||
|
||||
evec = palloc(sizeof(GISTENTRY) * entryvec->n + GEVHDRSZ);
|
||||
|
||||
evec->n = v->spl_nleft;
|
||||
@ -341,12 +401,17 @@ genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC
|
||||
}
|
||||
|
||||
/*
|
||||
* Calls user picksplit method for attno columns to split vector to
|
||||
* two vectors. May use attno+n columns data to
|
||||
* get better split.
|
||||
* Returns TRUE and v->spl_equiv = NULL if left and right unions of attno columns are the same,
|
||||
* so caller may find better split
|
||||
* Returns TRUE and v->spl_equiv != NULL if there is tuples which may be freely moved
|
||||
* Calls user picksplit method for attno column to split tuples into
|
||||
* two vectors.
|
||||
*
|
||||
* Returns FALSE if split is complete (there are no more index columns, or
|
||||
* there is no need to consider them). Note that in this case the union
|
||||
* keys for all columns must be computed here.
|
||||
* Returns TRUE and v->spl_dontcare = NULL if left and right unions of attno
|
||||
* column are the same, so we should split on next column instead.
|
||||
* Returns TRUE and v->spl_dontcare != NULL if there are don't-care tuples
|
||||
* that could be relocated based on the next column(s). The don't-care
|
||||
* tuples have been removed from the split and must be reinserted by caller.
|
||||
*/
|
||||
static bool
|
||||
gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVector *v,
|
||||
@ -355,15 +420,18 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
|
||||
GIST_SPLITVEC *sv = &v->splitVector;
|
||||
|
||||
/*
|
||||
* now let the user-defined picksplit function set up the split vector; in
|
||||
* entryvec there is no null value!!
|
||||
* Prepare spl_ldatum/spl_rdatum/spl_ldatum_exists/spl_rdatum_exists in
|
||||
* case we are doing a secondary split (see comments in gist.h).
|
||||
*/
|
||||
|
||||
sv->spl_ldatum_exists = (v->spl_lisnull[attno]) ? false : true;
|
||||
sv->spl_rdatum_exists = (v->spl_risnull[attno]) ? false : true;
|
||||
sv->spl_ldatum = v->spl_lattr[attno];
|
||||
sv->spl_rdatum = v->spl_rattr[attno];
|
||||
|
||||
/*
|
||||
* Let the opclass-specific PickSplit method do its thing. Note that at
|
||||
* this point we know there are no null keys in the entryvec.
|
||||
*/
|
||||
FunctionCall2Coll(&giststate->picksplitFn[attno],
|
||||
giststate->supportCollation[attno],
|
||||
PointerGetDatum(entryvec),
|
||||
@ -371,6 +439,10 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
|
||||
|
||||
if (sv->spl_nleft == 0 || sv->spl_nright == 0)
|
||||
{
|
||||
/*
|
||||
* User-defined picksplit failed to create an actual split, ie it put
|
||||
* everything on the same side. Complain but cope.
|
||||
*/
|
||||
ereport(DEBUG1,
|
||||
(errcode(ERRCODE_INTERNAL_ERROR),
|
||||
errmsg("picksplit method for column %d of index \"%s\" failed",
|
||||
@ -378,107 +450,129 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
|
||||
errhint("The index is not optimal. To optimize it, contact a developer, or try to use the column as the second one in the CREATE INDEX command.")));
|
||||
|
||||
/*
|
||||
* Reinit GIST_SPLITVEC. Although that fields are not used by
|
||||
* genericPickSplit(), let us set up it for further processing
|
||||
* Reinit GIST_SPLITVEC. Although these fields are not used by
|
||||
* genericPickSplit(), set them up for further processing
|
||||
*/
|
||||
sv->spl_ldatum_exists = (v->spl_lisnull[attno]) ? false : true;
|
||||
sv->spl_rdatum_exists = (v->spl_risnull[attno]) ? false : true;
|
||||
sv->spl_ldatum = v->spl_lattr[attno];
|
||||
sv->spl_rdatum = v->spl_rattr[attno];
|
||||
|
||||
/* Do a generic split */
|
||||
genericPickSplit(giststate, entryvec, sv, attno);
|
||||
|
||||
/* Clean up if we're in a secondary split */
|
||||
if (sv->spl_ldatum_exists || sv->spl_rdatum_exists)
|
||||
supportSecondarySplit(r, giststate, attno, sv, v->spl_lattr[attno], v->spl_rattr[attno]);
|
||||
supportSecondarySplit(r, giststate, attno, sv,
|
||||
v->spl_lattr[attno], v->spl_rattr[attno]);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* compatibility with old code */
|
||||
/* hack for compatibility with old picksplit API */
|
||||
if (sv->spl_left[sv->spl_nleft - 1] == InvalidOffsetNumber)
|
||||
sv->spl_left[sv->spl_nleft - 1] = (OffsetNumber) (entryvec->n - 1);
|
||||
if (sv->spl_right[sv->spl_nright - 1] == InvalidOffsetNumber)
|
||||
sv->spl_right[sv->spl_nright - 1] = (OffsetNumber) (entryvec->n - 1);
|
||||
|
||||
/* Clean up if we're in a secondary split */
|
||||
if (sv->spl_ldatum_exists || sv->spl_rdatum_exists)
|
||||
{
|
||||
elog(LOG, "picksplit method for column %d of index \"%s\" doesn't support secondary split",
|
||||
attno + 1, RelationGetRelationName(r));
|
||||
|
||||
supportSecondarySplit(r, giststate, attno, sv, v->spl_lattr[attno], v->spl_rattr[attno]);
|
||||
supportSecondarySplit(r, giststate, attno, sv,
|
||||
v->spl_lattr[attno], v->spl_rattr[attno]);
|
||||
}
|
||||
}
|
||||
|
||||
/* emit union datums computed by PickSplit back to v arrays */
|
||||
v->spl_lattr[attno] = sv->spl_ldatum;
|
||||
v->spl_rattr[attno] = sv->spl_rdatum;
|
||||
v->spl_lisnull[attno] = false;
|
||||
v->spl_risnull[attno] = false;
|
||||
|
||||
/*
|
||||
* if index is multikey, then we must to try get smaller bounding box for
|
||||
* subkey(s)
|
||||
* If index columns remain, then consider whether we can improve the split
|
||||
* by using them. Even if we can't, we must compute union keys for those
|
||||
* columns before we can return FALSE.
|
||||
*/
|
||||
v->spl_equiv = NULL;
|
||||
v->spl_dontcare = NULL;
|
||||
|
||||
if (giststate->tupdesc->natts > 1 && attno + 1 != giststate->tupdesc->natts)
|
||||
if (attno + 1 < giststate->tupdesc->natts)
|
||||
{
|
||||
int NumDontCare;
|
||||
|
||||
if (gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum))
|
||||
{
|
||||
/*
|
||||
* Left and right key's unions are equial, so we can get better
|
||||
* split by following columns. Note, unions for attno columns are
|
||||
* already done.
|
||||
* Left and right union keys are equal, so we can get better split
|
||||
* by considering next column.
|
||||
*/
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Locate don't-care tuples, if any
|
||||
*/
|
||||
v->spl_dontcare = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1));
|
||||
|
||||
NumDontCare = findDontCares(r, giststate, entryvec->vector, v, attno);
|
||||
|
||||
if (NumDontCare == 0)
|
||||
{
|
||||
/*
|
||||
* There are no don't-cares, so just compute the union keys for
|
||||
* remaining columns and we're done.
|
||||
*/
|
||||
gistunionsubkey(giststate, itup, v);
|
||||
}
|
||||
else
|
||||
{
|
||||
int LenEquiv;
|
||||
|
||||
v->spl_equiv = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1));
|
||||
|
||||
LenEquiv = gistfindgroup(r, giststate, entryvec->vector, v, attno);
|
||||
/*
|
||||
* Remove don't-cares from spl_left[] and spl_right[]. NOTE: this
|
||||
* could reduce NumDontCare to zero.
|
||||
*/
|
||||
removeDontCares(sv->spl_left, &sv->spl_nleft,
|
||||
v->spl_dontcare, &NumDontCare);
|
||||
removeDontCares(sv->spl_right, &sv->spl_nright,
|
||||
v->spl_dontcare, &NumDontCare);
|
||||
|
||||
/*
|
||||
* if possible, we should distribute equivalent tuples
|
||||
* Recompute union keys, considering only non-don't-care tuples.
|
||||
* NOTE: this will set union keys for remaining index columns,
|
||||
* which will cause later calls of gistUserPicksplit to pass those
|
||||
* values down to user-defined PickSplit methods with
|
||||
* spl_ldatum_exists/spl_rdatum_exists set true.
|
||||
*/
|
||||
if (LenEquiv == 0)
|
||||
{
|
||||
gistunionsubkey(giststate, itup, v);
|
||||
}
|
||||
else
|
||||
{
|
||||
cleanupOffsets(sv->spl_left, &sv->spl_nleft, v->spl_equiv, &LenEquiv);
|
||||
cleanupOffsets(sv->spl_right, &sv->spl_nright, v->spl_equiv, &LenEquiv);
|
||||
gistunionsubkey(giststate, itup, v);
|
||||
|
||||
gistunionsubkey(giststate, itup, v);
|
||||
if (LenEquiv == 1)
|
||||
if (NumDontCare == 1)
|
||||
{
|
||||
/*
|
||||
* If there's only one don't-care tuple then we can't do a
|
||||
* PickSplit on it, so just choose whether to send it left or
|
||||
* right by comparing penalties.
|
||||
*/
|
||||
OffsetNumber toMove;
|
||||
|
||||
/* find it ... */
|
||||
for (toMove = FirstOffsetNumber; toMove < entryvec->n; toMove++)
|
||||
{
|
||||
/*
|
||||
* In case with one tuple we just choose left-right by
|
||||
* penalty. It's simplify user-defined pickSplit
|
||||
*/
|
||||
OffsetNumber toMove = InvalidOffsetNumber;
|
||||
|
||||
for (toMove = FirstOffsetNumber; toMove < entryvec->n; toMove++)
|
||||
if (v->spl_equiv[toMove])
|
||||
break;
|
||||
Assert(toMove < entryvec->n);
|
||||
|
||||
placeOne(r, giststate, v, itup[toMove - 1], toMove, attno + 1);
|
||||
|
||||
/*
|
||||
* redo gistunionsubkey(): it will not degradate
|
||||
* performance, because it's very rarely
|
||||
*/
|
||||
v->spl_equiv = NULL;
|
||||
gistunionsubkey(giststate, itup, v);
|
||||
|
||||
return false;
|
||||
if (v->spl_dontcare[toMove])
|
||||
break;
|
||||
}
|
||||
else if (LenEquiv > 1)
|
||||
return true;
|
||||
Assert(toMove < entryvec->n);
|
||||
|
||||
/* ... and assign it to cheaper side */
|
||||
placeOne(r, giststate, v, itup[toMove - 1], toMove, attno + 1);
|
||||
|
||||
/* recompute the union keys including this tuple */
|
||||
v->spl_dontcare = NULL;
|
||||
gistunionsubkey(giststate, itup, v);
|
||||
}
|
||||
else if (NumDontCare > 1)
|
||||
return true;
|
||||
/* else NumDontCare is now zero; handle same as above */
|
||||
}
|
||||
}
|
||||
|
||||
@ -486,7 +580,7 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
|
||||
}
|
||||
|
||||
/*
|
||||
* simple split page
|
||||
* simply split page in half
|
||||
*/
|
||||
static void
|
||||
gistSplitHalf(GIST_SPLITVEC *v, int len)
|
||||
@ -501,26 +595,53 @@ gistSplitHalf(GIST_SPLITVEC *v, int len)
|
||||
v->spl_right[v->spl_nright++] = i;
|
||||
else
|
||||
v->spl_left[v->spl_nleft++] = i;
|
||||
|
||||
/* we need not compute union keys, caller took care of it */
|
||||
}
|
||||
|
||||
/*
|
||||
* tries to split page by attno key, in case of null
|
||||
* values move those to separate page.
|
||||
* gistSplitByKey: main entry point for page-splitting algorithm
|
||||
*
|
||||
* r: index relation
|
||||
* page: page being split
|
||||
* itup: array of IndexTuples to be processed
|
||||
* len: number of IndexTuples to be processed (must be at least 2)
|
||||
* giststate: additional info about index
|
||||
* v: working state and output area
|
||||
* attno: column we are working on (zero-based index)
|
||||
*
|
||||
* Outside caller must initialize v->spl_lisnull and v->spl_risnull arrays
|
||||
* to all-TRUE. On return, spl_left/spl_nleft contain indexes of tuples
|
||||
* to go left, spl_right/spl_nright contain indexes of tuples to go right,
|
||||
* spl_lattr/spl_lisnull contain left-side union key values, and
|
||||
* spl_rattr/spl_risnull contain right-side union key values. Other fields
|
||||
* in this struct are workspace for this file.
|
||||
*
|
||||
* Outside caller must pass zero for attno. The function may internally
|
||||
* recurse to the next column by passing attno+1.
|
||||
*/
|
||||
void
|
||||
gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate,
|
||||
GistSplitVector *v, GistEntryVector *entryvec, int attno)
|
||||
gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len,
|
||||
GISTSTATE *giststate, GistSplitVector *v, int attno)
|
||||
{
|
||||
int i;
|
||||
static OffsetNumber offNullTuples[MaxOffsetNumber];
|
||||
GistEntryVector *entryvec;
|
||||
OffsetNumber *offNullTuples;
|
||||
int nOffNullTuples = 0;
|
||||
int i;
|
||||
|
||||
/* generate the item array, and identify tuples with null keys */
|
||||
/* note that entryvec->vector[0] goes unused in this code */
|
||||
entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY));
|
||||
entryvec->n = len + 1;
|
||||
offNullTuples = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
|
||||
|
||||
for (i = 1; i <= len; i++)
|
||||
{
|
||||
Datum datum;
|
||||
bool IsNull;
|
||||
|
||||
datum = index_getattr(itup[i - 1], attno + 1, giststate->tupdesc, &IsNull);
|
||||
datum = index_getattr(itup[i - 1], attno + 1, giststate->tupdesc,
|
||||
&IsNull);
|
||||
gistdentryinit(giststate, attno, &(entryvec->vector[i]),
|
||||
datum, r, page, i,
|
||||
FALSE, IsNull);
|
||||
@ -531,24 +652,24 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *gist
|
||||
if (nOffNullTuples == len)
|
||||
{
|
||||
/*
|
||||
* Corner case: All keys in attno column are null, we should try to
|
||||
* split by keys in next column. If all keys in all columns are NULL
|
||||
* just split page half by half
|
||||
* Corner case: All keys in attno column are null, so just transfer
|
||||
* our attention to the next column. If there's no next column, just
|
||||
* split page in half.
|
||||
*/
|
||||
v->spl_risnull[attno] = v->spl_lisnull[attno] = TRUE;
|
||||
|
||||
if (attno + 1 == r->rd_att->natts)
|
||||
gistSplitHalf(&v->splitVector, len);
|
||||
if (attno + 1 < r->rd_att->natts)
|
||||
gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
|
||||
else
|
||||
gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno + 1);
|
||||
gistSplitHalf(&v->splitVector, len);
|
||||
}
|
||||
else if (nOffNullTuples > 0)
|
||||
{
|
||||
int j = 0;
|
||||
|
||||
/*
|
||||
* We don't want to mix NULLs and not-NULLs keys on one page, so move
|
||||
* nulls to right page
|
||||
* We don't want to mix NULL and not-NULL keys on one page, so split
|
||||
* nulls to right page and not-nulls to left.
|
||||
*/
|
||||
v->splitVector.spl_right = offNullTuples;
|
||||
v->splitVector.spl_nright = nOffNullTuples;
|
||||
@ -562,62 +683,76 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *gist
|
||||
else
|
||||
v->splitVector.spl_left[v->splitVector.spl_nleft++] = i;
|
||||
|
||||
v->spl_equiv = NULL;
|
||||
/* Must compute union keys for this and any following columns */
|
||||
v->spl_dontcare = NULL;
|
||||
gistunionsubkey(giststate, itup, v);
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* all keys are not-null
|
||||
* all keys are not-null, so apply user-defined PickSplit method
|
||||
*/
|
||||
entryvec->n = len + 1;
|
||||
|
||||
if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate) && attno + 1 != r->rd_att->natts)
|
||||
if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate))
|
||||
{
|
||||
/*
|
||||
* Splitting on attno column is not optimized: there is a tuples
|
||||
* which can be freely left or right page, we will try to split
|
||||
* page by following columns
|
||||
* Splitting on attno column is not optimal, so consider
|
||||
* redistributing don't-care tuples according to the next column
|
||||
*/
|
||||
if (v->spl_equiv == NULL)
|
||||
Assert(attno + 1 < r->rd_att->natts);
|
||||
|
||||
if (v->spl_dontcare == NULL)
|
||||
{
|
||||
/*
|
||||
* simple case: left and right keys for attno column are equal
|
||||
* Simple case: left and right keys for attno column are
|
||||
* equal, so just split according to the next column.
|
||||
*/
|
||||
gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno + 1);
|
||||
gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* we should clean up vector from already distributed tuples */
|
||||
IndexTuple *newitup = (IndexTuple *) palloc((len + 1) * sizeof(IndexTuple));
|
||||
OffsetNumber *map = (OffsetNumber *) palloc((len + 1) * sizeof(IndexTuple));
|
||||
/*
|
||||
* Form an array of just the don't-care tuples to pass to a
|
||||
* recursive invocation of this function for the next column.
|
||||
*/
|
||||
IndexTuple *newitup = (IndexTuple *) palloc(len * sizeof(IndexTuple));
|
||||
OffsetNumber *map = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
|
||||
int newlen = 0;
|
||||
GIST_SPLITVEC backupSplit = v->splitVector;
|
||||
GIST_SPLITVEC backupSplit;
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
if (v->spl_equiv[i + 1])
|
||||
{
|
||||
if (v->spl_dontcare[i + 1])
|
||||
{
|
||||
newitup[newlen] = itup[i];
|
||||
map[newlen] = i + 1;
|
||||
newitup[newlen++] = itup[i];
|
||||
newlen++;
|
||||
}
|
||||
}
|
||||
|
||||
Assert(newlen > 0);
|
||||
|
||||
/*
|
||||
* Make a backup copy of v->splitVector, since the recursive
|
||||
* call will overwrite that with its own result.
|
||||
*/
|
||||
backupSplit = v->splitVector;
|
||||
backupSplit.spl_left = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
|
||||
memcpy(backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber) * v->splitVector.spl_nleft);
|
||||
backupSplit.spl_right = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
|
||||
memcpy(backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber) * v->splitVector.spl_nright);
|
||||
|
||||
gistSplitByKey(r, page, newitup, newlen, giststate, v, entryvec, attno + 1);
|
||||
/* Recursively decide how to split the don't-care tuples */
|
||||
gistSplitByKey(r, page, newitup, newlen, giststate, v, attno + 1);
|
||||
|
||||
/* merge result of subsplit */
|
||||
/* Merge result of subsplit with non-don't-care tuples */
|
||||
for (i = 0; i < v->splitVector.spl_nleft; i++)
|
||||
backupSplit.spl_left[backupSplit.spl_nleft++] = map[v->splitVector.spl_left[i] - 1];
|
||||
for (i = 0; i < v->splitVector.spl_nright; i++)
|
||||
backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1];
|
||||
|
||||
v->splitVector = backupSplit;
|
||||
/* reunion left and right datums */
|
||||
|
||||
/* recompute left and right union datums */
|
||||
gistunionsubkey(giststate, itup, v);
|
||||
}
|
||||
}
|
||||
|
@ -85,11 +85,30 @@ typedef GISTPageOpaqueData *GISTPageOpaque;
|
||||
|
||||
/*
|
||||
* This is the Split Vector to be returned by the PickSplit method.
|
||||
* PickSplit should check spl_(r|l)datum_exists. If it is 'true',
|
||||
* that corresponding spl_(r|l)datum already defined and
|
||||
* PickSplit should use that value. PickSplit should always set
|
||||
* spl_(r|l)datum_exists to false: GiST will check value to
|
||||
* control supporting this feature by PickSplit...
|
||||
* PickSplit should fill the indexes of tuples to go to the left side into
|
||||
* spl_left[], and those to go to the right into spl_right[] (note the method
|
||||
* is responsible for palloc'ing both of these arrays!). The tuple counts
|
||||
* go into spl_nleft/spl_nright, and spl_ldatum/spl_rdatum must be set to
|
||||
* the union keys for each side.
|
||||
*
|
||||
* If spl_ldatum_exists and spl_rdatum_exists are true, then we are performing
|
||||
* a "secondary split" using a non-first index column. In this case some
|
||||
* decisions have already been made about a page split, and the set of tuples
|
||||
* being passed to PickSplit is just the tuples about which we are undecided.
|
||||
* spl_ldatum/spl_rdatum then contain the union keys for the tuples already
|
||||
* chosen to go left or right. Ideally the PickSplit method should take those
|
||||
* keys into account while deciding what to do with the remaining tuples, ie
|
||||
* it should try to "build out" from those unions so as to minimally expand
|
||||
* them. If it does so, it should union the given tuples' keys into the
|
||||
* existing spl_ldatum/spl_rdatum values rather than just setting those values
|
||||
* from scratch, and then set spl_ldatum_exists/spl_rdatum_exists to false to
|
||||
* show it has done this.
|
||||
*
|
||||
* If the PickSplit method fails to clear spl_ldatum_exists/spl_rdatum_exists,
|
||||
* the core GiST code will make its own decision about how to merge the
|
||||
* secondary-split results with the previously-chosen tuples, and will then
|
||||
* recompute the union keys from scratch. This is a workable though often not
|
||||
* optimal approach.
|
||||
*/
|
||||
typedef struct GIST_SPLITVEC
|
||||
{
|
||||
|
@ -254,20 +254,21 @@ typedef struct GISTInsertStack
|
||||
struct GISTInsertStack *parent;
|
||||
} GISTInsertStack;
|
||||
|
||||
/* Working state and results for multi-column split logic in gistsplit.c */
|
||||
typedef struct GistSplitVector
|
||||
{
|
||||
GIST_SPLITVEC splitVector; /* to/from PickSplit method */
|
||||
GIST_SPLITVEC splitVector; /* passed to/from user PickSplit method */
|
||||
|
||||
Datum spl_lattr[INDEX_MAX_KEYS]; /* Union of subkeys in
|
||||
* spl_left */
|
||||
* splitVector.spl_left */
|
||||
bool spl_lisnull[INDEX_MAX_KEYS];
|
||||
|
||||
Datum spl_rattr[INDEX_MAX_KEYS]; /* Union of subkeys in
|
||||
* spl_right */
|
||||
* splitVector.spl_right */
|
||||
bool spl_risnull[INDEX_MAX_KEYS];
|
||||
|
||||
bool *spl_equiv; /* equivalent tuples which can be freely
|
||||
* distributed between left and right pages */
|
||||
bool *spl_dontcare; /* flags tuples which could go to either side
|
||||
* of the split for zero penalty */
|
||||
} GistSplitVector;
|
||||
|
||||
typedef struct
|
||||
@ -526,7 +527,7 @@ extern Datum gistvacuumcleanup(PG_FUNCTION_ARGS);
|
||||
/* gistsplit.c */
|
||||
extern void gistSplitByKey(Relation r, Page page, IndexTuple *itup,
|
||||
int len, GISTSTATE *giststate,
|
||||
GistSplitVector *v, GistEntryVector *entryvec,
|
||||
GistSplitVector *v,
|
||||
int attno);
|
||||
|
||||
/* gistbuild.c */
|
||||
|
Loading…
x
Reference in New Issue
Block a user