1
0
mirror of https://github.com/postgres/postgres.git synced 2025-05-03 22:24:49 +03:00

Back-patch recent fixes for gistchoose and gistRelocateBuildBuffersOnSplit.

This back-ports commits c8ba697a4bdb934f0c51424c654e8db6133ea255 and
e5db11c5582b469c04a11f217a0f32c827da5dd7, which fix one definite and one
speculative bug in gistchoose, and make the code a lot more intelligible as
well.  In 9.2 only, this also affects the largely-copied-and-pasted logic
in gistRelocateBuildBuffersOnSplit.

The impact of the bugs was that the functions might make poor decisions
as to which index tree branch to push a new entry down into, resulting in
GiST index bloat and poor performance.  The fixes rectify these decisions
for future insertions, but a REINDEX would be needed to clean up any
existing index bloat.

Alexander Korotkov, Robert Haas, Tom Lane
This commit is contained in:
Tom Lane 2012-08-30 23:47:46 -04:00
parent d561fc5396
commit d2528e5dbf
2 changed files with 141 additions and 46 deletions

View File

@ -625,60 +625,107 @@ gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
}
/*
* Loop through all index tuples on the buffer on the splitted page,
* moving them to buffers on the new pages.
* Loop through all index tuples in the buffer of the page being split,
* moving them to buffers for the new pages. We try to move each tuple to
* the page that will result in the lowest penalty for the leading column
* or, in the case of a tie, the lowest penalty for the earliest column
* that is not tied.
*
* The page searching logic is very similar to gistchoose().
*/
while (gistPopItupFromNodeBuffer(gfbb, &oldBuf, &itup))
{
float sum_grow,
which_grow[INDEX_MAX_KEYS];
float best_penalty[INDEX_MAX_KEYS];
int i,
which;
IndexTuple newtup;
RelocationBufferInfo *targetBufferInfo;
/*
* Choose which page this tuple should go to.
*/
gistDeCompressAtt(giststate, r,
itup, NULL, (OffsetNumber) 0, entry, isnull);
which = -1;
*which_grow = -1.0f;
sum_grow = 1.0f;
/* default to using first page (shouldn't matter) */
which = 0;
for (i = 0; i < splitPagesCount && sum_grow; i++)
/*
* best_penalty[j] is the best penalty we have seen so far for column
* j, or -1 when we haven't yet examined column j. Array entries to
* the right of the first -1 are undefined.
*/
best_penalty[0] = -1;
/*
* Loop over possible target pages, looking for one to move this tuple
* to.
*/
for (i = 0; i < splitPagesCount; i++)
{
int j;
RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i];
bool zero_penalty;
int j;
sum_grow = 0.0f;
zero_penalty = true;
/* Loop over index attributes. */
for (j = 0; j < r->rd_att->natts; j++)
{
float usize;
/* Compute penalty for this column. */
usize = gistpenalty(giststate, j,
&splitPageInfo->entry[j],
splitPageInfo->isnull[j],
&entry[j], isnull[j]);
if (usize > 0)
zero_penalty = false;
if (which_grow[j] < 0 || usize < which_grow[j])
if (best_penalty[j] < 0 || usize < best_penalty[j])
{
/*
* New best penalty for column. Tentatively select this
* page as the target, and record the best penalty. Then
* reset the next column's penalty to "unknown" (and
* indirectly, the same for all the ones to its right).
* This will force us to adopt this page's penalty values
* as the best for all the remaining columns during
* subsequent loop iterations.
*/
which = i;
which_grow[j] = usize;
if (j < r->rd_att->natts - 1 && i == 0)
which_grow[j + 1] = -1;
sum_grow += which_grow[j];
best_penalty[j] = usize;
if (j < r->rd_att->natts - 1)
best_penalty[j + 1] = -1;
}
else if (best_penalty[j] == usize)
{
/*
* The current page is exactly as good for this column as
* the best page seen so far. The next iteration of this
* loop will compare the next column.
*/
}
else if (which_grow[j] == usize)
sum_grow += usize;
else
{
sum_grow = 1;
/*
* The current page is worse for this column than the best
* page seen so far. Skip the remaining columns and move
* on to the next page, if any.
*/
zero_penalty = false; /* so outer loop won't exit */
break;
}
}
/*
* If we find a page with zero penalty for all columns, there's no
* need to examine remaining pages; just break out of the loop and
* return it.
*/
if (zero_penalty)
break;
}
/* OK, "which" is the page index to push the tuple to */
targetBufferInfo = &relocationBuffersInfos[which];
/* Push item to selected node buffer */

View File

@ -363,72 +363,120 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
}
/*
* find entry with lowest penalty
* Search an upper index page for the entry with lowest penalty for insertion
* of the new index key contained in "it".
*
* Returns the index of the page entry to insert into.
*/
OffsetNumber
gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
GISTSTATE *giststate)
{
OffsetNumber result;
OffsetNumber maxoff;
OffsetNumber i;
OffsetNumber which;
float sum_grow,
which_grow[INDEX_MAX_KEYS];
float best_penalty[INDEX_MAX_KEYS];
GISTENTRY entry,
identry[INDEX_MAX_KEYS];
bool isnull[INDEX_MAX_KEYS];
maxoff = PageGetMaxOffsetNumber(p);
*which_grow = -1.0;
which = InvalidOffsetNumber;
sum_grow = 1;
Assert(!GistPageIsLeaf(p));
gistDeCompressAtt(giststate, r,
it, NULL, (OffsetNumber) 0,
identry, isnull);
/* we'll return FirstOffsetNumber if page is empty (shouldn't happen) */
result = FirstOffsetNumber;
/*
* The index may have multiple columns, and there's a penalty value for
* each column. The penalty associated with a column that appears earlier
* in the index definition is strictly more important than the penalty of
* a column that appears later in the index definition.
*
* best_penalty[j] is the best penalty we have seen so far for column j,
* or -1 when we haven't yet examined column j. Array entries to the
* right of the first -1 are undefined.
*/
best_penalty[0] = -1;
/*
* Loop over tuples on page.
*/
maxoff = PageGetMaxOffsetNumber(p);
Assert(maxoff >= FirstOffsetNumber);
Assert(!GistPageIsLeaf(p));
for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i))
for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
{
int j;
IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
bool zero_penalty;
int j;
sum_grow = 0;
zero_penalty = true;
/* Loop over index attributes. */
for (j = 0; j < r->rd_att->natts; j++)
{
Datum datum;
float usize;
bool IsNull;
/* Compute penalty for this column. */
datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull);
gistdentryinit(giststate, j, &entry, datum, r, p, i,
FALSE, IsNull);
usize = gistpenalty(giststate, j, &entry, IsNull,
&identry[j], isnull[j]);
if (usize > 0)
zero_penalty = false;
if (which_grow[j] < 0 || usize < which_grow[j])
if (best_penalty[j] < 0 || usize < best_penalty[j])
{
which = i;
which_grow[j] = usize;
if (j < r->rd_att->natts - 1 && i == FirstOffsetNumber)
which_grow[j + 1] = -1;
sum_grow += which_grow[j];
/*
* New best penalty for column. Tentatively select this tuple
* as the target, and record the best penalty. Then reset the
* next column's penalty to "unknown" (and indirectly, the
* same for all the ones to its right). This will force us to
* adopt this tuple's penalty values as the best for all the
* remaining columns during subsequent loop iterations.
*/
result = i;
best_penalty[j] = usize;
if (j < r->rd_att->natts - 1)
best_penalty[j + 1] = -1;
}
else if (best_penalty[j] == usize)
{
/*
* The current tuple is exactly as good for this column as the
* best tuple seen so far. The next iteration of this loop
* will compare the next column.
*/
}
else if (which_grow[j] == usize)
sum_grow += usize;
else
{
sum_grow = 1;
/*
* The current tuple is worse for this column than the best
* tuple seen so far. Skip the remaining columns and move on
* to the next tuple, if any.
*/
zero_penalty = false; /* so outer loop won't exit */
break;
}
}
/*
* If we find a tuple with zero penalty for all columns, there's no
* need to examine remaining tuples; just break out of the loop and
* return it.
*/
if (zero_penalty)
break;
}
if (which == InvalidOffsetNumber)
which = FirstOffsetNumber;
return which;
return result;
}
/*