1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-10 17:42:29 +03:00

Use abbreviated keys for faster sorting of text datums.

This commit extends the SortSupport infrastructure to allow operator
classes the option to provide abbreviated representations of Datums;
in the case of text, we abbreviate by taking the first few characters
of the strxfrm() blob.  If the abbreviated comparison is insufficent
to resolve the comparison, we fall back on the normal comparator.
This can be much faster than the old way of doing sorting if the
first few bytes of the string are usually sufficient to resolve the
comparison.

There is the potential for a performance regression if all of the
strings to be sorted are identical for the first 8+ characters and
differ only in later positions; therefore, the SortSupport machinery
now provides an infrastructure to abort the use of abbreviation if
it appears that abbreviation is producing comparatively few distinct
keys.  HyperLogLog, a streaming cardinality estimator, is included in
this commit and used to make that determination for text.

Peter Geoghegan, reviewed by me.
This commit is contained in:
Robert Haas
2015-01-19 15:20:31 -05:00
parent 1605291b6c
commit 4ea51cdfe8
13 changed files with 1149 additions and 76 deletions

View File

@@ -150,7 +150,10 @@ bool optimize_bounded_sort = true;
* When sorting single Datums, the data value is represented directly by
* datum1/isnull1. If the datatype is pass-by-reference and isnull1 is false,
* then datum1 points to a separately palloc'd data value that is also pointed
* to by the "tuple" pointer; otherwise "tuple" is NULL.
* to by the "tuple" pointer; otherwise "tuple" is NULL. There is one special
* case: when the sort support infrastructure provides an "abbreviated key"
* representation, where the key is (typically) a pass by value proxy for a
* pass by reference type.
*
* While building initial runs, tupindex holds the tuple's run number. During
* merge passes, we re-use it to hold the input tape number that each tuple in
@@ -346,6 +349,14 @@ struct Tuplesortstate
*/
SortSupport onlyKey;
/*
* Additional state for managing "abbreviated key" sortsupport routines
* (which currently may be used by all cases except the Datum sort case and
* hash index case). Tracks the intervals at which the optimization's
* effectiveness is tested.
*/
int64 abbrevNext; /* Tuple # at which to next check applicability */
/*
* These variables are specific to the CLUSTER case; they are set by
* tuplesort_begin_cluster.
@@ -442,6 +453,7 @@ struct Tuplesortstate
static Tuplesortstate *tuplesort_begin_common(int workMem, bool randomAccess);
static void puttuple_common(Tuplesortstate *state, SortTuple *tuple);
static bool consider_abort_common(Tuplesortstate *state);
static void inittapes(Tuplesortstate *state);
static void selectnewtape(Tuplesortstate *state);
static void mergeruns(Tuplesortstate *state);
@@ -619,6 +631,7 @@ tuplesort_begin_heap(TupleDesc tupDesc,
state->readtup = readtup_heap;
state->tupDesc = tupDesc; /* assume we need not copy tupDesc */
state->abbrevNext = 10;
/* Prepare SortSupport data for each column */
state->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData));
@@ -634,11 +647,19 @@ tuplesort_begin_heap(TupleDesc tupDesc,
sortKey->ssup_collation = sortCollations[i];
sortKey->ssup_nulls_first = nullsFirstFlags[i];
sortKey->ssup_attno = attNums[i];
/* Convey if abbreviation optimization is applicable in principle */
sortKey->abbreviate = (i == 0);
PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey);
}
if (nkeys == 1)
/*
* The "onlyKey" optimization cannot be used with abbreviated keys, since
* tie-breaker comparisons may be required. Typically, the optimization is
* only of value to pass-by-value types anyway, whereas abbreviated keys
* are typically only of value to pass-by-reference types.
*/
if (nkeys == 1 && !state->sortKeys->abbrev_converter)
state->onlyKey = state->sortKeys;
MemoryContextSwitchTo(oldcontext);
@@ -680,6 +701,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc,
state->copytup = copytup_cluster;
state->writetup = writetup_cluster;
state->readtup = readtup_cluster;
state->abbrevNext = 10;
state->indexInfo = BuildIndexInfo(indexRel);
@@ -719,6 +741,8 @@ tuplesort_begin_cluster(TupleDesc tupDesc,
sortKey->ssup_nulls_first =
(scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
sortKey->ssup_attno = scanKey->sk_attno;
/* Convey if abbreviation optimization is applicable in principle */
sortKey->abbreviate = (i == 0);
AssertState(sortKey->ssup_attno != 0);
@@ -768,6 +792,7 @@ tuplesort_begin_index_btree(Relation heapRel,
state->copytup = copytup_index;
state->writetup = writetup_index;
state->readtup = readtup_index;
state->abbrevNext = 10;
state->heapRel = heapRel;
state->indexRel = indexRel;
@@ -791,6 +816,8 @@ tuplesort_begin_index_btree(Relation heapRel,
sortKey->ssup_nulls_first =
(scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
sortKey->ssup_attno = scanKey->sk_attno;
/* Convey if abbreviation optimization is applicable in principle */
sortKey->abbreviate = (i == 0);
AssertState(sortKey->ssup_attno != 0);
@@ -883,6 +910,13 @@ tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
state->onlyKey->ssup_cxt = CurrentMemoryContext;
state->onlyKey->ssup_collation = sortCollation;
state->onlyKey->ssup_nulls_first = nullsFirstFlag;
/*
* Conversion to abbreviated representation infeasible in the Datum case.
* It must be possible to subsequently fetch original datum values within
* tuplesort_getdatum(), which would require special-case preservation of
* original values.
*/
state->onlyKey->abbreviate = false;
PrepareSortSupportFromOrderingOp(sortOperator, state->onlyKey);
@@ -928,6 +962,19 @@ tuplesort_set_bound(Tuplesortstate *state, int64 bound)
state->bounded = true;
state->bound = (int) bound;
/*
* Bounded sorts are not an effective target for abbreviated key
* optimization. Disable by setting state to be consistent with no
* abbreviation support.
*/
state->sortKeys->abbrev_converter = NULL;
if (state->sortKeys->abbrev_full_comparator)
state->sortKeys->comparator = state->sortKeys->abbrev_full_comparator;
/* Not strictly necessary, but be tidy */
state->sortKeys->abbrev_abort = NULL;
state->sortKeys->abbrev_full_comparator = NULL;
}
/*
@@ -1186,15 +1233,63 @@ tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel,
{
MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
SortTuple stup;
Datum original;
IndexTuple tuple;
stup.tuple = index_form_tuple(RelationGetDescr(rel), values, isnull);
((IndexTuple) stup.tuple)->t_tid = *self;
tuple = ((IndexTuple) stup.tuple);
tuple->t_tid = *self;
USEMEM(state, GetMemoryChunkSpace(stup.tuple));
/* set up first-column key value */
stup.datum1 = index_getattr((IndexTuple) stup.tuple,
1,
RelationGetDescr(state->indexRel),
&stup.isnull1);
original = index_getattr(tuple,
1,
RelationGetDescr(state->indexRel),
&stup.isnull1);
if (!state->sortKeys->abbrev_converter || stup.isnull1)
{
/*
* Store ordinary Datum representation, or NULL value. If there is a
* converter it won't expect NULL values, and cost model is not
* required to account for NULL, so in that case we avoid calling
* converter and just set datum1 to "void" representation (to be
* consistent).
*/
stup.datum1 = original;
}
else if (!consider_abort_common(state))
{
/* Store abbreviated key representation */
stup.datum1 = state->sortKeys->abbrev_converter(original,
state->sortKeys);
}
else
{
/* Abort abbreviation */
int i;
stup.datum1 = original;
/*
* Set state to be consistent with never trying abbreviation.
*
* Alter datum1 representation in already-copied tuples, so as to
* ensure a consistent representation (current tuple was just handled).
* Note that we rely on all tuples copied so far actually being
* contained within memtuples array.
*/
for (i = 0; i < state->memtupcount; i++)
{
SortTuple *mtup = &state->memtuples[i];
tuple = mtup->tuple;
mtup->datum1 = index_getattr(tuple,
1,
RelationGetDescr(state->indexRel),
&stup.isnull1);
}
}
puttuple_common(state, &stup);
MemoryContextSwitchTo(oldcontext);
@@ -1359,6 +1454,47 @@ puttuple_common(Tuplesortstate *state, SortTuple *tuple)
}
}
static bool
consider_abort_common(Tuplesortstate *state)
{
Assert(state->sortKeys[0].abbrev_converter != NULL);
Assert(state->sortKeys[0].abbrev_abort != NULL);
Assert(state->sortKeys[0].abbrev_full_comparator != NULL);
/*
* Check effectiveness of abbreviation optimization. Consider aborting
* when still within memory limit.
*/
if (state->status == TSS_INITIAL &&
state->memtupcount >= state->abbrevNext)
{
state->abbrevNext *= 2;
/*
* Check opclass-supplied abbreviation abort routine. It may
* indicate that abbreviation should not proceed.
*/
if (!state->sortKeys->abbrev_abort(state->memtupcount,
state->sortKeys))
return false;
/*
* Finally, restore authoritative comparator, and indicate that
* abbreviation is not in play by setting abbrev_converter to NULL
*/
state->sortKeys[0].comparator = state->sortKeys[0].abbrev_full_comparator;
state->sortKeys[0].abbrev_converter = NULL;
/* Not strictly necessary, but be tidy */
state->sortKeys[0].abbrev_abort = NULL;
state->sortKeys[0].abbrev_full_comparator = NULL;
/* Give up - expect original pass-by-value representation */
return true;
}
return false;
}
/*
* All tuples have been provided; finish the sort.
*/
@@ -2853,6 +2989,12 @@ comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
TupleDesc tupDesc;
int nkey;
int32 compare;
AttrNumber attno;
Datum datum1,
datum2;
bool isnull1,
isnull2;
/* Compare the leading sort key */
compare = ApplySortComparator(a->datum1, a->isnull1,
@@ -2867,14 +3009,25 @@ comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
rtup.t_len = ((MinimalTuple) b->tuple)->t_len + MINIMAL_TUPLE_OFFSET;
rtup.t_data = (HeapTupleHeader) ((char *) b->tuple - MINIMAL_TUPLE_OFFSET);
tupDesc = state->tupDesc;
if (sortKey->abbrev_converter)
{
attno = sortKey->ssup_attno;
datum1 = heap_getattr(&ltup, attno, tupDesc, &isnull1);
datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2);
compare = ApplySortAbbrevFullComparator(datum1, isnull1,
datum2, isnull2,
sortKey);
if (compare != 0)
return compare;
}
sortKey++;
for (nkey = 1; nkey < state->nKeys; nkey++, sortKey++)
{
AttrNumber attno = sortKey->ssup_attno;
Datum datum1,
datum2;
bool isnull1,
isnull2;
attno = sortKey->ssup_attno;
datum1 = heap_getattr(&ltup, attno, tupDesc, &isnull1);
datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2);
@@ -2897,6 +3050,7 @@ copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup)
* MinimalTuple using the exported interface for that.
*/
TupleTableSlot *slot = (TupleTableSlot *) tup;
Datum original;
MinimalTuple tuple;
HeapTupleData htup;
@@ -2907,10 +3061,58 @@ copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup)
/* set up first-column key value */
htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET;
htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET);
stup->datum1 = heap_getattr(&htup,
state->sortKeys[0].ssup_attno,
state->tupDesc,
&stup->isnull1);
original = heap_getattr(&htup,
state->sortKeys[0].ssup_attno,
state->tupDesc,
&stup->isnull1);
if (!state->sortKeys->abbrev_converter || stup->isnull1)
{
/*
* Store ordinary Datum representation, or NULL value. If there is a
* converter it won't expect NULL values, and cost model is not
* required to account for NULL, so in that case we avoid calling
* converter and just set datum1 to "void" representation (to be
* consistent).
*/
stup->datum1 = original;
}
else if (!consider_abort_common(state))
{
/* Store abbreviated key representation */
stup->datum1 = state->sortKeys->abbrev_converter(original,
state->sortKeys);
}
else
{
/* Abort abbreviation */
int i;
stup->datum1 = original;
/*
* Set state to be consistent with never trying abbreviation.
*
* Alter datum1 representation in already-copied tuples, so as to
* ensure a consistent representation (current tuple was just handled).
* Note that we rely on all tuples copied so far actually being
* contained within memtuples array.
*/
for (i = 0; i < state->memtupcount; i++)
{
SortTuple *mtup = &state->memtuples[i];
htup.t_len = ((MinimalTuple) mtup->tuple)->t_len +
MINIMAL_TUPLE_OFFSET;
htup.t_data = (HeapTupleHeader) ((char *) mtup->tuple -
MINIMAL_TUPLE_OFFSET);
mtup->datum1 = heap_getattr(&htup,
state->sortKeys[0].ssup_attno,
state->tupDesc,
&mtup->isnull1);
}
}
}
static void
@@ -2980,13 +3182,35 @@ comparetup_cluster(const SortTuple *a, const SortTuple *b,
TupleDesc tupDesc;
int nkey;
int32 compare;
Datum datum1,
datum2;
bool isnull1,
isnull2;
AttrNumber leading = state->indexInfo->ii_KeyAttrNumbers[0];
/* Be prepared to compare additional sort keys */
ltup = (HeapTuple) a->tuple;
rtup = (HeapTuple) b->tuple;
tupDesc = state->tupDesc;
/* Compare the leading sort key, if it's simple */
if (state->indexInfo->ii_KeyAttrNumbers[0] != 0)
if (leading != 0)
{
compare = ApplySortComparator(a->datum1, a->isnull1,
b->datum1, b->isnull1,
sortKey);
if (compare != 0)
return compare;
if (sortKey->abbrev_converter)
{
datum1 = heap_getattr(ltup, leading, tupDesc, &isnull1);
datum2 = heap_getattr(rtup, leading, tupDesc, &isnull2);
compare = ApplySortAbbrevFullComparator(datum1, isnull1,
datum2, isnull2,
sortKey);
}
if (compare != 0 || state->nKeys == 1)
return compare;
/* Compare additional columns the hard way */
@@ -2999,22 +3223,13 @@ comparetup_cluster(const SortTuple *a, const SortTuple *b,
nkey = 0;
}
/* Compare additional sort keys */
ltup = (HeapTuple) a->tuple;
rtup = (HeapTuple) b->tuple;
if (state->indexInfo->ii_Expressions == NULL)
{
/* If not expression index, just compare the proper heap attrs */
tupDesc = state->tupDesc;
for (; nkey < state->nKeys; nkey++, sortKey++)
{
AttrNumber attno = state->indexInfo->ii_KeyAttrNumbers[nkey];
Datum datum1,
datum2;
bool isnull1,
isnull2;
datum1 = heap_getattr(ltup, attno, tupDesc, &isnull1);
datum2 = heap_getattr(rtup, attno, tupDesc, &isnull2);
@@ -3072,17 +3287,67 @@ static void
copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup)
{
HeapTuple tuple = (HeapTuple) tup;
Datum original;
/* copy the tuple into sort storage */
tuple = heap_copytuple(tuple);
stup->tuple = (void *) tuple;
USEMEM(state, GetMemoryChunkSpace(tuple));
/* set up first-column key value, if it's a simple column */
if (state->indexInfo->ii_KeyAttrNumbers[0] != 0)
stup->datum1 = heap_getattr(tuple,
state->indexInfo->ii_KeyAttrNumbers[0],
state->tupDesc,
&stup->isnull1);
/*
* set up first-column key value, and potentially abbreviate, if it's a
* simple column
*/
if (state->indexInfo->ii_KeyAttrNumbers[0] == 0)
return;
original = heap_getattr(tuple,
state->indexInfo->ii_KeyAttrNumbers[0],
state->tupDesc,
&stup->isnull1);
if (!state->sortKeys->abbrev_converter || stup->isnull1)
{
/*
* Store ordinary Datum representation, or NULL value. If there is a
* converter it won't expect NULL values, and cost model is not
* required to account for NULL, so in that case we avoid calling
* converter and just set datum1 to "void" representation (to be
* consistent).
*/
stup->datum1 = original;
}
else if (!consider_abort_common(state))
{
/* Store abbreviated key representation */
stup->datum1 = state->sortKeys->abbrev_converter(original,
state->sortKeys);
}
else
{
/* Abort abbreviation */
int i;
stup->datum1 = original;
/*
* Set state to be consistent with never trying abbreviation.
*
* Alter datum1 representation in already-copied tuples, so as to
* ensure a consistent representation (current tuple was just handled).
* Note that we rely on all tuples copied so far actually being
* contained within memtuples array.
*/
for (i = 0; i < state->memtupcount; i++)
{
SortTuple *mtup = &state->memtuples[i];
tuple = (HeapTuple) mtup->tuple;
mtup->datum1 = heap_getattr(tuple,
state->indexInfo->ii_KeyAttrNumbers[0],
state->tupDesc,
&stup->isnull1);
}
}
}
static void
@@ -3162,6 +3427,11 @@ comparetup_index_btree(const SortTuple *a, const SortTuple *b,
bool equal_hasnull = false;
int nkey;
int32 compare;
Datum datum1,
datum2;
bool isnull1,
isnull2;
/* Compare the leading sort key */
compare = ApplySortComparator(a->datum1, a->isnull1,
@@ -3170,23 +3440,31 @@ comparetup_index_btree(const SortTuple *a, const SortTuple *b,
if (compare != 0)
return compare;
/* they are equal, so we only need to examine one null flag */
if (a->isnull1)
equal_hasnull = true;
/* Compare additional sort keys */
tuple1 = (IndexTuple) a->tuple;
tuple2 = (IndexTuple) b->tuple;
keysz = state->nKeys;
tupDes = RelationGetDescr(state->indexRel);
if (sortKey->abbrev_converter)
{
datum1 = index_getattr(tuple1, 1, tupDes, &isnull1);
datum2 = index_getattr(tuple2, 1, tupDes, &isnull2);
compare = ApplySortAbbrevFullComparator(datum1, isnull1,
datum2, isnull2,
sortKey);
if (compare != 0)
return compare;
}
/* they are equal, so we only need to examine one null flag */
if (a->isnull1)
equal_hasnull = true;
sortKey++;
for (nkey = 2; nkey <= keysz; nkey++, sortKey++)
{
Datum datum1,
datum2;
bool isnull1,
isnull2;
datum1 = index_getattr(tuple1, nkey, tupDes, &isnull1);
datum2 = index_getattr(tuple2, nkey, tupDes, &isnull2);
@@ -3313,6 +3591,7 @@ copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup)
IndexTuple tuple = (IndexTuple) tup;
unsigned int tuplen = IndexTupleSize(tuple);
IndexTuple newtuple;
Datum original;
/* copy the tuple into sort storage */
newtuple = (IndexTuple) palloc(tuplen);
@@ -3320,10 +3599,54 @@ copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup)
USEMEM(state, GetMemoryChunkSpace(newtuple));
stup->tuple = (void *) newtuple;
/* set up first-column key value */
stup->datum1 = index_getattr(newtuple,
1,
RelationGetDescr(state->indexRel),
&stup->isnull1);
original = index_getattr(newtuple,
1,
RelationGetDescr(state->indexRel),
&stup->isnull1);
if (!state->sortKeys->abbrev_converter || stup->isnull1)
{
/*
* Store ordinary Datum representation, or NULL value. If there is a
* converter it won't expect NULL values, and cost model is not
* required to account for NULL, so in that case we avoid calling
* converter and just set datum1 to "void" representation (to be
* consistent).
*/
stup->datum1 = original;
}
else if (!consider_abort_common(state))
{
/* Store abbreviated key representation */
stup->datum1 = state->sortKeys->abbrev_converter(original,
state->sortKeys);
}
else
{
/* Abort abbreviation */
int i;
stup->datum1 = original;
/*
* Set state to be consistent with never trying abbreviation.
*
* Alter datum1 representation in already-copied tuples, so as to
* ensure a consistent representation (current tuple was just handled).
* Note that we rely on all tuples copied so far actually being
* contained within memtuples array.
*/
for (i = 0; i < state->memtupcount; i++)
{
SortTuple *mtup = &state->memtuples[i];
tuple = (IndexTuple) mtup->tuple;
mtup->datum1 = index_getattr(tuple,
1,
RelationGetDescr(state->indexRel),
&stup->isnull1);
}
}
}
static void