1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-10 17:42:29 +03:00

Support ORDER BY ... NULLS FIRST/LAST, and add ASC/DESC/NULLS FIRST/NULLS LAST

per-column options for btree indexes.  The planner's support for this is still
pretty rudimentary; it does not yet know how to plan mergejoins with
nondefault ordering options.  The documentation is pretty rudimentary, too.
I'll work on improving that stuff later.

Note incompatible change from prior behavior: ORDER BY ... USING will now be
rejected if the operator is not a less-than or greater-than member of some
btree opclass.  This prevents less-than-sane behavior if an operator that
doesn't actually define a proper sort ordering is selected.
This commit is contained in:
Tom Lane
2007-01-09 02:14:16 +00:00
parent 3a32ba2f3f
commit 4431758229
65 changed files with 1476 additions and 593 deletions

View File

@@ -1,4 +1,4 @@
$PostgreSQL: pgsql/src/backend/access/nbtree/README,v 1.15 2006/12/28 23:16:39 tgl Exp $
$PostgreSQL: pgsql/src/backend/access/nbtree/README,v 1.16 2007/01/09 02:14:10 tgl Exp $
This directory contains a correct implementation of Lehman and Yao's
high-concurrency B-tree management algorithm (P. Lehman and S. Yao,
@@ -483,5 +483,6 @@ Notes to operator class implementors
With this implementation, we require each supported combination of
datatypes to supply us with a comparison procedure via pg_amproc.
This procedure must take two nonnull values A and B and return an int32 < 0,
0, or > 0 if A < B, A = B, or A > B, respectively. See nbtcompare.c for
examples.
0, or > 0 if A < B, A = B, or A > B, respectively. The procedure must
not return INT_MIN for "A < B", since the value may be negated before
being tested for sign. See nbtcompare.c for examples.

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtcompare.c,v 1.53 2007/01/05 22:19:23 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtcompare.c,v 1.54 2007/01/09 02:14:10 tgl Exp $
*
* NOTES
*
@@ -22,10 +22,10 @@
*
* The result is always an int32 regardless of the input datatype.
*
* NOTE: although any negative int32 is acceptable for reporting "<",
* and any positive int32 is acceptable for reporting ">", routines
* Although any negative int32 (except INT_MIN) is acceptable for reporting
* "<", and any positive int32 is acceptable for reporting ">", routines
* that work on 32-bit or wider datatypes can't just return "a - b".
* That could overflow and give the wrong answer. Also, one should not
* That could overflow and give the wrong answer. Also, one must not
* return INT_MIN to report "<", since some callers will negate the result.
*
* NOTE: it is critical that the comparison function impose a total order

View File

@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.110 2007/01/05 22:19:23 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.111 2007/01/09 02:14:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -376,12 +376,17 @@ _bt_compare(Relation rel,
{
if (isNull)
result = 0; /* NULL "=" NULL */
else if (scankey->sk_flags & SK_BT_NULLS_FIRST)
result = -1; /* NULL "<" NOT_NULL */
else
result = 1; /* NULL ">" NOT_NULL */
}
else if (isNull) /* key is NOT_NULL and item is NULL */
{
result = -1; /* NOT_NULL "<" NULL */
if (scankey->sk_flags & SK_BT_NULLS_FIRST)
result = 1; /* NOT_NULL ">" NULL */
else
result = -1; /* NOT_NULL "<" NULL */
}
else
{
@@ -390,16 +395,15 @@ _bt_compare(Relation rel,
* the sk_argument as right arg (they might be of different
* types). Since it is convenient for callers to think of
* _bt_compare as comparing the scankey to the index item, we have
* to flip the sign of the comparison result.
*
* Note: curious-looking coding is to avoid overflow if comparison
* function returns INT_MIN. There is no risk of overflow for
* positive results.
* to flip the sign of the comparison result. (Unless it's a DESC
* column, in which case we *don't* flip the sign.)
*/
result = DatumGetInt32(FunctionCall2(&scankey->sk_func,
datum,
scankey->sk_argument));
result = (result < 0) ? 1 : -result;
if (!(scankey->sk_flags & SK_BT_DESC))
result = -result;
}
/* if the keys are unequal, return the difference */
@@ -617,11 +621,12 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
* in the first row member makes the condition unmatchable, just
* like qual_ok = false.
*/
cur = (ScanKey) DatumGetPointer(cur->sk_argument);
Assert(cur->sk_flags & SK_ROW_MEMBER);
if (cur->sk_flags & SK_ISNULL)
ScanKey subkey = (ScanKey) DatumGetPointer(cur->sk_argument);
Assert(subkey->sk_flags & SK_ROW_MEMBER);
if (subkey->sk_flags & SK_ISNULL)
return false;
memcpy(scankeys + i, cur, sizeof(ScanKeyData));
memcpy(scankeys + i, subkey, sizeof(ScanKeyData));
/*
* If the row comparison is the last positioning key we accepted,
@@ -632,21 +637,46 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
* even if the row comparison is of ">" or "<" type, because the
* condition applied to all but the last row member is effectively
* ">=" or "<=", and so the extra keys don't break the positioning
* scheme.
* scheme. But, by the same token, if we aren't able to use all
* the row members, then the part of the row comparison that we
* did use has to be treated as just a ">=" or "<=" condition,
* and so we'd better adjust strat_total accordingly.
*/
if (i == keysCount - 1)
{
while (!(cur->sk_flags & SK_ROW_END))
bool used_all_subkeys = false;
Assert(!(subkey->sk_flags & SK_ROW_END));
for(;;)
{
cur++;
Assert(cur->sk_flags & SK_ROW_MEMBER);
if (cur->sk_attno != keysCount + 1)
subkey++;
Assert(subkey->sk_flags & SK_ROW_MEMBER);
if (subkey->sk_attno != keysCount + 1)
break; /* out-of-sequence, can't use it */
if (cur->sk_flags & SK_ISNULL)
if (subkey->sk_strategy != cur->sk_strategy)
break; /* wrong direction, can't use it */
if (subkey->sk_flags & SK_ISNULL)
break; /* can't use null keys */
Assert(keysCount < INDEX_MAX_KEYS);
memcpy(scankeys + keysCount, cur, sizeof(ScanKeyData));
memcpy(scankeys + keysCount, subkey, sizeof(ScanKeyData));
keysCount++;
if (subkey->sk_flags & SK_ROW_END)
{
used_all_subkeys = true;
break;
}
}
if (!used_all_subkeys)
{
switch (strat_total)
{
case BTLessStrategyNumber:
strat_total = BTLessEqualStrategyNumber;
break;
case BTGreaterStrategyNumber:
strat_total = BTGreaterEqualStrategyNumber;
break;
}
}
break; /* done with outer loop */
}

View File

@@ -57,7 +57,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.109 2007/01/05 22:19:23 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.110 2007/01/09 02:14:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -728,39 +728,45 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
ScanKey entry;
Datum attrDatum1,
attrDatum2;
bool isFirstNull,
isSecondNull;
bool isNull1,
isNull2;
int32 compare;
entry = indexScanKey + i - 1;
attrDatum1 = index_getattr(itup, i, tupdes,
&isFirstNull);
attrDatum2 = index_getattr(itup2, i, tupdes,
&isSecondNull);
if (isFirstNull)
attrDatum1 = index_getattr(itup, i, tupdes, &isNull1);
attrDatum2 = index_getattr(itup2, i, tupdes, &isNull2);
if (isNull1)
{
if (!isSecondNull)
{
load1 = false;
break;
}
if (isNull2)
compare = 0; /* NULL "=" NULL */
else if (entry->sk_flags & SK_BT_NULLS_FIRST)
compare = -1; /* NULL "<" NOT_NULL */
else
compare = 1; /* NULL ">" NOT_NULL */
}
else if (isNull2)
{
if (entry->sk_flags & SK_BT_NULLS_FIRST)
compare = 1; /* NOT_NULL ">" NULL */
else
compare = -1; /* NOT_NULL "<" NULL */
}
else if (isSecondNull)
break;
else
{
int32 compare;
compare = DatumGetInt32(FunctionCall2(&entry->sk_func,
attrDatum1,
attrDatum2));
if (compare > 0)
{
load1 = false;
break;
}
else if (compare < 0)
break;
if (entry->sk_flags & SK_BT_DESC)
compare = -compare;
}
if (compare > 0)
{
load1 = false;
break;
}
else if (compare < 0)
break;
}
}
else

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtutils.c,v 1.81 2007/01/05 22:19:23 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtutils.c,v 1.82 2007/01/09 02:14:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -30,6 +30,7 @@
static bool _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
ScanKey leftarg, ScanKey rightarg,
bool *result);
static void _bt_mark_scankey_with_indoption(ScanKey skey, int16 *indoption);
static void _bt_mark_scankey_required(ScanKey skey);
static bool _bt_check_rowcompare(ScanKey skey,
IndexTuple tuple, TupleDesc tupdesc,
@@ -49,10 +50,12 @@ _bt_mkscankey(Relation rel, IndexTuple itup)
ScanKey skey;
TupleDesc itupdesc;
int natts;
int16 *indoption;
int i;
itupdesc = RelationGetDescr(rel);
natts = RelationGetNumberOfAttributes(rel);
indoption = rel->rd_indoption;
skey = (ScanKey) palloc(natts * sizeof(ScanKeyData));
@@ -61,6 +64,7 @@ _bt_mkscankey(Relation rel, IndexTuple itup)
FmgrInfo *procinfo;
Datum arg;
bool null;
int flags;
/*
* We can use the cached (default) support procs since no cross-type
@@ -68,8 +72,9 @@ _bt_mkscankey(Relation rel, IndexTuple itup)
*/
procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC);
arg = index_getattr(itup, i + 1, itupdesc, &null);
flags = (null ? SK_ISNULL : 0) | (indoption[i] << SK_BT_INDOPTION_SHIFT);
ScanKeyEntryInitializeWithInfo(&skey[i],
null ? SK_ISNULL : 0,
flags,
(AttrNumber) (i + 1),
InvalidStrategy,
InvalidOid,
@@ -96,23 +101,27 @@ _bt_mkscankey_nodata(Relation rel)
{
ScanKey skey;
int natts;
int16 *indoption;
int i;
natts = RelationGetNumberOfAttributes(rel);
indoption = rel->rd_indoption;
skey = (ScanKey) palloc(natts * sizeof(ScanKeyData));
for (i = 0; i < natts; i++)
{
FmgrInfo *procinfo;
int flags;
/*
* We can use the cached (default) support procs since no cross-type
* comparison can be needed.
*/
procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC);
flags = SK_ISNULL | (indoption[i] << SK_BT_INDOPTION_SHIFT);
ScanKeyEntryInitializeWithInfo(&skey[i],
SK_ISNULL,
flags,
(AttrNumber) (i + 1),
InvalidStrategy,
InvalidOid,
@@ -157,7 +166,13 @@ _bt_freestack(BTStack stack)
* the number of input keys, so->numberOfKeys gets the number of output
* keys (possibly less, never greater).
*
* The primary purpose of this routine is to discover how many scan keys
* The output keys are marked with additional sk_flag bits beyond the
* system-standard bits supplied by the caller. The DESC and NULLS_FIRST
* indoption bits for the relevant index attribute are copied into the flags.
* Also, for a DESC column, we commute (flip) all the sk_strategy numbers
* so that the index sorts in the desired direction.
*
* One key purpose of this routine is to discover how many scan keys
* must be satisfied to continue the scan. It also attempts to eliminate
* redundant keys and detect contradictory keys. (If the index opfamily
* provides incomplete sets of cross-type operators, we may fail to detect
@@ -219,6 +234,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
{
BTScanOpaque so = (BTScanOpaque) scan->opaque;
int numberOfKeys = scan->numberOfKeys;
int16 *indoption = scan->indexRelation->rd_indoption;
int new_numberOfKeys;
int numberOfEqualCols;
ScanKey inkeys;
@@ -254,7 +270,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
*/
if (cur->sk_flags & SK_ISNULL)
so->qual_ok = false;
memcpy(outkeys, inkeys, sizeof(ScanKeyData));
_bt_mark_scankey_with_indoption(cur, indoption);
memcpy(outkeys, cur, sizeof(ScanKeyData));
so->numberOfKeys = 1;
/* We can mark the qual as required if it's for first index col */
if (cur->sk_attno == 1)
@@ -407,6 +424,9 @@ _bt_preprocess_keys(IndexScanDesc scan)
memset(xform, 0, sizeof(xform));
}
/* apply indoption to scankey (might change sk_strategy!) */
_bt_mark_scankey_with_indoption(cur, indoption);
/* check strategy this key's operator corresponds to */
j = cur->sk_strategy - 1;
@@ -486,6 +506,11 @@ _bt_preprocess_keys(IndexScanDesc scan)
* Note: op always points at the same ScanKey as either leftarg or rightarg.
* Since we don't scribble on the scankeys, this aliasing should cause no
* trouble.
*
* Note: this routine needs to be insensitive to any DESC option applied
* to the index column. For example, "x < 4" is a tighter constraint than
* "x < 5" regardless of which way the index is sorted. We don't worry about
* NULLS FIRST/LAST either, since the given values are never nulls.
*/
static bool
_bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
@@ -498,6 +523,7 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
optype,
opcintype,
cmp_op;
StrategyNumber strat;
/*
* The opfamily we need to worry about is identified by the index column.
@@ -538,11 +564,18 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
* operator. (This cannot result in infinite recursion, since no
* indexscan initiated by syscache lookup will use cross-data-type
* operators.)
*
* If the sk_strategy was flipped by _bt_mark_scankey_with_indoption,
* we have to un-flip it to get the correct opfamily member.
*/
strat = op->sk_strategy;
if (op->sk_flags & SK_BT_DESC)
strat = BTCommuteStrategyNumber(strat);
cmp_op = get_opfamily_member(rel->rd_opfamily[leftarg->sk_attno - 1],
lefttype,
righttype,
op->sk_strategy);
strat);
if (OidIsValid(cmp_op))
{
RegProcedure cmp_proc = get_opcode(cmp_op);
@@ -561,6 +594,48 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
return false;
}
/*
* Mark a scankey with info from the index's indoption array.
*
* We copy the appropriate indoption value into the scankey sk_flags
* (shifting to avoid clobbering system-defined flag bits). Also, if
* the DESC option is set, commute (flip) the operator strategy number.
*
* This function is applied to the *input* scankey structure; therefore
* on a rescan we will be looking at already-processed scankeys. Hence
* we have to be careful not to re-commute the strategy if we already did it.
* It's a bit ugly to modify the caller's copy of the scankey but in practice
* there shouldn't be any problem, since the index's indoptions are certainly
* not going to change while the scankey survives.
*/
static void
_bt_mark_scankey_with_indoption(ScanKey skey, int16 *indoption)
{
int addflags;
addflags = indoption[skey->sk_attno - 1] << SK_BT_INDOPTION_SHIFT;
if ((addflags & SK_BT_DESC) && !(skey->sk_flags & SK_BT_DESC))
skey->sk_strategy = BTCommuteStrategyNumber(skey->sk_strategy);
skey->sk_flags |= addflags;
if (skey->sk_flags & SK_ROW_HEADER)
{
ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
for (;;)
{
Assert(subkey->sk_flags & SK_ROW_MEMBER);
addflags = indoption[subkey->sk_attno - 1] << SK_BT_INDOPTION_SHIFT;
if ((addflags & SK_BT_DESC) && !(subkey->sk_flags & SK_BT_DESC))
subkey->sk_strategy = BTCommuteStrategyNumber(subkey->sk_strategy);
subkey->sk_flags |= addflags;
if (subkey->sk_flags & SK_ROW_END)
break;
subkey++;
}
}
}
/*
* Mark a scankey as "required to continue the scan".
*
@@ -568,7 +643,8 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
* directions or just one. Also, if the key is a row comparison header,
* we have to mark the appropriate subsidiary ScanKeys as required. In
* such cases, the first subsidiary key is required, but subsequent ones
* are required only as long as they correspond to successive index columns.
* are required only as long as they correspond to successive index columns
* and match the leading column as to sort direction.
* Otherwise the row comparison ordering is different from the index ordering
* and so we can't stop the scan on the basis of those lower-order columns.
*
@@ -616,9 +692,10 @@ _bt_mark_scankey_required(ScanKey skey)
for (;;)
{
Assert(subkey->sk_flags & SK_ROW_MEMBER);
Assert(subkey->sk_strategy == skey->sk_strategy);
if (subkey->sk_attno != attno)
break; /* non-adjacent key, so not required */
if (subkey->sk_strategy != skey->sk_strategy)
break; /* wrong direction, so not required */
subkey->sk_flags |= addflags;
if (subkey->sk_flags & SK_ROW_END)
break;
@@ -731,15 +808,32 @@ _bt_checkkeys(IndexScanDesc scan,
if (isNull)
{
/*
* Since NULLs are sorted after non-NULLs, we know we have reached
* the upper limit of the range of values for this index attr. On
* a forward scan, we can stop if this qual is one of the "must
* match" subset. On a backward scan, however, we should keep
* going.
*/
if ((key->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir))
*continuescan = false;
if (key->sk_flags & SK_BT_NULLS_FIRST)
{
/*
* Since NULLs are sorted before non-NULLs, we know we have
* reached the lower limit of the range of values for this
* index attr. On a backward scan, we can stop if this qual is
* one of the "must match" subset. On a forward scan,
* however, we should keep going.
*/
if ((key->sk_flags & SK_BT_REQBKWD) &&
ScanDirectionIsBackward(dir))
*continuescan = false;
}
else
{
/*
* Since NULLs are sorted after non-NULLs, we know we have
* reached the upper limit of the range of values for this
* index attr. On a forward scan, we can stop if this qual is
* one of the "must match" subset. On a backward scan,
* however, we should keep going.
*/
if ((key->sk_flags & SK_BT_REQFWD) &&
ScanDirectionIsForward(dir))
*continuescan = false;
}
/*
* In any case, this indextuple doesn't match the qual.
@@ -809,7 +903,6 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
bool isNull;
Assert(subkey->sk_flags & SK_ROW_MEMBER);
Assert(subkey->sk_strategy == skey->sk_strategy);
datum = index_getattr(tuple,
subkey->sk_attno,
@@ -818,16 +911,32 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
if (isNull)
{
/*
* Since NULLs are sorted after non-NULLs, we know we have reached
* the upper limit of the range of values for this index attr. On
* a forward scan, we can stop if this qual is one of the "must
* match" subset. On a backward scan, however, we should keep
* going.
*/
if ((subkey->sk_flags & SK_BT_REQFWD) &&
ScanDirectionIsForward(dir))
*continuescan = false;
if (subkey->sk_flags & SK_BT_NULLS_FIRST)
{
/*
* Since NULLs are sorted before non-NULLs, we know we have
* reached the lower limit of the range of values for this
* index attr. On a backward scan, we can stop if this qual is
* one of the "must match" subset. On a forward scan,
* however, we should keep going.
*/
if ((subkey->sk_flags & SK_BT_REQBKWD) &&
ScanDirectionIsBackward(dir))
*continuescan = false;
}
else
{
/*
* Since NULLs are sorted after non-NULLs, we know we have
* reached the upper limit of the range of values for this
* index attr. On a forward scan, we can stop if this qual is
* one of the "must match" subset. On a backward scan,
* however, we should keep going.
*/
if ((subkey->sk_flags & SK_BT_REQFWD) &&
ScanDirectionIsForward(dir))
*continuescan = false;
}
/*
* In any case, this indextuple doesn't match the qual.
@@ -859,6 +968,9 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
datum,
subkey->sk_argument));
if (subkey->sk_flags & SK_BT_DESC)
cmpresult = -cmpresult;
/* Done comparing if unequal, else advance to next column */
if (cmpresult != 0)
break;