1
0
mirror of https://github.com/postgres/postgres.git synced 2025-04-20 00:42:27 +03:00

Avoid extra index searches through preprocessing.

Transform low_compare and high_compare nbtree skip array inequalities
(with opclasses that offer skip support) in such a way as to allow
_bt_first to consistently apply later keys when it descends the tree.
This can lower the number of index searches for multi-column scans that
use a ">" key on one of the index's prefix columns (or use a "<" key,
when scanning backwards) when it precedes some later lower-order key.

For example, an index qual "WHERE a > 5 AND b = 2" will now be converted
to "WHERE a >= 6 AND b = 2" by a new preprocessing step that takes place
after low_compare and high_compare have been finalized.  That way, the
initial call to _bt_first can use "WHERE a >= 6 AND b = 2" to find an
initial position, rather than just using "WHERE a > 5" -- "b = 2" can be
applied during every _bt_first call.  There's a decent chance that this
will allow such a scan to avoid the extra search that might otherwise be
needed to determine the lowest "a" value still satisfying "WHERE a > 5".

The transformation process can only lower the total number of index
pages read when the use of a more restrictive set of initial positioning
keys in _bt_first actually allows the scan to land on some later leaf
page directly, relative to the unoptimized case (or on an earlier leaf
page directly, when scanning backwards).  But the savings can really add
up in cases where an affected skip array comes after some other array.
For example, a scan indexqual "WHERE x IN (1, 2, 3) AND y > 5 AND z = 2"
can save as many as 3 _bt_first calls by applying the new transformation
to its "y" array (up to 1 extra search can be avoided per "x" element).

Follow-up to commit 92fe23d9, which added nbtree skip scan.

Author: Peter Geoghegan <pg@bowt.ie>
Reviewed-By: Matthias van de Meent <boekewurm+postgres@gmail.com>
Discussion: https://postgr.es/m/CAH2-Wz=FJ78K3WsF3iWNxWnUCY9f=Jdg3QPxaXE=uYUbmuRz5Q@mail.gmail.com
This commit is contained in:
Peter Geoghegan 2025-04-04 14:14:08 -04:00
parent 21a152b37f
commit b3f1a13f22
3 changed files with 211 additions and 0 deletions

View File

@ -50,6 +50,12 @@ static bool _bt_saoparray_shrink(IndexScanDesc scan, ScanKey arraysk,
BTArrayKeyInfo *array, bool *qual_ok);
static bool _bt_skiparray_shrink(IndexScanDesc scan, ScanKey skey,
BTArrayKeyInfo *array, bool *qual_ok);
static void _bt_skiparray_strat_adjust(IndexScanDesc scan, ScanKey arraysk,
BTArrayKeyInfo *array);
static void _bt_skiparray_strat_decrement(IndexScanDesc scan, ScanKey arraysk,
BTArrayKeyInfo *array);
static void _bt_skiparray_strat_increment(IndexScanDesc scan, ScanKey arraysk,
BTArrayKeyInfo *array);
static ScanKey _bt_preprocess_array_keys(IndexScanDesc scan, int *new_numberOfKeys);
static void _bt_preprocess_array_keys_final(IndexScanDesc scan, int *keyDataMap);
static int _bt_num_array_keys(IndexScanDesc scan, Oid *skip_eq_ops_out,
@ -1296,6 +1302,171 @@ _bt_skiparray_shrink(IndexScanDesc scan, ScanKey skey, BTArrayKeyInfo *array,
return true;
}
/*
* Applies the opfamily's skip support routine to convert the skip array's >
* low_compare key (if any) into a >= key, and to convert its < high_compare
* key (if any) into a <= key. Decrements the high_compare key's sk_argument,
* and/or increments the low_compare key's sk_argument (also adjusts their
* operator strategies, while changing the operator as appropriate).
*
* This optional optimization reduces the number of descents required within
* _bt_first. Whenever _bt_first is called with a skip array whose current
* array element is the sentinel value MINVAL, using a transformed >= key
* instead of using the original > key makes it safe to include lower-order
* scan keys in the insertion scan key (there must be lower-order scan keys
* after the skip array). We will avoid an extra _bt_first to find the first
* value in the index > sk_argument -- at least when the first real matching
* value in the index happens to be an exact match for the sk_argument value
* that we produced here by incrementing the original input key's sk_argument.
* (Backwards scans derive the same benefit when they encounter the sentinel
* value MAXVAL, by converting the high_compare key from < to <=.)
*
* Note: The transformation is only correct when it cannot allow the scan to
* overlook matching tuples, but we don't have enough semantic information to
* safely make sure that can't happen during scans with cross-type operators.
* That's why we'll never apply the transformation in cross-type scenarios.
* For example, if we attempted to convert "sales_ts > '2024-01-01'::date"
* into "sales_ts >= '2024-01-02'::date" given a "sales_ts" attribute whose
* input opclass is timestamp_ops, the scan would overlook almost all (or all)
* tuples for sales that fell on '2024-01-01'.
*
* Note: We can safely modify array->low_compare/array->high_compare in place
* because they just point to copies of our scan->keyData[] input scan keys
* (namely the copies returned by _bt_preprocess_array_keys to be used as
* input into the standard preprocessing steps in _bt_preprocess_keys).
* Everything will be reset if there's a rescan.
*/
static void
_bt_skiparray_strat_adjust(IndexScanDesc scan, ScanKey arraysk,
BTArrayKeyInfo *array)
{
BTScanOpaque so = (BTScanOpaque) scan->opaque;
MemoryContext oldContext;
/*
* Called last among all preprocessing steps, when the skip array's final
* low_compare and high_compare have both been chosen
*/
Assert(arraysk->sk_flags & SK_BT_SKIP);
Assert(array->num_elems == -1 && !array->null_elem && array->sksup);
oldContext = MemoryContextSwitchTo(so->arrayContext);
if (array->high_compare &&
array->high_compare->sk_strategy == BTLessStrategyNumber)
_bt_skiparray_strat_decrement(scan, arraysk, array);
if (array->low_compare &&
array->low_compare->sk_strategy == BTGreaterStrategyNumber)
_bt_skiparray_strat_increment(scan, arraysk, array);
MemoryContextSwitchTo(oldContext);
}
/*
* Convert skip array's > low_compare key into a >= key
*/
static void
_bt_skiparray_strat_decrement(IndexScanDesc scan, ScanKey arraysk,
BTArrayKeyInfo *array)
{
Relation rel = scan->indexRelation;
Oid opfamily = rel->rd_opfamily[arraysk->sk_attno - 1],
opcintype = rel->rd_opcintype[arraysk->sk_attno - 1],
leop;
RegProcedure cmp_proc;
ScanKey high_compare = array->high_compare;
Datum orig_sk_argument = high_compare->sk_argument,
new_sk_argument;
bool uflow;
Assert(high_compare->sk_strategy == BTLessStrategyNumber);
/*
* Only perform the transformation when the operator type matches the
* index attribute's input opclass type
*/
if (high_compare->sk_subtype != opcintype &&
high_compare->sk_subtype != InvalidOid)
return;
/* Decrement, handling underflow by marking the qual unsatisfiable */
new_sk_argument = array->sksup->decrement(rel, orig_sk_argument, &uflow);
if (uflow)
{
BTScanOpaque so = (BTScanOpaque) scan->opaque;
so->qual_ok = false;
return;
}
/* Look up <= operator (might fail) */
leop = get_opfamily_member(opfamily, opcintype, opcintype,
BTLessEqualStrategyNumber);
if (!OidIsValid(leop))
return;
cmp_proc = get_opcode(leop);
if (RegProcedureIsValid(cmp_proc))
{
/* Transform < high_compare key into <= key */
fmgr_info(cmp_proc, &high_compare->sk_func);
high_compare->sk_argument = new_sk_argument;
high_compare->sk_strategy = BTLessEqualStrategyNumber;
}
}
/*
* Convert skip array's < low_compare key into a <= key
*/
static void
_bt_skiparray_strat_increment(IndexScanDesc scan, ScanKey arraysk,
BTArrayKeyInfo *array)
{
Relation rel = scan->indexRelation;
Oid opfamily = rel->rd_opfamily[arraysk->sk_attno - 1],
opcintype = rel->rd_opcintype[arraysk->sk_attno - 1],
geop;
RegProcedure cmp_proc;
ScanKey low_compare = array->low_compare;
Datum orig_sk_argument = low_compare->sk_argument,
new_sk_argument;
bool oflow;
Assert(low_compare->sk_strategy == BTGreaterStrategyNumber);
/*
* Only perform the transformation when the operator type matches the
* index attribute's input opclass type
*/
if (low_compare->sk_subtype != opcintype &&
low_compare->sk_subtype != InvalidOid)
return;
/* Increment, handling overflow by marking the qual unsatisfiable */
new_sk_argument = array->sksup->increment(rel, orig_sk_argument, &oflow);
if (oflow)
{
BTScanOpaque so = (BTScanOpaque) scan->opaque;
so->qual_ok = false;
return;
}
/* Look up >= operator (might fail) */
geop = get_opfamily_member(opfamily, opcintype, opcintype,
BTGreaterEqualStrategyNumber);
if (!OidIsValid(geop))
return;
cmp_proc = get_opcode(geop);
if (RegProcedureIsValid(cmp_proc))
{
/* Transform > low_compare key into >= key */
fmgr_info(cmp_proc, &low_compare->sk_func);
low_compare->sk_argument = new_sk_argument;
low_compare->sk_strategy = BTGreaterEqualStrategyNumber;
}
}
/*
* _bt_preprocess_array_keys() -- Preprocess SK_SEARCHARRAY scan keys
*
@ -1839,6 +2010,15 @@ _bt_preprocess_array_keys_final(IndexScanDesc scan, int *keyDataMap)
}
else
{
/*
* Any skip array low_compare and high_compare scan keys
* are now final. Transform the array's > low_compare key
* into a >= key (and < high_compare keys into a <= key).
*/
if (array->num_elems == -1 && array->sksup &&
!array->null_elem)
_bt_skiparray_strat_adjust(scan, outkey, array);
/* Match found, so done with this array */
arrayidx++;
}

View File

@ -2589,6 +2589,27 @@ ORDER BY thousand;
1 | 1001
(1 row)
-- Skip array preprocessing increments "thousand > -1" to "thousand >= 0"
explain (costs off)
SELECT thousand, tenthous FROM tenk1
WHERE thousand > -1 AND tenthous IN (1001,3000)
ORDER BY thousand limit 2;
QUERY PLAN
--------------------------------------------------------------------------------------------------
Limit
-> Index Only Scan using tenk1_thous_tenthous on tenk1
Index Cond: ((thousand > '-1'::integer) AND (tenthous = ANY ('{1001,3000}'::integer[])))
(3 rows)
SELECT thousand, tenthous FROM tenk1
WHERE thousand > -1 AND tenthous IN (1001,3000)
ORDER BY thousand limit 2;
thousand | tenthous
----------+----------
0 | 3000
1 | 1001
(2 rows)
--
-- Check elimination of constant-NULL subexpressions
--

View File

@ -993,6 +993,16 @@ SELECT thousand, tenthous FROM tenk1
WHERE thousand < 3 and thousand <= 2 AND tenthous = 1001
ORDER BY thousand;
-- Skip array preprocessing increments "thousand > -1" to "thousand >= 0"
explain (costs off)
SELECT thousand, tenthous FROM tenk1
WHERE thousand > -1 AND tenthous IN (1001,3000)
ORDER BY thousand limit 2;
SELECT thousand, tenthous FROM tenk1
WHERE thousand > -1 AND tenthous IN (1001,3000)
ORDER BY thousand limit 2;
--
-- Check elimination of constant-NULL subexpressions
--