mirror of
https://github.com/postgres/postgres.git
synced 2025-07-30 11:03:19 +03:00
Allow usage of match_orclause_to_indexcol() for joins
This commit allows transformation of OR-clauses into SAOP's for index scans within nested loop joins. That required the following changes. 1. Make match_orclause_to_indexcol() and group_similar_or_args() understand const-ness in the same way as match_opclause_to_indexcol(). This generally makes our approach more uniform. 2. Make match_join_clauses_to_index() pass OR-clauses to match_clause_to_index(). 3. Also switch match_join_clauses_to_index() to use list_append_unique_ptr() for adding clauses to *joinorclauses. That avoids possible duplicates when processing the same clauses with different indexes. Previously such duplicates were elimited in match_clause_to_index(), but now group_similar_or_args() each time generates distinct copies of grouped OR clauses. Discussion: https://postgr.es/m/CAPpHfdv%2BjtNwofg-p5z86jLYZUTt6tR17Wy00ta0dL%3DwHQN3ZA%40mail.gmail.com Reviewed-by: Andrei Lepikhov <lepihov@gmail.com> Reviewed-by: Alena Rybakina <a.rybakina@postgrespro.ru> Reviewed-by: Pavel Borisov <pashkin.elfe@gmail.com>
This commit is contained in:
@ -1255,6 +1255,7 @@ group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo)
|
||||
ListCell *lc2;
|
||||
List *orargs;
|
||||
List *result = NIL;
|
||||
Index relid = rel->relid;
|
||||
|
||||
Assert(IsA(rinfo->orclause, BoolExpr));
|
||||
orargs = ((BoolExpr *) rinfo->orclause)->args;
|
||||
@ -1319,10 +1320,13 @@ group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo)
|
||||
/*
|
||||
* Check for clauses of the form: (indexkey operator constant) or
|
||||
* (constant operator indexkey). But we don't know a particular index
|
||||
* yet. First check for a constant, which must be Const or Param.
|
||||
* That's cheaper than search for an index key among all indexes.
|
||||
* yet. Therefore, we try to distinguish the potential index key and
|
||||
* constant first, then search for a matching index key among all
|
||||
* indexes.
|
||||
*/
|
||||
if (IsA(leftop, Const) || IsA(leftop, Param))
|
||||
if (bms_is_member(relid, argrinfo->right_relids) &&
|
||||
!bms_is_member(relid, argrinfo->left_relids) &&
|
||||
!contain_volatile_functions(leftop))
|
||||
{
|
||||
opno = get_commutator(opno);
|
||||
|
||||
@ -1333,7 +1337,9 @@ group_similar_or_args(PlannerInfo *root, RelOptInfo *rel, RestrictInfo *rinfo)
|
||||
}
|
||||
nonConstExpr = rightop;
|
||||
}
|
||||
else if (IsA(rightop, Const) || IsA(rightop, Param))
|
||||
else if (bms_is_member(relid, argrinfo->left_relids) &&
|
||||
!bms_is_member(relid, argrinfo->right_relids) &&
|
||||
!contain_volatile_functions(rightop))
|
||||
{
|
||||
nonConstExpr = leftop;
|
||||
}
|
||||
@ -2414,6 +2420,7 @@ match_restriction_clauses_to_index(PlannerInfo *root,
|
||||
* Identify join clauses for the rel that match the index.
|
||||
* Matching clauses are added to *clauseset.
|
||||
* Also, add any potentially usable join OR clauses to *joinorclauses.
|
||||
* They also might be processed by match_clause_to_index() as a whole.
|
||||
*/
|
||||
static void
|
||||
match_join_clauses_to_index(PlannerInfo *root,
|
||||
@ -2432,11 +2439,15 @@ match_join_clauses_to_index(PlannerInfo *root,
|
||||
if (!join_clause_is_movable_to(rinfo, rel))
|
||||
continue;
|
||||
|
||||
/* Potentially usable, so see if it matches the index or is an OR */
|
||||
/*
|
||||
* Potentially usable, so see if it matches the index or is an OR. Use
|
||||
* list_append_unique_ptr() here to avoid possible duplicates when
|
||||
* processing the same clauses with different indexes.
|
||||
*/
|
||||
if (restriction_is_or_clause(rinfo))
|
||||
*joinorclauses = lappend(*joinorclauses, rinfo);
|
||||
else
|
||||
match_clause_to_index(root, rinfo, index, clauseset);
|
||||
*joinorclauses = list_append_unique_ptr(*joinorclauses, rinfo);
|
||||
|
||||
match_clause_to_index(root, rinfo, index, clauseset);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2585,10 +2596,7 @@ match_clause_to_index(PlannerInfo *root,
|
||||
* (3) must match the collation of the index, if collation is relevant.
|
||||
*
|
||||
* Our definition of "const" is exceedingly liberal: we allow anything that
|
||||
* doesn't involve a volatile function or a Var of the index's relation
|
||||
* except for a boolean OR expression input: due to a trade-off between the
|
||||
* expected execution speedup and planning complexity, we limit or->saop
|
||||
* transformation by obvious cases when an index scan can get a profit.
|
||||
* doesn't involve a volatile function or a Var of the index's relation.
|
||||
* In particular, Vars belonging to other relations of the query are
|
||||
* accepted here, since a clause of that form can be used in a
|
||||
* parameterized indexscan. It's the responsibility of higher code levels
|
||||
@ -3247,7 +3255,8 @@ match_orclause_to_indexcol(PlannerInfo *root,
|
||||
Oid arraytype = InvalidOid;
|
||||
Oid inputcollid = InvalidOid;
|
||||
bool firstTime = true;
|
||||
bool haveParam = false;
|
||||
bool haveNonConst = false;
|
||||
Index indexRelid = index->rel->relid;
|
||||
|
||||
Assert(IsA(orclause, BoolExpr));
|
||||
Assert(orclause->boolop == OR_EXPR);
|
||||
@ -3259,10 +3268,9 @@ match_orclause_to_indexcol(PlannerInfo *root,
|
||||
/*
|
||||
* Try to convert a list of OR-clauses to a single SAOP expression. Each
|
||||
* OR entry must be in the form: (indexkey operator constant) or (constant
|
||||
* operator indexkey). Operators of all the entries must match. Constant
|
||||
* might be either Const or Param. To be effective, give up on the first
|
||||
* non-matching entry. Exit is implemented as a break from the loop,
|
||||
* which is catched afterwards.
|
||||
* operator indexkey). Operators of all the entries must match. To be
|
||||
* effective, give up on the first non-matching entry. Exit is
|
||||
* implemented as a break from the loop, which is catched afterwards.
|
||||
*/
|
||||
foreach(lc, orclause->args)
|
||||
{
|
||||
@ -3313,17 +3321,21 @@ match_orclause_to_indexcol(PlannerInfo *root,
|
||||
|
||||
/*
|
||||
* Check for clauses of the form: (indexkey operator constant) or
|
||||
* (constant operator indexkey). Determine indexkey side first, check
|
||||
* the constant later.
|
||||
* (constant operator indexkey). See match_clause_to_indexcol's notes
|
||||
* about const-ness.
|
||||
*/
|
||||
leftop = (Node *) linitial(subClause->args);
|
||||
rightop = (Node *) lsecond(subClause->args);
|
||||
if (match_index_to_operand(leftop, indexcol, index))
|
||||
if (match_index_to_operand(leftop, indexcol, index) &&
|
||||
!bms_is_member(indexRelid, subRinfo->right_relids) &&
|
||||
!contain_volatile_functions(rightop))
|
||||
{
|
||||
indexExpr = leftop;
|
||||
constExpr = rightop;
|
||||
}
|
||||
else if (match_index_to_operand(rightop, indexcol, index))
|
||||
else if (match_index_to_operand(rightop, indexcol, index) &&
|
||||
!bms_is_member(indexRelid, subRinfo->left_relids) &&
|
||||
!contain_volatile_functions(leftop))
|
||||
{
|
||||
opno = get_commutator(opno);
|
||||
if (!OidIsValid(opno))
|
||||
@ -3350,10 +3362,6 @@ match_orclause_to_indexcol(PlannerInfo *root,
|
||||
if (IsA(indexExpr, RelabelType))
|
||||
indexExpr = (Node *) ((RelabelType *) indexExpr)->arg;
|
||||
|
||||
/* We allow constant to be Const or Param */
|
||||
if (!IsA(constExpr, Const) && !IsA(constExpr, Param))
|
||||
break;
|
||||
|
||||
/* Forbid transformation for composite types, records. */
|
||||
if (type_is_rowtype(exprType(constExpr)) ||
|
||||
type_is_rowtype(exprType(indexExpr)))
|
||||
@ -3390,8 +3398,12 @@ match_orclause_to_indexcol(PlannerInfo *root,
|
||||
break;
|
||||
}
|
||||
|
||||
if (IsA(constExpr, Param))
|
||||
haveParam = true;
|
||||
/*
|
||||
* Check if our list of constants in match_clause_to_indexcol's
|
||||
* understanding of const-ness have something other than Const.
|
||||
*/
|
||||
if (!IsA(constExpr, Const))
|
||||
haveNonConst = true;
|
||||
consts = lappend(consts, constExpr);
|
||||
}
|
||||
|
||||
@ -3408,10 +3420,10 @@ match_orclause_to_indexcol(PlannerInfo *root,
|
||||
|
||||
/*
|
||||
* Assemble an array from the list of constants. It seems more profitable
|
||||
* to build a const array. But in the presence of parameters, we don't
|
||||
* to build a const array. But in the presence of other nodes, we don't
|
||||
* have a specific value here and must employ an ArrayExpr instead.
|
||||
*/
|
||||
if (haveParam)
|
||||
if (haveNonConst)
|
||||
{
|
||||
ArrayExpr *arrayExpr = makeNode(ArrayExpr);
|
||||
|
||||
|
@ -2006,6 +2006,27 @@ SELECT * FROM tenk1
|
||||
Filter: (((tenthous)::numeric = '1'::numeric) OR (tenthous = 3) OR ((tenthous)::numeric = '42'::numeric))
|
||||
(2 rows)
|
||||
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT count(*) FROM tenk1 t1
|
||||
WHERE t1.thousand = 42 OR t1.thousand = (SELECT t2.tenthous FROM tenk1 t2 WHERE t2.thousand = t1.tenthous + 1 LIMIT 1);
|
||||
QUERY PLAN
|
||||
----------------------------------------------------------------------------
|
||||
Aggregate
|
||||
-> Index Only Scan using tenk1_thous_tenthous on tenk1 t1
|
||||
Filter: ((thousand = 42) OR (thousand = (SubPlan 1)))
|
||||
SubPlan 1
|
||||
-> Limit
|
||||
-> Index Only Scan using tenk1_thous_tenthous on tenk1 t2
|
||||
Index Cond: (thousand = (t1.tenthous + 1))
|
||||
(7 rows)
|
||||
|
||||
SELECT count(*) FROM tenk1 t1
|
||||
WHERE t1.thousand = 42 OR t1.thousand = (SELECT t2.tenthous FROM tenk1 t2 WHERE t2.thousand = t1.tenthous + 1 LIMIT 1);
|
||||
count
|
||||
-------
|
||||
10
|
||||
(1 row)
|
||||
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT count(*) FROM tenk1
|
||||
WHERE hundred = 42 AND (thousand = 42 OR thousand = 99);
|
||||
@ -3255,6 +3276,17 @@ CREATE INDEX t_b_c_idx ON bitmap_split_or (b, c);
|
||||
CREATE STATISTICS t_a_b_stat (mcv) ON a, b FROM bitmap_split_or;
|
||||
CREATE STATISTICS t_b_c_stat (mcv) ON b, c FROM bitmap_split_or;
|
||||
ANALYZE bitmap_split_or;
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT * FROM bitmap_split_or t1, bitmap_split_or t2
|
||||
WHERE t1.a = t2.b OR t1.a = 2;
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------
|
||||
Nested Loop
|
||||
-> Seq Scan on bitmap_split_or t2
|
||||
-> Index Scan using t_a_b_idx on bitmap_split_or t1
|
||||
Index Cond: (a = ANY (ARRAY[t2.b, 2]))
|
||||
(4 rows)
|
||||
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT * FROM bitmap_split_or WHERE a = 1 AND (b = 1 OR b = 2) AND c = 2;
|
||||
QUERY PLAN
|
||||
|
@ -3849,14 +3849,11 @@ where q1 = thousand or q2 = thousand;
|
||||
-> Seq Scan on q2
|
||||
-> Bitmap Heap Scan on tenk1
|
||||
Recheck Cond: ((q1.q1 = thousand) OR (q2.q2 = thousand))
|
||||
-> BitmapOr
|
||||
-> Bitmap Index Scan on tenk1_thous_tenthous
|
||||
Index Cond: (thousand = q1.q1)
|
||||
-> Bitmap Index Scan on tenk1_thous_tenthous
|
||||
Index Cond: (thousand = q2.q2)
|
||||
-> Bitmap Index Scan on tenk1_thous_tenthous
|
||||
Index Cond: (thousand = ANY (ARRAY[q1.q1, q2.q2]))
|
||||
-> Hash
|
||||
-> Seq Scan on int4_tbl
|
||||
(15 rows)
|
||||
(12 rows)
|
||||
|
||||
explain (costs off)
|
||||
select * from
|
||||
@ -8239,3 +8236,45 @@ GROUP BY s.c1, s.c2;
|
||||
(7 rows)
|
||||
|
||||
DROP TABLE group_tbl;
|
||||
--
|
||||
-- Test for a nested loop join involving index scan, transforming OR-clauses
|
||||
-- to SAOP.
|
||||
--
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT COUNT(*) FROM tenk1 t1, tenk1 t2
|
||||
WHERE t2.thousand = t1.tenthous OR t2.thousand = t1.unique1 OR t2.thousand = t1.unique2;
|
||||
QUERY PLAN
|
||||
-----------------------------------------------------------------------------------------
|
||||
Aggregate
|
||||
-> Nested Loop
|
||||
-> Seq Scan on tenk1 t1
|
||||
-> Index Only Scan using tenk1_thous_tenthous on tenk1 t2
|
||||
Index Cond: (thousand = ANY (ARRAY[t1.tenthous, t1.unique1, t1.unique2]))
|
||||
(5 rows)
|
||||
|
||||
SELECT COUNT(*) FROM tenk1 t1, tenk1 t2
|
||||
WHERE t2.thousand = t1.tenthous OR t2.thousand = t1.unique1 OR t2.thousand = t1.unique2;
|
||||
count
|
||||
-------
|
||||
20000
|
||||
(1 row)
|
||||
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT COUNT(*) FROM onek t1 LEFT JOIN tenk1 t2
|
||||
ON (t2.thousand = t1.tenthous OR t2.thousand = t1.thousand);
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------------------
|
||||
Aggregate
|
||||
-> Nested Loop Left Join
|
||||
-> Seq Scan on onek t1
|
||||
-> Index Only Scan using tenk1_thous_tenthous on tenk1 t2
|
||||
Index Cond: (thousand = ANY (ARRAY[t1.tenthous, t1.thousand]))
|
||||
(5 rows)
|
||||
|
||||
SELECT COUNT(*) FROM onek t1 LEFT JOIN tenk1 t2
|
||||
ON (t2.thousand = t1.tenthous OR t2.thousand = t1.thousand);
|
||||
count
|
||||
-------
|
||||
19000
|
||||
(1 row)
|
||||
|
||||
|
@ -2533,24 +2533,24 @@ where not exists (select 1 from prtx2
|
||||
-> Seq Scan on prtx1_1
|
||||
Filter: ((a < 20) AND (c = 91))
|
||||
-> Bitmap Heap Scan on prtx2_1
|
||||
Recheck Cond: ((b = (prtx1_1.b + 1)) OR (c = 99))
|
||||
Recheck Cond: ((c = 99) OR (b = (prtx1_1.b + 1)))
|
||||
Filter: (a = prtx1_1.a)
|
||||
-> BitmapOr
|
||||
-> Bitmap Index Scan on prtx2_1_b_idx
|
||||
Index Cond: (b = (prtx1_1.b + 1))
|
||||
-> Bitmap Index Scan on prtx2_1_c_idx
|
||||
Index Cond: (c = 99)
|
||||
-> Bitmap Index Scan on prtx2_1_b_idx
|
||||
Index Cond: (b = (prtx1_1.b + 1))
|
||||
-> Nested Loop Anti Join
|
||||
-> Seq Scan on prtx1_2
|
||||
Filter: ((a < 20) AND (c = 91))
|
||||
-> Bitmap Heap Scan on prtx2_2
|
||||
Recheck Cond: ((b = (prtx1_2.b + 1)) OR (c = 99))
|
||||
Recheck Cond: ((c = 99) OR (b = (prtx1_2.b + 1)))
|
||||
Filter: (a = prtx1_2.a)
|
||||
-> BitmapOr
|
||||
-> Bitmap Index Scan on prtx2_2_b_idx
|
||||
Index Cond: (b = (prtx1_2.b + 1))
|
||||
-> Bitmap Index Scan on prtx2_2_c_idx
|
||||
Index Cond: (c = 99)
|
||||
-> Bitmap Index Scan on prtx2_2_b_idx
|
||||
Index Cond: (b = (prtx1_2.b + 1))
|
||||
(23 rows)
|
||||
|
||||
select * from prtx1
|
||||
|
@ -781,6 +781,12 @@ EXPLAIN (COSTS OFF)
|
||||
SELECT * FROM tenk1
|
||||
WHERE tenthous = 1::numeric OR tenthous = 3::int4 OR tenthous = 42::numeric;
|
||||
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT count(*) FROM tenk1 t1
|
||||
WHERE t1.thousand = 42 OR t1.thousand = (SELECT t2.tenthous FROM tenk1 t2 WHERE t2.thousand = t1.tenthous + 1 LIMIT 1);
|
||||
SELECT count(*) FROM tenk1 t1
|
||||
WHERE t1.thousand = 42 OR t1.thousand = (SELECT t2.tenthous FROM tenk1 t2 WHERE t2.thousand = t1.tenthous + 1 LIMIT 1);
|
||||
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT count(*) FROM tenk1
|
||||
WHERE hundred = 42 AND (thousand = 42 OR thousand = 99);
|
||||
@ -1367,6 +1373,9 @@ CREATE STATISTICS t_a_b_stat (mcv) ON a, b FROM bitmap_split_or;
|
||||
CREATE STATISTICS t_b_c_stat (mcv) ON b, c FROM bitmap_split_or;
|
||||
ANALYZE bitmap_split_or;
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT * FROM bitmap_split_or t1, bitmap_split_or t2
|
||||
WHERE t1.a = t2.b OR t1.a = 2;
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT * FROM bitmap_split_or WHERE a = 1 AND (b = 1 OR b = 2) AND c = 2;
|
||||
DROP TABLE bitmap_split_or;
|
||||
|
||||
|
@ -3016,7 +3016,6 @@ SELECT t1.a FROM skip_fetch t1 LEFT JOIN skip_fetch t2 ON t2.a = 1 WHERE t2.a IS
|
||||
|
||||
RESET enable_indexonlyscan;
|
||||
RESET enable_seqscan;
|
||||
|
||||
-- Test BitmapHeapScan with a rescan releases resources correctly
|
||||
SET enable_seqscan = off;
|
||||
SET enable_indexscan = off;
|
||||
@ -3046,3 +3045,20 @@ SELECT 1 FROM group_tbl t1
|
||||
GROUP BY s.c1, s.c2;
|
||||
|
||||
DROP TABLE group_tbl;
|
||||
|
||||
--
|
||||
-- Test for a nested loop join involving index scan, transforming OR-clauses
|
||||
-- to SAOP.
|
||||
--
|
||||
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT COUNT(*) FROM tenk1 t1, tenk1 t2
|
||||
WHERE t2.thousand = t1.tenthous OR t2.thousand = t1.unique1 OR t2.thousand = t1.unique2;
|
||||
SELECT COUNT(*) FROM tenk1 t1, tenk1 t2
|
||||
WHERE t2.thousand = t1.tenthous OR t2.thousand = t1.unique1 OR t2.thousand = t1.unique2;
|
||||
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT COUNT(*) FROM onek t1 LEFT JOIN tenk1 t2
|
||||
ON (t2.thousand = t1.tenthous OR t2.thousand = t1.thousand);
|
||||
SELECT COUNT(*) FROM onek t1 LEFT JOIN tenk1 t2
|
||||
ON (t2.thousand = t1.tenthous OR t2.thousand = t1.thousand);
|
||||
|
Reference in New Issue
Block a user