1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-05 07:21:24 +03:00

Transform OR-clauses to SAOP's during index matching

This commit makes match_clause_to_indexcol() match
"(indexkey op C1) OR (indexkey op C2) ... (indexkey op CN)" expression
to the index while transforming it into "indexkey op ANY(ARRAY[C1, C2, ...])"
(ScalarArrayOpExpr node).

This transformation allows handling long OR-clauses with single IndexScan
avoiding diving them into a slower BitmapOr.

We currently restrict Ci to be either Const or Param to apply this
transformation only when it's clearly beneficial.  However, in the future,
we might switch to a liberal understanding of constants, as it is in other
cases.

Discussion: https://postgr.es/m/567ED6CA.2040504%40sigaev.ru
Author: Alena Rybakina, Andrey Lepikhov, Alexander Korotkov
Reviewed-by: Peter Geoghegan, Ranier Vilela, Alexander Korotkov, Robert Haas
Reviewed-by: Jian He, Tom Lane, Nikolay Shaplov
This commit is contained in:
Alexander Korotkov
2024-11-24 01:40:20 +02:00
parent 869ee4f10e
commit d4378c0005
11 changed files with 735 additions and 26 deletions

View File

@ -20,6 +20,7 @@
#include "access/stratnum.h"
#include "access/sysattr.h"
#include "catalog/pg_am.h"
#include "catalog/pg_amop.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_type.h"
@ -32,8 +33,10 @@
#include "optimizer/paths.h"
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
#include "utils/array.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
/* XXX see PartCollMatchesExprColl */
@ -177,6 +180,10 @@ static IndexClause *match_rowcompare_to_indexcol(PlannerInfo *root,
RestrictInfo *rinfo,
int indexcol,
IndexOptInfo *index);
static IndexClause *match_orclause_to_indexcol(PlannerInfo *root,
RestrictInfo *rinfo,
int indexcol,
IndexOptInfo *index);
static IndexClause *expand_indexqual_rowcompare(PlannerInfo *root,
RestrictInfo *rinfo,
int indexcol,
@ -2149,7 +2156,10 @@ match_clause_to_index(PlannerInfo *root,
* (3) must match the collation of the index, if collation is relevant.
*
* Our definition of "const" is exceedingly liberal: we allow anything that
* doesn't involve a volatile function or a Var of the index's relation.
* doesn't involve a volatile function or a Var of the index's relation
* except for a boolean OR expression input: due to a trade-off between the
* expected execution speedup and planning complexity, we limit or->saop
* transformation by obvious cases when an index scan can get a profit.
* In particular, Vars belonging to other relations of the query are
* accepted here, since a clause of that form can be used in a
* parameterized indexscan. It's the responsibility of higher code levels
@ -2179,6 +2189,10 @@ match_clause_to_index(PlannerInfo *root,
* It is also possible to match ScalarArrayOpExpr clauses to indexes, when
* the clause is of the form "indexkey op ANY (arrayconst)".
*
* It is also possible to match a list of OR clauses if it might be
* transformed into a single ScalarArrayOpExpr clause. On success,
* the returning index clause will contain a trasformed clause.
*
* For boolean indexes, it is also possible to match the clause directly
* to the indexkey; or perhaps the clause is (NOT indexkey).
*
@ -2228,9 +2242,9 @@ match_clause_to_indexcol(PlannerInfo *root,
}
/*
* Clause must be an opclause, funcclause, ScalarArrayOpExpr, or
* RowCompareExpr. Or, if the index supports it, we can handle IS
* NULL/NOT NULL clauses.
* Clause must be an opclause, funcclause, ScalarArrayOpExpr,
* RowCompareExpr, or OR-clause that could be converted to SAOP. Or, if
* the index supports it, we can handle IS NULL/NOT NULL clauses.
*/
if (IsA(clause, OpExpr))
{
@ -2248,6 +2262,10 @@ match_clause_to_indexcol(PlannerInfo *root,
{
return match_rowcompare_to_indexcol(root, rinfo, indexcol, index);
}
else if (restriction_is_or_clause(rinfo))
{
return match_orclause_to_indexcol(root, rinfo, indexcol, index);
}
else if (index->amsearchnulls && IsA(clause, NullTest))
{
NullTest *nt = (NullTest *) clause;
@ -2771,6 +2789,264 @@ match_rowcompare_to_indexcol(PlannerInfo *root,
return NULL;
}
/*
* match_orclause_to_indexcol()
* Handles the OR-expr case for match_clause_to_indexcol() in the case
* when it could be transformed to ScalarArrayOpExpr.
*
* In this routine, we attempt to transform a list of OR-clause args into a
* single SAOP expression matching the target index column. On success,
* return an IndexClause, containing the transformed expression or NULL,
* if failed.
*/
static IndexClause *
match_orclause_to_indexcol(PlannerInfo *root,
RestrictInfo *rinfo,
int indexcol,
IndexOptInfo *index)
{
ListCell *lc;
BoolExpr *orclause = (BoolExpr *) rinfo->orclause;
Node *indexExpr = NULL;
List *consts = NIL;
Node *arrayNode = NULL;
ScalarArrayOpExpr *saopexpr = NULL;
Oid matchOpno = InvalidOid;
IndexClause *iclause;
Oid consttype = InvalidOid;
Oid arraytype = InvalidOid;
Oid inputcollid = InvalidOid;
bool firstTime = true;
bool haveParam = false;
Assert(IsA(orclause, BoolExpr));
Assert(orclause->boolop == OR_EXPR);
/*
* Try to convert a list of OR-clauses to a single SAOP expression. Each
* OR entry must be in the form: (indexkey operator constant) or (constant
* operator indexkey). Operators of all the entries must match. Constant
* might be either Const or Param. To be effective, give up on the first
* non-matching entry. Exit is implemented as a break from the loop,
* which is catched afterwards.
*/
foreach(lc, orclause->args)
{
RestrictInfo *subRinfo;
OpExpr *subClause;
Oid opno;
Node *leftop,
*rightop;
Node *constExpr;
if (!IsA(lfirst(lc), RestrictInfo))
break;
subRinfo = (RestrictInfo *) lfirst(lc);
/* Only operator clauses can match */
if (!IsA(subRinfo->clause, OpExpr))
break;
subClause = (OpExpr *) subRinfo->clause;
opno = subClause->opno;
/* Only binary operators can match */
if (list_length(subClause->args) != 2)
break;
/*
* The parameters below must match between sub-rinfo and its parent as
* make_restrictinfo() fills them with the same values, and further
* modifications are also the same for the whole subtree. However,
* still make a sanity check.
*/
Assert(subRinfo->is_pushed_down == rinfo->is_pushed_down);
Assert(subRinfo->is_clone == rinfo->is_clone);
Assert(subRinfo->security_level == rinfo->security_level);
Assert(bms_equal(subRinfo->incompatible_relids, rinfo->incompatible_relids));
Assert(bms_equal(subRinfo->outer_relids, rinfo->outer_relids));
/*
* Also, check that required_relids in sub-rinfo is subset of parent's
* required_relids.
*/
Assert(bms_is_subset(subRinfo->required_relids, rinfo->required_relids));
/* Only the operator returning a boolean suit the transformation. */
if (get_op_rettype(opno) != BOOLOID)
break;
/*
* Check for clauses of the form: (indexkey operator constant) or
* (constant operator indexkey). Determine indexkey side first, check
* the constant later.
*/
leftop = (Node *) linitial(subClause->args);
rightop = (Node *) lsecond(subClause->args);
if (match_index_to_operand(leftop, indexcol, index))
{
indexExpr = leftop;
constExpr = rightop;
}
else if (match_index_to_operand(rightop, indexcol, index))
{
opno = get_commutator(opno);
if (!OidIsValid(opno))
{
/* commutator doesn't exist, we can't reverse the order */
break;
}
indexExpr = rightop;
constExpr = leftop;
}
else
{
break;
}
/*
* Ignore any RelabelType node above the operands. This is needed to
* be able to apply indexscanning in binary-compatible-operator cases.
* Note: we can assume there is at most one RelabelType node;
* eval_const_expressions() will have simplified if more than one.
*/
if (IsA(constExpr, RelabelType))
constExpr = (Node *) ((RelabelType *) constExpr)->arg;
if (IsA(indexExpr, RelabelType))
indexExpr = (Node *) ((RelabelType *) indexExpr)->arg;
/* We allow constant to be Const or Param */
if (!IsA(constExpr, Const) && !IsA(constExpr, Param))
break;
/* Forbid transformation for composite types, records. */
if (type_is_rowtype(exprType(constExpr)) ||
type_is_rowtype(exprType(indexExpr)))
break;
/*
* Save information about the operator, type, and collation for the
* first matching qual. Then, check that subsequent quals match the
* first.
*/
if (firstTime)
{
matchOpno = opno;
consttype = exprType(constExpr);
arraytype = get_array_type(consttype);
inputcollid = subClause->inputcollid;
/*
* Check that the operator is presented in the opfamily and that
* the expression collation matches the index collation. Also,
* there must be an array type to construct an array later.
*/
if (!IndexCollMatchesExprColl(index->indexcollations[indexcol], inputcollid) ||
!op_in_opfamily(matchOpno, index->opfamily[indexcol]) ||
!OidIsValid(arraytype))
break;
firstTime = false;
}
else
{
if (opno != matchOpno ||
inputcollid != subClause->inputcollid ||
consttype != exprType(constExpr))
break;
}
if (IsA(constExpr, Param))
haveParam = true;
consts = lappend(consts, constExpr);
}
/*
* Catch the break from the loop above. Normally, a foreach() loop ends
* up with a NULL list cell. A non-NULL list cell indicates a break from
* the foreach() loop. Free the consts list and return NULL then.
*/
if (lc != NULL)
{
list_free(consts);
return NULL;
}
/*
* Assemble an array from the list of constants. It seems more profitable
* to build a const array. But in the presence of parameters, we don't
* have a specific value here and must employ an ArrayExpr instead.
*/
if (haveParam)
{
ArrayExpr *arrayExpr = makeNode(ArrayExpr);
/* array_collid will be set by parse_collate.c */
arrayExpr->element_typeid = consttype;
arrayExpr->array_typeid = arraytype;
arrayExpr->multidims = false;
arrayExpr->elements = consts;
arrayExpr->location = -1;
arrayNode = (Node *) arrayExpr;
}
else
{
int16 typlen;
bool typbyval;
char typalign;
Datum *elems;
int i = 0;
ArrayType *arrayConst;
get_typlenbyvalalign(consttype, &typlen, &typbyval, &typalign);
elems = (Datum *) palloc(sizeof(Datum) * list_length(consts));
foreach_node(Const, value, consts)
{
Assert(!value->constisnull && value->constvalue);
elems[i++] = value->constvalue;
}
arrayConst = construct_array(elems, i, consttype,
typlen, typbyval, typalign);
arrayNode = (Node *) makeConst(arraytype, -1, inputcollid,
-1, PointerGetDatum(arrayConst),
false, false);
pfree(elems);
list_free(consts);
}
/* Build the SAOP expression node */
saopexpr = makeNode(ScalarArrayOpExpr);
saopexpr->opno = matchOpno;
saopexpr->opfuncid = get_opcode(matchOpno);
saopexpr->hashfuncid = InvalidOid;
saopexpr->negfuncid = InvalidOid;
saopexpr->useOr = true;
saopexpr->inputcollid = inputcollid;
saopexpr->args = list_make2(indexExpr, arrayNode);
saopexpr->location = -1;
/*
* Finally, build an IndexClause based on the SAOP node. Use
* make_simple_restrictinfo() to get RestrictInfo with clean selectivity
* estimations, because they may differ from the estimation made for an OR
* clause. Although it is not a lossy expression, keep the original rinfo
* in iclause->rinfo as prescribed.
*/
iclause = makeNode(IndexClause);
iclause->rinfo = rinfo;
iclause->indexquals = list_make1(make_simple_restrictinfo(root,
&saopexpr->xpr));
iclause->lossy = false;
iclause->indexcol = indexcol;
iclause->indexcols = NIL;
return iclause;
}
/*
* expand_indexqual_rowcompare --- expand a single indexqual condition
* that is a RowCompareExpr