1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-24 14:22:24 +03:00

Convert 'x IN (VALUES ...)' to 'x = ANY ...' then appropriate

This commit implements the automatic conversion of 'x IN (VALUES ...)' into
ScalarArrayOpExpr.  That simplifies the query tree, eliminating the appearance
of an unnecessary join.

Since VALUES describes a relational table, and the value of such a list is
a table row, the optimizer will likely face an underestimation problem due to
the inability to estimate cardinality through MCV statistics.  The cardinality
evaluation mechanism can work with the array inclusion check operation.
If the array is small enough (< 100 elements), it will perform a statistical
evaluation element by element.

We perform the transformation in the convert_ANY_sublink_to_join() if VALUES
RTE is proper and the transformation is convertible.  The conversion is only
possible for operations on scalar values, not rows.  Also, we currently
support the transformation only when it ends up with a constant array.
Otherwise, the evaluation of non-hashed SAOP might be slower than the
corresponding Hash Join with VALUES.

Discussion: https://postgr.es/m/0184212d-1248-4f1f-a42d-f5cb1c1976d2%40tantorlabs.com
Author: Alena Rybakina <a.rybakina@postgrespro.ru>
Author: Andrei Lepikhov <lepihov@gmail.com>
Reviewed-by: Ivan Kush <ivan.kush@tantorlabs.com>
Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
This commit is contained in:
Alexander Korotkov
2025-04-04 16:01:50 +03:00
parent d48d2e2dc8
commit c0962a113d
6 changed files with 512 additions and 5 deletions

View File

@ -1214,6 +1214,86 @@ inline_cte_walker(Node *node, inline_cte_walker_context *context)
return expression_tree_walker(node, inline_cte_walker, context);
}
/*
* Attempt to transform 'testexpr' over the VALUES subquery into
* a ScalarArrayOpExpr. We currently support the transformation only when
* it ends up with a constant array. Otherwise, the evaluation of non-hashed
* SAOP might be slower than the corresponding Hash Join with VALUES.
*
* Return transformed ScalarArrayOpExpr or NULL if transformation isn't
* allowed.
*/
ScalarArrayOpExpr *
convert_VALUES_to_ANY(PlannerInfo *root, Node *testexpr, Query *values)
{
RangeTblEntry *rte;
Node *leftop;
Node *rightop;
Oid opno;
ListCell *lc;
Oid inputcollid;
List *exprs = NIL;
/*
* Check we have a binary operator over a single-column subquery with no
* joins and no LIMIT/OFFSET/ORDER BY clauses.
*/
if (!IsA(testexpr, OpExpr) ||
list_length(((OpExpr *) testexpr)->args) != 2 ||
list_length(values->targetList) > 1 ||
values->limitCount != NULL ||
values->limitOffset != NULL ||
values->sortClause != NIL ||
list_length(values->rtable) != 1)
return NULL;
rte = linitial_node(RangeTblEntry, values->rtable);
leftop = linitial(((OpExpr *) testexpr)->args);
rightop = lsecond(((OpExpr *) testexpr)->args);
opno = ((OpExpr *) testexpr)->opno;
inputcollid = ((OpExpr *) testexpr)->inputcollid;
/*
* Also, check that only RTE corresponds to VALUES; the list of values has
* at least two items and no volatile functions.
*/
if (rte->rtekind != RTE_VALUES ||
list_length(rte->values_lists) < 2 ||
contain_volatile_functions((Node *) rte->values_lists))
return NULL;
foreach(lc, rte->values_lists)
{
List *elem = lfirst(lc);
Node *value = linitial(elem);
/*
* Prepare an evaluation of the right side of the operator with
* substitution of the given value.
*/
value = convert_testexpr(root, rightop, list_make1(value));
/*
* Try to evaluate constant expressions. We could get Const as a
* result.
*/
value = eval_const_expressions(root, value);
/*
* As we only support constant output arrays, all the items must also
* be constant.
*/
if (!IsA(value, Const))
return NULL;
exprs = lappend(exprs, value);
}
/* Finally, build ScalarArrayOpExpr at the top of the 'exprs' list. */
return make_SAOP_expr(opno, leftop, exprType(rightop),
linitial_oid(rte->colcollations), inputcollid,
exprs, false);
}
/*
* convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join

View File

@ -664,6 +664,18 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
/* Is it a convertible ANY or EXISTS clause? */
if (sublink->subLinkType == ANY_SUBLINK)
{
ScalarArrayOpExpr *saop;
if ((saop = convert_VALUES_to_ANY(root,
sublink->testexpr,
(Query *) sublink->subselect)) != NULL)
/*
* The VALUES sequence was simplified. Nothing more to do
* here.
*/
return (Node *) saop;
if ((j = convert_ANY_sublink_to_join(root, sublink,
available_rels1)) != NULL)
{

View File

@ -5484,26 +5484,30 @@ make_SAOP_expr(Oid oper, Node *leftexpr, Oid coltype, Oid arraycollid,
bool typbyval;
char typalign;
Datum *elems;
bool *nulls;
int i = 0;
ArrayType *arrayConst;
int dims[1] = {list_length(exprs)};
int lbs[1] = {1};
get_typlenbyvalalign(coltype, &typlen, &typbyval, &typalign);
elems = (Datum *) palloc(sizeof(Datum) * list_length(exprs));
nulls = (bool *) palloc(sizeof(bool) * list_length(exprs));
foreach_node(Const, value, exprs)
{
Assert(!value->constisnull);
elems[i++] = value->constvalue;
elems[i] = value->constvalue;
nulls[i++] = value->constisnull;
}
arrayConst = construct_array(elems, i, coltype,
typlen, typbyval, typalign);
arrayConst = construct_md_array(elems, nulls, 1, dims, lbs,
coltype, typlen, typbyval, typalign);
arrayNode = (Node *) makeConst(arraytype, -1, arraycollid,
-1, PointerGetDatum(arrayConst),
false, false);
pfree(elems);
pfree(nulls);
list_free(exprs);
}