mirror of
https://github.com/postgres/postgres.git
synced 2025-07-27 12:41:57 +03:00
Implement SEMI and ANTI joins in the planner and executor. (Semijoins replace
the old JOIN_IN code, but antijoins are new functionality.) Teach the planner to convert appropriate EXISTS and NOT EXISTS subqueries into semi and anti joins respectively. Also, LEFT JOINs with suitable upper-level IS NULL filters are recognized as being anti joins. Unify the InClauseInfo and OuterJoinInfo infrastructure into "SpecialJoinInfo". With that change, it becomes possible to associate a SpecialJoinInfo with every join attempt, which permits some cleanup of join selectivity estimation. That needs to be taken much further than this patch does, but the next step is to change the API for oprjoin selectivity functions, which seems like material for a separate patch. So for the moment the output size estimates for semi and especially anti joins are quite bogus.
This commit is contained in:
@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.132 2008/07/10 02:14:03 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.133 2008/08/14 18:47:59 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -21,6 +21,7 @@
|
||||
#include "optimizer/cost.h"
|
||||
#include "optimizer/planmain.h"
|
||||
#include "optimizer/planner.h"
|
||||
#include "optimizer/prep.h"
|
||||
#include "optimizer/subselect.h"
|
||||
#include "optimizer/var.h"
|
||||
#include "parser/parse_expr.h"
|
||||
@ -62,6 +63,7 @@ static Node *convert_testexpr_mutator(Node *node,
|
||||
convert_testexpr_context *context);
|
||||
static bool subplan_is_hashable(SubLink *slink, SubPlan *node, Plan *plan);
|
||||
static bool hash_ok_operator(OpExpr *expr);
|
||||
static bool simplify_EXISTS_query(Query *query);
|
||||
static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root);
|
||||
static Node *process_sublinks_mutator(Node *node,
|
||||
process_sublinks_context *context);
|
||||
@ -217,11 +219,16 @@ generate_new_param(PlannerInfo *root, Oid paramtype, int32 paramtypmod)
|
||||
static Oid
|
||||
get_first_col_type(Plan *plan)
|
||||
{
|
||||
TargetEntry *tent = (TargetEntry *) linitial(plan->targetlist);
|
||||
/* In cases such as EXISTS, tlist might be empty; arbitrarily use VOID */
|
||||
if (plan->targetlist)
|
||||
{
|
||||
TargetEntry *tent = (TargetEntry *) linitial(plan->targetlist);
|
||||
|
||||
Assert(IsA(tent, TargetEntry));
|
||||
Assert(!tent->resjunk);
|
||||
return exprType((Node *) tent->expr);
|
||||
Assert(IsA(tent, TargetEntry));
|
||||
if (!tent->resjunk)
|
||||
return exprType((Node *) tent->expr);
|
||||
}
|
||||
return VOIDOID;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -258,6 +265,12 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual)
|
||||
*/
|
||||
subquery = (Query *) copyObject(subquery);
|
||||
|
||||
/*
|
||||
* If it's an EXISTS subplan, we might be able to simplify it.
|
||||
*/
|
||||
if (slink->subLinkType == EXISTS_SUBLINK)
|
||||
(void) simplify_EXISTS_query(subquery);
|
||||
|
||||
/*
|
||||
* For an EXISTS subplan, tell lower-level planner to expect that only the
|
||||
* first tuple will be retrieved. For ALL and ANY subplans, we will be
|
||||
@ -710,80 +723,32 @@ hash_ok_operator(OpExpr *expr)
|
||||
}
|
||||
|
||||
/*
|
||||
* convert_IN_to_join: can we convert an IN SubLink to join style?
|
||||
* convert_ANY_sublink_to_join: can we convert an ANY SubLink to a join?
|
||||
*
|
||||
* The caller has found a SubLink at the top level of WHERE, but has not
|
||||
* checked the properties of the SubLink at all. Decide whether it is
|
||||
* The caller has found an ANY SubLink at the top level of WHERE, but has not
|
||||
* checked the properties of the SubLink further. Decide whether it is
|
||||
* appropriate to process this SubLink in join style. If not, return NULL.
|
||||
* If so, build the qual clause(s) to replace the SubLink, and return them.
|
||||
* The qual clauses are wrapped in a FlattenedSubLink node to help later
|
||||
* processing place them properly.
|
||||
*
|
||||
* Side effects of a successful conversion include adding the SubLink's
|
||||
* subselect to the query's rangetable and adding an InClauseInfo node to
|
||||
* its in_info_list.
|
||||
* subselect to the query's rangetable.
|
||||
*/
|
||||
Node *
|
||||
convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
|
||||
convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink)
|
||||
{
|
||||
Query *parse = root->parse;
|
||||
Query *subselect = (Query *) sublink->subselect;
|
||||
List *in_operators;
|
||||
List *left_exprs;
|
||||
List *right_exprs;
|
||||
Relids left_varnos;
|
||||
int rtindex;
|
||||
RangeTblEntry *rte;
|
||||
RangeTblRef *rtr;
|
||||
List *subquery_vars;
|
||||
InClauseInfo *ininfo;
|
||||
Node *result;
|
||||
Expr *quals;
|
||||
FlattenedSubLink *fslink;
|
||||
|
||||
/*
|
||||
* The sublink type must be "= ANY" --- that is, an IN operator. We
|
||||
* expect that the test expression will be either a single OpExpr, or an
|
||||
* AND-clause containing OpExprs. (If it's anything else then the parser
|
||||
* must have determined that the operators have non-equality-like
|
||||
* semantics. In the OpExpr case we can't be sure what the operator's
|
||||
* semantics are like, and must check for ourselves.)
|
||||
*/
|
||||
if (sublink->subLinkType != ANY_SUBLINK)
|
||||
return NULL;
|
||||
if (sublink->testexpr && IsA(sublink->testexpr, OpExpr))
|
||||
{
|
||||
OpExpr *op = (OpExpr *) sublink->testexpr;
|
||||
Oid opno = op->opno;
|
||||
List *opfamilies;
|
||||
List *opstrats;
|
||||
|
||||
if (list_length(op->args) != 2)
|
||||
return NULL; /* not binary operator? */
|
||||
get_op_btree_interpretation(opno, &opfamilies, &opstrats);
|
||||
if (!list_member_int(opstrats, ROWCOMPARE_EQ))
|
||||
return NULL;
|
||||
in_operators = list_make1_oid(opno);
|
||||
left_exprs = list_make1(linitial(op->args));
|
||||
right_exprs = list_make1(lsecond(op->args));
|
||||
}
|
||||
else if (and_clause(sublink->testexpr))
|
||||
{
|
||||
ListCell *lc;
|
||||
|
||||
/* OK, but we need to extract the per-column info */
|
||||
in_operators = left_exprs = right_exprs = NIL;
|
||||
foreach(lc, ((BoolExpr *) sublink->testexpr)->args)
|
||||
{
|
||||
OpExpr *op = (OpExpr *) lfirst(lc);
|
||||
|
||||
if (!IsA(op, OpExpr)) /* probably shouldn't happen */
|
||||
return NULL;
|
||||
if (list_length(op->args) != 2)
|
||||
return NULL; /* not binary operator? */
|
||||
in_operators = lappend_oid(in_operators, op->opno);
|
||||
left_exprs = lappend(left_exprs, linitial(op->args));
|
||||
right_exprs = lappend(right_exprs, lsecond(op->args));
|
||||
}
|
||||
}
|
||||
else
|
||||
return NULL;
|
||||
Assert(sublink->subLinkType == ANY_SUBLINK);
|
||||
|
||||
/*
|
||||
* The sub-select must not refer to any Vars of the parent query. (Vars of
|
||||
@ -793,16 +758,14 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* The left-hand expressions must contain some Vars of the current query,
|
||||
* else it's not gonna be a join.
|
||||
* The test expression must contain some Vars of the current query,
|
||||
* else it's not gonna be a join. (Note that it won't have Vars
|
||||
* referring to the subquery, rather Params.)
|
||||
*/
|
||||
left_varnos = pull_varnos((Node *) left_exprs);
|
||||
left_varnos = pull_varnos(sublink->testexpr);
|
||||
if (bms_is_empty(left_varnos))
|
||||
return NULL;
|
||||
|
||||
/* ... and the right-hand expressions better not contain Vars at all */
|
||||
Assert(!contain_var_clause((Node *) right_exprs));
|
||||
|
||||
/*
|
||||
* The combining operators and left-hand expressions mustn't be volatile.
|
||||
*/
|
||||
@ -819,12 +782,19 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
|
||||
*/
|
||||
rte = addRangeTableEntryForSubquery(NULL,
|
||||
subselect,
|
||||
makeAlias("IN_subquery", NIL),
|
||||
makeAlias("ANY_subquery", NIL),
|
||||
false);
|
||||
parse->rtable = lappend(parse->rtable, rte);
|
||||
rtindex = list_length(parse->rtable);
|
||||
rtr = makeNode(RangeTblRef);
|
||||
rtr->rtindex = rtindex;
|
||||
|
||||
/*
|
||||
* We assume it's okay to add the pulled-up subquery to the topmost FROM
|
||||
* list. This should be all right for ANY clauses appearing in WHERE
|
||||
* or in upper-level plain JOIN/ON clauses. ANYs appearing below any
|
||||
* outer joins couldn't be placed there, however.
|
||||
*/
|
||||
parse->jointree->fromlist = lappend(parse->jointree->fromlist, rtr);
|
||||
|
||||
/*
|
||||
@ -837,34 +807,232 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
|
||||
/*
|
||||
* Build the result qual expression, replacing Params with these Vars.
|
||||
*/
|
||||
result = convert_testexpr(root,
|
||||
sublink->testexpr,
|
||||
subquery_vars);
|
||||
quals = (Expr *) convert_testexpr(root,
|
||||
sublink->testexpr,
|
||||
subquery_vars);
|
||||
|
||||
/*
|
||||
* Now build the InClauseInfo node.
|
||||
* Now build the FlattenedSubLink node.
|
||||
*/
|
||||
ininfo = makeNode(InClauseInfo);
|
||||
ininfo->lefthand = left_varnos;
|
||||
ininfo->righthand = bms_make_singleton(rtindex);
|
||||
ininfo->in_operators = in_operators;
|
||||
fslink = makeNode(FlattenedSubLink);
|
||||
fslink->jointype = JOIN_SEMI;
|
||||
fslink->lefthand = left_varnos;
|
||||
fslink->righthand = bms_make_singleton(rtindex);
|
||||
fslink->quals = quals;
|
||||
|
||||
return (Node *) fslink;
|
||||
}
|
||||
|
||||
/*
|
||||
* simplify_EXISTS_query: remove any useless stuff in an EXISTS's subquery
|
||||
*
|
||||
* The only thing that matters about an EXISTS query is whether it returns
|
||||
* zero or more than zero rows. Therefore, we can remove certain SQL features
|
||||
* that won't affect that. The only part that is really likely to matter in
|
||||
* typical usage is simplifying the targetlist: it's a common habit to write
|
||||
* "SELECT * FROM" even though there is no need to evaluate any columns.
|
||||
*
|
||||
* Note: by suppressing the targetlist we could cause an observable behavioral
|
||||
* change, namely that any errors that might occur in evaluating the tlist
|
||||
* won't occur, nor will other side-effects of volatile functions. This seems
|
||||
* unlikely to bother anyone in practice.
|
||||
*
|
||||
* Returns TRUE if was able to discard the targetlist, else FALSE.
|
||||
*/
|
||||
static bool
|
||||
simplify_EXISTS_query(Query *query)
|
||||
{
|
||||
/*
|
||||
* We don't try to simplify at all if the query uses set operations,
|
||||
* aggregates, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE; none of these
|
||||
* seem likely in normal usage and their possible effects are complex.
|
||||
*/
|
||||
if (query->commandType != CMD_SELECT ||
|
||||
query->intoClause ||
|
||||
query->setOperations ||
|
||||
query->hasAggs ||
|
||||
query->havingQual ||
|
||||
query->limitOffset ||
|
||||
query->limitCount ||
|
||||
query->rowMarks)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* ininfo->sub_targetlist must be filled with a list of expressions that
|
||||
* would need to be unique-ified if we try to implement the IN using a
|
||||
* regular join to unique-ified subquery output. This is most easily done
|
||||
* by applying convert_testexpr to just the RHS inputs of the testexpr
|
||||
* operators. That handles cases like type coercions of the subquery
|
||||
* outputs, clauses dropped due to const-simplification, etc.
|
||||
* Mustn't throw away the targetlist if it contains set-returning
|
||||
* functions; those could affect whether zero rows are returned!
|
||||
*/
|
||||
ininfo->sub_targetlist = (List *) convert_testexpr(root,
|
||||
(Node *) right_exprs,
|
||||
subquery_vars);
|
||||
if (expression_returns_set((Node *) query->targetList))
|
||||
return false;
|
||||
|
||||
/* Add the completed node to the query's list */
|
||||
root->in_info_list = lappend(root->in_info_list, ininfo);
|
||||
/*
|
||||
* Otherwise, we can throw away the targetlist, as well as any GROUP,
|
||||
* DISTINCT, and ORDER BY clauses; none of those clauses will change
|
||||
* a nonzero-rows result to zero rows or vice versa. (Furthermore,
|
||||
* since our parsetree representation of these clauses depends on the
|
||||
* targetlist, we'd better throw them away if we drop the targetlist.)
|
||||
*/
|
||||
query->targetList = NIL;
|
||||
query->groupClause = NIL;
|
||||
query->distinctClause = NIL;
|
||||
query->sortClause = NIL;
|
||||
query->hasDistinctOn = false;
|
||||
|
||||
return result;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* convert_EXISTS_sublink_to_join: can we convert an EXISTS SubLink to a join?
|
||||
*
|
||||
* The caller has found an EXISTS SubLink at the top level of WHERE, or just
|
||||
* underneath a NOT, but has not checked the properties of the SubLink
|
||||
* further. Decide whether it is appropriate to process this SubLink in join
|
||||
* style. If not, return NULL. If so, build the qual clause(s) to replace
|
||||
* the SubLink, and return them. (In the NOT case, the returned clauses are
|
||||
* intended to replace the NOT as well.) The qual clauses are wrapped in a
|
||||
* FlattenedSubLink node to help later processing place them properly.
|
||||
*
|
||||
* Side effects of a successful conversion include adding the SubLink's
|
||||
* subselect to the query's rangetable.
|
||||
*/
|
||||
Node *
|
||||
convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
|
||||
bool under_not)
|
||||
{
|
||||
Query *parse = root->parse;
|
||||
Query *subselect = (Query *) sublink->subselect;
|
||||
Node *whereClause;
|
||||
int rtoffset;
|
||||
int varno;
|
||||
Relids clause_varnos;
|
||||
Relids left_varnos;
|
||||
Relids right_varnos;
|
||||
Relids subselect_varnos;
|
||||
FlattenedSubLink *fslink;
|
||||
|
||||
Assert(sublink->subLinkType == EXISTS_SUBLINK);
|
||||
|
||||
/*
|
||||
* Copy the subquery so we can modify it safely (see comments in
|
||||
* make_subplan).
|
||||
*/
|
||||
subselect = (Query *) copyObject(subselect);
|
||||
|
||||
/*
|
||||
* See if the subquery can be simplified based on the knowledge that
|
||||
* it's being used in EXISTS(). If we aren't able to get rid of its
|
||||
* targetlist, we have to fail, because the pullup operation leaves
|
||||
* us with noplace to evaluate the targetlist.
|
||||
*/
|
||||
if (!simplify_EXISTS_query(subselect))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Separate out the WHERE clause. (We could theoretically also remove
|
||||
* top-level plain JOIN/ON clauses, but it's probably not worth the
|
||||
* trouble.)
|
||||
*/
|
||||
whereClause = subselect->jointree->quals;
|
||||
subselect->jointree->quals = NULL;
|
||||
|
||||
/*
|
||||
* The rest of the sub-select must not refer to any Vars of the parent
|
||||
* query. (Vars of higher levels should be okay, though.)
|
||||
*/
|
||||
if (contain_vars_of_level((Node *) subselect, 1))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* On the other hand, the WHERE clause must contain some Vars of the
|
||||
* parent query, else it's not gonna be a join.
|
||||
*/
|
||||
if (!contain_vars_of_level(whereClause, 1))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* We don't risk optimizing if the WHERE clause is volatile, either.
|
||||
*/
|
||||
if (contain_volatile_functions(whereClause))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Also disallow SubLinks within the WHERE clause. (XXX this could
|
||||
* probably be supported, but it would complicate the transformation
|
||||
* below, and it doesn't seem worth worrying about in a first pass.)
|
||||
*/
|
||||
if (contain_subplans(whereClause))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* Okay, pull up the sub-select into top range table and jointree.
|
||||
*
|
||||
* We rely here on the assumption that the outer query has no references
|
||||
* to the inner (necessarily true). Therefore this is a lot easier than
|
||||
* what pull_up_subqueries has to go through.
|
||||
*
|
||||
* In fact, it's even easier than what convert_ANY_sublink_to_join has
|
||||
* to do. The machinations of simplify_EXISTS_query ensured that there
|
||||
* is nothing interesting in the subquery except an rtable and jointree,
|
||||
* and even the jointree FromExpr no longer has quals. So we can just
|
||||
* append the rtable to our own and append the fromlist to our own.
|
||||
* But first, adjust all level-zero varnos in the subquery to account
|
||||
* for the rtable merger.
|
||||
*/
|
||||
rtoffset = list_length(parse->rtable);
|
||||
OffsetVarNodes((Node *) subselect, rtoffset, 0);
|
||||
OffsetVarNodes(whereClause, rtoffset, 0);
|
||||
|
||||
/*
|
||||
* Upper-level vars in subquery will now be one level closer to their
|
||||
* parent than before; in particular, anything that had been level 1
|
||||
* becomes level zero.
|
||||
*/
|
||||
IncrementVarSublevelsUp((Node *) subselect, -1, 1);
|
||||
IncrementVarSublevelsUp(whereClause, -1, 1);
|
||||
|
||||
/*
|
||||
* Now that the WHERE clause is adjusted to match the parent query
|
||||
* environment, we can easily identify all the level-zero rels it uses.
|
||||
* The ones <= rtoffset are "left rels" of the join we're forming,
|
||||
* and the ones > rtoffset are "right rels".
|
||||
*/
|
||||
clause_varnos = pull_varnos(whereClause);
|
||||
left_varnos = right_varnos = NULL;
|
||||
while ((varno = bms_first_member(clause_varnos)) >= 0)
|
||||
{
|
||||
if (varno <= rtoffset)
|
||||
left_varnos = bms_add_member(left_varnos, varno);
|
||||
else
|
||||
right_varnos = bms_add_member(right_varnos, varno);
|
||||
}
|
||||
bms_free(clause_varnos);
|
||||
Assert(!bms_is_empty(left_varnos));
|
||||
|
||||
/* Also identify all the rels syntactically within the subselect */
|
||||
subselect_varnos = get_relids_in_jointree((Node *) subselect->jointree);
|
||||
Assert(bms_is_subset(right_varnos, subselect_varnos));
|
||||
|
||||
/* Now we can attach the modified subquery rtable to the parent */
|
||||
parse->rtable = list_concat(parse->rtable, subselect->rtable);
|
||||
|
||||
/*
|
||||
* We assume it's okay to add the pulled-up subquery to the topmost FROM
|
||||
* list. This should be all right for EXISTS clauses appearing in WHERE
|
||||
* or in upper-level plain JOIN/ON clauses. EXISTS appearing below any
|
||||
* outer joins couldn't be placed there, however.
|
||||
*/
|
||||
parse->jointree->fromlist = list_concat(parse->jointree->fromlist,
|
||||
subselect->jointree->fromlist);
|
||||
|
||||
/*
|
||||
* Now build the FlattenedSubLink node.
|
||||
*/
|
||||
fslink = makeNode(FlattenedSubLink);
|
||||
fslink->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
|
||||
fslink->lefthand = left_varnos;
|
||||
fslink->righthand = subselect_varnos;
|
||||
fslink->quals = (Expr *) whereClause;
|
||||
|
||||
return (Node *) fslink;
|
||||
}
|
||||
|
||||
/*
|
||||
|
Reference in New Issue
Block a user