1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-15 19:21:59 +03:00

Restore foreign-key-aware estimation of join relation sizes.

This patch provides a new implementation of the logic added by commit
137805f89 and later removed by 77ba61080.  It differs from the original
primarily in expending much less effort per joinrel in large queries,
which it accomplishes by doing most of the matching work once per query not
once per joinrel.  Hopefully, it's also less buggy and better commented.
The never-documented enable_fkey_estimates GUC remains gone.

There remains work to be done to make the selectivity estimates account
for nulls in FK referencing columns; but that was true of the original
patch as well.  We may be able to address this point later in beta.
In the meantime, any error should be in the direction of overestimating
rather than underestimating joinrel sizes, which seems like the direction
we want to err in.

Tomas Vondra and Tom Lane

Discussion: <31041.1465069446@sss.pgh.pa.us>
This commit is contained in:
Tom Lane
2016-06-18 15:22:34 -04:00
parent 598aa194af
commit 100340e2dc
17 changed files with 921 additions and 18 deletions

View File

@ -2306,6 +2306,159 @@ build_implied_join_equality(Oid opno,
}
/*
* match_foreign_keys_to_quals
* Match foreign-key constraints to equivalence classes and join quals
*
* The idea here is to see which query join conditions match equality
* constraints of a foreign-key relationship. For such join conditions,
* we can use the FK semantics to make selectivity estimates that are more
* reliable than estimating from statistics, especially for multiple-column
* FKs, where the normal assumption of independent conditions tends to fail.
*
* In this function we annotate the ForeignKeyOptInfos in root->fkey_list
* with info about which eclasses and join qual clauses they match, and
* discard any ForeignKeyOptInfos that are irrelevant for the query.
*/
void
match_foreign_keys_to_quals(PlannerInfo *root)
{
List *newlist = NIL;
ListCell *lc;
foreach(lc, root->fkey_list)
{
ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc);
RelOptInfo *con_rel = find_base_rel(root, fkinfo->con_relid);
RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid);
int colno;
/*
* Ignore FK unless both rels are baserels. This gets rid of FKs that
* link to inheritance child rels (otherrels) and those that link to
* rels removed by join removal (dead rels).
*/
if (con_rel->reloptkind != RELOPT_BASEREL ||
ref_rel->reloptkind != RELOPT_BASEREL)
continue;
/*
* Scan the columns and try to match them to eclasses and quals.
*
* Note: for simple inner joins, any match should be in an eclass.
* "Loose" quals that syntactically match an FK equality must have
* been rejected for EC status because they are outer-join quals or
* similar. We can still consider them to match the FK if they are
* not outerjoin_delayed.
*/
for (colno = 0; colno < fkinfo->nkeys; colno++)
{
AttrNumber con_attno,
ref_attno;
Oid fpeqop;
ListCell *lc2;
fkinfo->eclass[colno] = match_eclasses_to_foreign_key_col(root,
fkinfo,
colno);
/* Don't bother looking for loose quals if we got an EC match */
if (fkinfo->eclass[colno] != NULL)
{
fkinfo->nmatched_ec++;
continue;
}
/*
* Scan joininfo list for relevant clauses. Either rel's joininfo
* list would do equally well; we use con_rel's.
*/
con_attno = fkinfo->conkey[colno];
ref_attno = fkinfo->confkey[colno];
fpeqop = InvalidOid; /* we'll look this up only if needed */
foreach(lc2, con_rel->joininfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc2);
OpExpr *clause = (OpExpr *) rinfo->clause;
Var *leftvar;
Var *rightvar;
/* Ignore outerjoin-delayed clauses */
if (rinfo->outerjoin_delayed)
continue;
/* Only binary OpExprs are useful for consideration */
if (!IsA(clause, OpExpr) ||
list_length(clause->args) != 2)
continue;
leftvar = (Var *) get_leftop((Expr *) clause);
rightvar = (Var *) get_rightop((Expr *) clause);
/* Operands must be Vars, possibly with RelabelType */
while (leftvar && IsA(leftvar, RelabelType))
leftvar = (Var *) ((RelabelType *) leftvar)->arg;
if (!(leftvar && IsA(leftvar, Var)))
continue;
while (rightvar && IsA(rightvar, RelabelType))
rightvar = (Var *) ((RelabelType *) rightvar)->arg;
if (!(rightvar && IsA(rightvar, Var)))
continue;
/* Now try to match the vars to the current foreign key cols */
if (fkinfo->ref_relid == leftvar->varno &&
ref_attno == leftvar->varattno &&
fkinfo->con_relid == rightvar->varno &&
con_attno == rightvar->varattno)
{
/* Vars match, but is it the right operator? */
if (clause->opno == fkinfo->conpfeqop[colno])
{
fkinfo->rinfos[colno] = lappend(fkinfo->rinfos[colno],
rinfo);
fkinfo->nmatched_ri++;
}
}
else if (fkinfo->ref_relid == rightvar->varno &&
ref_attno == rightvar->varattno &&
fkinfo->con_relid == leftvar->varno &&
con_attno == leftvar->varattno)
{
/*
* Reverse match, must check commutator operator. Look it
* up if we didn't already. (In the worst case we might
* do multiple lookups here, but that would require an FK
* equality operator without commutator, which is
* unlikely.)
*/
if (!OidIsValid(fpeqop))
fpeqop = get_commutator(fkinfo->conpfeqop[colno]);
if (clause->opno == fpeqop)
{
fkinfo->rinfos[colno] = lappend(fkinfo->rinfos[colno],
rinfo);
fkinfo->nmatched_ri++;
}
}
}
/* If we found any matching loose quals, count col as matched */
if (fkinfo->rinfos[colno])
fkinfo->nmatched_rcols++;
}
/*
* Currently, we drop multicolumn FKs that aren't fully matched to the
* query. Later we might figure out how to derive some sort of
* estimate from them, in which case this test should be weakened to
* "if ((fkinfo->nmatched_ec + fkinfo->nmatched_rcols) > 0)".
*/
if ((fkinfo->nmatched_ec + fkinfo->nmatched_rcols) == fkinfo->nkeys)
newlist = lappend(newlist, fkinfo);
}
/* Replace fkey_list, thereby discarding any useless entries */
root->fkey_list = newlist;
}
/*****************************************************************************
*
* CHECKS FOR MERGEJOINABLE AND HASHJOINABLE CLAUSES