1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-08 11:42:09 +03:00

Revert "Use Foreign Key relationships to infer multi-column join selectivity".

This commit reverts 137805f89 as well as the associated commits 015e88942,
5306df283, and 68d704edb.  We found multiple bugs in this feature, and
there was concern about possible planner slowdown (though to be fair,
exhibiting a very large slowdown proved difficult).  The way forward
requires a considerable rewrite, which may or may not be possible to
accomplish in time for beta2.  In my judgment reviewing the rewrite will
be easier to accomplish starting from a clean slate, so let's temporarily
revert what's there now.  This also leaves us in a safe state if it turns
out to be necessary to postpone the rewrite to the next development cycle.

Discussion: <20160429102531.GA13701@huehner.biz>
This commit is contained in:
Tom Lane
2016-06-07 17:21:17 -04:00
parent 5c6d2a5e7c
commit 77ba610805
12 changed files with 25 additions and 591 deletions

View File

@ -126,7 +126,6 @@ bool enable_nestloop = true;
bool enable_material = true;
bool enable_mergejoin = true;
bool enable_hashjoin = true;
bool enable_fkey_estimates = true;
typedef struct
{
@ -3888,361 +3887,6 @@ get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel,
return nrows;
}
/*
* quals_match_foreign_key
* Determines if the foreign key is matched by joinquals.
*
* Checks that there are conditions on all columns of the foreign key, matching
* the operator used by the foreign key etc. If such complete match is found,
* the function returns bitmap identifying the matching quals (0-based).
*
* Otherwise (no match at all or incomplete match), NULL is returned.
*
* XXX It seems possible in the future to do something useful when a
* partial match occurs between join and FK, but that is less common
* and that part isn't worked out yet.
*/
static Bitmapset *
quals_match_foreign_key(PlannerInfo *root, ForeignKeyOptInfo *fkinfo,
RelOptInfo *fkrel, RelOptInfo *foreignrel,
List *joinquals)
{
int i;
int nkeys = fkinfo->nkeys;
Bitmapset *qualmatches = NULL;
Bitmapset *fkmatches = NULL;
/*
* Loop over each column of the foreign key and build a bitmapset
* of each joinqual which matches. Note that we don't stop when we find
* the first match, as the expression could be duplicated in the
* joinquals, and we want to generate a bitmapset which has bits set for
* every matching join qual.
*/
for (i = 0; i < nkeys; i++)
{
ListCell *lc;
int quallstidx = -1;
foreach(lc, joinquals)
{
RestrictInfo *rinfo;
OpExpr *clause;
Var *leftvar;
Var *rightvar;
quallstidx++;
/*
* Technically we don't need to, but here we skip this qual if
* we've matched it to part of the foreign key already. This
* should prove to be a useful optimization when the quals appear
* in the same order as the foreign key's keys. We need only bother
* doing this when the foreign key is made up of more than 1 set
* of columns, and we're not testing the first column.
*/
if (i > 0 && bms_is_member(quallstidx, qualmatches))
continue;
rinfo = (RestrictInfo *) lfirst(lc);
clause = (OpExpr *) rinfo->clause;
/* only OpExprs are useful for consideration */
if (!IsA(clause, OpExpr))
continue;
/*
* If the operator does not match then there's little point in
* checking the operands.
*/
if (clause->opno != fkinfo->conpfeqop[i])
continue;
leftvar = (Var *) get_leftop((Expr *) clause);
rightvar = (Var *) get_rightop((Expr *) clause);
/* Foreign keys only support Vars, so ignore anything more complex */
if (!IsA(leftvar, Var) || !IsA(rightvar, Var))
continue;
/*
* For RestrictInfos built from an eclass we must consider each
* member of the eclass as rinfo's operands may not belong to the
* foreign key. For efficient tracking of which Vars we've found,
* since we're only tracking 2 Vars, we use a bitmask. We can
* safely finish searching when both of the least significant bits
* are set.
*/
if (rinfo->parent_ec)
{
EquivalenceClass *ec = rinfo->parent_ec;
ListCell *lc2;
int foundvarmask = 0;
foreach(lc2, ec->ec_members)
{
EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);
Var *var = (Var *) em->em_expr;
if (!IsA(var, Var))
continue;
if (foreignrel->relid == var->varno &&
fkinfo->confkeys[i] == var->varattno)
foundvarmask |= 1;
else if (fkrel->relid == var->varno &&
fkinfo->conkeys[i] == var->varattno)
foundvarmask |= 2;
/*
* Check if we've found both matches. If found we add
* this qual to the matched list and mark this key as
* matched too.
*/
if (foundvarmask == 3)
{
qualmatches = bms_add_member(qualmatches, quallstidx);
fkmatches = bms_add_member(fkmatches, i);
break;
}
}
}
else
{
/*
* In this non eclass RestrictInfo case we'll check if the left
* and right Vars match to this part of the foreign key.
* Remember that this could be written with the Vars in either
* order, so we test both permutations of the expression.
*/
if ((foreignrel->relid == leftvar->varno) &&
(fkrel->relid == rightvar->varno) &&
(fkinfo->confkeys[i] == leftvar->varattno) &&
(fkinfo->conkeys[i] == rightvar->varattno))
{
qualmatches = bms_add_member(qualmatches, quallstidx);
fkmatches = bms_add_member(fkmatches, i);
}
else if ((foreignrel->relid == rightvar->varno) &&
(fkrel->relid == leftvar->varno) &&
(fkinfo->confkeys[i] == rightvar->varattno) &&
(fkinfo->conkeys[i] == leftvar->varattno))
{
qualmatches = bms_add_member(qualmatches, quallstidx);
fkmatches = bms_add_member(fkmatches, i);
}
}
}
}
/* can't find more matches than columns in the foreign key */
Assert(bms_num_members(fkmatches) <= nkeys);
/* Only return the matches if the foreign key is matched fully. */
if (bms_num_members(fkmatches) == nkeys)
{
bms_free(fkmatches);
return qualmatches;
}
bms_free(fkmatches);
bms_free(qualmatches);
return NULL;
}
/*
* find_best_foreign_key_quals
* Finds the foreign key best matching the joinquals.
*
* Analyzes joinquals to determine if any quals match foreign keys defined the
* two relations (fkrel referencing foreignrel). When multiple foreign keys
* match, we choose the one with the most keys as the best one because of the
* way estimation occurs in clauselist_join_selectivity(). We could choose
* the FK matching the most quals, however we assume the quals may be duplicated.
*
* We also track which joinquals match the current foreign key, so that we can
* easily skip then when computing the selectivity.
*
* When no matching foreign key is found we return 0, otherwise we return the
* number of keys in the foreign key.
*
* Foreign keys matched only partially are currently ignored.
*/
static int
find_best_foreign_key_quals(PlannerInfo *root, RelOptInfo *fkrel,
RelOptInfo *foreignrel, List *joinquals,
Bitmapset **joinqualsbitmap)
{
Bitmapset *qualbestmatch;
ListCell *lc;
int bestmatchnkeys;
/*
* fast path out when there's no foreign keys on fkrel, or when use of
* foreign keys for estimation is disabled by GUC
*/
if ((fkrel->fkeylist == NIL) || (!enable_fkey_estimates))
{
*joinqualsbitmap = NULL;
return 0;
}
qualbestmatch = NULL;
bestmatchnkeys = 0;
/* now check the matches for each foreign key defined on the fkrel */
foreach(lc, fkrel->fkeylist)
{
ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc);
Bitmapset *qualsmatched;
/*
* We make no attempt in checking that this foreign key actually
* references 'foreignrel', the reasoning here is that we may be able
* to match the foreign key to an eclass member Var of a RestrictInfo
* that's in qualslist, this Var may belong to some other relation.
*
* XXX Is this assumption safe in all cases? Maybe not, but does
* it lead to a worse estimate than the previous approach? Doubt it.
*/
qualsmatched = quals_match_foreign_key(root, fkinfo, fkrel, foreignrel,
joinquals);
/* Did we get a match? And is that match better than a previous one? */
if (qualsmatched != NULL && fkinfo->nkeys > bestmatchnkeys)
{
/* save the new best match */
bms_free(qualbestmatch);
qualbestmatch = qualsmatched;
bestmatchnkeys = fkinfo->nkeys;
}
}
*joinqualsbitmap = qualbestmatch;
return bestmatchnkeys;
}
/*
* clauselist_join_selectivity
* Estimate selectivity of join clauses either by using foreign key info
* or by using the regular clauselist_selectivity().
*
* Since selectivity estimates for each joinqual are multiplied together, this
* can cause significant underestimates on the number of join tuples in cases
* where there's more than 1 clause in the join condition. To help ease the
* pain here we make use of foreign keys, and we assume that 1 row will match
* when *all* of the foreign key columns are present in the join condition, any
* additional clauses are estimated using clauselist_selectivity().
*
* Note this ignores whether the FK is invalid or currently deferred; we don't
* rely on this assumption for correctness of the query, so it is a reasonable
* and safe assumption for planning purposes.
*/
static Selectivity
clauselist_join_selectivity(PlannerInfo *root, List *joinquals,
JoinType jointype, SpecialJoinInfo *sjinfo)
{
int outerid;
int innerid;
Selectivity sel = 1.0;
Bitmapset *foundfkquals = NULL;
innerid = -1;
while ((innerid = bms_next_member(sjinfo->min_righthand, innerid)) >= 0)
{
RelOptInfo *innerrel = find_base_rel(root, innerid);
outerid = -1;
while ((outerid = bms_next_member(sjinfo->min_lefthand, outerid)) >= 0)
{
RelOptInfo *outerrel = find_base_rel(root, outerid);
Bitmapset *outer2inner;
Bitmapset *inner2outer;
int innermatches;
int outermatches;
/*
* check which quals are matched by a foreign key referencing the
* innerrel.
*/
outermatches = find_best_foreign_key_quals(root, outerrel,
innerrel, joinquals, &outer2inner);
/* do the same, but with relations swapped */
innermatches = find_best_foreign_key_quals(root, innerrel,
outerrel, joinquals, &inner2outer);
/*
* did we find any matches at all? If so we need to see which one is
* the best/longest match
*/
if (outermatches != 0 || innermatches != 0)
{
double referenced_tuples;
bool overlap;
/* either could be zero, but not both. */
if (outermatches < innermatches)
{
overlap = bms_overlap(foundfkquals, inner2outer);
foundfkquals = bms_add_members(foundfkquals, inner2outer);
referenced_tuples = Max(outerrel->tuples, 1.0);
}
else
{
overlap = bms_overlap(foundfkquals, outer2inner);
foundfkquals = bms_add_members(foundfkquals, outer2inner);
referenced_tuples = Max(innerrel->tuples, 1.0);
}
/*
* XXX should we ignore these overlapping matches?
* Or perhaps take the Max() or Min()?
*/
if (overlap)
{
if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
sel = Min(sel,Min(1.0 / (outerrel->tuples / Max(innerrel->tuples, 1.0)), 1.0));
else
sel = Min(sel, 1.0 / referenced_tuples);
}
else
{
if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
sel *= Min(1.0 / (outerrel->tuples / Max(innerrel->tuples, 1.0)), 1.0);
else
sel *= 1.0 / referenced_tuples;
}
}
}
}
/*
* If any non matched quals exist then we build a list of the non-matches
* and use clauselist_selectivity() to estimate the selectivity of these.
*/
if (bms_num_members(foundfkquals) < list_length(joinquals))
{
ListCell *lc;
int lstidx = 0;
List *nonfkeyclauses = NIL;
foreach (lc, joinquals)
{
if (!bms_is_member(lstidx, foundfkquals))
nonfkeyclauses = lappend(nonfkeyclauses, lfirst(lc));
lstidx++;
}
sel *= clauselist_selectivity(root, nonfkeyclauses, 0, jointype, sjinfo);
}
return sel;
}
/*
* calc_joinrel_size_estimate
* Workhorse for set_joinrel_size_estimates and
@ -4289,11 +3933,11 @@ calc_joinrel_size_estimate(PlannerInfo *root,
}
/* Get the separate selectivities */
jselec = clauselist_join_selectivity(root,
joinquals,
jointype,
sjinfo);
jselec = clauselist_selectivity(root,
joinquals,
0,
jointype,
sjinfo);
pselec = clauselist_selectivity(root,
pushedquals,
0,
@ -4306,10 +3950,11 @@ calc_joinrel_size_estimate(PlannerInfo *root,
}
else
{
jselec = clauselist_join_selectivity(root,
restrictlist,
jointype,
sjinfo);
jselec = clauselist_selectivity(root,
restrictlist,
0,
jointype,
sjinfo);
pselec = 0.0; /* not used, keep compiler quiet */
}

View File

@ -28,7 +28,6 @@
#include "catalog/dependency.h"
#include "catalog/heap.h"
#include "catalog/pg_am.h"
#include "catalog/pg_constraint.h"
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
@ -42,7 +41,6 @@
#include "rewrite/rewriteManip.h"
#include "storage/bufmgr.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
#include "utils/rel.h"
#include "utils/snapmgr.h"
@ -96,9 +94,6 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
Relation relation;
bool hasindex;
List *indexinfos = NIL;
List *fkinfos = NIL;
List *fkoidlist;
ListCell *l;
/*
* We need not lock the relation since it was already locked, either by
@ -149,6 +144,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
if (hasindex)
{
List *indexoidlist;
ListCell *l;
LOCKMODE lmode;
indexoidlist = RelationGetIndexList(relation);
@ -395,85 +391,6 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
rel->indexlist = indexinfos;
/*
* Load foreign key data. Note this is the definitional data from the
* catalog only and does not lock the referenced tables here. The
* precise definition of the FK is important and may affect the usage
* elsewhere in the planner, e.g. if the constraint is deferred or
* if the constraint is not valid then relying upon this in the executor
* may not be accurate, though might be considered a useful estimate for
* planning purposes.
*/
fkoidlist = RelationGetFKeyList(relation);
foreach(l, fkoidlist)
{
Oid fkoid = lfirst_oid(l);
HeapTuple htup;
Form_pg_constraint constraint;
ForeignKeyOptInfo *info;
Datum adatum;
bool isnull;
ArrayType *arr;
int numkeys;
int i;
htup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(fkoid));
if (!HeapTupleIsValid(htup)) /* should not happen */
elog(ERROR, "cache lookup failed for constraint %u", fkoid);
constraint = (Form_pg_constraint) GETSTRUCT(htup);
Assert(constraint->contype == CONSTRAINT_FOREIGN);
info = makeNode(ForeignKeyOptInfo);
info->conrelid = constraint->conrelid;
info->confrelid = constraint->confrelid;
/* conkey */
adatum = SysCacheGetAttr(CONSTROID, htup,
Anum_pg_constraint_conkey, &isnull);
Assert(!isnull);
arr = DatumGetArrayTypeP(adatum);
numkeys = ARR_DIMS(arr)[0];
info->conkeys = (int*)palloc(numkeys * sizeof(int));
for (i = 0; i < numkeys; i++)
info->conkeys[i] = ((int16 *) ARR_DATA_PTR(arr))[i];
/* confkey */
adatum = SysCacheGetAttr(CONSTROID, htup,
Anum_pg_constraint_confkey, &isnull);
Assert(!isnull);
arr = DatumGetArrayTypeP(adatum);
Assert(numkeys == ARR_DIMS(arr)[0]);
info->confkeys = (int*)palloc(numkeys * sizeof(int));
for (i = 0; i < numkeys; i++)
info->confkeys[i] = ((int16 *) ARR_DATA_PTR(arr))[i];
/* conpfeqop */
adatum = SysCacheGetAttr(CONSTROID, htup,
Anum_pg_constraint_conpfeqop, &isnull);
Assert(!isnull);
arr = DatumGetArrayTypeP(adatum);
Assert(numkeys == ARR_DIMS(arr)[0]);
info->conpfeqop = (Oid*)palloc(numkeys * sizeof(Oid));
for (i = 0; i < numkeys; i++)
info->conpfeqop[i] = ((Oid *) ARR_DATA_PTR(arr))[i];
info->nkeys = numkeys;
ReleaseSysCache(htup);
fkinfos = lappend(fkinfos, info);
}
list_free(fkoidlist);
rel->fkeylist = fkinfos;
/* Grab foreign-table info using the relcache, while we have it */
if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
{