mirror of
https://github.com/postgres/postgres.git
synced 2025-07-23 03:21:12 +03:00
Refactor join_is_removable() to separate out distinctness-proving logic.
Extracted from pending unique-join patch, since this is a rather large delta but it's simply moving code out into separately-accessible subroutines. I (tgl) did choose to add a bit more logic to rel_supports_distinctness, so that it verifies that there's at least one potentially usable unique index rather than just checking indexlist != NIL. Otherwise there's no functional change here. David Rowley
This commit is contained in:
@ -37,6 +37,9 @@ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
|
|||||||
static void remove_rel_from_query(PlannerInfo *root, int relid,
|
static void remove_rel_from_query(PlannerInfo *root, int relid,
|
||||||
Relids joinrelids);
|
Relids joinrelids);
|
||||||
static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
|
static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
|
||||||
|
static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
|
||||||
|
static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
|
||||||
|
List *clause_list);
|
||||||
static Oid distinct_col_search(int colno, List *colnos, List *opids);
|
static Oid distinct_col_search(int colno, List *colnos, List *opids);
|
||||||
|
|
||||||
|
|
||||||
@ -152,7 +155,6 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
|
|||||||
{
|
{
|
||||||
int innerrelid;
|
int innerrelid;
|
||||||
RelOptInfo *innerrel;
|
RelOptInfo *innerrel;
|
||||||
Query *subquery = NULL;
|
|
||||||
Relids joinrelids;
|
Relids joinrelids;
|
||||||
List *clause_list = NIL;
|
List *clause_list = NIL;
|
||||||
ListCell *l;
|
ListCell *l;
|
||||||
@ -171,38 +173,13 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
|
|||||||
|
|
||||||
innerrel = find_base_rel(root, innerrelid);
|
innerrel = find_base_rel(root, innerrelid);
|
||||||
|
|
||||||
if (innerrel->reloptkind != RELOPT_BASEREL)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Before we go to the effort of checking whether any innerrel variables
|
* Before we go to the effort of checking whether any innerrel variables
|
||||||
* are needed above the join, make a quick check to eliminate cases in
|
* are needed above the join, make a quick check to eliminate cases in
|
||||||
* which we will surely be unable to prove uniqueness of the innerrel.
|
* which we will surely be unable to prove uniqueness of the innerrel.
|
||||||
*/
|
*/
|
||||||
if (innerrel->rtekind == RTE_RELATION)
|
if (!rel_supports_distinctness(root, innerrel))
|
||||||
{
|
return false;
|
||||||
/*
|
|
||||||
* For a plain-relation innerrel, we only know how to prove uniqueness
|
|
||||||
* by reference to unique indexes. If there are no indexes then
|
|
||||||
* there's certainly no unique indexes so there's no point in going
|
|
||||||
* further.
|
|
||||||
*/
|
|
||||||
if (innerrel->indexlist == NIL)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
else if (innerrel->rtekind == RTE_SUBQUERY)
|
|
||||||
{
|
|
||||||
subquery = root->simple_rte_array[innerrelid]->subquery;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If the subquery has no qualities that support distinctness proofs
|
|
||||||
* then there's no point in going further.
|
|
||||||
*/
|
|
||||||
if (!query_supports_distinctness(subquery))
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
return false; /* unsupported rtekind */
|
|
||||||
|
|
||||||
/* Compute the relid set for the join we are considering */
|
/* Compute the relid set for the join we are considering */
|
||||||
joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
|
joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
|
||||||
@ -291,7 +268,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
|
|||||||
continue; /* not mergejoinable */
|
continue; /* not mergejoinable */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if clause has the form "outer op inner" or "inner op outer".
|
* Check if clause has the form "outer op inner" or "inner op outer",
|
||||||
|
* and if so mark which side is inner.
|
||||||
*/
|
*/
|
||||||
if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
|
if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
|
||||||
innerrel->relids))
|
innerrel->relids))
|
||||||
@ -302,65 +280,11 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* relation_has_unique_index_for automatically adds any usable restriction
|
* Now that we have the relevant equality join clauses, try to prove the
|
||||||
* clauses for the innerrel, so we needn't do that here. (XXX we are not
|
* innerrel distinct.
|
||||||
* considering restriction clauses for subqueries; is that worth doing?)
|
|
||||||
*/
|
*/
|
||||||
|
if (rel_is_distinct_for(root, innerrel, clause_list))
|
||||||
if (innerrel->rtekind == RTE_RELATION)
|
return true;
|
||||||
{
|
|
||||||
/* Now examine the indexes to see if we have a matching unique index */
|
|
||||||
if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else /* innerrel->rtekind == RTE_SUBQUERY */
|
|
||||||
{
|
|
||||||
List *colnos = NIL;
|
|
||||||
List *opids = NIL;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Build the argument lists for query_is_distinct_for: a list of
|
|
||||||
* output column numbers that the query needs to be distinct over, and
|
|
||||||
* a list of equality operators that the output columns need to be
|
|
||||||
* distinct according to.
|
|
||||||
*/
|
|
||||||
foreach(l, clause_list)
|
|
||||||
{
|
|
||||||
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
|
|
||||||
Oid op;
|
|
||||||
Var *var;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Get the equality operator we need uniqueness according to.
|
|
||||||
* (This might be a cross-type operator and thus not exactly the
|
|
||||||
* same operator the subquery would consider; that's all right
|
|
||||||
* since query_is_distinct_for can resolve such cases.) The
|
|
||||||
* mergejoinability test above should have selected only OpExprs.
|
|
||||||
*/
|
|
||||||
Assert(IsA(rinfo->clause, OpExpr));
|
|
||||||
op = ((OpExpr *) rinfo->clause)->opno;
|
|
||||||
|
|
||||||
/* clause_sides_match_join identified the inner side for us */
|
|
||||||
if (rinfo->outer_is_left)
|
|
||||||
var = (Var *) get_rightop(rinfo->clause);
|
|
||||||
else
|
|
||||||
var = (Var *) get_leftop(rinfo->clause);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If inner side isn't a Var referencing a subquery output column,
|
|
||||||
* this clause doesn't help us.
|
|
||||||
*/
|
|
||||||
if (!var || !IsA(var, Var) ||
|
|
||||||
var->varno != innerrelid || var->varlevelsup != 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
colnos = lappend_int(colnos, var->varattno);
|
|
||||||
opids = lappend_oid(opids, op);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (query_is_distinct_for(subquery, colnos, opids))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Some day it would be nice to check for other methods of establishing
|
* Some day it would be nice to check for other methods of establishing
|
||||||
@ -561,6 +485,152 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* rel_supports_distinctness
|
||||||
|
* Could the relation possibly be proven distinct on some set of columns?
|
||||||
|
*
|
||||||
|
* This is effectively a pre-checking function for rel_is_distinct_for().
|
||||||
|
* It must return TRUE if rel_is_distinct_for() could possibly return TRUE
|
||||||
|
* with this rel, but it should not expend a lot of cycles. The idea is
|
||||||
|
* that callers can avoid doing possibly-expensive processing to compute
|
||||||
|
* rel_is_distinct_for()'s argument lists if the call could not possibly
|
||||||
|
* succeed.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
|
||||||
|
{
|
||||||
|
/* We only know about baserels ... */
|
||||||
|
if (rel->reloptkind != RELOPT_BASEREL)
|
||||||
|
return false;
|
||||||
|
if (rel->rtekind == RTE_RELATION)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* For a plain relation, we only know how to prove uniqueness by
|
||||||
|
* reference to unique indexes. Make sure there's at least one
|
||||||
|
* suitable unique index. It must be immediately enforced, and if
|
||||||
|
* it's a partial index, it must match the query. (Keep these
|
||||||
|
* conditions in sync with relation_has_unique_index_for!)
|
||||||
|
*/
|
||||||
|
ListCell *lc;
|
||||||
|
|
||||||
|
foreach(lc, rel->indexlist)
|
||||||
|
{
|
||||||
|
IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
|
||||||
|
|
||||||
|
if (ind->unique && ind->immediate &&
|
||||||
|
(ind->indpred == NIL || ind->predOK))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (rel->rtekind == RTE_SUBQUERY)
|
||||||
|
{
|
||||||
|
Query *subquery = root->simple_rte_array[rel->relid]->subquery;
|
||||||
|
|
||||||
|
/* Check if the subquery has any qualities that support distinctness */
|
||||||
|
if (query_supports_distinctness(subquery))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
/* We have no proof rules for any other rtekinds. */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* rel_is_distinct_for
|
||||||
|
* Does the relation return only distinct rows according to clause_list?
|
||||||
|
*
|
||||||
|
* clause_list is a list of join restriction clauses involving this rel and
|
||||||
|
* some other one. Return true if no two rows emitted by this rel could
|
||||||
|
* possibly join to the same row of the other rel.
|
||||||
|
*
|
||||||
|
* The caller must have already determined that each condition is a
|
||||||
|
* mergejoinable equality with an expression in this relation on one side, and
|
||||||
|
* an expression not involving this relation on the other. The transient
|
||||||
|
* outer_is_left flag is used to identify which side references this relation:
|
||||||
|
* left side if outer_is_left is false, right side if it is true.
|
||||||
|
*
|
||||||
|
* Note that the passed-in clause_list may be destructively modified! This
|
||||||
|
* is OK for current uses, because the clause_list is built by the caller for
|
||||||
|
* the sole purpose of passing to this function.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We could skip a couple of tests here if we assume all callers checked
|
||||||
|
* rel_supports_distinctness first, but it doesn't seem worth taking any
|
||||||
|
* risk for.
|
||||||
|
*/
|
||||||
|
if (rel->reloptkind != RELOPT_BASEREL)
|
||||||
|
return false;
|
||||||
|
if (rel->rtekind == RTE_RELATION)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Examine the indexes to see if we have a matching unique index.
|
||||||
|
* relation_has_unique_index_for automatically adds any usable
|
||||||
|
* restriction clauses for the rel, so we needn't do that here.
|
||||||
|
*/
|
||||||
|
if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if (rel->rtekind == RTE_SUBQUERY)
|
||||||
|
{
|
||||||
|
Index relid = rel->relid;
|
||||||
|
Query *subquery = root->simple_rte_array[relid]->subquery;
|
||||||
|
List *colnos = NIL;
|
||||||
|
List *opids = NIL;
|
||||||
|
ListCell *l;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Build the argument lists for query_is_distinct_for: a list of
|
||||||
|
* output column numbers that the query needs to be distinct over, and
|
||||||
|
* a list of equality operators that the output columns need to be
|
||||||
|
* distinct according to.
|
||||||
|
*
|
||||||
|
* (XXX we are not considering restriction clauses attached to the
|
||||||
|
* subquery; is that worth doing?)
|
||||||
|
*/
|
||||||
|
foreach(l, clause_list)
|
||||||
|
{
|
||||||
|
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
|
||||||
|
Oid op;
|
||||||
|
Var *var;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the equality operator we need uniqueness according to.
|
||||||
|
* (This might be a cross-type operator and thus not exactly the
|
||||||
|
* same operator the subquery would consider; that's all right
|
||||||
|
* since query_is_distinct_for can resolve such cases.) The
|
||||||
|
* caller's mergejoinability test should have selected only
|
||||||
|
* OpExprs.
|
||||||
|
*/
|
||||||
|
Assert(IsA(rinfo->clause, OpExpr));
|
||||||
|
op = ((OpExpr *) rinfo->clause)->opno;
|
||||||
|
|
||||||
|
/* caller identified the inner side for us */
|
||||||
|
if (rinfo->outer_is_left)
|
||||||
|
var = (Var *) get_rightop(rinfo->clause);
|
||||||
|
else
|
||||||
|
var = (Var *) get_leftop(rinfo->clause);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If inner side isn't a Var referencing a subquery output column,
|
||||||
|
* this clause doesn't help us.
|
||||||
|
*/
|
||||||
|
if (!var || !IsA(var, Var) ||
|
||||||
|
var->varno != relid || var->varlevelsup != 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
colnos = lappend_int(colnos, var->varattno);
|
||||||
|
opids = lappend_oid(opids, op);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (query_is_distinct_for(subquery, colnos, opids))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* query_supports_distinctness - could the query possibly be proven distinct
|
* query_supports_distinctness - could the query possibly be proven distinct
|
||||||
* on some set of output columns?
|
* on some set of output columns?
|
||||||
|
Reference in New Issue
Block a user