mirror of
https://github.com/postgres/postgres.git
synced 2025-11-19 13:42:17 +03:00
Traditionally we used the same Var struct to represent the value of a table column everywhere in parse and plan trees. This choice predates our support for SQL outer joins, and it's really a pretty bad idea with outer joins, because the Var's value can depend on where it is in the tree: it might go to NULL above an outer join. So expression nodes that are equal() per equalfuncs.c might not represent the same value, which is a huge correctness hazard for the planner. To improve this, decorate Var nodes with a bitmapset showing which outer joins (identified by RTE indexes) may have nulled them at the point in the parse tree where the Var appears. This allows us to trust that equal() Vars represent the same value. A certain amount of klugery is still needed to cope with cases where we re-order two outer joins, but it's possible to make it work without sacrificing that core principle. PlaceHolderVars receive similar decoration for the same reason. In the planner, we include these outer join bitmapsets into the relids that an expression is considered to depend on, and in consequence also add outer-join relids to the relids of join RelOptInfos. This allows us to correctly perceive whether an expression can be calculated above or below a particular outer join. This change affects FDWs that want to plan foreign joins. They *must* follow suit when labeling foreign joins in order to match with the core planner, but for many purposes (if postgres_fdw is any guide) they'd prefer to consider only base relations within the join. To support both requirements, redefine ForeignScan.fs_relids as base+OJ relids, and add a new field fs_base_relids that's set up by the core planner. Large though it is, this commit just does the minimum necessary to install the new mechanisms and get check-world passing again. Follow-up patches will perform some cleanup. (The README additions and comments mention some stuff that will appear in the follow-up.) Patch by me; thanks to Richard Guo for review. Discussion: https://postgr.es/m/830269.1656693747@sss.pgh.pa.us
148 lines
3.9 KiB
C
148 lines
3.9 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* joininfo.c
|
|
* joininfo list manipulation routines
|
|
*
|
|
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/optimizer/util/joininfo.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "optimizer/joininfo.h"
|
|
#include "optimizer/pathnode.h"
|
|
#include "optimizer/paths.h"
|
|
|
|
|
|
/*
|
|
* have_relevant_joinclause
|
|
* Detect whether there is a joinclause that involves
|
|
* the two given relations.
|
|
*
|
|
* Note: the joinclause does not have to be evaluable with only these two
|
|
* relations. This is intentional. For example consider
|
|
* SELECT * FROM a, b, c WHERE a.x = (b.y + c.z)
|
|
* If a is much larger than the other tables, it may be worthwhile to
|
|
* cross-join b and c and then use an inner indexscan on a.x. Therefore
|
|
* we should consider this joinclause as reason to join b to c, even though
|
|
* it can't be applied at that join step.
|
|
*/
|
|
bool
|
|
have_relevant_joinclause(PlannerInfo *root,
|
|
RelOptInfo *rel1, RelOptInfo *rel2)
|
|
{
|
|
bool result = false;
|
|
List *joininfo;
|
|
Relids other_relids;
|
|
ListCell *l;
|
|
|
|
/*
|
|
* We could scan either relation's joininfo list; may as well use the
|
|
* shorter one.
|
|
*/
|
|
if (list_length(rel1->joininfo) <= list_length(rel2->joininfo))
|
|
{
|
|
joininfo = rel1->joininfo;
|
|
other_relids = rel2->relids;
|
|
}
|
|
else
|
|
{
|
|
joininfo = rel2->joininfo;
|
|
other_relids = rel1->relids;
|
|
}
|
|
|
|
foreach(l, joininfo)
|
|
{
|
|
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
|
|
|
|
if (bms_overlap(other_relids, rinfo->required_relids))
|
|
{
|
|
result = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* We also need to check the EquivalenceClass data structure, which might
|
|
* contain relationships not emitted into the joininfo lists.
|
|
*/
|
|
if (!result && rel1->has_eclass_joins && rel2->has_eclass_joins)
|
|
result = have_relevant_eclass_joinclause(root, rel1, rel2);
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
/*
|
|
* add_join_clause_to_rels
|
|
* Add 'restrictinfo' to the joininfo list of each relation it requires.
|
|
*
|
|
* Note that the same copy of the restrictinfo node is linked to by all the
|
|
* lists it is in. This allows us to exploit caching of information about
|
|
* the restriction clause (but we must be careful that the information does
|
|
* not depend on context).
|
|
*
|
|
* 'restrictinfo' describes the join clause
|
|
* 'join_relids' is the set of relations participating in the join clause
|
|
* (some of these could be outer joins)
|
|
*/
|
|
void
|
|
add_join_clause_to_rels(PlannerInfo *root,
|
|
RestrictInfo *restrictinfo,
|
|
Relids join_relids)
|
|
{
|
|
int cur_relid;
|
|
|
|
cur_relid = -1;
|
|
while ((cur_relid = bms_next_member(join_relids, cur_relid)) >= 0)
|
|
{
|
|
RelOptInfo *rel = find_base_rel_ignore_join(root, cur_relid);
|
|
|
|
/* We only need to add the clause to baserels */
|
|
if (rel == NULL)
|
|
continue;
|
|
rel->joininfo = lappend(rel->joininfo, restrictinfo);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* remove_join_clause_from_rels
|
|
* Delete 'restrictinfo' from all the joininfo lists it is in
|
|
*
|
|
* This reverses the effect of add_join_clause_to_rels. It's used when we
|
|
* discover that a relation need not be joined at all.
|
|
*
|
|
* 'restrictinfo' describes the join clause
|
|
* 'join_relids' is the set of relations participating in the join clause
|
|
* (some of these could be outer joins)
|
|
*/
|
|
void
|
|
remove_join_clause_from_rels(PlannerInfo *root,
|
|
RestrictInfo *restrictinfo,
|
|
Relids join_relids)
|
|
{
|
|
int cur_relid;
|
|
|
|
cur_relid = -1;
|
|
while ((cur_relid = bms_next_member(join_relids, cur_relid)) >= 0)
|
|
{
|
|
RelOptInfo *rel = find_base_rel_ignore_join(root, cur_relid);
|
|
|
|
/* We would only have added the clause to baserels */
|
|
if (rel == NULL)
|
|
continue;
|
|
|
|
/*
|
|
* Remove the restrictinfo from the list. Pointer comparison is
|
|
* sufficient.
|
|
*/
|
|
Assert(list_member_ptr(rel->joininfo, restrictinfo));
|
|
rel->joininfo = list_delete_ptr(rel->joininfo, restrictinfo);
|
|
}
|
|
}
|