1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-19 13:42:17 +03:00

Pathify RHS unique-ification for semijoin planning

There are two implementation techniques for semijoins: one uses the
JOIN_SEMI jointype, where the executor emits at most one matching row
per left-hand side (LHS) row; the other unique-ifies the right-hand
side (RHS) and then performs a plain inner join.

The latter technique currently has some drawbacks related to the
unique-ification step.

* Only the cheapest-total path of the RHS is considered during
unique-ification.  This may cause us to miss some optimization
opportunities; for example, a path with a better sort order might be
overlooked simply because it is not the cheapest in total cost.  Such
a path could help avoid a sort at a higher level, potentially
resulting in a cheaper overall plan.

* We currently rely on heuristics to choose between hash-based and
sort-based unique-ification.  A better approach would be to generate
paths for both methods and allow add_path() to decide which one is
preferable, consistent with how path selection is handled elsewhere in
the planner.

* In the sort-based implementation, we currently pay no attention to
the pathkeys of the input subpath or the resulting output.  This can
result in redundant sort nodes being added to the final plan.

This patch improves semijoin planning by creating a new RelOptInfo for
the RHS rel to represent its unique-ified version.  It then generates
multiple paths that represent elimination of distinct rows from the
RHS, considering both a hash-based implementation using the cheapest
total path of the original RHS rel, and sort-based implementations
that either exploit presorted input paths or explicitly sort the
cheapest total path.  All resulting paths compete in add_path(), and
those deemed worthy of consideration are added to the new RelOptInfo.
Finally, the unique-ified rel is joined with the other side of the
semijoin using a plain inner join.

As a side effect, most of the code related to the JOIN_UNIQUE_OUTER
and JOIN_UNIQUE_INNER jointypes -- used to indicate that the LHS or
RHS path should be made unique -- has been removed.  Besides, the
T_Unique path now has the same meaning for both semijoins and upper
DISTINCT clauses: it represents adjacent-duplicate removal on
presorted input.  This patch unifies their handling by sharing the
same data structures and functions.

This patch also removes the UNIQUE_PATH_NOOP related code along the
way, as it is dead code -- if the RHS rel is provably unique, the
semijoin should have already been simplified to a plain inner join by
analyzejoins.c.

Author: Richard Guo <guofenglinux@gmail.com>
Reviewed-by: Alexandra Wang <alexandra.wang.oss@gmail.com>
Reviewed-by: wenhui qiu <qiuwenhuifx@gmail.com>
Discussion: https://postgr.es/m/CAMbWs4-EBnaRvEs7frTLbsXiweSTUXifsteF-d3rvv01FKO86w@mail.gmail.com
This commit is contained in:
Richard Guo
2025-08-19 09:35:40 +09:00
parent 3c07944d04
commit 24225ad9aa
18 changed files with 1074 additions and 971 deletions

View File

@@ -46,7 +46,6 @@ typedef enum
*/
#define STD_FUZZ_FACTOR 1.01
static List *translate_sub_tlist(List *tlist, int relid);
static int append_total_cost_compare(const ListCell *a, const ListCell *b);
static int append_startup_cost_compare(const ListCell *a, const ListCell *b);
static List *reparameterize_pathlist_by_child(PlannerInfo *root,
@@ -381,7 +380,6 @@ set_cheapest(RelOptInfo *parent_rel)
parent_rel->cheapest_startup_path = cheapest_startup_path;
parent_rel->cheapest_total_path = cheapest_total_path;
parent_rel->cheapest_unique_path = NULL; /* computed only if needed */
parent_rel->cheapest_parameterized_paths = parameterized_paths;
}
@@ -1740,246 +1738,6 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
return pathnode;
}
/*
* create_unique_path
* Creates a path representing elimination of distinct rows from the
* input data. Distinct-ness is defined according to the needs of the
* semijoin represented by sjinfo. If it is not possible to identify
* how to make the data unique, NULL is returned.
*
* If used at all, this is likely to be called repeatedly on the same rel;
* and the input subpath should always be the same (the cheapest_total path
* for the rel). So we cache the result.
*/
UniquePath *
create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
SpecialJoinInfo *sjinfo)
{
UniquePath *pathnode;
Path sort_path; /* dummy for result of cost_sort */
Path agg_path; /* dummy for result of cost_agg */
MemoryContext oldcontext;
int numCols;
/* Caller made a mistake if subpath isn't cheapest_total ... */
Assert(subpath == rel->cheapest_total_path);
Assert(subpath->parent == rel);
/* ... or if SpecialJoinInfo is the wrong one */
Assert(sjinfo->jointype == JOIN_SEMI);
Assert(bms_equal(rel->relids, sjinfo->syn_righthand));
/* If result already cached, return it */
if (rel->cheapest_unique_path)
return (UniquePath *) rel->cheapest_unique_path;
/* If it's not possible to unique-ify, return NULL */
if (!(sjinfo->semi_can_btree || sjinfo->semi_can_hash))
return NULL;
/*
* When called during GEQO join planning, we are in a short-lived memory
* context. We must make sure that the path and any subsidiary data
* structures created for a baserel survive the GEQO cycle, else the
* baserel is trashed for future GEQO cycles. On the other hand, when we
* are creating those for a joinrel during GEQO, we don't want them to
* clutter the main planning context. Upshot is that the best solution is
* to explicitly allocate memory in the same context the given RelOptInfo
* is in.
*/
oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
pathnode = makeNode(UniquePath);
pathnode->path.pathtype = T_Unique;
pathnode->path.parent = rel;
pathnode->path.pathtarget = rel->reltarget;
pathnode->path.param_info = subpath->param_info;
pathnode->path.parallel_aware = false;
pathnode->path.parallel_safe = rel->consider_parallel &&
subpath->parallel_safe;
pathnode->path.parallel_workers = subpath->parallel_workers;
/*
* Assume the output is unsorted, since we don't necessarily have pathkeys
* to represent it. (This might get overridden below.)
*/
pathnode->path.pathkeys = NIL;
pathnode->subpath = subpath;
/*
* Under GEQO and when planning child joins, the sjinfo might be
* short-lived, so we'd better make copies of data structures we extract
* from it.
*/
pathnode->in_operators = copyObject(sjinfo->semi_operators);
pathnode->uniq_exprs = copyObject(sjinfo->semi_rhs_exprs);
/*
* If the input is a relation and it has a unique index that proves the
* semi_rhs_exprs are unique, then we don't need to do anything. Note
* that relation_has_unique_index_for automatically considers restriction
* clauses for the rel, as well.
*/
if (rel->rtekind == RTE_RELATION && sjinfo->semi_can_btree &&
relation_has_unique_index_for(root, rel, NIL,
sjinfo->semi_rhs_exprs,
sjinfo->semi_operators))
{
pathnode->umethod = UNIQUE_PATH_NOOP;
pathnode->path.rows = rel->rows;
pathnode->path.disabled_nodes = subpath->disabled_nodes;
pathnode->path.startup_cost = subpath->startup_cost;
pathnode->path.total_cost = subpath->total_cost;
pathnode->path.pathkeys = subpath->pathkeys;
rel->cheapest_unique_path = (Path *) pathnode;
MemoryContextSwitchTo(oldcontext);
return pathnode;
}
/*
* If the input is a subquery whose output must be unique already, then we
* don't need to do anything. The test for uniqueness has to consider
* exactly which columns we are extracting; for example "SELECT DISTINCT
* x,y" doesn't guarantee that x alone is distinct. So we cannot check for
* this optimization unless semi_rhs_exprs consists only of simple Vars
* referencing subquery outputs. (Possibly we could do something with
* expressions in the subquery outputs, too, but for now keep it simple.)
*/
if (rel->rtekind == RTE_SUBQUERY)
{
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
if (query_supports_distinctness(rte->subquery))
{
List *sub_tlist_colnos;
sub_tlist_colnos = translate_sub_tlist(sjinfo->semi_rhs_exprs,
rel->relid);
if (sub_tlist_colnos &&
query_is_distinct_for(rte->subquery,
sub_tlist_colnos,
sjinfo->semi_operators))
{
pathnode->umethod = UNIQUE_PATH_NOOP;
pathnode->path.rows = rel->rows;
pathnode->path.disabled_nodes = subpath->disabled_nodes;
pathnode->path.startup_cost = subpath->startup_cost;
pathnode->path.total_cost = subpath->total_cost;
pathnode->path.pathkeys = subpath->pathkeys;
rel->cheapest_unique_path = (Path *) pathnode;
MemoryContextSwitchTo(oldcontext);
return pathnode;
}
}
}
/* Estimate number of output rows */
pathnode->path.rows = estimate_num_groups(root,
sjinfo->semi_rhs_exprs,
rel->rows,
NULL,
NULL);
numCols = list_length(sjinfo->semi_rhs_exprs);
if (sjinfo->semi_can_btree)
{
/*
* Estimate cost for sort+unique implementation
*/
cost_sort(&sort_path, root, NIL,
subpath->disabled_nodes,
subpath->total_cost,
rel->rows,
subpath->pathtarget->width,
0.0,
work_mem,
-1.0);
/*
* Charge one cpu_operator_cost per comparison per input tuple. We
* assume all columns get compared at most of the tuples. (XXX
* probably this is an overestimate.) This should agree with
* create_upper_unique_path.
*/
sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
}
if (sjinfo->semi_can_hash)
{
/*
* Estimate the overhead per hashtable entry at 64 bytes (same as in
* planner.c).
*/
int hashentrysize = subpath->pathtarget->width + 64;
if (hashentrysize * pathnode->path.rows > get_hash_memory_limit())
{
/*
* We should not try to hash. Hack the SpecialJoinInfo to
* remember this, in case we come through here again.
*/
sjinfo->semi_can_hash = false;
}
else
cost_agg(&agg_path, root,
AGG_HASHED, NULL,
numCols, pathnode->path.rows,
NIL,
subpath->disabled_nodes,
subpath->startup_cost,
subpath->total_cost,
rel->rows,
subpath->pathtarget->width);
}
if (sjinfo->semi_can_btree && sjinfo->semi_can_hash)
{
if (agg_path.disabled_nodes < sort_path.disabled_nodes ||
(agg_path.disabled_nodes == sort_path.disabled_nodes &&
agg_path.total_cost < sort_path.total_cost))
pathnode->umethod = UNIQUE_PATH_HASH;
else
pathnode->umethod = UNIQUE_PATH_SORT;
}
else if (sjinfo->semi_can_btree)
pathnode->umethod = UNIQUE_PATH_SORT;
else if (sjinfo->semi_can_hash)
pathnode->umethod = UNIQUE_PATH_HASH;
else
{
/* we can get here only if we abandoned hashing above */
MemoryContextSwitchTo(oldcontext);
return NULL;
}
if (pathnode->umethod == UNIQUE_PATH_HASH)
{
pathnode->path.disabled_nodes = agg_path.disabled_nodes;
pathnode->path.startup_cost = agg_path.startup_cost;
pathnode->path.total_cost = agg_path.total_cost;
}
else
{
pathnode->path.disabled_nodes = sort_path.disabled_nodes;
pathnode->path.startup_cost = sort_path.startup_cost;
pathnode->path.total_cost = sort_path.total_cost;
}
rel->cheapest_unique_path = (Path *) pathnode;
MemoryContextSwitchTo(oldcontext);
return pathnode;
}
/*
* create_gather_merge_path
*
@@ -2031,36 +1789,6 @@ create_gather_merge_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
return pathnode;
}
/*
* translate_sub_tlist - get subquery column numbers represented by tlist
*
* The given targetlist usually contains only Vars referencing the given relid.
* Extract their varattnos (ie, the column numbers of the subquery) and return
* as an integer List.
*
* If any of the tlist items is not a simple Var, we cannot determine whether
* the subquery's uniqueness condition (if any) matches ours, so punt and
* return NIL.
*/
static List *
translate_sub_tlist(List *tlist, int relid)
{
List *result = NIL;
ListCell *l;
foreach(l, tlist)
{
Var *var = (Var *) lfirst(l);
if (!var || !IsA(var, Var) ||
var->varno != relid)
return NIL; /* punt */
result = lappend_int(result, var->varattno);
}
return result;
}
/*
* create_gather_path
* Creates a path corresponding to a gather scan, returning the
@@ -2818,8 +2546,7 @@ create_projection_path(PlannerInfo *root,
pathnode->path.pathtype = T_Result;
pathnode->path.parent = rel;
pathnode->path.pathtarget = target;
/* For now, assume we are above any joins, so no parameterization */
pathnode->path.param_info = NULL;
pathnode->path.param_info = subpath->param_info;
pathnode->path.parallel_aware = false;
pathnode->path.parallel_safe = rel->consider_parallel &&
subpath->parallel_safe &&
@@ -3074,8 +2801,7 @@ create_incremental_sort_path(PlannerInfo *root,
pathnode->path.parent = rel;
/* Sort doesn't project, so use source path's pathtarget */
pathnode->path.pathtarget = subpath->pathtarget;
/* For now, assume we are above any joins, so no parameterization */
pathnode->path.param_info = NULL;
pathnode->path.param_info = subpath->param_info;
pathnode->path.parallel_aware = false;
pathnode->path.parallel_safe = rel->consider_parallel &&
subpath->parallel_safe;
@@ -3122,8 +2848,7 @@ create_sort_path(PlannerInfo *root,
pathnode->path.parent = rel;
/* Sort doesn't project, so use source path's pathtarget */
pathnode->path.pathtarget = subpath->pathtarget;
/* For now, assume we are above any joins, so no parameterization */
pathnode->path.param_info = NULL;
pathnode->path.param_info = subpath->param_info;
pathnode->path.parallel_aware = false;
pathnode->path.parallel_safe = rel->consider_parallel &&
subpath->parallel_safe;
@@ -3199,13 +2924,10 @@ create_group_path(PlannerInfo *root,
}
/*
* create_upper_unique_path
* create_unique_path
* Creates a pathnode that represents performing an explicit Unique step
* on presorted input.
*
* This produces a Unique plan node, but the use-case is so different from
* create_unique_path that it doesn't seem worth trying to merge the two.
*
* 'rel' is the parent relation associated with the result
* 'subpath' is the path representing the source of data
* 'numCols' is the number of grouping columns
@@ -3214,21 +2936,20 @@ create_group_path(PlannerInfo *root,
* The input path must be sorted on the grouping columns, plus possibly
* additional columns; so the first numCols pathkeys are the grouping columns
*/
UpperUniquePath *
create_upper_unique_path(PlannerInfo *root,
RelOptInfo *rel,
Path *subpath,
int numCols,
double numGroups)
UniquePath *
create_unique_path(PlannerInfo *root,
RelOptInfo *rel,
Path *subpath,
int numCols,
double numGroups)
{
UpperUniquePath *pathnode = makeNode(UpperUniquePath);
UniquePath *pathnode = makeNode(UniquePath);
pathnode->path.pathtype = T_Unique;
pathnode->path.parent = rel;
/* Unique doesn't project, so use source path's pathtarget */
pathnode->path.pathtarget = subpath->pathtarget;
/* For now, assume we are above any joins, so no parameterization */
pathnode->path.param_info = NULL;
pathnode->path.param_info = subpath->param_info;
pathnode->path.parallel_aware = false;
pathnode->path.parallel_safe = rel->consider_parallel &&
subpath->parallel_safe;
@@ -3284,8 +3005,7 @@ create_agg_path(PlannerInfo *root,
pathnode->path.pathtype = T_Agg;
pathnode->path.parent = rel;
pathnode->path.pathtarget = target;
/* For now, assume we are above any joins, so no parameterization */
pathnode->path.param_info = NULL;
pathnode->path.param_info = subpath->param_info;
pathnode->path.parallel_aware = false;
pathnode->path.parallel_safe = rel->consider_parallel &&
subpath->parallel_safe;

View File

@@ -217,7 +217,6 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
rel->partial_pathlist = NIL;
rel->cheapest_startup_path = NULL;
rel->cheapest_total_path = NULL;
rel->cheapest_unique_path = NULL;
rel->cheapest_parameterized_paths = NIL;
rel->relid = relid;
rel->rtekind = rte->rtekind;
@@ -269,6 +268,9 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
rel->fdw_private = NULL;
rel->unique_for_rels = NIL;
rel->non_unique_for_rels = NIL;
rel->unique_rel = NULL;
rel->unique_pathkeys = NIL;
rel->unique_groupclause = NIL;
rel->baserestrictinfo = NIL;
rel->baserestrictcost.startup = 0;
rel->baserestrictcost.per_tuple = 0;
@@ -713,7 +715,6 @@ build_join_rel(PlannerInfo *root,
joinrel->partial_pathlist = NIL;
joinrel->cheapest_startup_path = NULL;
joinrel->cheapest_total_path = NULL;
joinrel->cheapest_unique_path = NULL;
joinrel->cheapest_parameterized_paths = NIL;
/* init direct_lateral_relids from children; we'll finish it up below */
joinrel->direct_lateral_relids =
@@ -748,6 +749,9 @@ build_join_rel(PlannerInfo *root,
joinrel->fdw_private = NULL;
joinrel->unique_for_rels = NIL;
joinrel->non_unique_for_rels = NIL;
joinrel->unique_rel = NULL;
joinrel->unique_pathkeys = NIL;
joinrel->unique_groupclause = NIL;
joinrel->baserestrictinfo = NIL;
joinrel->baserestrictcost.startup = 0;
joinrel->baserestrictcost.per_tuple = 0;
@@ -906,7 +910,6 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
joinrel->partial_pathlist = NIL;
joinrel->cheapest_startup_path = NULL;
joinrel->cheapest_total_path = NULL;
joinrel->cheapest_unique_path = NULL;
joinrel->cheapest_parameterized_paths = NIL;
joinrel->direct_lateral_relids = NULL;
joinrel->lateral_relids = NULL;
@@ -933,6 +936,9 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
joinrel->useridiscurrent = false;
joinrel->fdwroutine = NULL;
joinrel->fdw_private = NULL;
joinrel->unique_rel = NULL;
joinrel->unique_pathkeys = NIL;
joinrel->unique_groupclause = NIL;
joinrel->baserestrictinfo = NIL;
joinrel->baserestrictcost.startup = 0;
joinrel->baserestrictcost.per_tuple = 0;
@@ -1488,7 +1494,6 @@ fetch_upper_rel(PlannerInfo *root, UpperRelationKind kind, Relids relids)
upperrel->pathlist = NIL;
upperrel->cheapest_startup_path = NULL;
upperrel->cheapest_total_path = NULL;
upperrel->cheapest_unique_path = NULL;
upperrel->cheapest_parameterized_paths = NIL;
root->upper_rels[kind] = lappend(root->upper_rels[kind], upperrel);