mirror of
https://github.com/postgres/postgres.git
synced 2025-11-22 12:22:45 +03:00
Re-allow planner to use Merge Append to efficiently implement UNION.
This reverts commit7204f35919, thus restoring66c0185a3(Allow planner to use Merge Append to efficiently implement UNION) as well as the follow-on commitsd5d2205c8,3b1a7eb28,7487044d6. Per further discussion on pgsql-release, we wish to ship beta1 with this feature, and patch the bug that was found just before wrap, rather than shipping beta1 with the feature reverted.
This commit is contained in:
@@ -43,11 +43,15 @@ static RelOptInfo *recurse_set_operations(Node *setOp, PlannerInfo *root,
|
||||
bool junkOK,
|
||||
int flag, List *refnames_tlist,
|
||||
List **pTargetList,
|
||||
double *pNumGroups);
|
||||
bool *istrivial_tlist);
|
||||
static RelOptInfo *generate_recursion_path(SetOperationStmt *setOp,
|
||||
PlannerInfo *root,
|
||||
List *refnames_tlist,
|
||||
List **pTargetList);
|
||||
static void build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
|
||||
bool trivial_tlist, List *child_tlist,
|
||||
List *interesting_pathkeys,
|
||||
double *pNumGroups);
|
||||
static RelOptInfo *generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
|
||||
List *refnames_tlist,
|
||||
List **pTargetList);
|
||||
@@ -57,9 +61,8 @@ static RelOptInfo *generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *ro
|
||||
static List *plan_union_children(PlannerInfo *root,
|
||||
SetOperationStmt *top_union,
|
||||
List *refnames_tlist,
|
||||
List **tlist_list);
|
||||
static Path *make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
|
||||
PlannerInfo *root);
|
||||
List **tlist_list,
|
||||
List **istrivial_tlist);
|
||||
static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
|
||||
static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
|
||||
Path *input_path,
|
||||
@@ -114,10 +117,10 @@ plan_set_operations(PlannerInfo *root)
|
||||
Assert(parse->distinctClause == NIL);
|
||||
|
||||
/*
|
||||
* In the outer query level, we won't have any true equivalences to deal
|
||||
* with; but we do want to be able to make pathkeys, which will require
|
||||
* single-member EquivalenceClasses. Indicate that EC merging is complete
|
||||
* so that pathkeys.c won't complain.
|
||||
* In the outer query level, equivalence classes are limited to classes
|
||||
* which define that the top-level target entry is equivalent to the
|
||||
* corresponding child target entry. There won't be any equivalence class
|
||||
* merging. Mark that merging is complete to allow us to make pathkeys.
|
||||
*/
|
||||
Assert(root->eq_classes == NIL);
|
||||
root->ec_merging_done = true;
|
||||
@@ -152,6 +155,8 @@ plan_set_operations(PlannerInfo *root)
|
||||
}
|
||||
else
|
||||
{
|
||||
bool trivial_tlist;
|
||||
|
||||
/*
|
||||
* Recurse on setOperations tree to generate paths for set ops. The
|
||||
* final output paths should have just the column types shown as the
|
||||
@@ -163,7 +168,7 @@ plan_set_operations(PlannerInfo *root)
|
||||
true, -1,
|
||||
leftmostQuery->targetList,
|
||||
&top_tlist,
|
||||
NULL);
|
||||
&trivial_tlist);
|
||||
}
|
||||
|
||||
/* Must return the built tlist into root->processed_tlist. */
|
||||
@@ -172,6 +177,31 @@ plan_set_operations(PlannerInfo *root)
|
||||
return setop_rel;
|
||||
}
|
||||
|
||||
/*
|
||||
* set_operation_ordered_results_useful
|
||||
* Return true if the given SetOperationStmt can be executed by utilizing
|
||||
* paths that provide sorted input according to the setop's targetlist.
|
||||
* Returns false when sorted paths are not any more useful then unsorted
|
||||
* ones.
|
||||
*/
|
||||
bool
|
||||
set_operation_ordered_results_useful(SetOperationStmt *setop)
|
||||
{
|
||||
/*
|
||||
* Paths sorted by the targetlist are useful for UNION as we can opt to
|
||||
* MergeAppend the sorted paths then Unique them. Ordered paths are no
|
||||
* more useful than unordered ones for UNION ALL.
|
||||
*/
|
||||
if (!setop->all && setop->op == SETOP_UNION)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* EXCEPT / EXCEPT ALL / INTERSECT / INTERSECT ALL cannot yet utilize
|
||||
* correctly sorted input paths.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* recurse_set_operations
|
||||
* Recursively handle one step in a tree of set operations
|
||||
@@ -184,8 +214,8 @@ plan_set_operations(PlannerInfo *root)
|
||||
*
|
||||
* Returns a RelOptInfo for the subtree, as well as these output parameters:
|
||||
* *pTargetList: receives the fully-fledged tlist for the subtree's top plan
|
||||
* *pNumGroups: if not NULL, we estimate the number of distinct groups
|
||||
* in the result, and store it there
|
||||
* *istrivial_tlist: true if, and only if, datatypes between parent and child
|
||||
* match.
|
||||
*
|
||||
* The pTargetList output parameter is mostly redundant with the pathtarget
|
||||
* of the returned RelOptInfo, but for the moment we need it because much of
|
||||
@@ -202,9 +232,11 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
|
||||
bool junkOK,
|
||||
int flag, List *refnames_tlist,
|
||||
List **pTargetList,
|
||||
double *pNumGroups)
|
||||
bool *istrivial_tlist)
|
||||
{
|
||||
RelOptInfo *rel = NULL; /* keep compiler quiet */
|
||||
RelOptInfo *rel;
|
||||
|
||||
*istrivial_tlist = true; /* for now */
|
||||
|
||||
/* Guard against stack overflow due to overly complex setop nests */
|
||||
check_stack_depth();
|
||||
@@ -213,11 +245,9 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
|
||||
{
|
||||
RangeTblRef *rtr = (RangeTblRef *) setOp;
|
||||
RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
|
||||
SetOperationStmt *setops;
|
||||
Query *subquery = rte->subquery;
|
||||
PlannerInfo *subroot;
|
||||
RelOptInfo *final_rel;
|
||||
Path *subpath;
|
||||
Path *path;
|
||||
List *tlist;
|
||||
bool trivial_tlist;
|
||||
|
||||
@@ -229,11 +259,16 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
|
||||
/* plan_params should not be in use in current query level */
|
||||
Assert(root->plan_params == NIL);
|
||||
|
||||
/*
|
||||
* Pass the set operation details to the subquery_planner to have it
|
||||
* consider generating Paths correctly ordered for the set operation.
|
||||
*/
|
||||
setops = castNode(SetOperationStmt, root->parse->setOperations);
|
||||
|
||||
/* Generate a subroot and Paths for the subquery */
|
||||
subroot = rel->subroot = subquery_planner(root->glob, subquery,
|
||||
root,
|
||||
false,
|
||||
root->tuple_fraction);
|
||||
subroot = rel->subroot = subquery_planner(root->glob, subquery, root,
|
||||
false, root->tuple_fraction,
|
||||
setops);
|
||||
|
||||
/*
|
||||
* It should not be possible for the primitive query to contain any
|
||||
@@ -254,90 +289,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
|
||||
|
||||
/* Return the fully-fledged tlist to caller, too */
|
||||
*pTargetList = tlist;
|
||||
|
||||
/*
|
||||
* Mark rel with estimated output rows, width, etc. Note that we have
|
||||
* to do this before generating outer-query paths, else
|
||||
* cost_subqueryscan is not happy.
|
||||
*/
|
||||
set_subquery_size_estimates(root, rel);
|
||||
|
||||
/*
|
||||
* Since we may want to add a partial path to this relation, we must
|
||||
* set its consider_parallel flag correctly.
|
||||
*/
|
||||
final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
|
||||
rel->consider_parallel = final_rel->consider_parallel;
|
||||
|
||||
/*
|
||||
* For the moment, we consider only a single Path for the subquery.
|
||||
* This should change soon (make it look more like
|
||||
* set_subquery_pathlist).
|
||||
*/
|
||||
subpath = get_cheapest_fractional_path(final_rel,
|
||||
root->tuple_fraction);
|
||||
|
||||
/*
|
||||
* Stick a SubqueryScanPath atop that.
|
||||
*
|
||||
* We don't bother to determine the subquery's output ordering since
|
||||
* it won't be reflected in the set-op result anyhow; so just label
|
||||
* the SubqueryScanPath with nil pathkeys. (XXX that should change
|
||||
* soon too, likely.)
|
||||
*/
|
||||
path = (Path *) create_subqueryscan_path(root, rel, subpath,
|
||||
trivial_tlist,
|
||||
NIL, NULL);
|
||||
|
||||
add_path(rel, path);
|
||||
|
||||
/*
|
||||
* If we have a partial path for the child relation, we can use that
|
||||
* to build a partial path for this relation. But there's no point in
|
||||
* considering any path but the cheapest.
|
||||
*/
|
||||
if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
|
||||
final_rel->partial_pathlist != NIL)
|
||||
{
|
||||
Path *partial_subpath;
|
||||
Path *partial_path;
|
||||
|
||||
partial_subpath = linitial(final_rel->partial_pathlist);
|
||||
partial_path = (Path *)
|
||||
create_subqueryscan_path(root, rel, partial_subpath,
|
||||
trivial_tlist,
|
||||
NIL, NULL);
|
||||
add_partial_path(rel, partial_path);
|
||||
}
|
||||
|
||||
/*
|
||||
* Estimate number of groups if caller wants it. If the subquery used
|
||||
* grouping or aggregation, its output is probably mostly unique
|
||||
* anyway; otherwise do statistical estimation.
|
||||
*
|
||||
* XXX you don't really want to know about this: we do the estimation
|
||||
* using the subquery's original targetlist expressions, not the
|
||||
* subroot->processed_tlist which might seem more appropriate. The
|
||||
* reason is that if the subquery is itself a setop, it may return a
|
||||
* processed_tlist containing "varno 0" Vars generated by
|
||||
* generate_append_tlist, and those would confuse estimate_num_groups
|
||||
* mightily. We ought to get rid of the "varno 0" hack, but that
|
||||
* requires a redesign of the parsetree representation of setops, so
|
||||
* that there can be an RTE corresponding to each setop's output.
|
||||
*/
|
||||
if (pNumGroups)
|
||||
{
|
||||
if (subquery->groupClause || subquery->groupingSets ||
|
||||
subquery->distinctClause ||
|
||||
subroot->hasHavingQual || subquery->hasAggs)
|
||||
*pNumGroups = subpath->rows;
|
||||
else
|
||||
*pNumGroups = estimate_num_groups(subroot,
|
||||
get_tlist_exprs(subquery->targetList, false),
|
||||
subpath->rows,
|
||||
NULL,
|
||||
NULL);
|
||||
}
|
||||
*istrivial_tlist = trivial_tlist;
|
||||
}
|
||||
else if (IsA(setOp, SetOperationStmt))
|
||||
{
|
||||
@@ -352,8 +304,6 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
|
||||
rel = generate_nonunion_paths(op, root,
|
||||
refnames_tlist,
|
||||
pTargetList);
|
||||
if (pNumGroups)
|
||||
*pNumGroups = rel->rows;
|
||||
|
||||
/*
|
||||
* If necessary, add a Result node to project the caller-requested
|
||||
@@ -383,6 +333,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
|
||||
*pTargetList,
|
||||
refnames_tlist,
|
||||
&trivial_tlist);
|
||||
*istrivial_tlist = trivial_tlist;
|
||||
target = create_pathtarget(root, *pTargetList);
|
||||
|
||||
/* Apply projection to each path */
|
||||
@@ -413,16 +364,16 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
|
||||
lfirst(lc) = path;
|
||||
}
|
||||
}
|
||||
postprocess_setop_rel(root, rel);
|
||||
}
|
||||
else
|
||||
{
|
||||
elog(ERROR, "unrecognized node type: %d",
|
||||
(int) nodeTag(setOp));
|
||||
*pTargetList = NIL;
|
||||
rel = NULL; /* keep compiler quiet */
|
||||
}
|
||||
|
||||
postprocess_setop_rel(root, rel);
|
||||
|
||||
return rel;
|
||||
}
|
||||
|
||||
@@ -441,7 +392,9 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
|
||||
Path *lpath;
|
||||
Path *rpath;
|
||||
List *lpath_tlist;
|
||||
bool lpath_trivial_tlist;
|
||||
List *rpath_tlist;
|
||||
bool rpath_trivial_tlist;
|
||||
List *tlist;
|
||||
List *groupList;
|
||||
double dNumGroups;
|
||||
@@ -461,7 +414,10 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
|
||||
false, -1,
|
||||
refnames_tlist,
|
||||
&lpath_tlist,
|
||||
NULL);
|
||||
&lpath_trivial_tlist);
|
||||
if (lrel->rtekind == RTE_SUBQUERY)
|
||||
build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
|
||||
NIL, NULL);
|
||||
lpath = lrel->cheapest_total_path;
|
||||
/* The right path will want to look at the left one ... */
|
||||
root->non_recursive_path = lpath;
|
||||
@@ -470,7 +426,10 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
|
||||
false, -1,
|
||||
refnames_tlist,
|
||||
&rpath_tlist,
|
||||
NULL);
|
||||
&rpath_trivial_tlist);
|
||||
if (rrel->rtekind == RTE_SUBQUERY)
|
||||
build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
|
||||
NIL, NULL);
|
||||
rpath = rrel->cheapest_total_path;
|
||||
root->non_recursive_path = NULL;
|
||||
|
||||
@@ -532,6 +491,204 @@ generate_recursion_path(SetOperationStmt *setOp, PlannerInfo *root,
|
||||
return result_rel;
|
||||
}
|
||||
|
||||
/*
|
||||
* build_setop_child_paths
|
||||
* Build paths for the set op child relation denoted by 'rel'.
|
||||
*
|
||||
* interesting_pathkeys: if not NIL, also include paths that suit these
|
||||
* pathkeys, sorting any unsorted paths as required.
|
||||
* *pNumGroups: if not NULL, we estimate the number of distinct groups
|
||||
* in the result, and store it there
|
||||
*/
|
||||
static void
|
||||
build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
|
||||
bool trivial_tlist, List *child_tlist,
|
||||
List *interesting_pathkeys, double *pNumGroups)
|
||||
{
|
||||
RelOptInfo *final_rel;
|
||||
List *setop_pathkeys = rel->subroot->setop_pathkeys;
|
||||
ListCell *lc;
|
||||
|
||||
/* it can't be a set op child rel if it's not a subquery */
|
||||
Assert(rel->rtekind == RTE_SUBQUERY);
|
||||
|
||||
/* when sorting is needed, add child rel equivalences */
|
||||
if (interesting_pathkeys != NIL)
|
||||
add_setop_child_rel_equivalences(root,
|
||||
rel,
|
||||
child_tlist,
|
||||
interesting_pathkeys);
|
||||
|
||||
/*
|
||||
* Mark rel with estimated output rows, width, etc. Note that we have to
|
||||
* do this before generating outer-query paths, else cost_subqueryscan is
|
||||
* not happy.
|
||||
*/
|
||||
set_subquery_size_estimates(root, rel);
|
||||
|
||||
/*
|
||||
* Since we may want to add a partial path to this relation, we must set
|
||||
* its consider_parallel flag correctly.
|
||||
*/
|
||||
final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
|
||||
rel->consider_parallel = final_rel->consider_parallel;
|
||||
|
||||
/* Generate subquery scan paths for any interesting path in final_rel */
|
||||
foreach(lc, final_rel->pathlist)
|
||||
{
|
||||
Path *subpath = (Path *) lfirst(lc);
|
||||
List *pathkeys;
|
||||
Path *cheapest_input_path = final_rel->cheapest_total_path;
|
||||
bool is_sorted;
|
||||
int presorted_keys;
|
||||
|
||||
/*
|
||||
* Include the cheapest path as-is so that the set operation can be
|
||||
* cheaply implemented using a method which does not require the input
|
||||
* to be sorted.
|
||||
*/
|
||||
if (subpath == cheapest_input_path)
|
||||
{
|
||||
/* Convert subpath's pathkeys to outer representation */
|
||||
pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
|
||||
make_tlist_from_pathtarget(subpath->pathtarget));
|
||||
|
||||
/* Generate outer path using this subpath */
|
||||
add_path(rel, (Path *) create_subqueryscan_path(root,
|
||||
rel,
|
||||
subpath,
|
||||
trivial_tlist,
|
||||
pathkeys,
|
||||
NULL));
|
||||
}
|
||||
|
||||
/* skip dealing with sorted paths if the setop doesn't need them */
|
||||
if (interesting_pathkeys == NIL)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Create paths to suit final sort order required for setop_pathkeys.
|
||||
* Here we'll sort the cheapest input path (if not sorted already) and
|
||||
* incremental sort any paths which are partially sorted.
|
||||
*/
|
||||
is_sorted = pathkeys_count_contained_in(setop_pathkeys,
|
||||
subpath->pathkeys,
|
||||
&presorted_keys);
|
||||
|
||||
if (!is_sorted)
|
||||
{
|
||||
double limittuples = rel->subroot->limit_tuples;
|
||||
|
||||
/*
|
||||
* Try at least sorting the cheapest path and also try
|
||||
* incrementally sorting any path which is partially sorted
|
||||
* already (no need to deal with paths which have presorted keys
|
||||
* when incremental sort is disabled unless it's the cheapest
|
||||
* input path).
|
||||
*/
|
||||
if (subpath != cheapest_input_path &&
|
||||
(presorted_keys == 0 || !enable_incremental_sort))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* We've no need to consider both a sort and incremental sort.
|
||||
* We'll just do a sort if there are no presorted keys and an
|
||||
* incremental sort when there are presorted keys.
|
||||
*/
|
||||
if (presorted_keys == 0 || !enable_incremental_sort)
|
||||
subpath = (Path *) create_sort_path(rel->subroot,
|
||||
final_rel,
|
||||
subpath,
|
||||
setop_pathkeys,
|
||||
limittuples);
|
||||
else
|
||||
subpath = (Path *) create_incremental_sort_path(rel->subroot,
|
||||
final_rel,
|
||||
subpath,
|
||||
setop_pathkeys,
|
||||
presorted_keys,
|
||||
limittuples);
|
||||
}
|
||||
|
||||
/*
|
||||
* subpath is now sorted, so add it to the pathlist. We already added
|
||||
* the cheapest_input_path above, so don't add it again unless we just
|
||||
* sorted it.
|
||||
*/
|
||||
if (subpath != cheapest_input_path)
|
||||
{
|
||||
/* Convert subpath's pathkeys to outer representation */
|
||||
pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys,
|
||||
make_tlist_from_pathtarget(subpath->pathtarget));
|
||||
|
||||
/* Generate outer path using this subpath */
|
||||
add_path(rel, (Path *) create_subqueryscan_path(root,
|
||||
rel,
|
||||
subpath,
|
||||
trivial_tlist,
|
||||
pathkeys,
|
||||
NULL));
|
||||
}
|
||||
}
|
||||
|
||||
/* if consider_parallel is false, there should be no partial paths */
|
||||
Assert(final_rel->consider_parallel ||
|
||||
final_rel->partial_pathlist == NIL);
|
||||
|
||||
/*
|
||||
* If we have a partial path for the child relation, we can use that to
|
||||
* build a partial path for this relation. But there's no point in
|
||||
* considering any path but the cheapest.
|
||||
*/
|
||||
if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
|
||||
final_rel->partial_pathlist != NIL)
|
||||
{
|
||||
Path *partial_subpath;
|
||||
Path *partial_path;
|
||||
|
||||
partial_subpath = linitial(final_rel->partial_pathlist);
|
||||
partial_path = (Path *)
|
||||
create_subqueryscan_path(root, rel, partial_subpath,
|
||||
trivial_tlist,
|
||||
NIL, NULL);
|
||||
add_partial_path(rel, partial_path);
|
||||
}
|
||||
|
||||
postprocess_setop_rel(root, rel);
|
||||
|
||||
/*
|
||||
* Estimate number of groups if caller wants it. If the subquery used
|
||||
* grouping or aggregation, its output is probably mostly unique anyway;
|
||||
* otherwise do statistical estimation.
|
||||
*
|
||||
* XXX you don't really want to know about this: we do the estimation
|
||||
* using the subquery's original targetlist expressions, not the
|
||||
* subroot->processed_tlist which might seem more appropriate. The reason
|
||||
* is that if the subquery is itself a setop, it may return a
|
||||
* processed_tlist containing "varno 0" Vars generated by
|
||||
* generate_append_tlist, and those would confuse estimate_num_groups
|
||||
* mightily. We ought to get rid of the "varno 0" hack, but that requires
|
||||
* a redesign of the parsetree representation of setops, so that there can
|
||||
* be an RTE corresponding to each setop's output.
|
||||
*/
|
||||
if (pNumGroups)
|
||||
{
|
||||
PlannerInfo *subroot = rel->subroot;
|
||||
Query *subquery = subroot->parse;
|
||||
|
||||
if (subquery->groupClause || subquery->groupingSets ||
|
||||
subquery->distinctClause || subroot->hasHavingQual ||
|
||||
subquery->hasAggs)
|
||||
*pNumGroups = rel->cheapest_total_path->rows;
|
||||
else
|
||||
*pNumGroups = estimate_num_groups(subroot,
|
||||
get_tlist_exprs(subquery->targetList, false),
|
||||
rel->cheapest_total_path->rows,
|
||||
NULL,
|
||||
NULL);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate paths for a UNION or UNION ALL node
|
||||
*/
|
||||
@@ -542,41 +699,38 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
|
||||
{
|
||||
Relids relids = NULL;
|
||||
RelOptInfo *result_rel;
|
||||
double save_fraction = root->tuple_fraction;
|
||||
ListCell *lc;
|
||||
List *pathlist = NIL;
|
||||
ListCell *lc2;
|
||||
ListCell *lc3;
|
||||
List *cheapest_pathlist = NIL;
|
||||
List *ordered_pathlist = NIL;
|
||||
List *partial_pathlist = NIL;
|
||||
bool partial_paths_valid = true;
|
||||
bool consider_parallel = true;
|
||||
List *rellist;
|
||||
List *tlist_list;
|
||||
List *trivial_tlist_list;
|
||||
List *tlist;
|
||||
Path *path;
|
||||
|
||||
/*
|
||||
* If plain UNION, tell children to fetch all tuples.
|
||||
*
|
||||
* Note: in UNION ALL, we pass the top-level tuple_fraction unmodified to
|
||||
* each arm of the UNION ALL. One could make a case for reducing the
|
||||
* tuple fraction for later arms (discounting by the expected size of the
|
||||
* earlier arms' results) but it seems not worth the trouble. The normal
|
||||
* case where tuple_fraction isn't already zero is a LIMIT at top level,
|
||||
* and passing it down as-is is usually enough to get the desired result
|
||||
* of preferring fast-start plans.
|
||||
*/
|
||||
if (!op->all)
|
||||
root->tuple_fraction = 0.0;
|
||||
List *groupList = NIL;
|
||||
Path *apath;
|
||||
Path *gpath = NULL;
|
||||
bool try_sorted;
|
||||
List *union_pathkeys = NIL;
|
||||
|
||||
/*
|
||||
* If any of my children are identical UNION nodes (same op, all-flag, and
|
||||
* colTypes) then they can be merged into this node so that we generate
|
||||
* only one Append and unique-ification for the lot. Recurse to find such
|
||||
* nodes and compute their children's paths.
|
||||
* only one Append/MergeAppend and unique-ification for the lot. Recurse
|
||||
* to find such nodes.
|
||||
*/
|
||||
rellist = plan_union_children(root, op, refnames_tlist, &tlist_list);
|
||||
rellist = plan_union_children(root,
|
||||
op,
|
||||
refnames_tlist,
|
||||
&tlist_list,
|
||||
&trivial_tlist_list);
|
||||
|
||||
/*
|
||||
* Generate tlist for Append plan node.
|
||||
* Generate tlist for Append/MergeAppend plan node.
|
||||
*
|
||||
* The tlist for an Append plan isn't important as far as the Append is
|
||||
* concerned, but we must make it look real anyway for the benefit of the
|
||||
@@ -584,15 +738,68 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
|
||||
*/
|
||||
tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
|
||||
tlist_list, refnames_tlist);
|
||||
|
||||
*pTargetList = tlist;
|
||||
|
||||
/* For for UNIONs (not UNION ALL), try sorting, if sorting is possible */
|
||||
try_sorted = !op->all && grouping_is_sortable(op->groupClauses);
|
||||
|
||||
if (try_sorted)
|
||||
{
|
||||
/* Identify the grouping semantics */
|
||||
groupList = generate_setop_grouplist(op, tlist);
|
||||
|
||||
/* Determine the pathkeys for sorting by the whole target list */
|
||||
union_pathkeys = make_pathkeys_for_sortclauses(root, groupList, tlist);
|
||||
|
||||
root->query_pathkeys = union_pathkeys;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now that we've got the append target list, we can build the union child
|
||||
* paths.
|
||||
*/
|
||||
forthree(lc, rellist, lc2, trivial_tlist_list, lc3, tlist_list)
|
||||
{
|
||||
RelOptInfo *rel = lfirst(lc);
|
||||
bool trivial_tlist = lfirst_int(lc2);
|
||||
List *child_tlist = lfirst_node(List, lc3);
|
||||
|
||||
/* only build paths for the union children */
|
||||
if (rel->rtekind == RTE_SUBQUERY)
|
||||
build_setop_child_paths(root, rel, trivial_tlist, child_tlist,
|
||||
union_pathkeys, NULL);
|
||||
}
|
||||
|
||||
/* Build path lists and relid set. */
|
||||
foreach(lc, rellist)
|
||||
{
|
||||
RelOptInfo *rel = lfirst(lc);
|
||||
Path *ordered_path;
|
||||
|
||||
pathlist = lappend(pathlist, rel->cheapest_total_path);
|
||||
cheapest_pathlist = lappend(cheapest_pathlist,
|
||||
rel->cheapest_total_path);
|
||||
|
||||
if (try_sorted)
|
||||
{
|
||||
ordered_path = get_cheapest_path_for_pathkeys(rel->pathlist,
|
||||
union_pathkeys,
|
||||
NULL,
|
||||
TOTAL_COST,
|
||||
false);
|
||||
|
||||
if (ordered_path != NULL)
|
||||
ordered_pathlist = lappend(ordered_pathlist, ordered_path);
|
||||
else
|
||||
{
|
||||
/*
|
||||
* If we can't find a sorted path, just give up trying to
|
||||
* generate a list of correctly sorted child paths. This can
|
||||
* happen when type coercion was added to the targetlist due
|
||||
* to mismatching types from the union children.
|
||||
*/
|
||||
try_sorted = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (consider_parallel)
|
||||
{
|
||||
@@ -615,28 +822,21 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
|
||||
result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
|
||||
result_rel->reltarget = create_pathtarget(root, tlist);
|
||||
result_rel->consider_parallel = consider_parallel;
|
||||
result_rel->consider_startup = (root->tuple_fraction > 0);
|
||||
|
||||
/*
|
||||
* Append the child results together.
|
||||
* Append the child results together using the cheapest paths from each
|
||||
* union child.
|
||||
*/
|
||||
path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
|
||||
NIL, NULL, 0, false, -1);
|
||||
|
||||
/*
|
||||
* For UNION ALL, we just need the Append path. For UNION, need to add
|
||||
* node(s) to remove duplicates.
|
||||
*/
|
||||
if (!op->all)
|
||||
path = make_union_unique(op, path, tlist, root);
|
||||
|
||||
add_path(result_rel, path);
|
||||
apath = (Path *) create_append_path(root, result_rel, cheapest_pathlist,
|
||||
NIL, NIL, NULL, 0, false, -1);
|
||||
|
||||
/*
|
||||
* Estimate number of groups. For now we just assume the output is unique
|
||||
* --- this is certainly true for the UNION case, and we want worst-case
|
||||
* estimates anyway.
|
||||
*/
|
||||
result_rel->rows = path->rows;
|
||||
result_rel->rows = apath->rows;
|
||||
|
||||
/*
|
||||
* Now consider doing the same thing using the partial paths plus Append
|
||||
@@ -644,7 +844,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
|
||||
*/
|
||||
if (partial_paths_valid)
|
||||
{
|
||||
Path *ppath;
|
||||
Path *papath;
|
||||
int parallel_workers = 0;
|
||||
|
||||
/* Find the highest number of workers requested for any subpath. */
|
||||
@@ -673,21 +873,137 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
|
||||
}
|
||||
Assert(parallel_workers > 0);
|
||||
|
||||
ppath = (Path *)
|
||||
papath = (Path *)
|
||||
create_append_path(root, result_rel, NIL, partial_pathlist,
|
||||
NIL, NULL,
|
||||
parallel_workers, enable_parallel_append,
|
||||
-1);
|
||||
ppath = (Path *)
|
||||
create_gather_path(root, result_rel, ppath,
|
||||
NIL, NULL, parallel_workers,
|
||||
enable_parallel_append, -1);
|
||||
gpath = (Path *)
|
||||
create_gather_path(root, result_rel, papath,
|
||||
result_rel->reltarget, NULL, NULL);
|
||||
if (!op->all)
|
||||
ppath = make_union_unique(op, ppath, tlist, root);
|
||||
add_path(result_rel, ppath);
|
||||
}
|
||||
|
||||
/* Undo effects of possibly forcing tuple_fraction to 0 */
|
||||
root->tuple_fraction = save_fraction;
|
||||
if (!op->all)
|
||||
{
|
||||
double dNumGroups;
|
||||
bool can_sort = grouping_is_sortable(groupList);
|
||||
bool can_hash = grouping_is_hashable(groupList);
|
||||
|
||||
/*
|
||||
* XXX for the moment, take the number of distinct groups as equal to
|
||||
* the total input size, i.e., the worst case. This is too
|
||||
* conservative, but it's not clear how to get a decent estimate of
|
||||
* the true size. One should note as well the propensity of novices
|
||||
* to write UNION rather than UNION ALL even when they don't expect
|
||||
* any duplicates...
|
||||
*/
|
||||
dNumGroups = apath->rows;
|
||||
|
||||
if (can_hash)
|
||||
{
|
||||
Path *path;
|
||||
|
||||
/*
|
||||
* Try a hash aggregate plan on 'apath'. This is the cheapest
|
||||
* available path containing each append child.
|
||||
*/
|
||||
path = (Path *) create_agg_path(root,
|
||||
result_rel,
|
||||
apath,
|
||||
create_pathtarget(root, tlist),
|
||||
AGG_HASHED,
|
||||
AGGSPLIT_SIMPLE,
|
||||
groupList,
|
||||
NIL,
|
||||
NULL,
|
||||
dNumGroups);
|
||||
add_path(result_rel, path);
|
||||
|
||||
/* Try hash aggregate on the Gather path, if valid */
|
||||
if (gpath != NULL)
|
||||
{
|
||||
/* Hashed aggregate plan --- no sort needed */
|
||||
path = (Path *) create_agg_path(root,
|
||||
result_rel,
|
||||
gpath,
|
||||
create_pathtarget(root, tlist),
|
||||
AGG_HASHED,
|
||||
AGGSPLIT_SIMPLE,
|
||||
groupList,
|
||||
NIL,
|
||||
NULL,
|
||||
dNumGroups);
|
||||
add_path(result_rel, path);
|
||||
}
|
||||
}
|
||||
|
||||
if (can_sort)
|
||||
{
|
||||
Path *path = apath;
|
||||
|
||||
/* Try Sort -> Unique on the Append path */
|
||||
if (groupList != NIL)
|
||||
path = (Path *) create_sort_path(root, result_rel, path,
|
||||
make_pathkeys_for_sortclauses(root, groupList, tlist),
|
||||
-1.0);
|
||||
|
||||
path = (Path *) create_upper_unique_path(root,
|
||||
result_rel,
|
||||
path,
|
||||
list_length(path->pathkeys),
|
||||
dNumGroups);
|
||||
|
||||
add_path(result_rel, path);
|
||||
|
||||
/* Try Sort -> Unique on the Gather path, if set */
|
||||
if (gpath != NULL)
|
||||
{
|
||||
path = gpath;
|
||||
|
||||
path = (Path *) create_sort_path(root, result_rel, path,
|
||||
make_pathkeys_for_sortclauses(root, groupList, tlist),
|
||||
-1.0);
|
||||
|
||||
path = (Path *) create_upper_unique_path(root,
|
||||
result_rel,
|
||||
path,
|
||||
list_length(path->pathkeys),
|
||||
dNumGroups);
|
||||
add_path(result_rel, path);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Try making a MergeAppend path if we managed to find a path with the
|
||||
* correct pathkeys in each union child query.
|
||||
*/
|
||||
if (try_sorted && groupList != NIL)
|
||||
{
|
||||
Path *path;
|
||||
|
||||
path = (Path *) create_merge_append_path(root,
|
||||
result_rel,
|
||||
ordered_pathlist,
|
||||
union_pathkeys,
|
||||
NULL);
|
||||
|
||||
/* and make the MergeAppend unique */
|
||||
path = (Path *) create_upper_unique_path(root,
|
||||
result_rel,
|
||||
path,
|
||||
list_length(tlist),
|
||||
dNumGroups);
|
||||
|
||||
add_path(result_rel, path);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* UNION ALL */
|
||||
add_path(result_rel, apath);
|
||||
|
||||
if (gpath != NULL)
|
||||
add_path(result_rel, gpath);
|
||||
}
|
||||
|
||||
return result_rel;
|
||||
}
|
||||
@@ -713,6 +1029,8 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
|
||||
*tlist,
|
||||
*groupList,
|
||||
*pathlist;
|
||||
bool lpath_trivial_tlist,
|
||||
rpath_trivial_tlist;
|
||||
double dLeftGroups,
|
||||
dRightGroups,
|
||||
dNumGroups,
|
||||
@@ -732,14 +1050,26 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
|
||||
false, 0,
|
||||
refnames_tlist,
|
||||
&lpath_tlist,
|
||||
&dLeftGroups);
|
||||
&lpath_trivial_tlist);
|
||||
if (lrel->rtekind == RTE_SUBQUERY)
|
||||
build_setop_child_paths(root, lrel, lpath_trivial_tlist, lpath_tlist,
|
||||
NIL, &dLeftGroups);
|
||||
else
|
||||
dLeftGroups = lrel->rows;
|
||||
|
||||
lpath = lrel->cheapest_total_path;
|
||||
rrel = recurse_set_operations(op->rarg, root,
|
||||
op->colTypes, op->colCollations,
|
||||
false, 1,
|
||||
refnames_tlist,
|
||||
&rpath_tlist,
|
||||
&dRightGroups);
|
||||
&rpath_trivial_tlist);
|
||||
if (rrel->rtekind == RTE_SUBQUERY)
|
||||
build_setop_child_paths(root, rrel, rpath_trivial_tlist, rpath_tlist,
|
||||
NIL, &dRightGroups);
|
||||
else
|
||||
dRightGroups = rrel->rows;
|
||||
|
||||
rpath = rrel->cheapest_total_path;
|
||||
|
||||
/* Undo effects of forcing tuple_fraction to 0 */
|
||||
@@ -876,13 +1206,16 @@ static List *
|
||||
plan_union_children(PlannerInfo *root,
|
||||
SetOperationStmt *top_union,
|
||||
List *refnames_tlist,
|
||||
List **tlist_list)
|
||||
List **tlist_list,
|
||||
List **istrivial_tlist)
|
||||
{
|
||||
List *pending_rels = list_make1(top_union);
|
||||
List *result = NIL;
|
||||
List *child_tlist;
|
||||
bool trivial_tlist;
|
||||
|
||||
*tlist_list = NIL;
|
||||
*istrivial_tlist = NIL;
|
||||
|
||||
while (pending_rels != NIL)
|
||||
{
|
||||
@@ -921,75 +1254,14 @@ plan_union_children(PlannerInfo *root,
|
||||
false, -1,
|
||||
refnames_tlist,
|
||||
&child_tlist,
|
||||
NULL));
|
||||
&trivial_tlist));
|
||||
*tlist_list = lappend(*tlist_list, child_tlist);
|
||||
*istrivial_tlist = lappend_int(*istrivial_tlist, trivial_tlist);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add nodes to the given path tree to unique-ify the result of a UNION.
|
||||
*/
|
||||
static Path *
|
||||
make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
|
||||
PlannerInfo *root)
|
||||
{
|
||||
RelOptInfo *result_rel = fetch_upper_rel(root, UPPERREL_SETOP, NULL);
|
||||
List *groupList;
|
||||
double dNumGroups;
|
||||
|
||||
/* Identify the grouping semantics */
|
||||
groupList = generate_setop_grouplist(op, tlist);
|
||||
|
||||
/*
|
||||
* XXX for the moment, take the number of distinct groups as equal to the
|
||||
* total input size, ie, the worst case. This is too conservative, but
|
||||
* it's not clear how to get a decent estimate of the true size. One
|
||||
* should note as well the propensity of novices to write UNION rather
|
||||
* than UNION ALL even when they don't expect any duplicates...
|
||||
*/
|
||||
dNumGroups = path->rows;
|
||||
|
||||
/* Decide whether to hash or sort */
|
||||
if (choose_hashed_setop(root, groupList, path,
|
||||
dNumGroups, dNumGroups,
|
||||
"UNION"))
|
||||
{
|
||||
/* Hashed aggregate plan --- no sort needed */
|
||||
path = (Path *) create_agg_path(root,
|
||||
result_rel,
|
||||
path,
|
||||
create_pathtarget(root, tlist),
|
||||
AGG_HASHED,
|
||||
AGGSPLIT_SIMPLE,
|
||||
groupList,
|
||||
NIL,
|
||||
NULL,
|
||||
dNumGroups);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Sort and Unique */
|
||||
if (groupList)
|
||||
path = (Path *)
|
||||
create_sort_path(root,
|
||||
result_rel,
|
||||
path,
|
||||
make_pathkeys_for_sortclauses(root,
|
||||
groupList,
|
||||
tlist),
|
||||
-1.0);
|
||||
path = (Path *) create_upper_unique_path(root,
|
||||
result_rel,
|
||||
path,
|
||||
list_length(path->pathkeys),
|
||||
dNumGroups);
|
||||
}
|
||||
|
||||
return path;
|
||||
}
|
||||
|
||||
/*
|
||||
* postprocess_setop_rel - perform steps required after adding paths
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user