diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c index 2421b0a6df4..f0ebe1fc0aa 100644 --- a/src/backend/executor/nodeSetOp.c +++ b/src/backend/executor/nodeSetOp.c @@ -12,11 +12,16 @@ * relation. Then it is a simple matter to emit the output demanded by the * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL. * - * In SETOP_HASHED mode, the input is delivered in no particular order. - * We build a hash table in memory with one entry for each group of - * identical tuples, and count the number of tuples in the group from - * each relation. After seeing all the input, we scan the hashtable and - * generate the correct output using those counts. + * In SETOP_HASHED mode, the input is delivered in no particular order, + * except that we know all the tuples from one input relation will come before + * all the tuples of the other. The planner guarantees that the first input + * relation is the left-hand one for EXCEPT, and tries to make the smaller + * input relation come first for INTERSECT. We build a hash table in memory + * with one entry for each group of identical tuples, and count the number of + * tuples in the group from each relation. After seeing all the input, we + * scan the hashtable and generate the correct output using those counts. + * We can avoid making hashtable entries for any tuples appearing only in the + * second input relation, since they cannot result in any output. * * This node type is not used for UNION or UNION ALL, since those can be * implemented more cheaply (there's no need for the junk attribute to @@ -32,7 +37,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeSetOp.c,v 1.26 2008/08/07 03:04:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeSetOp.c,v 1.27 2008/08/07 19:35:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -82,8 +87,8 @@ static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate); /* * Initialize state for a new group of input values. */ -static void -initialize_counts(SetOpState *setopstate, SetOpStatePerGroup pergroup) +static inline void +initialize_counts(SetOpStatePerGroup pergroup) { pergroup->numLeft = pergroup->numRight = 0; } @@ -91,9 +96,21 @@ initialize_counts(SetOpState *setopstate, SetOpStatePerGroup pergroup) /* * Advance the appropriate counter for one input tuple. */ -static void -advance_counts(SetOpState *setopstate, SetOpStatePerGroup pergroup, - TupleTableSlot *inputslot) +static inline void +advance_counts(SetOpStatePerGroup pergroup, int flag) +{ + if (flag) + pergroup->numRight++; + else + pergroup->numLeft++; +} + +/* + * Fetch the "flag" column from an input tuple. + * This is an integer column with value 0 for left side, 1 for right side. + */ +static int +fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot) { SetOp *node = (SetOp *) setopstate->ps.plan; int flag; @@ -103,10 +120,8 @@ advance_counts(SetOpState *setopstate, SetOpStatePerGroup pergroup, node->flagColIdx, &isNull)); Assert(!isNull); - if (flag) - pergroup->numRight++; - else - pergroup->numLeft++; + Assert(flag == 0 || flag == 1); + return flag; } /* @@ -130,33 +145,6 @@ build_hash_table(SetOpState *setopstate) setopstate->tempContext); } -/* - * Find or create a hashtable entry for the tuple group containing the - * given tuple. - */ -static SetOpHashEntry -lookup_hash_entry(SetOpState *setopstate, TupleTableSlot *inputslot) -{ - SetOpHashEntry entry; - bool isnew; - - /* find or create the hashtable entry */ - entry = (SetOpHashEntry) LookupTupleHashEntry(setopstate->hashtable, - inputslot, - &isnew); - - if (isnew) - { - /* initialize counts for new tuple group */ - initialize_counts(setopstate, &entry->pergroup); - } - - /* Must reset temp context after each hashtable lookup */ - MemoryContextReset(setopstate->tempContext); - - return entry; -} - /* * We've completed processing a tuple group. Decide how many copies (if any) * of its representative row to emit, and store the count into numOutput. @@ -289,10 +277,11 @@ setop_retrieve_direct(SetOpState *setopstate) setopstate->grp_firstTuple = NULL; /* don't keep two pointers */ /* Initialize working state for a new input tuple group */ - initialize_counts(setopstate, pergroup); + initialize_counts(pergroup); /* Count the first input tuple */ - advance_counts(setopstate, pergroup, resultTupleSlot); + advance_counts(pergroup, + fetch_tuple_flag(setopstate, resultTupleSlot)); /* * Scan the outer plan until we exhaust it or cross a group boundary. @@ -324,7 +313,8 @@ setop_retrieve_direct(SetOpState *setopstate) } /* Still in same group, so count this tuple */ - advance_counts(setopstate, pergroup, outerslot); + advance_counts(pergroup, + fetch_tuple_flag(setopstate, outerslot)); } /* @@ -351,30 +341,73 @@ setop_retrieve_direct(SetOpState *setopstate) static void setop_fill_hash_table(SetOpState *setopstate) { + SetOp *node = (SetOp *) setopstate->ps.plan; PlanState *outerPlan; - SetOpHashEntry entry; - TupleTableSlot *outerslot; + int firstFlag; + bool in_first_rel; /* * get state info from node */ outerPlan = outerPlanState(setopstate); + firstFlag = node->firstFlag; + /* verify planner didn't mess up */ + Assert(firstFlag == 0 || + (firstFlag == 1 && + (node->cmd == SETOPCMD_INTERSECT || + node->cmd == SETOPCMD_INTERSECT_ALL))); /* * Process each outer-plan tuple, and then fetch the next one, until we * exhaust the outer plan. */ + in_first_rel = true; for (;;) { + TupleTableSlot *outerslot; + int flag; + SetOpHashEntry entry; + bool isnew; + outerslot = ExecProcNode(outerPlan); if (TupIsNull(outerslot)) break; - /* Find or build hashtable entry for this tuple's group */ - entry = lookup_hash_entry(setopstate, outerslot); + /* Identify whether it's left or right input */ + flag = fetch_tuple_flag(setopstate, outerslot); - /* Advance the counts */ - advance_counts(setopstate, &entry->pergroup, outerslot); + if (flag == firstFlag) + { + /* (still) in first input relation */ + Assert(in_first_rel); + + /* Find or build hashtable entry for this tuple's group */ + entry = (SetOpHashEntry) + LookupTupleHashEntry(setopstate->hashtable, outerslot, &isnew); + + /* If new tuple group, initialize counts */ + if (isnew) + initialize_counts(&entry->pergroup); + + /* Advance the counts */ + advance_counts(&entry->pergroup, flag); + } + else + { + /* reached second relation */ + in_first_rel = false; + + /* For tuples not seen previously, do not make hashtable entry */ + entry = (SetOpHashEntry) + LookupTupleHashEntry(setopstate->hashtable, outerslot, NULL); + + /* Advance the counts if entry is already present */ + if (entry) + advance_counts(&entry->pergroup, flag); + } + + /* Must reset temp context after each hashtable lookup */ + MemoryContextReset(setopstate->tempContext); } setopstate->table_filled = true; diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 4767b924e9e..90ebd7819a7 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -15,7 +15,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.398 2008/08/07 03:04:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.399 2008/08/07 19:35:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -657,6 +657,7 @@ _copySetOp(SetOp *from) COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber)); COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid)); COPY_SCALAR_FIELD(flagColIdx); + COPY_SCALAR_FIELD(firstFlag); COPY_SCALAR_FIELD(numGroups); return newnode; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 8440864b0e6..4728117be24 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.332 2008/08/07 03:04:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.333 2008/08/07 19:35:02 tgl Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -611,6 +611,7 @@ _outSetOp(StringInfo str, SetOp *node) appendStringInfo(str, " %u", node->dupOperators[i]); WRITE_INT_FIELD(flagColIdx); + WRITE_INT_FIELD(firstFlag); WRITE_LONG_FIELD(numGroups); } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 4cf2a5208b1..0c200e05795 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.243 2008/08/07 03:04:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.244 2008/08/07 19:35:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -3109,8 +3109,8 @@ make_unique(Plan *lefttree, List *distinctList) */ SetOp * make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, - List *distinctList, AttrNumber flagColIdx, long numGroups, - double outputRows) + List *distinctList, AttrNumber flagColIdx, int firstFlag, + long numGroups, double outputRows) { SetOp *node = makeNode(SetOp); Plan *plan = &node->plan; @@ -3159,6 +3159,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, node->dupColIdx = dupColIdx; node->dupOperators = dupOperators; node->flagColIdx = flagColIdx; + node->firstFlag = firstFlag; node->numGroups = numGroups; return node; diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 3a155c0d0ad..dda41770c8d 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -22,7 +22,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.151 2008/08/07 03:04:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.152 2008/08/07 19:35:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -49,19 +49,22 @@ #include "parser/parsetree.h" #include "utils/lsyscache.h" #include "utils/rel.h" +#include "utils/selfuncs.h" static Plan *recurse_set_operations(Node *setOp, PlannerInfo *root, double tuple_fraction, List *colTypes, bool junkOK, int flag, List *refnames_tlist, - List **sortClauses); + List **sortClauses, double *pNumGroups); static Plan *generate_union_plan(SetOperationStmt *op, PlannerInfo *root, double tuple_fraction, - List *refnames_tlist, List **sortClauses); + List *refnames_tlist, + List **sortClauses, double *pNumGroups); static Plan *generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root, double tuple_fraction, - List *refnames_tlist, List **sortClauses); + List *refnames_tlist, + List **sortClauses, double *pNumGroups); static List *recurse_union_children(Node *setOp, PlannerInfo *root, double tuple_fraction, SetOperationStmt *top_union, @@ -71,7 +74,8 @@ static Plan *make_union_unique(SetOperationStmt *op, Plan *plan, List **sortClauses); static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses, Plan *input_plan, - double tuple_fraction, double dNumDistinctRows, + double dNumGroups, double dNumOutputRows, + double tuple_fraction, const char *construct); static List *generate_setop_tlist(List *colTypes, int flag, Index varno, @@ -153,7 +157,7 @@ plan_set_operations(PlannerInfo *root, double tuple_fraction, return recurse_set_operations((Node *) topop, root, tuple_fraction, topop->colTypes, true, -1, leftmostQuery->targetList, - sortClauses); + sortClauses, NULL); } /* @@ -165,7 +169,11 @@ plan_set_operations(PlannerInfo *root, double tuple_fraction, * junkOK: if true, child resjunk columns may be left in the result * flag: if >= 0, add a resjunk output column indicating value of flag * refnames_tlist: targetlist to take column names from + * + * Returns a plan for the subtree, as well as these output parameters: * *sortClauses: receives list of SortGroupClauses for result plan, if any + * *pNumGroups: if not NULL, we estimate the number of distinct groups + * in the result, and store it there * * We don't have to care about typmods here: the only allowed difference * between set-op input and output typmods is input is a specific typmod @@ -176,7 +184,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, double tuple_fraction, List *colTypes, bool junkOK, int flag, List *refnames_tlist, - List **sortClauses) + List **sortClauses, double *pNumGroups) { if (IsA(setOp, RangeTblRef)) { @@ -197,6 +205,22 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, tuple_fraction, &subroot); + /* + * Estimate number of groups if caller wants it. If the subquery + * used grouping or aggregation, its output is probably mostly + * unique anyway; otherwise do statistical estimation. + */ + if (pNumGroups) + { + if (subquery->groupClause || subquery->distinctClause || + subroot->hasHavingQual || subquery->hasAggs) + *pNumGroups = subplan->plan_rows; + else + *pNumGroups = estimate_num_groups(subroot, + get_tlist_exprs(subquery->targetList, false), + subplan->plan_rows); + } + /* * Add a SubqueryScan with the caller-requested targetlist */ @@ -228,11 +252,11 @@ recurse_set_operations(Node *setOp, PlannerInfo *root, if (op->op == SETOP_UNION) plan = generate_union_plan(op, root, tuple_fraction, refnames_tlist, - sortClauses); + sortClauses, pNumGroups); else plan = generate_nonunion_plan(op, root, tuple_fraction, refnames_tlist, - sortClauses); + sortClauses, pNumGroups); /* * If necessary, add a Result node to project the caller-requested @@ -277,7 +301,7 @@ static Plan * generate_union_plan(SetOperationStmt *op, PlannerInfo *root, double tuple_fraction, List *refnames_tlist, - List **sortClauses) + List **sortClauses, double *pNumGroups) { List *planlist; List *tlist; @@ -334,6 +358,14 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root, else plan = make_union_unique(op, plan, root, tuple_fraction, sortClauses); + /* + * Estimate number of groups if caller wants it. For now we just + * assume the output is unique --- this is certainly true for the + * UNION case, and we want worst-case estimates anyway. + */ + if (pNumGroups) + *pNumGroups = plan->plan_rows; + return plan; } @@ -344,7 +376,7 @@ static Plan * generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root, double tuple_fraction, List *refnames_tlist, - List **sortClauses) + List **sortClauses, double *pNumGroups) { Plan *lplan, *rplan, @@ -353,24 +385,43 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root, *groupList, *planlist, *child_sortclauses; - double dNumDistinctRows; - double dNumOutputRows; - long numDistinctRows; + double dLeftGroups, + dRightGroups, + dNumGroups, + dNumOutputRows; + long numGroups; bool use_hash; SetOpCmd cmd; + int firstFlag; /* Recurse on children, ensuring their outputs are marked */ lplan = recurse_set_operations(op->larg, root, 0.0 /* all tuples needed */ , op->colTypes, false, 0, refnames_tlist, - &child_sortclauses); + &child_sortclauses, &dLeftGroups); rplan = recurse_set_operations(op->rarg, root, 0.0 /* all tuples needed */ , op->colTypes, false, 1, refnames_tlist, - &child_sortclauses); - planlist = list_make2(lplan, rplan); + &child_sortclauses, &dRightGroups); + + /* + * For EXCEPT, we must put the left input first. For INTERSECT, either + * order should give the same results, and we prefer to put the smaller + * input first in order to minimize the size of the hash table in the + * hashing case. "Smaller" means the one with the fewer groups. + */ + if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups) + { + planlist = list_make2(lplan, rplan); + firstFlag = 0; + } + else + { + planlist = list_make2(rplan, lplan); + firstFlag = 1; + } /* * Generate tlist for Append plan node. @@ -400,27 +451,32 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root, } /* - * XXX for the moment, take the number of distinct groups as being the - * total input size, ie, the worst case. This is too conservative, but - * we don't want to risk having the hashtable overrun memory; also, - * it's not clear how to get a decent estimate of the true size. + * Estimate number of distinct groups that we'll need hashtable entries + * for; this is the size of the left-hand input for EXCEPT, or the smaller + * input for INTERSECT. Also estimate the number of eventual output rows. + * In non-ALL cases, we estimate each group produces one output row; + * in ALL cases use the relevant relation size. These are worst-case + * estimates, of course, but we need to be conservative. */ - dNumDistinctRows = plan->plan_rows; + if (op->op == SETOP_EXCEPT) + { + dNumGroups = dLeftGroups; + dNumOutputRows = op->all ? lplan->plan_rows : dNumGroups; + } + else + { + dNumGroups = Min(dLeftGroups, dRightGroups); + dNumOutputRows = op->all ? Min(lplan->plan_rows, rplan->plan_rows) : dNumGroups; + } /* Also convert to long int --- but 'ware overflow! */ - numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX); - - /* - * The output size is taken as 10% of that, which is a completely bogus - * guess, but it's what we've used historically. - */ - dNumOutputRows = ceil(dNumDistinctRows * 0.1); + numGroups = (long) Min(dNumGroups, (double) LONG_MAX); /* * Decide whether to hash or sort, and add a sort node if needed. */ use_hash = choose_hashed_setop(root, groupList, plan, - tuple_fraction, dNumDistinctRows, + dNumGroups, dNumOutputRows, tuple_fraction, (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT"); if (!use_hash) @@ -443,12 +499,17 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root, break; } plan = (Plan *) make_setop(cmd, use_hash ? SETOP_HASHED : SETOP_SORTED, - plan, groupList, list_length(op->colTypes) + 1, - numDistinctRows, dNumOutputRows); + plan, groupList, + list_length(op->colTypes) + 1, + use_hash ? firstFlag : -1, + numGroups, dNumOutputRows); /* Result is sorted only if we're not hashing */ *sortClauses = use_hash ? NIL : groupList; + if (pNumGroups) + *pNumGroups = dNumGroups; + return plan; } @@ -499,7 +560,7 @@ recurse_union_children(Node *setOp, PlannerInfo *root, tuple_fraction, top_union->colTypes, false, -1, refnames_tlist, - &child_sortclauses)); + &child_sortclauses, NULL)); } /* @@ -511,8 +572,8 @@ make_union_unique(SetOperationStmt *op, Plan *plan, List **sortClauses) { List *groupList; - double dNumDistinctRows; - long numDistinctRows; + double dNumGroups; + long numGroups; /* Identify the grouping semantics */ groupList = generate_setop_grouplist(op, plan->targetlist); @@ -525,21 +586,21 @@ make_union_unique(SetOperationStmt *op, Plan *plan, } /* - * XXX for the moment, take the number of distinct groups as being the - * total input size, ie, the worst case. This is too conservative, but - * we don't want to risk having the hashtable overrun memory; also, + * XXX for the moment, take the number of distinct groups as equal to + * the total input size, ie, the worst case. This is too conservative, + * but we don't want to risk having the hashtable overrun memory; also, * it's not clear how to get a decent estimate of the true size. One * should note as well the propensity of novices to write UNION rather * than UNION ALL even when they don't expect any duplicates... */ - dNumDistinctRows = plan->plan_rows; + dNumGroups = plan->plan_rows; /* Also convert to long int --- but 'ware overflow! */ - numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX); + numGroups = (long) Min(dNumGroups, (double) LONG_MAX); /* Decide whether to hash or sort */ if (choose_hashed_setop(root, groupList, plan, - tuple_fraction, dNumDistinctRows, + dNumGroups, dNumGroups, tuple_fraction, "UNION")) { /* Hashed aggregate plan --- no sort needed */ @@ -551,7 +612,7 @@ make_union_unique(SetOperationStmt *op, Plan *plan, extract_grouping_cols(groupList, plan->targetlist), extract_grouping_ops(groupList), - numDistinctRows, + numGroups, 0, plan); /* Hashed aggregation produces randomly-ordered results */ @@ -562,7 +623,7 @@ make_union_unique(SetOperationStmt *op, Plan *plan, /* Sort and Unique */ plan = (Plan *) make_sort_from_sortclauses(root, groupList, plan); plan = (Plan *) make_unique(plan, groupList); - plan->plan_rows = dNumDistinctRows; + plan->plan_rows = dNumGroups; /* We know the sort order of the result */ *sortClauses = groupList; } @@ -576,14 +637,14 @@ make_union_unique(SetOperationStmt *op, Plan *plan, static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses, Plan *input_plan, - double tuple_fraction, double dNumDistinctRows, + double dNumGroups, double dNumOutputRows, + double tuple_fraction, const char *construct) { - int numDistinctCols = list_length(groupClauses); + int numGroupCols = list_length(groupClauses); bool can_sort; bool can_hash; Size hashentrysize; - List *needed_pathkeys; Path hashed_p; Path sorted_p; @@ -615,7 +676,7 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses, */ hashentrysize = MAXALIGN(input_plan->plan_width) + MAXALIGN(sizeof(MinimalTupleData)); - if (hashentrysize * dNumDistinctRows > work_mem * 1024L) + if (hashentrysize * dNumGroups > work_mem * 1024L) return false; /* @@ -630,7 +691,7 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses, * make actual Paths for these steps. */ cost_agg(&hashed_p, root, AGG_HASHED, 0, - numDistinctCols, dNumDistinctRows, + numGroupCols, dNumGroups, input_plan->startup_cost, input_plan->total_cost, input_plan->plan_rows); @@ -640,14 +701,10 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses, */ sorted_p.startup_cost = input_plan->startup_cost; sorted_p.total_cost = input_plan->total_cost; - /* XXX this is more expensive than cost_sort really needs: */ - needed_pathkeys = make_pathkeys_for_sortclauses(root, - groupClauses, - input_plan->targetlist, - true); - cost_sort(&sorted_p, root, needed_pathkeys, sorted_p.total_cost, + /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */ + cost_sort(&sorted_p, root, NIL, sorted_p.total_cost, input_plan->plan_rows, input_plan->plan_width, -1.0); - cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows, + cost_group(&sorted_p, root, numGroupCols, dNumGroups, sorted_p.startup_cost, sorted_p.total_cost, input_plan->plan_rows); @@ -656,7 +713,7 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses, * have to convert an absolute count (LIMIT) into fractional form. */ if (tuple_fraction >= 1.0) - tuple_fraction /= dNumDistinctRows; + tuple_fraction /= dNumOutputRows; if (compare_fractional_path_costs(&hashed_p, &sorted_p, tuple_fraction) < 0) diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index a2c627fd4d5..b2fb112ebef 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.80 2008/08/07 01:11:50 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.81 2008/08/07 19:35:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -133,6 +133,69 @@ add_to_flat_tlist(List *tlist, List *vars) } +/* + * get_tlist_exprs + * Get just the expression subtrees of a tlist + * + * Resjunk columns are ignored unless includeJunk is true + */ +List * +get_tlist_exprs(List *tlist, bool includeJunk) +{ + List *result = NIL; + ListCell *l; + + foreach(l, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (tle->resjunk && !includeJunk) + continue; + + result = lappend(result, tle->expr); + } + return result; +} + + +/* + * Does tlist have same output datatypes as listed in colTypes? + * + * Resjunk columns are ignored if junkOK is true; otherwise presence of + * a resjunk column will always cause a 'false' result. + * + * Note: currently no callers care about comparing typmods. + */ +bool +tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK) +{ + ListCell *l; + ListCell *curColType = list_head(colTypes); + + foreach(l, tlist) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + if (tle->resjunk) + { + if (!junkOK) + return false; + } + else + { + if (curColType == NULL) + return false; /* tlist longer than colTypes */ + if (exprType((Node *) tle->expr) != lfirst_oid(curColType)) + return false; + curColType = lnext(curColType); + } + } + if (curColType != NULL) + return false; /* tlist shorter than colTypes */ + return true; +} + + /* * get_sortgroupref_tle * Find the targetlist entry matching the given SortGroupRef index, @@ -303,42 +366,3 @@ grouping_is_hashable(List *groupClause) } return true; } - - - -/* - * Does tlist have same output datatypes as listed in colTypes? - * - * Resjunk columns are ignored if junkOK is true; otherwise presence of - * a resjunk column will always cause a 'false' result. - * - * Note: currently no callers care about comparing typmods. - */ -bool -tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK) -{ - ListCell *l; - ListCell *curColType = list_head(colTypes); - - foreach(l, tlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(l); - - if (tle->resjunk) - { - if (!junkOK) - return false; - } - else - { - if (curColType == NULL) - return false; /* tlist longer than colTypes */ - if (exprType((Node *) tle->expr) != lfirst_oid(curColType)) - return false; - curColType = lnext(curColType); - } - } - if (curColType != NULL) - return false; /* tlist shorter than colTypes */ - return true; -} diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index e2eaadd29ff..f0e0d08e03a 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.101 2008/08/07 03:04:04 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.102 2008/08/07 19:35:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -541,6 +541,7 @@ typedef struct SetOp AttrNumber *dupColIdx; /* their indexes in the target list */ Oid *dupOperators; /* equality operators to compare with */ AttrNumber flagColIdx; /* where is the flag column, if any */ + int firstFlag; /* flag value for first input relation */ long numGroups; /* estimated number of groups in input */ } SetOp; diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 616f0ea2ad0..0aa1d32f947 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.109 2008/08/07 03:04:04 tgl Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.110 2008/08/07 19:35:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -62,8 +62,8 @@ extern Unique *make_unique(Plan *lefttree, List *distinctList); extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount, int64 offset_est, int64 count_est); extern SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree, - List *distinctList, AttrNumber flagColIdx, long numGroups, - double outputRows); + List *distinctList, AttrNumber flagColIdx, int firstFlag, + long numGroups, double outputRows); extern Result *make_result(PlannerInfo *root, List *tlist, Node *resconstantqual, Plan *subplan); extern bool is_projection_capable_plan(Plan *plan); diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h index 9899b14c82f..d2c7f42e05f 100644 --- a/src/include/optimizer/tlist.h +++ b/src/include/optimizer/tlist.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/tlist.h,v 1.51 2008/08/07 01:11:52 tgl Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/tlist.h,v 1.52 2008/08/07 19:35:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,6 +23,9 @@ extern TargetEntry *tlist_member_ignore_relabel(Node *node, List *targetlist); extern List *flatten_tlist(List *tlist); extern List *add_to_flat_tlist(List *tlist, List *vars); +extern List *get_tlist_exprs(List *tlist, bool includeJunk); +extern bool tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK); + extern TargetEntry *get_sortgroupref_tle(Index sortref, List *targetList); extern TargetEntry *get_sortgroupclause_tle(SortGroupClause *sgClause, @@ -37,6 +40,4 @@ extern AttrNumber *extract_grouping_cols(List *groupClause, List *tlist); extern bool grouping_is_sortable(List *groupClause); extern bool grouping_is_hashable(List *groupClause); -extern bool tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK); - #endif /* TLIST_H */