diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index e1a3a813dd9..8b64a625ca5 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -14,6 +14,8 @@ */ #include "postgres.h" +#include + #include "access/htup_details.h" #include "access/parallel.h" #include "common/hashfn.h" @@ -144,7 +146,7 @@ execTuplesHashPrepare(int numCols, * eqfuncoids: OIDs of equality comparison functions to use * hashfunctions: FmgrInfos of datatype-specific hashing functions to use * collations: collations to use in comparisons - * nbuckets: initial estimate of hashtable size + * nelements: initial estimate of hashtable size * additionalsize: size of data that may be stored along with the hash entry * metacxt: memory context for long-lived data and the simplehash table * tuplescxt: memory context in which to store the hashed tuples themselves @@ -187,7 +189,7 @@ BuildTupleHashTable(PlanState *parent, const Oid *eqfuncoids, FmgrInfo *hashfunctions, Oid *collations, - long nbuckets, + double nelements, Size additionalsize, MemoryContext metacxt, MemoryContext tuplescxt, @@ -195,12 +197,24 @@ BuildTupleHashTable(PlanState *parent, bool use_variable_hash_iv) { TupleHashTable hashtable; - Size entrysize; - Size hash_mem_limit; + uint32 nbuckets; MemoryContext oldcontext; uint32 hash_iv = 0; - Assert(nbuckets > 0); + /* + * tuplehash_create requires a uint32 element count, so we had better + * clamp the given nelements to fit in that. As long as we have to do + * that, we might as well protect against completely insane input like + * zero or NaN. But it is not our job here to enforce issues like staying + * within hash_mem: the caller should have done that, and we don't have + * enough info to second-guess. + */ + if (isnan(nelements) || nelements <= 0) + nbuckets = 1; + else if (nelements >= PG_UINT32_MAX) + nbuckets = PG_UINT32_MAX; + else + nbuckets = (uint32) nelements; /* tuplescxt must be separate, else ResetTupleHashTable breaks things */ Assert(metacxt != tuplescxt); @@ -208,18 +222,6 @@ BuildTupleHashTable(PlanState *parent, /* ensure additionalsize is maxalign'ed */ additionalsize = MAXALIGN(additionalsize); - /* - * Limit initial table size request to not more than hash_mem. - * - * XXX this calculation seems pretty misguided, as it counts only overhead - * and not the tuples themselves. But we have no knowledge of the - * expected tuple width here. - */ - entrysize = sizeof(TupleHashEntryData) + additionalsize; - hash_mem_limit = get_hash_memory_limit() / entrysize; - if (nbuckets > hash_mem_limit) - nbuckets = hash_mem_limit; - oldcontext = MemoryContextSwitchTo(metacxt); hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData)); diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 759ffeed2e6..0b02fd32107 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -402,12 +402,12 @@ static void find_cols(AggState *aggstate, Bitmapset **aggregated, Bitmapset **unaggregated); static bool find_cols_walker(Node *node, FindColsContext *context); static void build_hash_tables(AggState *aggstate); -static void build_hash_table(AggState *aggstate, int setno, long nbuckets); +static void build_hash_table(AggState *aggstate, int setno, double nbuckets); static void hashagg_recompile_expressions(AggState *aggstate, bool minslot, bool nullcheck); static void hash_create_memory(AggState *aggstate); -static long hash_choose_num_buckets(double hashentrysize, - long ngroups, Size memory); +static double hash_choose_num_buckets(double hashentrysize, + double ngroups, Size memory); static int hash_choose_num_partitions(double input_groups, double hashentrysize, int used_bits, @@ -1469,7 +1469,7 @@ build_hash_tables(AggState *aggstate) for (setno = 0; setno < aggstate->num_hashes; ++setno) { AggStatePerHash perhash = &aggstate->perhash[setno]; - long nbuckets; + double nbuckets; Size memory; if (perhash->hashtable != NULL) @@ -1478,8 +1478,6 @@ build_hash_tables(AggState *aggstate) continue; } - Assert(perhash->aggnode->numGroups > 0); - memory = aggstate->hash_mem_limit / aggstate->num_hashes; /* choose reasonable number of buckets per hashtable */ @@ -1505,7 +1503,7 @@ build_hash_tables(AggState *aggstate) * Build a single hashtable for this grouping set. */ static void -build_hash_table(AggState *aggstate, int setno, long nbuckets) +build_hash_table(AggState *aggstate, int setno, double nbuckets) { AggStatePerHash perhash = &aggstate->perhash[setno]; MemoryContext metacxt = aggstate->hash_metacxt; @@ -2053,11 +2051,11 @@ hash_create_memory(AggState *aggstate) /* * Choose a reasonable number of buckets for the initial hash table size. */ -static long -hash_choose_num_buckets(double hashentrysize, long ngroups, Size memory) +static double +hash_choose_num_buckets(double hashentrysize, double ngroups, Size memory) { - long max_nbuckets; - long nbuckets = ngroups; + double max_nbuckets; + double nbuckets = ngroups; max_nbuckets = memory / hashentrysize; @@ -2065,12 +2063,16 @@ hash_choose_num_buckets(double hashentrysize, long ngroups, Size memory) * Underestimating is better than overestimating. Too many buckets crowd * out space for group keys and transition state values. */ - max_nbuckets >>= 1; + max_nbuckets /= 2; if (nbuckets > max_nbuckets) nbuckets = max_nbuckets; - return Max(nbuckets, 1); + /* + * BuildTupleHashTable will clamp any obviously-insane result, so we don't + * need to be too careful here. + */ + return nbuckets; } /* @@ -3686,7 +3688,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) if (use_hashing) { Plan *outerplan = outerPlan(node); - uint64 totalGroups = 0; + double totalGroups = 0; aggstate->hash_spill_rslot = ExecInitExtraTupleSlot(estate, scanDesc, &TTSOpsMinimalTuple); diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c index ebb7919b49b..cd0ad51dcd2 100644 --- a/src/backend/executor/nodeRecursiveunion.c +++ b/src/backend/executor/nodeRecursiveunion.c @@ -35,7 +35,6 @@ build_hash_table(RecursiveUnionState *rustate) TupleDesc desc = ExecGetResultType(outerPlanState(rustate)); Assert(node->numCols > 0); - Assert(node->numGroups > 0); /* * If both child plans deliver the same fixed tuple slot type, we can tell diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c index 5aabed18a09..9e0f9274fb1 100644 --- a/src/backend/executor/nodeSetOp.c +++ b/src/backend/executor/nodeSetOp.c @@ -88,7 +88,6 @@ build_hash_table(SetOpState *setopstate) TupleDesc desc = ExecGetResultType(outerPlanState(setopstate)); Assert(node->strategy == SETOP_HASHED); - Assert(node->numGroups > 0); /* * If both child plans deliver the same fixed tuple slot type, we can tell diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index 1cd0988bb49..c8b7bd9eb66 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -34,7 +34,6 @@ #include "miscadmin.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" -#include "optimizer/optimizer.h" #include "utils/array.h" #include "utils/lsyscache.h" #include "utils/memutils.h" @@ -481,7 +480,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) int ncols = node->numCols; ExprContext *innerecontext = node->innerecontext; MemoryContext oldcontext; - long nbuckets; + double nentries; TupleTableSlot *slot; Assert(subplan->subLinkType == ANY_SUBLINK); @@ -509,9 +508,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) node->havehashrows = false; node->havenullrows = false; - nbuckets = clamp_cardinality_to_long(planstate->plan->plan_rows); - if (nbuckets < 1) - nbuckets = 1; + nentries = planstate->plan->plan_rows; if (node->hashtable) ResetTupleHashTable(node->hashtable); @@ -524,7 +521,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) node->tab_eq_funcoids, node->tab_hash_funcs, node->tab_collations, - nbuckets, + nentries, 0, /* no additional data */ node->planstate->state->es_query_cxt, node->tuplesContext, @@ -534,12 +531,12 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) if (!subplan->unknownEqFalse) { if (ncols == 1) - nbuckets = 1; /* there can only be one entry */ + nentries = 1; /* there can only be one entry */ else { - nbuckets /= 16; - if (nbuckets < 1) - nbuckets = 1; + nentries /= 16; + if (nentries < 1) + nentries = 1; } if (node->hashnulls) @@ -553,7 +550,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) node->tab_eq_funcoids, node->tab_hash_funcs, node->tab_collations, - nbuckets, + nentries, 0, /* no additional data */ node->planstate->state->es_query_cxt, node->tuplesContext, diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 94077e6a006..8335cf5b5c5 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -257,32 +257,6 @@ clamp_width_est(int64 tuple_width) return (int32) tuple_width; } -/* - * clamp_cardinality_to_long - * Cast a Cardinality value to a sane long value. - */ -long -clamp_cardinality_to_long(Cardinality x) -{ - /* - * Just for paranoia's sake, ensure we do something sane with negative or - * NaN values. - */ - if (isnan(x)) - return LONG_MAX; - if (x <= 0) - return 0; - - /* - * If "long" is 64 bits, then LONG_MAX cannot be represented exactly as a - * double. Casting it to double and back may well result in overflow due - * to rounding, so avoid doing that. We trust that any double value that - * compares strictly less than "(double) LONG_MAX" will cast to a - * representable "long" value. - */ - return (x < (double) LONG_MAX) ? (long) x : LONG_MAX; -} - /* * cost_seqscan diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 63fe6637155..8af091ba647 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -226,7 +226,7 @@ static RecursiveUnion *make_recursive_union(List *tlist, Plan *righttree, int wtParam, List *distinctList, - long numGroups); + Cardinality numGroups); static BitmapAnd *make_bitmap_and(List *bitmapplans); static BitmapOr *make_bitmap_or(List *bitmapplans); static NestLoop *make_nestloop(List *tlist, @@ -301,7 +301,7 @@ static Gather *make_gather(List *qptlist, List *qpqual, int nworkers, int rescan_param, bool single_copy, Plan *subplan); static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, List *tlist, Plan *lefttree, Plan *righttree, - List *groupList, long numGroups); + List *groupList, Cardinality numGroups); static LockRows *make_lockrows(Plan *lefttree, List *rowMarks, int epqParam); static Result *make_gating_result(List *tlist, Node *resconstantqual, Plan *subplan); @@ -2564,7 +2564,6 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags) List *tlist = build_path_tlist(root, &best_path->path); Plan *leftplan; Plan *rightplan; - long numGroups; /* * SetOp doesn't project, so tlist requirements pass through; moreover we @@ -2575,16 +2574,13 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags) rightplan = create_plan_recurse(root, best_path->rightpath, flags | CP_LABEL_TLIST); - /* Convert numGroups to long int --- but 'ware overflow! */ - numGroups = clamp_cardinality_to_long(best_path->numGroups); - plan = make_setop(best_path->cmd, best_path->strategy, tlist, leftplan, rightplan, best_path->groupList, - numGroups); + best_path->numGroups); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -2604,7 +2600,6 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path) Plan *leftplan; Plan *rightplan; List *tlist; - long numGroups; /* Need both children to produce same tlist, so force it */ leftplan = create_plan_recurse(root, best_path->leftpath, CP_EXACT_TLIST); @@ -2612,15 +2607,12 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path) tlist = build_path_tlist(root, &best_path->path); - /* Convert numGroups to long int --- but 'ware overflow! */ - numGroups = clamp_cardinality_to_long(best_path->numGroups); - plan = make_recursive_union(tlist, leftplan, rightplan, best_path->wtParam, best_path->distinctList, - numGroups); + best_path->numGroups); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -5845,7 +5837,7 @@ make_recursive_union(List *tlist, Plan *righttree, int wtParam, List *distinctList, - long numGroups) + Cardinality numGroups) { RecursiveUnion *node = makeNode(RecursiveUnion); Plan *plan = &node->plan; @@ -6582,15 +6574,11 @@ Agg * make_agg(List *tlist, List *qual, AggStrategy aggstrategy, AggSplit aggsplit, int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, - List *groupingSets, List *chain, double dNumGroups, + List *groupingSets, List *chain, Cardinality numGroups, Size transitionSpace, Plan *lefttree) { Agg *node = makeNode(Agg); Plan *plan = &node->plan; - long numGroups; - - /* Reduce to long, but 'ware overflow! */ - numGroups = clamp_cardinality_to_long(dNumGroups); node->aggstrategy = aggstrategy; node->aggsplit = aggsplit; @@ -6822,7 +6810,7 @@ make_gather(List *qptlist, static SetOp * make_setop(SetOpCmd cmd, SetOpStrategy strategy, List *tlist, Plan *lefttree, Plan *righttree, - List *groupList, long numGroups) + List *groupList, Cardinality numGroups) { SetOp *node = makeNode(SetOp); Plan *plan = &node->plan; diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 086f52cff3d..fa2b657fb2f 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -138,7 +138,7 @@ extern TupleHashTable BuildTupleHashTable(PlanState *parent, const Oid *eqfuncoids, FmgrInfo *hashfunctions, Oid *collations, - long nbuckets, + double nelements, Size additionalsize, MemoryContext metacxt, MemoryContext tuplescxt, diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 7cdd2b51c94..c4393a94321 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -475,7 +475,7 @@ typedef struct RecursiveUnion Oid *dupCollations pg_node_attr(array_size(numCols)); /* estimated number of groups in input */ - long numGroups; + Cardinality numGroups; } RecursiveUnion; /* ---------------- @@ -1206,7 +1206,7 @@ typedef struct Agg Oid *grpCollations pg_node_attr(array_size(numCols)); /* estimated number of groups in input */ - long numGroups; + Cardinality numGroups; /* for pass-by-ref transition data */ uint64 transitionSpace; @@ -1446,7 +1446,7 @@ typedef struct SetOp bool *cmpNullsFirst pg_node_attr(array_size(numCols)); /* estimated number of groups in left input */ - long numGroups; + Cardinality numGroups; } SetOp; /* ---------------- diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h index a34113903c0..d0aa8ab0c1c 100644 --- a/src/include/optimizer/optimizer.h +++ b/src/include/optimizer/optimizer.h @@ -82,7 +82,6 @@ extern PGDLLIMPORT int effective_cache_size; extern double clamp_row_est(double nrows); extern int32 clamp_width_est(int64 tuple_width); -extern long clamp_cardinality_to_long(Cardinality x); /* in path/indxpath.c: */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 09b48b26f8f..00addf15992 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -55,7 +55,7 @@ extern Sort *make_sort_from_sortclauses(List *sortcls, Plan *lefttree); extern Agg *make_agg(List *tlist, List *qual, AggStrategy aggstrategy, AggSplit aggsplit, int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations, - List *groupingSets, List *chain, double dNumGroups, + List *groupingSets, List *chain, Cardinality numGroups, Size transitionSpace, Plan *lefttree); extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount, LimitOption limitOption, int uniqNumCols,