mirror of
https://github.com/postgres/postgres.git
synced 2025-11-15 03:41:20 +03:00
Phase 2 of hashed-aggregation project. nodeAgg.c now knows how to do
hashed aggregation, but there's not yet planner support for it.
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -15,7 +15,7 @@
|
||||
* locate group boundaries.
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.48 2002/11/06 00:00:43 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.49 2002/11/06 22:31:23 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -151,9 +151,8 @@ ExecInitGroup(Group *node, EState *estate, Plan *parent)
|
||||
*/
|
||||
grpstate = makeNode(GroupState);
|
||||
node->grpstate = grpstate;
|
||||
grpstate->grp_useFirstTuple = FALSE;
|
||||
grpstate->grp_done = FALSE;
|
||||
grpstate->grp_firstTuple = NULL;
|
||||
grpstate->grp_done = FALSE;
|
||||
|
||||
/*
|
||||
* create expression context
|
||||
@@ -236,7 +235,6 @@ ExecReScanGroup(Group *node, ExprContext *exprCtxt, Plan *parent)
|
||||
{
|
||||
GroupState *grpstate = node->grpstate;
|
||||
|
||||
grpstate->grp_useFirstTuple = FALSE;
|
||||
grpstate->grp_done = FALSE;
|
||||
if (grpstate->grp_firstTuple != NULL)
|
||||
{
|
||||
|
||||
@@ -7,7 +7,8 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* $Id: nodeHash.c,v 1.66 2002/09/04 20:31:18 momjian Exp $
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.67 2002/11/06 22:31:23 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -31,8 +32,6 @@
|
||||
#include "utils/lsyscache.h"
|
||||
|
||||
|
||||
static uint32 hashFunc(Datum key, int typLen, bool byVal);
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* ExecHash
|
||||
*
|
||||
@@ -532,7 +531,7 @@ ExecHashGetBucket(HashJoinTable hashtable,
|
||||
|
||||
/*
|
||||
* We reset the eval context each time to reclaim any memory leaked in
|
||||
* the hashkey expression or hashFunc itself.
|
||||
* the hashkey expression or ComputeHashFunc itself.
|
||||
*/
|
||||
ResetExprContext(econtext);
|
||||
|
||||
@@ -550,9 +549,9 @@ ExecHashGetBucket(HashJoinTable hashtable,
|
||||
bucketno = 0;
|
||||
else
|
||||
{
|
||||
bucketno = hashFunc(keyval,
|
||||
(int) hashtable->typLen,
|
||||
hashtable->typByVal)
|
||||
bucketno = ComputeHashFunc(keyval,
|
||||
(int) hashtable->typLen,
|
||||
hashtable->typByVal)
|
||||
% (uint32) hashtable->totalbuckets;
|
||||
}
|
||||
|
||||
@@ -622,16 +621,16 @@ ExecScanHashBucket(HashJoinState *hjstate,
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------
|
||||
* hashFunc
|
||||
* ComputeHashFunc
|
||||
*
|
||||
* the hash function for hash joins
|
||||
* the hash function for hash joins (also used for hash aggregation)
|
||||
*
|
||||
* XXX this probably ought to be replaced with datatype-specific
|
||||
* hash functions, such as those already implemented for hash indexes.
|
||||
* ----------------------------------------------------------------
|
||||
*/
|
||||
static uint32
|
||||
hashFunc(Datum key, int typLen, bool byVal)
|
||||
uint32
|
||||
ComputeHashFunc(Datum key, int typLen, bool byVal)
|
||||
{
|
||||
unsigned char *k;
|
||||
|
||||
@@ -681,7 +680,7 @@ hashFunc(Datum key, int typLen, bool byVal)
|
||||
}
|
||||
else
|
||||
{
|
||||
elog(ERROR, "hashFunc: Invalid typLen %d", typLen);
|
||||
elog(ERROR, "ComputeHashFunc: Invalid typLen %d", typLen);
|
||||
k = NULL; /* keep compiler quiet */
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.215 2002/11/06 00:00:43 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.216 2002/11/06 22:31:23 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -524,6 +524,7 @@ _copyAgg(Agg *from)
|
||||
memcpy(newnode->grpColIdx, from->grpColIdx,
|
||||
from->numCols * sizeof(AttrNumber));
|
||||
}
|
||||
newnode->numGroups = from->numGroups;
|
||||
|
||||
return newnode;
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.177 2002/11/06 00:00:44 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.178 2002/11/06 22:31:24 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Every (plan) node in POSTGRES has an associated "out" routine which
|
||||
@@ -597,8 +597,8 @@ _outAgg(StringInfo str, Agg *node)
|
||||
{
|
||||
appendStringInfo(str, " AGG ");
|
||||
_outPlanInfo(str, (Plan *) node);
|
||||
appendStringInfo(str, " :aggstrategy %d :numCols %d ",
|
||||
(int) node->aggstrategy, node->numCols);
|
||||
appendStringInfo(str, " :aggstrategy %d :numCols %d :numGroups %ld ",
|
||||
(int) node->aggstrategy, node->numCols, node->numGroups);
|
||||
}
|
||||
|
||||
static void
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.120 2002/11/06 00:00:44 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.121 2002/11/06 22:31:24 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -1675,6 +1675,7 @@ make_agg(List *tlist, List *qual, AggStrategy aggstrategy,
|
||||
plan->plan_rows *= 0.1;
|
||||
if (plan->plan_rows < 1)
|
||||
plan->plan_rows = 1;
|
||||
node->numGroups = (long) plan->plan_rows;
|
||||
}
|
||||
|
||||
plan->state = (EState *) NULL;
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.126 2002/11/06 00:00:44 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.127 2002/11/06 22:31:24 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -931,6 +931,7 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
AttrNumber *groupColIdx = NULL;
|
||||
Path *cheapest_path;
|
||||
Path *sorted_path;
|
||||
bool use_hashed_grouping = false;
|
||||
|
||||
/* Preprocess targetlist in case we are inside an INSERT/UPDATE. */
|
||||
tlist = preprocess_targetlist(tlist,
|
||||
@@ -1209,6 +1210,29 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
group_pathkeys = canonicalize_pathkeys(parse, group_pathkeys);
|
||||
sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys);
|
||||
|
||||
/*
|
||||
* Consider whether we might want to use hashed grouping.
|
||||
*/
|
||||
if (parse->groupClause)
|
||||
{
|
||||
/*
|
||||
* Executor doesn't support hashed aggregation with DISTINCT
|
||||
* aggregates. (Doing so would imply storing *all* the input
|
||||
* values in the hash table, which seems like a certain loser.)
|
||||
*/
|
||||
if (parse->hasAggs &&
|
||||
(contain_distinct_agg_clause((Node *) tlist) ||
|
||||
contain_distinct_agg_clause(parse->havingQual)))
|
||||
use_hashed_grouping = false;
|
||||
else
|
||||
{
|
||||
#if 0 /* much more to do here */
|
||||
/* TEMPORARY HOTWIRE FOR TESTING */
|
||||
use_hashed_grouping = true;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Select the best path and create a plan to execute it.
|
||||
*
|
||||
@@ -1279,22 +1303,30 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
}
|
||||
|
||||
/*
|
||||
* If any aggregate is present, insert the Agg node, plus an explicit
|
||||
* sort if necessary.
|
||||
* Insert AGG or GROUP node if needed, plus an explicit sort step
|
||||
* if necessary.
|
||||
*
|
||||
* HAVING clause, if any, becomes qual of the Agg node
|
||||
*/
|
||||
if (parse->hasAggs)
|
||||
if (use_hashed_grouping)
|
||||
{
|
||||
/* Hashed aggregate plan --- no sort needed */
|
||||
result_plan = (Plan *) make_agg(tlist,
|
||||
(List *) parse->havingQual,
|
||||
AGG_HASHED,
|
||||
length(parse->groupClause),
|
||||
groupColIdx,
|
||||
result_plan);
|
||||
/* Hashed aggregation produces randomly-ordered results */
|
||||
current_pathkeys = NIL;
|
||||
}
|
||||
else if (parse->hasAggs)
|
||||
{
|
||||
/* Plain aggregate plan --- sort if needed */
|
||||
AggStrategy aggstrategy;
|
||||
|
||||
if (parse->groupClause)
|
||||
{
|
||||
aggstrategy = AGG_SORTED;
|
||||
/*
|
||||
* Add an explicit sort if we couldn't make the path come out
|
||||
* the way the AGG node needs it.
|
||||
*/
|
||||
if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
|
||||
{
|
||||
result_plan = make_groupsortplan(parse,
|
||||
@@ -1303,9 +1335,18 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
result_plan);
|
||||
current_pathkeys = group_pathkeys;
|
||||
}
|
||||
aggstrategy = AGG_SORTED;
|
||||
/*
|
||||
* The AGG node will not change the sort ordering of its
|
||||
* groups, so current_pathkeys describes the result too.
|
||||
*/
|
||||
}
|
||||
else
|
||||
{
|
||||
aggstrategy = AGG_PLAIN;
|
||||
/* Result will be only one row anyway; no sort order */
|
||||
current_pathkeys = NIL;
|
||||
}
|
||||
|
||||
result_plan = (Plan *) make_agg(tlist,
|
||||
(List *) parse->havingQual,
|
||||
@@ -1313,10 +1354,6 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
length(parse->groupClause),
|
||||
groupColIdx,
|
||||
result_plan);
|
||||
/*
|
||||
* Note: plain or grouped Agg does not affect any existing
|
||||
* sort order of the tuples
|
||||
*/
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.109 2002/09/11 14:48:54 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.110 2002/11/06 22:31:24 tgl Exp $
|
||||
*
|
||||
* HISTORY
|
||||
* AUTHOR DATE MAJOR EVENT
|
||||
@@ -46,6 +46,7 @@ typedef struct
|
||||
} check_subplans_for_ungrouped_vars_context;
|
||||
|
||||
static bool contain_agg_clause_walker(Node *node, void *context);
|
||||
static bool contain_distinct_agg_clause_walker(Node *node, void *context);
|
||||
static bool pull_agg_clause_walker(Node *node, List **listptr);
|
||||
static bool expression_returns_set_walker(Node *node, void *context);
|
||||
static bool contain_subplans_walker(Node *node, void *context);
|
||||
@@ -410,6 +411,32 @@ contain_agg_clause_walker(Node *node, void *context)
|
||||
return expression_tree_walker(node, contain_agg_clause_walker, context);
|
||||
}
|
||||
|
||||
/*
|
||||
* contain_distinct_agg_clause
|
||||
* Recursively search for DISTINCT Aggref nodes within a clause.
|
||||
*
|
||||
* Returns true if any DISTINCT aggregate found.
|
||||
*/
|
||||
bool
|
||||
contain_distinct_agg_clause(Node *clause)
|
||||
{
|
||||
return contain_distinct_agg_clause_walker(clause, NULL);
|
||||
}
|
||||
|
||||
static bool
|
||||
contain_distinct_agg_clause_walker(Node *node, void *context)
|
||||
{
|
||||
if (node == NULL)
|
||||
return false;
|
||||
if (IsA(node, Aggref))
|
||||
{
|
||||
if (((Aggref *) node)->aggdistinct)
|
||||
return true; /* abort the tree traversal and return
|
||||
* true */
|
||||
}
|
||||
return expression_tree_walker(node, contain_distinct_agg_clause_walker, context);
|
||||
}
|
||||
|
||||
/*
|
||||
* pull_agg_clause
|
||||
* Recursively pulls all Aggref nodes from an expression tree.
|
||||
|
||||
Reference in New Issue
Block a user