mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	Phase 2 of hashed-aggregation project. nodeAgg.c now knows how to do
hashed aggregation, but there's not yet planner support for it.
This commit is contained in:
		
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -15,7 +15,7 @@ | ||||
|  *	  locate group boundaries. | ||||
|  * | ||||
|  * IDENTIFICATION | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.48 2002/11/06 00:00:43 tgl Exp $ | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.49 2002/11/06 22:31:23 tgl Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -151,9 +151,8 @@ ExecInitGroup(Group *node, EState *estate, Plan *parent) | ||||
| 	 */ | ||||
| 	grpstate = makeNode(GroupState); | ||||
| 	node->grpstate = grpstate; | ||||
| 	grpstate->grp_useFirstTuple = FALSE; | ||||
| 	grpstate->grp_done = FALSE; | ||||
| 	grpstate->grp_firstTuple = NULL; | ||||
| 	grpstate->grp_done = FALSE; | ||||
|  | ||||
| 	/* | ||||
| 	 * create expression context | ||||
| @@ -236,7 +235,6 @@ ExecReScanGroup(Group *node, ExprContext *exprCtxt, Plan *parent) | ||||
| { | ||||
| 	GroupState *grpstate = node->grpstate; | ||||
|  | ||||
| 	grpstate->grp_useFirstTuple = FALSE; | ||||
| 	grpstate->grp_done = FALSE; | ||||
| 	if (grpstate->grp_firstTuple != NULL) | ||||
| 	{ | ||||
|   | ||||
| @@ -7,7 +7,8 @@ | ||||
|  * Portions Copyright (c) 1994, Regents of the University of California | ||||
|  * | ||||
|  * | ||||
|  *	$Id: nodeHash.c,v 1.66 2002/09/04 20:31:18 momjian Exp $ | ||||
|  * IDENTIFICATION | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.67 2002/11/06 22:31:23 tgl Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -31,8 +32,6 @@ | ||||
| #include "utils/lsyscache.h" | ||||
|  | ||||
|  | ||||
| static uint32 hashFunc(Datum key, int typLen, bool byVal); | ||||
|  | ||||
| /* ---------------------------------------------------------------- | ||||
|  *		ExecHash | ||||
|  * | ||||
| @@ -532,7 +531,7 @@ ExecHashGetBucket(HashJoinTable hashtable, | ||||
|  | ||||
| 	/* | ||||
| 	 * We reset the eval context each time to reclaim any memory leaked in | ||||
| 	 * the hashkey expression or hashFunc itself. | ||||
| 	 * the hashkey expression or ComputeHashFunc itself. | ||||
| 	 */ | ||||
| 	ResetExprContext(econtext); | ||||
|  | ||||
| @@ -550,9 +549,9 @@ ExecHashGetBucket(HashJoinTable hashtable, | ||||
| 		bucketno = 0; | ||||
| 	else | ||||
| 	{ | ||||
| 		bucketno = hashFunc(keyval, | ||||
| 							(int) hashtable->typLen, | ||||
| 							hashtable->typByVal) | ||||
| 		bucketno = ComputeHashFunc(keyval, | ||||
| 								   (int) hashtable->typLen, | ||||
| 								   hashtable->typByVal) | ||||
| 			% (uint32) hashtable->totalbuckets; | ||||
| 	} | ||||
|  | ||||
| @@ -622,16 +621,16 @@ ExecScanHashBucket(HashJoinState *hjstate, | ||||
| } | ||||
|  | ||||
| /* ---------------------------------------------------------------- | ||||
|  *		hashFunc | ||||
|  *		ComputeHashFunc | ||||
|  * | ||||
|  *		the hash function for hash joins | ||||
|  *		the hash function for hash joins (also used for hash aggregation) | ||||
|  * | ||||
|  *		XXX this probably ought to be replaced with datatype-specific | ||||
|  *		hash functions, such as those already implemented for hash indexes. | ||||
|  * ---------------------------------------------------------------- | ||||
|  */ | ||||
| static uint32 | ||||
| hashFunc(Datum key, int typLen, bool byVal) | ||||
| uint32 | ||||
| ComputeHashFunc(Datum key, int typLen, bool byVal) | ||||
| { | ||||
| 	unsigned char *k; | ||||
|  | ||||
| @@ -681,7 +680,7 @@ hashFunc(Datum key, int typLen, bool byVal) | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 			elog(ERROR, "hashFunc: Invalid typLen %d", typLen); | ||||
| 			elog(ERROR, "ComputeHashFunc: Invalid typLen %d", typLen); | ||||
| 			k = NULL;			/* keep compiler quiet */ | ||||
| 		} | ||||
| 	} | ||||
|   | ||||
| @@ -15,7 +15,7 @@ | ||||
|  * Portions Copyright (c) 1994, Regents of the University of California | ||||
|  * | ||||
|  * IDENTIFICATION | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.215 2002/11/06 00:00:43 tgl Exp $ | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.216 2002/11/06 22:31:23 tgl Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -524,6 +524,7 @@ _copyAgg(Agg *from) | ||||
| 		memcpy(newnode->grpColIdx, from->grpColIdx, | ||||
| 			   from->numCols * sizeof(AttrNumber)); | ||||
| 	} | ||||
| 	newnode->numGroups = from->numGroups; | ||||
|  | ||||
| 	return newnode; | ||||
| } | ||||
|   | ||||
| @@ -5,7 +5,7 @@ | ||||
|  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group | ||||
|  * Portions Copyright (c) 1994, Regents of the University of California | ||||
|  * | ||||
|  *	$Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.177 2002/11/06 00:00:44 tgl Exp $ | ||||
|  *	$Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.178 2002/11/06 22:31:24 tgl Exp $ | ||||
|  * | ||||
|  * NOTES | ||||
|  *	  Every (plan) node in POSTGRES has an associated "out" routine which | ||||
| @@ -597,8 +597,8 @@ _outAgg(StringInfo str, Agg *node) | ||||
| { | ||||
| 	appendStringInfo(str, " AGG "); | ||||
| 	_outPlanInfo(str, (Plan *) node); | ||||
| 	appendStringInfo(str, " :aggstrategy %d :numCols %d ", | ||||
| 					 (int) node->aggstrategy, node->numCols); | ||||
| 	appendStringInfo(str, " :aggstrategy %d :numCols %d :numGroups %ld ", | ||||
| 					 (int) node->aggstrategy, node->numCols, node->numGroups); | ||||
| } | ||||
|  | ||||
| static void | ||||
|   | ||||
| @@ -10,7 +10,7 @@ | ||||
|  * | ||||
|  * | ||||
|  * IDENTIFICATION | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.120 2002/11/06 00:00:44 tgl Exp $ | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.121 2002/11/06 22:31:24 tgl Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -1675,6 +1675,7 @@ make_agg(List *tlist, List *qual, AggStrategy aggstrategy, | ||||
| 		plan->plan_rows *= 0.1; | ||||
| 		if (plan->plan_rows < 1) | ||||
| 			plan->plan_rows = 1; | ||||
| 		node->numGroups = (long) plan->plan_rows; | ||||
| 	} | ||||
|  | ||||
| 	plan->state = (EState *) NULL; | ||||
|   | ||||
| @@ -8,7 +8,7 @@ | ||||
|  * | ||||
|  * | ||||
|  * IDENTIFICATION | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.126 2002/11/06 00:00:44 tgl Exp $ | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.127 2002/11/06 22:31:24 tgl Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -931,6 +931,7 @@ grouping_planner(Query *parse, double tuple_fraction) | ||||
| 		AttrNumber *groupColIdx = NULL; | ||||
| 		Path	   *cheapest_path; | ||||
| 		Path	   *sorted_path; | ||||
| 		bool		use_hashed_grouping = false; | ||||
|  | ||||
| 		/* Preprocess targetlist in case we are inside an INSERT/UPDATE. */ | ||||
| 		tlist = preprocess_targetlist(tlist, | ||||
| @@ -1209,6 +1210,29 @@ grouping_planner(Query *parse, double tuple_fraction) | ||||
| 		group_pathkeys = canonicalize_pathkeys(parse, group_pathkeys); | ||||
| 		sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys); | ||||
|  | ||||
| 		/* | ||||
| 		 * Consider whether we might want to use hashed grouping. | ||||
| 		 */ | ||||
| 		if (parse->groupClause) | ||||
| 		{ | ||||
| 			/* | ||||
| 			 * Executor doesn't support hashed aggregation with DISTINCT | ||||
| 			 * aggregates.  (Doing so would imply storing *all* the input | ||||
| 			 * values in the hash table, which seems like a certain loser.) | ||||
| 			 */ | ||||
| 			if (parse->hasAggs && | ||||
| 				(contain_distinct_agg_clause((Node *) tlist) || | ||||
| 				 contain_distinct_agg_clause(parse->havingQual))) | ||||
| 				use_hashed_grouping = false; | ||||
| 			else | ||||
| 			{ | ||||
| #if 0							/* much more to do here */ | ||||
| 				/* TEMPORARY HOTWIRE FOR TESTING */ | ||||
| 				use_hashed_grouping = true; | ||||
| #endif | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		/* | ||||
| 		 * Select the best path and create a plan to execute it. | ||||
| 		 * | ||||
| @@ -1279,22 +1303,30 @@ grouping_planner(Query *parse, double tuple_fraction) | ||||
| 		} | ||||
|  | ||||
| 		/* | ||||
| 		 * If any aggregate is present, insert the Agg node, plus an explicit | ||||
| 		 * sort if necessary. | ||||
| 		 * Insert AGG or GROUP node if needed, plus an explicit sort step | ||||
| 		 * if necessary. | ||||
| 		 * | ||||
| 		 * HAVING clause, if any, becomes qual of the Agg node | ||||
| 		 */ | ||||
| 		if (parse->hasAggs) | ||||
| 		if (use_hashed_grouping) | ||||
| 		{ | ||||
| 			/* Hashed aggregate plan --- no sort needed */ | ||||
| 			result_plan = (Plan *) make_agg(tlist, | ||||
| 											(List *) parse->havingQual, | ||||
| 											AGG_HASHED, | ||||
| 											length(parse->groupClause), | ||||
| 											groupColIdx, | ||||
| 											result_plan); | ||||
| 			/* Hashed aggregation produces randomly-ordered results */ | ||||
| 			current_pathkeys = NIL; | ||||
| 		} | ||||
| 		else if (parse->hasAggs) | ||||
| 		{ | ||||
| 			/* Plain aggregate plan --- sort if needed */ | ||||
| 			AggStrategy aggstrategy; | ||||
|  | ||||
| 			if (parse->groupClause) | ||||
| 			{ | ||||
| 				aggstrategy = AGG_SORTED; | ||||
| 				/* | ||||
| 				 * Add an explicit sort if we couldn't make the path come out | ||||
| 				 * the way the AGG node needs it. | ||||
| 				 */ | ||||
| 				if (!pathkeys_contained_in(group_pathkeys, current_pathkeys)) | ||||
| 				{ | ||||
| 					result_plan = make_groupsortplan(parse, | ||||
| @@ -1303,9 +1335,18 @@ grouping_planner(Query *parse, double tuple_fraction) | ||||
| 													 result_plan); | ||||
| 					current_pathkeys = group_pathkeys; | ||||
| 				} | ||||
| 				aggstrategy = AGG_SORTED; | ||||
| 				/* | ||||
| 				 * The AGG node will not change the sort ordering of its | ||||
| 				 * groups, so current_pathkeys describes the result too. | ||||
| 				 */ | ||||
| 			} | ||||
| 			else | ||||
| 			{ | ||||
| 				aggstrategy = AGG_PLAIN; | ||||
| 				/* Result will be only one row anyway; no sort order */ | ||||
| 				current_pathkeys = NIL; | ||||
| 			} | ||||
|  | ||||
| 			result_plan = (Plan *) make_agg(tlist, | ||||
| 											(List *) parse->havingQual, | ||||
| @@ -1313,10 +1354,6 @@ grouping_planner(Query *parse, double tuple_fraction) | ||||
| 											length(parse->groupClause), | ||||
| 											groupColIdx, | ||||
| 											result_plan); | ||||
| 			/* | ||||
| 			 * Note: plain or grouped Agg does not affect any existing | ||||
| 			 * sort order of the tuples | ||||
| 			 */ | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
|   | ||||
| @@ -8,7 +8,7 @@ | ||||
|  * | ||||
|  * | ||||
|  * IDENTIFICATION | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.109 2002/09/11 14:48:54 tgl Exp $ | ||||
|  *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.110 2002/11/06 22:31:24 tgl Exp $ | ||||
|  * | ||||
|  * HISTORY | ||||
|  *	  AUTHOR			DATE			MAJOR EVENT | ||||
| @@ -46,6 +46,7 @@ typedef struct | ||||
| } check_subplans_for_ungrouped_vars_context; | ||||
|  | ||||
| static bool contain_agg_clause_walker(Node *node, void *context); | ||||
| static bool contain_distinct_agg_clause_walker(Node *node, void *context); | ||||
| static bool pull_agg_clause_walker(Node *node, List **listptr); | ||||
| static bool expression_returns_set_walker(Node *node, void *context); | ||||
| static bool contain_subplans_walker(Node *node, void *context); | ||||
| @@ -410,6 +411,32 @@ contain_agg_clause_walker(Node *node, void *context) | ||||
| 	return expression_tree_walker(node, contain_agg_clause_walker, context); | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * contain_distinct_agg_clause | ||||
|  *	  Recursively search for DISTINCT Aggref nodes within a clause. | ||||
|  * | ||||
|  *	  Returns true if any DISTINCT aggregate found. | ||||
|  */ | ||||
| bool | ||||
| contain_distinct_agg_clause(Node *clause) | ||||
| { | ||||
| 	return contain_distinct_agg_clause_walker(clause, NULL); | ||||
| } | ||||
|  | ||||
| static bool | ||||
| contain_distinct_agg_clause_walker(Node *node, void *context) | ||||
| { | ||||
| 	if (node == NULL) | ||||
| 		return false; | ||||
| 	if (IsA(node, Aggref)) | ||||
| 	{ | ||||
| 		if (((Aggref *) node)->aggdistinct) | ||||
| 			return true;		/* abort the tree traversal and return | ||||
| 								 * true */ | ||||
| 	} | ||||
| 	return expression_tree_walker(node, contain_distinct_agg_clause_walker, context); | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * pull_agg_clause | ||||
|  *	  Recursively pulls all Aggref nodes from an expression tree. | ||||
|   | ||||
| @@ -7,7 +7,7 @@ | ||||
|  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group | ||||
|  * Portions Copyright (c) 1994, Regents of the University of California | ||||
|  * | ||||
|  * $Id: nodeHash.h,v 1.24 2002/06/20 20:29:49 momjian Exp $ | ||||
|  * $Id: nodeHash.h,v 1.25 2002/11/06 22:31:24 tgl Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -36,5 +36,6 @@ extern void ExecChooseHashTableSize(double ntuples, int tupwidth, | ||||
| 						int *virtualbuckets, | ||||
| 						int *physicalbuckets, | ||||
| 						int *numbatches); | ||||
| extern uint32 ComputeHashFunc(Datum key, int typLen, bool byVal); | ||||
|  | ||||
| #endif   /* NODEHASH_H */ | ||||
|   | ||||
| @@ -7,7 +7,7 @@ | ||||
|  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group | ||||
|  * Portions Copyright (c) 1994, Regents of the University of California | ||||
|  * | ||||
|  * $Id: execnodes.h,v 1.76 2002/11/06 00:00:44 tgl Exp $ | ||||
|  * $Id: execnodes.h,v 1.77 2002/11/06 22:31:24 tgl Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -661,12 +661,18 @@ typedef struct MaterialState | ||||
|  * | ||||
|  *	csstate.css_ScanTupleSlot refers to output of underlying plan. | ||||
|  * | ||||
|  *	Note: the associated ExprContext contains ecxt_aggvalues and ecxt_aggnulls | ||||
|  *	arrays, which hold the computed agg values for the current input group | ||||
|  *	during evaluation of an Agg node's output tuple(s). | ||||
|  *	Note: csstate.cstate.cs_ExprContext contains ecxt_aggvalues and | ||||
|  *	ecxt_aggnulls arrays, which hold the computed agg values for the current | ||||
|  *	input group during evaluation of an Agg node's output tuple(s).  We | ||||
|  *	create a second ExprContext, tmpcontext, in which to evaluate input | ||||
|  *	expressions and run the aggregate transition functions. | ||||
|  * ------------------------- | ||||
|  */ | ||||
| typedef struct AggStatePerAggData *AggStatePerAgg;		/* private in nodeAgg.c */ | ||||
| /* these structs are private in nodeAgg.c: */ | ||||
| typedef struct AggStatePerAggData *AggStatePerAgg; | ||||
| typedef struct AggStatePerGroupData *AggStatePerGroup; | ||||
| typedef struct AggHashEntryData *AggHashEntry; | ||||
| typedef struct AggHashTableData *AggHashTable; | ||||
|  | ||||
| typedef struct AggState | ||||
| { | ||||
| @@ -674,13 +680,18 @@ typedef struct AggState | ||||
| 	List	   *aggs;			/* all Aggref nodes in targetlist & quals */ | ||||
| 	int			numaggs;		/* length of list (could be zero!) */ | ||||
| 	FmgrInfo   *eqfunctions;	/* per-grouping-field equality fns */ | ||||
| 	HeapTuple	grp_firstTuple;	/* copy of first tuple of current group */ | ||||
| 	AggStatePerAgg peragg;		/* per-Aggref working state */ | ||||
| 	MemoryContext tup_cxt;		/* context for per-output-tuple | ||||
| 								 * expressions */ | ||||
| 	MemoryContext agg_cxt[2];	/* pair of expression eval memory contexts */ | ||||
| 	int			which_cxt;		/* 0 or 1, indicates current agg_cxt */ | ||||
| 	AggStatePerAgg peragg;		/* per-Aggref information */ | ||||
| 	MemoryContext aggcontext;	/* memory context for long-lived data */ | ||||
| 	ExprContext *tmpcontext;	/* econtext for input expressions */ | ||||
| 	bool		agg_done;		/* indicates completion of Agg scan */ | ||||
| 	/* these fields are used in AGG_PLAIN and AGG_SORTED modes: */ | ||||
| 	AggStatePerGroup pergroup;	/* per-Aggref-per-group working state */ | ||||
| 	HeapTuple	grp_firstTuple;	/* copy of first tuple of current group */ | ||||
| 	/* these fields are used in AGG_HASHED mode: */ | ||||
| 	AggHashTable hashtable;		/* hash table with one entry per group */ | ||||
| 	bool		table_filled;	/* hash table filled yet? */ | ||||
| 	AggHashEntry next_hash_entry; /* next entry in current chain */ | ||||
| 	int			next_hash_bucket; /* next chain */ | ||||
| } AggState; | ||||
|  | ||||
| /* --------------------- | ||||
| @@ -691,9 +702,8 @@ typedef struct GroupState | ||||
| { | ||||
| 	CommonScanState csstate;	/* its first field is NodeTag */ | ||||
| 	FmgrInfo   *eqfunctions;	/* per-field lookup data for equality fns */ | ||||
| 	bool		grp_useFirstTuple;		/* first tuple not processed yet */ | ||||
| 	bool		grp_done; | ||||
| 	HeapTuple	grp_firstTuple;	/* copy of first tuple of current group */ | ||||
| 	bool		grp_done;		/* indicates completion of Group scan */ | ||||
| } GroupState; | ||||
|  | ||||
| /* ---------------- | ||||
|   | ||||
| @@ -7,7 +7,7 @@ | ||||
|  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group | ||||
|  * Portions Copyright (c) 1994, Regents of the University of California | ||||
|  * | ||||
|  * $Id: plannodes.h,v 1.59 2002/11/06 00:00:44 tgl Exp $ | ||||
|  * $Id: plannodes.h,v 1.60 2002/11/06 22:31:24 tgl Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -349,6 +349,7 @@ typedef struct Agg | ||||
| 	AggStrategy	aggstrategy; | ||||
| 	int			numCols;		/* number of grouping columns */ | ||||
| 	AttrNumber *grpColIdx;		/* their indexes in the target list */ | ||||
| 	long		numGroups;		/* estimated number of groups in input */ | ||||
| 	AggState   *aggstate; | ||||
| } Agg; | ||||
|  | ||||
|   | ||||
| @@ -7,7 +7,7 @@ | ||||
|  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group | ||||
|  * Portions Copyright (c) 1994, Regents of the University of California | ||||
|  * | ||||
|  * $Id: clauses.h,v 1.54 2002/09/11 14:48:55 tgl Exp $ | ||||
|  * $Id: clauses.h,v 1.55 2002/11/06 22:31:24 tgl Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -40,6 +40,7 @@ extern Expr *make_ands_explicit(List *andclauses); | ||||
| extern List *make_ands_implicit(Expr *clause); | ||||
|  | ||||
| extern bool contain_agg_clause(Node *clause); | ||||
| extern bool contain_distinct_agg_clause(Node *clause); | ||||
| extern List *pull_agg_clause(Node *clause); | ||||
|  | ||||
| extern bool expression_returns_set(Node *clause); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user