mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	Fix improper repetition of previous results from a hashed aggregate.
ExecReScanAgg's check for whether it could re-use a previously calculated hashtable neglected the possibility that the Agg node might reference PARAM_EXEC Params that are not referenced by its input plan node. That's okay if the Params are in upper tlist or qual expressions; but if one appears in aggregate input expressions, then the hashtable contents need to be recomputed when the Param's value changes. To avoid unnecessary performance degradation in the case of a Param that isn't within an aggregate input, add logic to the planner to determine which Params are within aggregate inputs. This requires a new field in struct Agg, but fortunately we never write plans to disk, so this isn't an initdb-forcing change. Per report from Jeevan Chalke. This has been broken since forever, so back-patch to all supported branches. Andrew Gierth, with minor adjustments by me Report: <CAM2+6=VY8ykfLT5Q8vb9B6EbeBk-NGuLbT6seaQ+Fq4zXvrDcA@mail.gmail.com>
This commit is contained in:
		@@ -1914,13 +1914,14 @@ void
 | 
			
		||||
ExecReScanAgg(AggState *node)
 | 
			
		||||
{
 | 
			
		||||
	ExprContext *econtext = node->ss.ps.ps_ExprContext;
 | 
			
		||||
	Agg		   *aggnode = (Agg *) node->ss.ps.plan;
 | 
			
		||||
	int			aggno;
 | 
			
		||||
 | 
			
		||||
	node->agg_done = false;
 | 
			
		||||
 | 
			
		||||
	node->ss.ps.ps_TupFromTlist = false;
 | 
			
		||||
 | 
			
		||||
	if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
 | 
			
		||||
	if (aggnode->aggstrategy == AGG_HASHED)
 | 
			
		||||
	{
 | 
			
		||||
		/*
 | 
			
		||||
		 * In the hashed case, if we haven't yet built the hash table then we
 | 
			
		||||
@@ -1932,11 +1933,13 @@ ExecReScanAgg(AggState *node)
 | 
			
		||||
			return;
 | 
			
		||||
 | 
			
		||||
		/*
 | 
			
		||||
		 * If we do have the hash table and the subplan does not have any
 | 
			
		||||
		 * parameter changes, then we can just rescan the existing hash table;
 | 
			
		||||
		 * no need to build it again.
 | 
			
		||||
		 * If we do have the hash table, and the subplan does not have any
 | 
			
		||||
		 * parameter changes, and none of our own parameter changes affect
 | 
			
		||||
		 * input expressions of the aggregated functions, then we can just
 | 
			
		||||
		 * rescan the existing hash table; no need to build it again.
 | 
			
		||||
		 */
 | 
			
		||||
		if (node->ss.ps.lefttree->chgParam == NULL)
 | 
			
		||||
		if (node->ss.ps.lefttree->chgParam == NULL &&
 | 
			
		||||
			!bms_overlap(node->ss.ps.chgParam, aggnode->aggParams))
 | 
			
		||||
		{
 | 
			
		||||
			ResetTupleHashIterator(node->hashtable, &node->hashiter);
 | 
			
		||||
			return;
 | 
			
		||||
@@ -1973,7 +1976,7 @@ ExecReScanAgg(AggState *node)
 | 
			
		||||
	 */
 | 
			
		||||
	MemoryContextResetAndDeleteChildren(node->aggcontext);
 | 
			
		||||
 | 
			
		||||
	if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
 | 
			
		||||
	if (aggnode->aggstrategy == AGG_HASHED)
 | 
			
		||||
	{
 | 
			
		||||
		/* Rebuild an empty hash table */
 | 
			
		||||
		build_hash_table(node);
 | 
			
		||||
 
 | 
			
		||||
@@ -781,6 +781,7 @@ _copyAgg(const Agg *from)
 | 
			
		||||
		COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid));
 | 
			
		||||
	}
 | 
			
		||||
	COPY_SCALAR_FIELD(numGroups);
 | 
			
		||||
	COPY_BITMAPSET_FIELD(aggParams);
 | 
			
		||||
 | 
			
		||||
	return newnode;
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -645,6 +645,7 @@ _outAgg(StringInfo str, const Agg *node)
 | 
			
		||||
		appendStringInfo(str, " %u", node->grpOperators[i]);
 | 
			
		||||
 | 
			
		||||
	WRITE_LONG_FIELD(numGroups);
 | 
			
		||||
	WRITE_BITMAPSET_FIELD(aggParams);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void
 | 
			
		||||
 
 | 
			
		||||
@@ -4311,6 +4311,7 @@ make_agg(PlannerInfo *root, List *tlist, List *qual,
 | 
			
		||||
	node->grpColIdx = grpColIdx;
 | 
			
		||||
	node->grpOperators = grpOperators;
 | 
			
		||||
	node->numGroups = numGroups;
 | 
			
		||||
	node->aggParams = NULL;		/* SS_finalize_plan() will fill this */
 | 
			
		||||
 | 
			
		||||
	copy_plan_costsize(plan, lefttree); /* only care about copying size */
 | 
			
		||||
	cost_agg(&agg_path, root,
 | 
			
		||||
 
 | 
			
		||||
@@ -80,6 +80,7 @@ static Bitmapset *finalize_plan(PlannerInfo *root,
 | 
			
		||||
			  Bitmapset *valid_params,
 | 
			
		||||
			  Bitmapset *scan_params);
 | 
			
		||||
static bool finalize_primnode(Node *node, finalize_primnode_context *context);
 | 
			
		||||
static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
@@ -2356,6 +2357,29 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
 | 
			
		||||
										 locally_added_param);
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		case T_Agg:
 | 
			
		||||
			{
 | 
			
		||||
				Agg		   *agg = (Agg *) plan;
 | 
			
		||||
 | 
			
		||||
				/*
 | 
			
		||||
				 * AGG_HASHED plans need to know which Params are referenced
 | 
			
		||||
				 * in aggregate calls.  Do a separate scan to identify them.
 | 
			
		||||
				 */
 | 
			
		||||
				if (agg->aggstrategy == AGG_HASHED)
 | 
			
		||||
				{
 | 
			
		||||
					finalize_primnode_context aggcontext;
 | 
			
		||||
 | 
			
		||||
					aggcontext.root = root;
 | 
			
		||||
					aggcontext.paramids = NULL;
 | 
			
		||||
					finalize_agg_primnode((Node *) agg->plan.targetlist,
 | 
			
		||||
										  &aggcontext);
 | 
			
		||||
					finalize_agg_primnode((Node *) agg->plan.qual,
 | 
			
		||||
										  &aggcontext);
 | 
			
		||||
					agg->aggParams = aggcontext.paramids;
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		case T_WindowAgg:
 | 
			
		||||
			finalize_primnode(((WindowAgg *) plan)->startOffset,
 | 
			
		||||
							  &context);
 | 
			
		||||
@@ -2364,7 +2388,6 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
 | 
			
		||||
			break;
 | 
			
		||||
 | 
			
		||||
		case T_Hash:
 | 
			
		||||
		case T_Agg:
 | 
			
		||||
		case T_Material:
 | 
			
		||||
		case T_Sort:
 | 
			
		||||
		case T_Unique:
 | 
			
		||||
@@ -2508,6 +2531,28 @@ finalize_primnode(Node *node, finalize_primnode_context *context)
 | 
			
		||||
								  (void *) context);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * finalize_agg_primnode: find all Aggref nodes in the given expression tree,
 | 
			
		||||
 * and add IDs of all PARAM_EXEC params appearing within their aggregated
 | 
			
		||||
 * arguments to the result set.
 | 
			
		||||
 */
 | 
			
		||||
static bool
 | 
			
		||||
finalize_agg_primnode(Node *node, finalize_primnode_context *context)
 | 
			
		||||
{
 | 
			
		||||
	if (node == NULL)
 | 
			
		||||
		return false;
 | 
			
		||||
	if (IsA(node, Aggref))
 | 
			
		||||
	{
 | 
			
		||||
		Aggref	   *agg = (Aggref *) node;
 | 
			
		||||
 | 
			
		||||
		/* we should not consider the direct arguments, if any */
 | 
			
		||||
		finalize_primnode((Node *) agg->args, context);
 | 
			
		||||
		return false;			/* there can't be any Aggrefs below here */
 | 
			
		||||
	}
 | 
			
		||||
	return expression_tree_walker(node, finalize_agg_primnode,
 | 
			
		||||
								  (void *) context);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * SS_make_initplan_from_plan - given a plan tree, make it an InitPlan
 | 
			
		||||
 *
 | 
			
		||||
 
 | 
			
		||||
@@ -633,6 +633,8 @@ typedef struct Agg
 | 
			
		||||
	AttrNumber *grpColIdx;		/* their indexes in the target list */
 | 
			
		||||
	Oid		   *grpOperators;	/* equality operators to compare with */
 | 
			
		||||
	long		numGroups;		/* estimated number of groups in input */
 | 
			
		||||
	Bitmapset  *aggParams;		/* IDs of Params used in Aggref inputs */
 | 
			
		||||
	/* Note: planner provides numGroups & aggParams only in AGG_HASHED case */
 | 
			
		||||
} Agg;
 | 
			
		||||
 | 
			
		||||
/* ----------------
 | 
			
		||||
 
 | 
			
		||||
@@ -305,6 +305,79 @@ from tenk1 o;
 | 
			
		||||
 9999
 | 
			
		||||
(1 row)
 | 
			
		||||
 | 
			
		||||
-- Test handling of Params within aggregate arguments in hashed aggregation.
 | 
			
		||||
-- Per bug report from Jeevan Chalke.
 | 
			
		||||
explain (verbose, costs off)
 | 
			
		||||
select s1, s2, sm
 | 
			
		||||
from generate_series(1, 3) s1,
 | 
			
		||||
     lateral (select s2, sum(s1 + s2) sm
 | 
			
		||||
              from generate_series(1, 3) s2 group by s2) ss
 | 
			
		||||
order by 1, 2;
 | 
			
		||||
                            QUERY PLAN                            
 | 
			
		||||
------------------------------------------------------------------
 | 
			
		||||
 Sort
 | 
			
		||||
   Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2)))
 | 
			
		||||
   Sort Key: s1.s1, s2.s2
 | 
			
		||||
   ->  Nested Loop
 | 
			
		||||
         Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2)))
 | 
			
		||||
         ->  Function Scan on pg_catalog.generate_series s1
 | 
			
		||||
               Output: s1.s1
 | 
			
		||||
               Function Call: generate_series(1, 3)
 | 
			
		||||
         ->  HashAggregate
 | 
			
		||||
               Output: s2.s2, sum((s1.s1 + s2.s2))
 | 
			
		||||
               ->  Function Scan on pg_catalog.generate_series s2
 | 
			
		||||
                     Output: s2.s2
 | 
			
		||||
                     Function Call: generate_series(1, 3)
 | 
			
		||||
(13 rows)
 | 
			
		||||
 | 
			
		||||
select s1, s2, sm
 | 
			
		||||
from generate_series(1, 3) s1,
 | 
			
		||||
     lateral (select s2, sum(s1 + s2) sm
 | 
			
		||||
              from generate_series(1, 3) s2 group by s2) ss
 | 
			
		||||
order by 1, 2;
 | 
			
		||||
 s1 | s2 | sm 
 | 
			
		||||
----+----+----
 | 
			
		||||
  1 |  1 |  2
 | 
			
		||||
  1 |  2 |  3
 | 
			
		||||
  1 |  3 |  4
 | 
			
		||||
  2 |  1 |  3
 | 
			
		||||
  2 |  2 |  4
 | 
			
		||||
  2 |  3 |  5
 | 
			
		||||
  3 |  1 |  4
 | 
			
		||||
  3 |  2 |  5
 | 
			
		||||
  3 |  3 |  6
 | 
			
		||||
(9 rows)
 | 
			
		||||
 | 
			
		||||
explain (verbose, costs off)
 | 
			
		||||
select array(select sum(x+y) s
 | 
			
		||||
            from generate_series(1,3) y group by y order by s)
 | 
			
		||||
  from generate_series(1,3) x;
 | 
			
		||||
                            QUERY PLAN                             
 | 
			
		||||
-------------------------------------------------------------------
 | 
			
		||||
 Function Scan on pg_catalog.generate_series x
 | 
			
		||||
   Output: (SubPlan 1)
 | 
			
		||||
   Function Call: generate_series(1, 3)
 | 
			
		||||
   SubPlan 1
 | 
			
		||||
     ->  Sort
 | 
			
		||||
           Output: (sum((x.x + y.y))), y.y
 | 
			
		||||
           Sort Key: (sum((x.x + y.y)))
 | 
			
		||||
           ->  HashAggregate
 | 
			
		||||
                 Output: sum((x.x + y.y)), y.y
 | 
			
		||||
                 ->  Function Scan on pg_catalog.generate_series y
 | 
			
		||||
                       Output: y.y
 | 
			
		||||
                       Function Call: generate_series(1, 3)
 | 
			
		||||
(12 rows)
 | 
			
		||||
 | 
			
		||||
select array(select sum(x+y) s
 | 
			
		||||
            from generate_series(1,3) y group by y order by s)
 | 
			
		||||
  from generate_series(1,3) x;
 | 
			
		||||
  array  
 | 
			
		||||
---------
 | 
			
		||||
 {2,3,4}
 | 
			
		||||
 {3,4,5}
 | 
			
		||||
 {4,5,6}
 | 
			
		||||
(3 rows)
 | 
			
		||||
 | 
			
		||||
--
 | 
			
		||||
-- test for bitwise integer aggregates
 | 
			
		||||
--
 | 
			
		||||
 
 | 
			
		||||
@@ -86,6 +86,28 @@ select
 | 
			
		||||
  (select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1)))
 | 
			
		||||
from tenk1 o;
 | 
			
		||||
 | 
			
		||||
-- Test handling of Params within aggregate arguments in hashed aggregation.
 | 
			
		||||
-- Per bug report from Jeevan Chalke.
 | 
			
		||||
explain (verbose, costs off)
 | 
			
		||||
select s1, s2, sm
 | 
			
		||||
from generate_series(1, 3) s1,
 | 
			
		||||
     lateral (select s2, sum(s1 + s2) sm
 | 
			
		||||
              from generate_series(1, 3) s2 group by s2) ss
 | 
			
		||||
order by 1, 2;
 | 
			
		||||
select s1, s2, sm
 | 
			
		||||
from generate_series(1, 3) s1,
 | 
			
		||||
     lateral (select s2, sum(s1 + s2) sm
 | 
			
		||||
              from generate_series(1, 3) s2 group by s2) ss
 | 
			
		||||
order by 1, 2;
 | 
			
		||||
 | 
			
		||||
explain (verbose, costs off)
 | 
			
		||||
select array(select sum(x+y) s
 | 
			
		||||
            from generate_series(1,3) y group by y order by s)
 | 
			
		||||
  from generate_series(1,3) x;
 | 
			
		||||
select array(select sum(x+y) s
 | 
			
		||||
            from generate_series(1,3) y group by y order by s)
 | 
			
		||||
  from generate_series(1,3) x;
 | 
			
		||||
 | 
			
		||||
--
 | 
			
		||||
-- test for bitwise integer aggregates
 | 
			
		||||
--
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user