mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	When dealing with multiple grouping columns coming from the same table,
clamp the estimated number of groups to table row count over 10, instead of table row count; this reflects a heuristic that people probably won't group over a near-unique set of columns, and the knowledge that we don't currently have any way to estimate the correlation of the columns better than guessing. This change creates a trivial plan change in one of the regression tests.
This commit is contained in:
		@@ -15,7 +15,7 @@
 | 
				
			|||||||
 *
 | 
					 *
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * IDENTIFICATION
 | 
					 * IDENTIFICATION
 | 
				
			||||||
 *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.169 2004/12/31 22:01:22 pgsql Exp $
 | 
					 *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.170 2005/01/28 20:34:25 tgl Exp $
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 *-------------------------------------------------------------------------
 | 
					 *-------------------------------------------------------------------------
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
@@ -1937,10 +1937,13 @@ add_unique_group_var(Query *root, List *varinfos,
 | 
				
			|||||||
 *		if we considered ones of the same rel, we'd be double-counting the
 | 
					 *		if we considered ones of the same rel, we'd be double-counting the
 | 
				
			||||||
 *		restriction selectivity of the equality in the next step.
 | 
					 *		restriction selectivity of the equality in the next step.
 | 
				
			||||||
 *	3.	For Vars within a single source rel, we multiply together the numbers
 | 
					 *	3.	For Vars within a single source rel, we multiply together the numbers
 | 
				
			||||||
 *		of values, clamp to the number of rows in the rel, and then multiply
 | 
					 *		of values, clamp to the number of rows in the rel (divided by 10 if
 | 
				
			||||||
 *		by the selectivity of the restriction clauses for that rel.  The
 | 
					 *		more than one Var), and then multiply by the selectivity of the
 | 
				
			||||||
 *		initial product is probably too high (it's the worst case) but since
 | 
					 *		restriction clauses for that rel.  When there's more than one Var,
 | 
				
			||||||
 *		we can clamp to the rel's rows it won't be hugely bad.	Multiplying
 | 
					 *		the initial product is probably too high (it's the worst case) but
 | 
				
			||||||
 | 
					 *		clamping to a fraction of the rel's rows seems to be a helpful
 | 
				
			||||||
 | 
					 *		heuristic for not letting the estimate get out of hand.  (The factor
 | 
				
			||||||
 | 
					 *		of 10 is derived from pre-Postgres-7.4 practice.)  Multiplying
 | 
				
			||||||
 *		by the restriction selectivity is effectively assuming that the
 | 
					 *		by the restriction selectivity is effectively assuming that the
 | 
				
			||||||
 *		restriction clauses are independent of the grouping, which is a crummy
 | 
					 *		restriction clauses are independent of the grouping, which is a crummy
 | 
				
			||||||
 *		assumption, but it's hard to do better.
 | 
					 *		assumption, but it's hard to do better.
 | 
				
			||||||
@@ -2040,6 +2043,7 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
 | 
				
			|||||||
		GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
 | 
							GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
 | 
				
			||||||
		RelOptInfo *rel = varinfo1->rel;
 | 
							RelOptInfo *rel = varinfo1->rel;
 | 
				
			||||||
		double		reldistinct = varinfo1->ndistinct;
 | 
							double		reldistinct = varinfo1->ndistinct;
 | 
				
			||||||
 | 
							int			relvarcount = 1;
 | 
				
			||||||
		List	   *newvarinfos = NIL;
 | 
							List	   *newvarinfos = NIL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
@@ -2051,7 +2055,10 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
 | 
				
			|||||||
			GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
 | 
								GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			if (varinfo2->rel == varinfo1->rel)
 | 
								if (varinfo2->rel == varinfo1->rel)
 | 
				
			||||||
 | 
								{
 | 
				
			||||||
				reldistinct *= varinfo2->ndistinct;
 | 
									reldistinct *= varinfo2->ndistinct;
 | 
				
			||||||
 | 
									relvarcount++;
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
			else
 | 
								else
 | 
				
			||||||
			{
 | 
								{
 | 
				
			||||||
				/* not time to process varinfo2 yet */
 | 
									/* not time to process varinfo2 yet */
 | 
				
			||||||
@@ -2066,10 +2073,20 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
 | 
				
			|||||||
		if (rel->tuples > 0)
 | 
							if (rel->tuples > 0)
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			/*
 | 
								/*
 | 
				
			||||||
			 * Clamp to size of rel, multiply by restriction selectivity.
 | 
								 * Clamp to size of rel, or size of rel / 10 if multiple Vars.
 | 
				
			||||||
 | 
								 * The fudge factor is because the Vars are probably correlated
 | 
				
			||||||
 | 
								 * but we don't know by how much.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								double		clamp = rel->tuples;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if (relvarcount > 1)
 | 
				
			||||||
 | 
									clamp *= 0.1;
 | 
				
			||||||
 | 
								if (reldistinct > clamp)
 | 
				
			||||||
 | 
									reldistinct = clamp;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
								 * Multiply by restriction selectivity.
 | 
				
			||||||
			 */
 | 
								 */
 | 
				
			||||||
			if (reldistinct > rel->tuples)
 | 
					 | 
				
			||||||
				reldistinct = rel->tuples;
 | 
					 | 
				
			||||||
			reldistinct *= rel->rows / rel->tuples;
 | 
								reldistinct *= rel->rows / rel->tuples;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			/*
 | 
								/*
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -134,11 +134,11 @@ SELECT '' AS five, f1 AS "Correlated Field"
 | 
				
			|||||||
                     WHERE f3 IS NOT NULL);
 | 
					                     WHERE f3 IS NOT NULL);
 | 
				
			||||||
 five | Correlated Field 
 | 
					 five | Correlated Field 
 | 
				
			||||||
------+------------------
 | 
					------+------------------
 | 
				
			||||||
      |                2
 | 
					 | 
				
			||||||
      |                3
 | 
					      |                3
 | 
				
			||||||
      |                1
 | 
					      |                1
 | 
				
			||||||
      |                2
 | 
					 | 
				
			||||||
      |                3
 | 
					      |                3
 | 
				
			||||||
 | 
					      |                2
 | 
				
			||||||
 | 
					      |                2
 | 
				
			||||||
(5 rows)
 | 
					(5 rows)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
--
 | 
					--
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user