Change the division of labor between grouping_planner and query_planner

so that the latter estimates the number of groups that grouping will produce. This is needed because it is primarily query_planner that makes the decision between fast-start and fast-finish plans, and in the original coding it was unable to make more than a crude rule-of-thumb choice when the query involved grouping. This revision helps us make saner choices for queries like SELECT ... GROUP BY ... LIMIT, as in a recent example from Mark Kirkwood. Also move the responsibility for canonicalizing sort_pathkeys and group_pathkeys into query_planner; this information has to be available anyway to support the first change, and doing it this way lets us get rid of compare_noncanonical_pathkeys entirely.
2025-07-15 19:21:59 +03:00 · 2005-08-27 22:13:44 +00:00
parent 9e56c5a4cf
commit 4e5fbb34b3
7 changed files with 144 additions and 206 deletions
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@ -11,7 +11,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.71 2005/07/28 22:27:00 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.72 2005/08/27 22:13:43 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -800,54 +800,6 @@ compare_pathkeys(List *keys1, List *keys2)
 	return PATHKEYS_BETTER2;	/* key2 is longer */
 }

-/*
- * compare_noncanonical_pathkeys
- *	  Compare two pathkeys to see if they are equivalent, and if not whether
- *	  one is "better" than the other.  This is used when we must compare
- *	  non-canonicalized pathkeys.
- *
- *	  A pathkey can be considered better than another if it is a superset:
- *	  it contains all the keys of the other plus more.	For example, either
- *	  ((A) (B)) or ((A B)) is better than ((A)).
- *
- *	  Currently, the only user of this routine is grouping_planner(),
- *	  and it will only pass single-element sublists (from
- *	  make_pathkeys_for_sortclauses).  Therefore we don't have to do the
- *	  full two-way-subset-inclusion test on each pair of sublists that is
- *	  implied by the above statement.  Instead we just verify they are
- *	  singleton lists and then do an equal().  This could be improved if
- *	  necessary.
- */
-PathKeysComparison
-compare_noncanonical_pathkeys(List *keys1, List *keys2)
-{
-	ListCell   *key1,
-			   *key2;
-
-	forboth(key1, keys1, key2, keys2)
-	{
-		List	   *subkey1 = (List *) lfirst(key1);
-		List	   *subkey2 = (List *) lfirst(key2);
-
-		Assert(list_length(subkey1) == 1);
-		Assert(list_length(subkey2) == 1);
-		if (!equal(subkey1, subkey2))
-			return PATHKEYS_DIFFERENT;	/* no need to keep looking */
-	}
-
-	/*
-	 * If we reached the end of only one list, the other is longer and
-	 * therefore not a subset.	(We assume the additional sublist(s) of
-	 * the other list are not NIL --- no pathkey list should ever have a
-	 * NIL sublist.)
-	 */
-	if (key1 == NULL && key2 == NULL)
-		return PATHKEYS_EQUAL;
-	if (key1 != NULL)
-		return PATHKEYS_BETTER1;	/* key1 is longer */
-	return PATHKEYS_BETTER2;	/* key2 is longer */
-}
-
 /*
 * pathkeys_contained_in
 *	  Common special case of compare_pathkeys: we just want to know
@ -867,24 +819,6 @@ pathkeys_contained_in(List *keys1, List *keys2)
 	return false;
 }

-/*
- * noncanonical_pathkeys_contained_in
- *	  The same, when we don't have canonical pathkeys.
- */
-bool
-noncanonical_pathkeys_contained_in(List *keys1, List *keys2)
-{
-	switch (compare_noncanonical_pathkeys(keys1, keys2))
-	{
-		case PATHKEYS_EQUAL:
-		case PATHKEYS_BETTER2:
-			return true;
-		default:
-			break;
-	}
-	return false;
-}
-
 /*
 * get_cheapest_path_for_pathkeys
 *	  Find the cheapest path (according to the specified criterion) that