Remove useless "rough estimate" path from mcelem_array_contained_selec.

The code in this function that tried to cope with a missing count histogram was quite ineffective for anything except a perfectly flat distribution. Furthermore, since we were already punting for missing MCELEM slot, it's rather useless to sweat over missing DECHIST: there are no cases where ANALYZE will create the first but not the second. So just simplify the code by punting rather than pretending we can do something useful.
2025-08-15 14:02:29 +03:00 · 2012-03-04 16:03:38 -05:00
parent 4fb694aebc
commit e2eed78910
1 changed files with 67 additions and 81 deletions
--- a/src/backend/utils/adt/array_selfuncs.c
+++ b/src/backend/utils/adt/array_selfuncs.c
@@ -242,8 +242,7 @@ scalararraysel_containment(PlannerInfo *root,
 }

 /*
- * arraycontsel -- restriction selectivity for "arraycolumn @> const",
- * "arraycolumn && const" or "arraycolumn <@ const"
+ * arraycontsel -- restriction selectivity for array @>, &&, <@ operators
 */
 Datum
 arraycontsel(PG_FUNCTION_ARGS)
@@ -323,8 +322,7 @@ arraycontsel(PG_FUNCTION_ARGS)
 }

 /*
- * arraycontjoinsel -- join selectivity for "arraycolumn @> const",
- * "arraycolumn && const" or "arraycolumn <@ const"
+ * arraycontjoinsel -- join selectivity for array @>, &&, <@ operators
 */
 Datum
 arraycontjoinsel(PG_FUNCTION_ARGS)
@@ -744,6 +742,10 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
 	if (numbers == NULL || nnumbers != nmcelem + 3)
 		return DEFAULT_CONTAIN_SEL;

+	/* Can't do much without a count histogram, either */
+	if (hist == NULL || nhist < 3)
+		return DEFAULT_CONTAIN_SEL;
+
 	/*
 	 * Grab some of the summary statistics that compute_array_stats() stores:
 	 * lowest frequency, frequency of null elements, and average distinct
@@ -751,11 +753,7 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
 	 */
 	minfreq = numbers[nmcelem];
 	nullelem_freq = numbers[nmcelem + 2];
-
-	if (hist && nhist > 0)
 	avg_count = hist[nhist - 1];
-	else
-		avg_count = 10.0f;		/* default assumption */

 	/*
 	 * "rest" will be the sum of the frequencies of all elements not
@@ -853,28 +851,23 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
 	 */
 	mult *= exp(-rest);

-	/* Check we have nonempty distinct element count histogram */
-	if (hist && nhist >= 3)
-	{
 	/*----------
 	 * Using the distinct element count histogram requires
 	 *		O(unique_nitems * (nmcelem + unique_nitems))
 	 * operations.  Beyond a certain computational cost threshold, it's
-		 * reasonable to sacrifice accuracy for decreased planning time.
-		 * We limit the number of operations to EFFORT * nmcelem; since
-		 * nmcelem is limited by the column's statistics target, the work
-		 * done is user-controllable.
+	 * reasonable to sacrifice accuracy for decreased planning time.  We limit
+	 * the number of operations to EFFORT * nmcelem; since nmcelem is limited
+	 * by the column's statistics target, the work done is user-controllable.
 	 *
 	 * If the number of operations would be too large, we can reduce it
-		 * without losing all accuracy by reducing unique_nitems and
-		 * considering only the most-common elements of the constant array.
-		 * To make the results exactly match what we would have gotten with
-		 * only those elements to start with, we'd have to remove any
-		 * discarded elements' frequencies from "mult", but since this is only
-		 * an approximation anyway, we don't bother with that.  Therefore it's
-		 * sufficient to qsort elem_selec[] and take the largest elements.
-		 * (They will no longer match up with the elements of array_data[],
-		 * but we don't care.)
+	 * without losing all accuracy by reducing unique_nitems and considering
+	 * only the most-common elements of the constant array.  To make the
+	 * results exactly match what we would have gotten with only those
+	 * elements to start with, we'd have to remove any discarded elements'
+	 * frequencies from "mult", but since this is only an approximation
+	 * anyway, we don't bother with that.  Therefore it's sufficient to qsort
+	 * elem_selec[] and take the largest elements.  (They will no longer match
+	 * up with the elements of array_data[], but we don't care.)
 	 *----------
 	 */
 #define EFFORT 100
@@ -883,8 +876,8 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
 		unique_nitems > EFFORT * nmcelem / (nmcelem + unique_nitems))
 	{
 		/*
-			 * Use the quadratic formula to solve for largest allowable N;
-			 * we have A = 1, B = nmcelem, C = - EFFORT * nmcelem.
+		 * Use the quadratic formula to solve for largest allowable N.  We
+		 * have A = 1, B = nmcelem, C = - EFFORT * nmcelem.
 		 */
 		double	b = (double) nmcelem;
 		int		n;
@@ -905,7 +898,7 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
 	dist = calc_distr(elem_selec, unique_nitems, unique_nitems, 0.0f);
 	mcelem_dist = calc_distr(numbers, nmcelem, unique_nitems, rest);

-		/* ignore hist[nhist-1], which is the avg not a histogram member */
+	/* ignore hist[nhist-1], which is the average not a histogram member */
 	hist_part = calc_hist(hist, nhist - 1, unique_nitems);

 	selec = 0.0f;
@@ -923,13 +916,6 @@ mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
 	pfree(dist);
 	pfree(mcelem_dist);
 	pfree(hist_part);
-	}
-	else
-	{
-		/* We don't have histogram.  Use a rough estimate. */
-		selec = mult;
-	}
-
 	pfree(elem_selec);

 	/* Take into account occurrence of NULL element. */