mirror of
https://github.com/postgres/postgres.git
synced 2025-05-05 09:19:17 +03:00
Make ANALYZE compute basic statistics even for types with no "=" operator.
Previously, ANALYZE simply ignored columns of datatypes that have neither a btree nor hash opclass (which means they have no recognized equality operator). Without a notion of equality, we can't identify most-common values nor estimate the number of distinct values. But we can still count nulls and compute the average physical column width, and those stats might be of value. Moreover there are some tools out there that don't work so well if rows are missing from pg_statistic. So let's add suitable logic for this case. While this is arguably a bug fix, it also has the potential to change query plans, and the gain seems not worth taking a risk of that in stable branches. So back-patch into 9.5 but not further. Oleksandr Shulgin, rewritten a bit by me.
This commit is contained in:
parent
fe6d2ab473
commit
cfb2024ae4
@ -1689,7 +1689,11 @@ typedef struct
|
|||||||
} CompareScalarsContext;
|
} CompareScalarsContext;
|
||||||
|
|
||||||
|
|
||||||
static void compute_minimal_stats(VacAttrStatsP stats,
|
static void compute_trivial_stats(VacAttrStatsP stats,
|
||||||
|
AnalyzeAttrFetchFunc fetchfunc,
|
||||||
|
int samplerows,
|
||||||
|
double totalrows);
|
||||||
|
static void compute_distinct_stats(VacAttrStatsP stats,
|
||||||
AnalyzeAttrFetchFunc fetchfunc,
|
AnalyzeAttrFetchFunc fetchfunc,
|
||||||
int samplerows,
|
int samplerows,
|
||||||
double totalrows);
|
double totalrows);
|
||||||
@ -1723,21 +1727,17 @@ std_typanalyze(VacAttrStats *stats)
|
|||||||
<opr, &eqopr, NULL,
|
<opr, &eqopr, NULL,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
/* If column has no "=" operator, we can't do much of anything */
|
|
||||||
if (!OidIsValid(eqopr))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* Save the operator info for compute_stats routines */
|
/* Save the operator info for compute_stats routines */
|
||||||
mystats = (StdAnalyzeData *) palloc(sizeof(StdAnalyzeData));
|
mystats = (StdAnalyzeData *) palloc(sizeof(StdAnalyzeData));
|
||||||
mystats->eqopr = eqopr;
|
mystats->eqopr = eqopr;
|
||||||
mystats->eqfunc = get_opcode(eqopr);
|
mystats->eqfunc = OidIsValid(eqopr) ? get_opcode(eqopr) : InvalidOid;
|
||||||
mystats->ltopr = ltopr;
|
mystats->ltopr = ltopr;
|
||||||
stats->extra_data = mystats;
|
stats->extra_data = mystats;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Determine which standard statistics algorithm to use
|
* Determine which standard statistics algorithm to use
|
||||||
*/
|
*/
|
||||||
if (OidIsValid(ltopr))
|
if (OidIsValid(eqopr) && OidIsValid(ltopr))
|
||||||
{
|
{
|
||||||
/* Seems to be a scalar datatype */
|
/* Seems to be a scalar datatype */
|
||||||
stats->compute_stats = compute_scalar_stats;
|
stats->compute_stats = compute_scalar_stats;
|
||||||
@ -1762,10 +1762,17 @@ std_typanalyze(VacAttrStats *stats)
|
|||||||
*/
|
*/
|
||||||
stats->minrows = 300 * attr->attstattarget;
|
stats->minrows = 300 * attr->attstattarget;
|
||||||
}
|
}
|
||||||
|
else if (OidIsValid(eqopr))
|
||||||
|
{
|
||||||
|
/* We can still recognize distinct values */
|
||||||
|
stats->compute_stats = compute_distinct_stats;
|
||||||
|
/* Might as well use the same minrows as above */
|
||||||
|
stats->minrows = 300 * attr->attstattarget;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* Can't do much but the minimal stuff */
|
/* Can't do much but the trivial stuff */
|
||||||
stats->compute_stats = compute_minimal_stats;
|
stats->compute_stats = compute_trivial_stats;
|
||||||
/* Might as well use the same minrows as above */
|
/* Might as well use the same minrows as above */
|
||||||
stats->minrows = 300 * attr->attstattarget;
|
stats->minrows = 300 * attr->attstattarget;
|
||||||
}
|
}
|
||||||
@ -1773,8 +1780,91 @@ std_typanalyze(VacAttrStats *stats)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* compute_minimal_stats() -- compute minimal column statistics
|
* compute_trivial_stats() -- compute very basic column statistics
|
||||||
|
*
|
||||||
|
* We use this when we cannot find a hash "=" operator for the datatype.
|
||||||
|
*
|
||||||
|
* We determine the fraction of non-null rows and the average datum width.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
compute_trivial_stats(VacAttrStatsP stats,
|
||||||
|
AnalyzeAttrFetchFunc fetchfunc,
|
||||||
|
int samplerows,
|
||||||
|
double totalrows)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int null_cnt = 0;
|
||||||
|
int nonnull_cnt = 0;
|
||||||
|
double total_width = 0;
|
||||||
|
bool is_varlena = (!stats->attrtype->typbyval &&
|
||||||
|
stats->attrtype->typlen == -1);
|
||||||
|
bool is_varwidth = (!stats->attrtype->typbyval &&
|
||||||
|
stats->attrtype->typlen < 0);
|
||||||
|
|
||||||
|
for (i = 0; i < samplerows; i++)
|
||||||
|
{
|
||||||
|
Datum value;
|
||||||
|
bool isnull;
|
||||||
|
|
||||||
|
vacuum_delay_point();
|
||||||
|
|
||||||
|
value = fetchfunc(stats, i, &isnull);
|
||||||
|
|
||||||
|
/* Check for null/nonnull */
|
||||||
|
if (isnull)
|
||||||
|
{
|
||||||
|
null_cnt++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
nonnull_cnt++;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If it's a variable-width field, add up widths for average width
|
||||||
|
* calculation. Note that if the value is toasted, we use the toasted
|
||||||
|
* width. We don't bother with this calculation if it's a fixed-width
|
||||||
|
* type.
|
||||||
|
*/
|
||||||
|
if (is_varlena)
|
||||||
|
{
|
||||||
|
total_width += VARSIZE_ANY(DatumGetPointer(value));
|
||||||
|
}
|
||||||
|
else if (is_varwidth)
|
||||||
|
{
|
||||||
|
/* must be cstring */
|
||||||
|
total_width += strlen(DatumGetCString(value)) + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We can only compute average width if we found some non-null values. */
|
||||||
|
if (nonnull_cnt > 0)
|
||||||
|
{
|
||||||
|
stats->stats_valid = true;
|
||||||
|
/* Do the simple null-frac and width stats */
|
||||||
|
stats->stanullfrac = (double) null_cnt / (double) samplerows;
|
||||||
|
if (is_varwidth)
|
||||||
|
stats->stawidth = total_width / (double) nonnull_cnt;
|
||||||
|
else
|
||||||
|
stats->stawidth = stats->attrtype->typlen;
|
||||||
|
stats->stadistinct = 0.0; /* "unknown" */
|
||||||
|
}
|
||||||
|
else if (null_cnt > 0)
|
||||||
|
{
|
||||||
|
/* We found only nulls; assume the column is entirely null */
|
||||||
|
stats->stats_valid = true;
|
||||||
|
stats->stanullfrac = 1.0;
|
||||||
|
if (is_varwidth)
|
||||||
|
stats->stawidth = 0; /* "unknown" */
|
||||||
|
else
|
||||||
|
stats->stawidth = stats->attrtype->typlen;
|
||||||
|
stats->stadistinct = 0.0; /* "unknown" */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* compute_distinct_stats() -- compute column statistics including ndistinct
|
||||||
*
|
*
|
||||||
* We use this when we can find only an "=" operator for the datatype.
|
* We use this when we can find only an "=" operator for the datatype.
|
||||||
*
|
*
|
||||||
@ -1789,7 +1879,7 @@ std_typanalyze(VacAttrStats *stats)
|
|||||||
* depend mainly on the length of the list we are willing to keep.
|
* depend mainly on the length of the list we are willing to keep.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
compute_minimal_stats(VacAttrStatsP stats,
|
compute_distinct_stats(VacAttrStatsP stats,
|
||||||
AnalyzeAttrFetchFunc fetchfunc,
|
AnalyzeAttrFetchFunc fetchfunc,
|
||||||
int samplerows,
|
int samplerows,
|
||||||
double totalrows)
|
double totalrows)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user