1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-02 09:02:37 +03:00

Add hooks for type-specific calculation of ANALYZE statistics. Idea and

coding by Mark Cave-Ayland, some kibitzing by Tom Lane.  initdb forced
due to new column in pg_type.
This commit is contained in:
Tom Lane
2004-02-12 23:41:04 +00:00
parent d27471fe0a
commit 69946411d3
13 changed files with 740 additions and 501 deletions

View File

@ -1,14 +1,14 @@
/*-------------------------------------------------------------------------
*
* analyze.c
* the postgres statistics generator
* the Postgres statistics generator
*
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.67 2004/02/10 03:42:43 tgl Exp $
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.68 2004/02/12 23:41:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -23,8 +23,6 @@
#include "catalog/indexing.h"
#include "catalog/namespace.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_type.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "parser/parse_oper.h"
@ -38,91 +36,13 @@
#include "utils/tuplesort.h"
/*
* Analysis algorithms supported
*/
typedef enum
{
ALG_MINIMAL = 1, /* Compute only most-common-values */
ALG_SCALAR /* Compute MCV, histogram, sort
* correlation */
} AlgCode;
/*
* To avoid consuming too much memory during analysis and/or too much space
* in the resulting pg_statistic rows, we ignore varlena datums that are wider
* than WIDTH_THRESHOLD (after detoasting!). This is legitimate for MCV
* and distinct-value calculations since a wide value is unlikely to be
* duplicated at all, much less be a most-common value. For the same reason,
* ignoring wide values will not affect our estimates of histogram bin
* boundaries very much.
*/
#define WIDTH_THRESHOLD 1024
/*
* We build one of these structs for each attribute (column) that is to be
* analyzed. The struct and subsidiary data are in anl_context,
* so they live until the end of the ANALYZE operation.
*/
typedef struct
{
/* These fields are set up by examine_attribute */
int attnum; /* attribute number */
AlgCode algcode; /* Which algorithm to use for this column */
int minrows; /* Minimum # of rows wanted for stats */
Form_pg_attribute attr; /* copy of pg_attribute row for column */
Form_pg_type attrtype; /* copy of pg_type row for column */
Oid eqopr; /* '=' operator for datatype, if any */
Oid eqfunc; /* and associated function */
Oid ltopr; /* '<' operator for datatype, if any */
/*
* These fields are filled in by the actual statistics-gathering
* routine
*/
bool stats_valid;
float4 stanullfrac; /* fraction of entries that are NULL */
int4 stawidth; /* average width */
float4 stadistinct; /* # distinct values */
int2 stakind[STATISTIC_NUM_SLOTS];
Oid staop[STATISTIC_NUM_SLOTS];
int numnumbers[STATISTIC_NUM_SLOTS];
float4 *stanumbers[STATISTIC_NUM_SLOTS];
int numvalues[STATISTIC_NUM_SLOTS];
Datum *stavalues[STATISTIC_NUM_SLOTS];
} VacAttrStats;
typedef struct
{
Datum value; /* a data value */
int tupno; /* position index for tuple it came from */
} ScalarItem;
typedef struct
{
int count; /* # of duplicates */
int first; /* values[] index of first occurrence */
} ScalarMCVItem;
#define swapInt(a,b) do {int _tmp; _tmp=a; a=b; b=_tmp;} while(0)
#define swapDatum(a,b) do {Datum _tmp; _tmp=a; a=b; b=_tmp;} while(0)
/* Default statistics target (GUC parameter) */
int default_statistics_target = 10;
static int elevel = -1;
static MemoryContext anl_context = NULL;
/* context information for compare_scalars() */
static FmgrInfo *datumCmpFn;
static SortFunctionKind datumCmpFnKind;
static int *datumCmpTupnoLink;
static VacAttrStats *examine_attribute(Relation onerel, int attnum);
static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
@ -131,16 +51,10 @@ static double random_fract(void);
static double init_selection_state(int n);
static double select_next_random_record(double t, int n, double *stateptr);
static int compare_rows(const void *a, const void *b);
static int compare_scalars(const void *a, const void *b);
static int compare_mcvs(const void *a, const void *b);
static void compute_minimal_stats(VacAttrStats *stats,
TupleDesc tupDesc, double totalrows,
HeapTuple *rows, int numrows);
static void compute_scalar_stats(VacAttrStats *stats,
TupleDesc tupDesc, double totalrows,
HeapTuple *rows, int numrows);
static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats);
static bool std_typanalyze(VacAttrStats *stats);
/*
* analyze_rel() -- analyze one relation
@ -345,19 +259,12 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt)
old_context = MemoryContextSwitchTo(col_context);
for (i = 0; i < attr_cnt; i++)
{
switch (vacattrstats[i]->algcode)
{
case ALG_MINIMAL:
compute_minimal_stats(vacattrstats[i],
onerel->rd_att, totalrows,
rows, numrows);
break;
case ALG_SCALAR:
compute_scalar_stats(vacattrstats[i],
onerel->rd_att, totalrows,
rows, numrows);
break;
}
(*vacattrstats[i]->compute_stats) (vacattrstats[i],
vacattrstats[i]->tupattnum,
onerel->rd_att,
totalrows,
rows,
numrows);
MemoryContextResetAndDeleteChildren(col_context);
}
MemoryContextSwitchTo(old_context);
@ -390,14 +297,11 @@ static VacAttrStats *
examine_attribute(Relation onerel, int attnum)
{
Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];
Operator func_operator;
HeapTuple typtuple;
Oid eqopr = InvalidOid;
Oid eqfunc = InvalidOid;
Oid ltopr = InvalidOid;
VacAttrStats *stats;
bool ok;
/* Don't analyze dropped columns */
/* Never analyze dropped columns */
if (attr->attisdropped)
return NULL;
@ -405,23 +309,10 @@ examine_attribute(Relation onerel, int attnum)
if (attr->attstattarget == 0)
return NULL;
/* If column has no "=" operator, we can't do much of anything */
func_operator = equality_oper(attr->atttypid, true);
if (func_operator != NULL)
{
eqopr = oprid(func_operator);
eqfunc = oprfuncid(func_operator);
ReleaseSysCache(func_operator);
}
if (!OidIsValid(eqfunc))
return NULL;
/*
* If we have "=" then we're at least able to do the minimal
* algorithm, so start filling in a VacAttrStats struct.
* Create the VacAttrStats struct.
*/
stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
stats->attnum = attnum;
stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_TUPLE_SIZE);
memcpy(stats->attr, attr, ATTRIBUTE_TUPLE_SIZE);
typtuple = SearchSysCache(TYPEOID,
@ -432,57 +323,25 @@ examine_attribute(Relation onerel, int attnum)
stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type));
memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type));
ReleaseSysCache(typtuple);
stats->eqopr = eqopr;
stats->eqfunc = eqfunc;
/* If the attstattarget column is negative, use the default value */
if (stats->attr->attstattarget < 0)
stats->attr->attstattarget = default_statistics_target;
/* Is there a "<" operator with suitable semantics? */
func_operator = ordering_oper(attr->atttypid, true);
if (func_operator != NULL)
{
ltopr = oprid(func_operator);
ReleaseSysCache(func_operator);
}
stats->ltopr = ltopr;
stats->anl_context = anl_context;
stats->tupattnum = attnum;
/*
* Determine the algorithm to use (this will get more complicated
* later)
* Call the type-specific typanalyze function. If none is specified,
* use std_typanalyze().
*/
if (OidIsValid(ltopr))
{
/* Seems to be a scalar datatype */
stats->algcode = ALG_SCALAR;
/*--------------------
* The following choice of minrows is based on the paper
* "Random sampling for histogram construction: how much is enough?"
* by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in
* Proceedings of ACM SIGMOD International Conference on Management
* of Data, 1998, Pages 436-447. Their Corollary 1 to Theorem 5
* says that for table size n, histogram size k, maximum relative
* error in bin size f, and error probability gamma, the minimum
* random sample size is
* r = 4 * k * ln(2*n/gamma) / f^2
* Taking f = 0.5, gamma = 0.01, n = 1 million rows, we obtain
* r = 305.82 * k
* Note that because of the log function, the dependence on n is
* quite weak; even at n = 1 billion, a 300*k sample gives <= 0.59
* bin size error with probability 0.99. So there's no real need to
* scale for n, which is a good thing because we don't necessarily
* know it at this point.
*--------------------
*/
stats->minrows = 300 * stats->attr->attstattarget;
}
if (OidIsValid(stats->attrtype->typanalyze))
ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
PointerGetDatum(stats)));
else
ok = std_typanalyze(stats);
if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
{
/* Can't do much but the minimal stuff */
stats->algcode = ALG_MINIMAL;
/* Might as well use the same minrows as above */
stats->minrows = 300 * stats->attr->attstattarget;
pfree(stats->attrtype);
pfree(stats->attr);
pfree(stats);
return NULL;
}
return stats;
@ -851,6 +710,304 @@ compare_rows(const void *a, const void *b)
}
/*
* update_attstats() -- update attribute statistics for one relation
*
* Statistics are stored in several places: the pg_class row for the
* relation has stats about the whole relation, and there is a
* pg_statistic row for each (non-system) attribute that has ever
* been analyzed. The pg_class values are updated by VACUUM, not here.
*
* pg_statistic rows are just added or updated normally. This means
* that pg_statistic will probably contain some deleted rows at the
* completion of a vacuum cycle, unless it happens to get vacuumed last.
*
* To keep things simple, we punt for pg_statistic, and don't try
* to compute or store rows for pg_statistic itself in pg_statistic.
* This could possibly be made to work, but it's not worth the trouble.
* Note analyze_rel() has seen to it that we won't come here when
* vacuuming pg_statistic itself.
*
* Note: if two backends concurrently try to analyze the same relation,
* the second one is likely to fail here with a "tuple concurrently
* updated" error. This is slightly annoying, but no real harm is done.
* We could prevent the problem by using a stronger lock on the
* relation for ANALYZE (ie, ShareUpdateExclusiveLock instead
* of AccessShareLock); but that cure seems worse than the disease,
* especially now that ANALYZE doesn't start a new transaction
* for each relation. The lock could be held for a long time...
*/
static void
update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
{
Relation sd;
int attno;
sd = heap_openr(StatisticRelationName, RowExclusiveLock);
for (attno = 0; attno < natts; attno++)
{
VacAttrStats *stats = vacattrstats[attno];
HeapTuple stup,
oldtup;
int i,
k,
n;
Datum values[Natts_pg_statistic];
char nulls[Natts_pg_statistic];
char replaces[Natts_pg_statistic];
/* Ignore attr if we weren't able to collect stats */
if (!stats->stats_valid)
continue;
/*
* Construct a new pg_statistic tuple
*/
for (i = 0; i < Natts_pg_statistic; ++i)
{
nulls[i] = ' ';
replaces[i] = 'r';
}
i = 0;
values[i++] = ObjectIdGetDatum(relid); /* starelid */
values[i++] = Int16GetDatum(stats->attr->attnum); /* staattnum */
values[i++] = Float4GetDatum(stats->stanullfrac); /* stanullfrac */
values[i++] = Int32GetDatum(stats->stawidth); /* stawidth */
values[i++] = Float4GetDatum(stats->stadistinct); /* stadistinct */
for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
{
values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
}
for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
{
values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */
}
for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
{
int nnum = stats->numnumbers[k];
if (nnum > 0)
{
Datum *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
ArrayType *arry;
for (n = 0; n < nnum; n++)
numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
/* XXX knows more than it should about type float4: */
arry = construct_array(numdatums, nnum,
FLOAT4OID,
sizeof(float4), false, 'i');
values[i++] = PointerGetDatum(arry); /* stanumbersN */
}
else
{
nulls[i] = 'n';
values[i++] = (Datum) 0;
}
}
for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
{
if (stats->numvalues[k] > 0)
{
ArrayType *arry;
arry = construct_array(stats->stavalues[k],
stats->numvalues[k],
stats->attr->atttypid,
stats->attrtype->typlen,
stats->attrtype->typbyval,
stats->attrtype->typalign);
values[i++] = PointerGetDatum(arry); /* stavaluesN */
}
else
{
nulls[i] = 'n';
values[i++] = (Datum) 0;
}
}
/* Is there already a pg_statistic tuple for this attribute? */
oldtup = SearchSysCache(STATRELATT,
ObjectIdGetDatum(relid),
Int16GetDatum(stats->attr->attnum),
0, 0);
if (HeapTupleIsValid(oldtup))
{
/* Yes, replace it */
stup = heap_modifytuple(oldtup,
sd,
values,
nulls,
replaces);
ReleaseSysCache(oldtup);
simple_heap_update(sd, &stup->t_self, stup);
}
else
{
/* No, insert new tuple */
stup = heap_formtuple(sd->rd_att, values, nulls);
simple_heap_insert(sd, stup);
}
/* update indexes too */
CatalogUpdateIndexes(sd, stup);
heap_freetuple(stup);
}
heap_close(sd, RowExclusiveLock);
}
/*==========================================================================
*
* Code below this point represents the "standard" type-specific statistics
* analysis algorithms. This code can be replaced on a per-data-type basis
* by setting a nonzero value in pg_type.typanalyze.
*
*==========================================================================
*/
/*
* To avoid consuming too much memory during analysis and/or too much space
* in the resulting pg_statistic rows, we ignore varlena datums that are wider
* than WIDTH_THRESHOLD (after detoasting!). This is legitimate for MCV
* and distinct-value calculations since a wide value is unlikely to be
* duplicated at all, much less be a most-common value. For the same reason,
* ignoring wide values will not affect our estimates of histogram bin
* boundaries very much.
*/
#define WIDTH_THRESHOLD 1024
#define swapInt(a,b) do {int _tmp; _tmp=a; a=b; b=_tmp;} while(0)
#define swapDatum(a,b) do {Datum _tmp; _tmp=a; a=b; b=_tmp;} while(0)
/*
* Extra information used by the default analysis routines
*/
typedef struct
{
Oid eqopr; /* '=' operator for datatype, if any */
Oid eqfunc; /* and associated function */
Oid ltopr; /* '<' operator for datatype, if any */
} StdAnalyzeData;
typedef struct
{
Datum value; /* a data value */
int tupno; /* position index for tuple it came from */
} ScalarItem;
typedef struct
{
int count; /* # of duplicates */
int first; /* values[] index of first occurrence */
} ScalarMCVItem;
/* context information for compare_scalars() */
static FmgrInfo *datumCmpFn;
static SortFunctionKind datumCmpFnKind;
static int *datumCmpTupnoLink;
static void compute_minimal_stats(VacAttrStats *stats, int attnum,
TupleDesc tupDesc, double totalrows,
HeapTuple *rows, int numrows);
static void compute_scalar_stats(VacAttrStats *stats, int attnum,
TupleDesc tupDesc, double totalrows,
HeapTuple *rows, int numrows);
static int compare_scalars(const void *a, const void *b);
static int compare_mcvs(const void *a, const void *b);
/*
* std_typanalyze -- the default type-specific typanalyze function
*/
static bool
std_typanalyze(VacAttrStats *stats)
{
Form_pg_attribute attr = stats->attr;
Operator func_operator;
Oid eqopr = InvalidOid;
Oid eqfunc = InvalidOid;
Oid ltopr = InvalidOid;
StdAnalyzeData *mystats;
/* If the attstattarget column is negative, use the default value */
/* NB: it is okay to scribble on stats->attr since it's a copy */
if (attr->attstattarget < 0)
attr->attstattarget = default_statistics_target;
/* If column has no "=" operator, we can't do much of anything */
func_operator = equality_oper(attr->atttypid, true);
if (func_operator != NULL)
{
eqopr = oprid(func_operator);
eqfunc = oprfuncid(func_operator);
ReleaseSysCache(func_operator);
}
if (!OidIsValid(eqfunc))
return false;
/* Is there a "<" operator with suitable semantics? */
func_operator = ordering_oper(attr->atttypid, true);
if (func_operator != NULL)
{
ltopr = oprid(func_operator);
ReleaseSysCache(func_operator);
}
/* Save the operator info for compute_stats routines */
mystats = (StdAnalyzeData *) palloc(sizeof(StdAnalyzeData));
mystats->eqopr = eqopr;
mystats->eqfunc = eqfunc;
mystats->ltopr = ltopr;
stats->extra_data = mystats;
/*
* Determine which standard statistics algorithm to use
*/
if (OidIsValid(ltopr))
{
/* Seems to be a scalar datatype */
stats->compute_stats = compute_scalar_stats;
/*--------------------
* The following choice of minrows is based on the paper
* "Random sampling for histogram construction: how much is enough?"
* by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in
* Proceedings of ACM SIGMOD International Conference on Management
* of Data, 1998, Pages 436-447. Their Corollary 1 to Theorem 5
* says that for table size n, histogram size k, maximum relative
* error in bin size f, and error probability gamma, the minimum
* random sample size is
* r = 4 * k * ln(2*n/gamma) / f^2
* Taking f = 0.5, gamma = 0.01, n = 1 million rows, we obtain
* r = 305.82 * k
* Note that because of the log function, the dependence on n is
* quite weak; even at n = 1 billion, a 300*k sample gives <= 0.59
* bin size error with probability 0.99. So there's no real need to
* scale for n, which is a good thing because we don't necessarily
* know it at this point.
*--------------------
*/
stats->minrows = 300 * attr->attstattarget;
}
else
{
/* Can't do much but the minimal stuff */
stats->compute_stats = compute_minimal_stats;
/* Might as well use the same minrows as above */
stats->minrows = 300 * attr->attstattarget;
}
return true;
}
/*
* compute_minimal_stats() -- compute minimal column statistics
*
@ -867,7 +1024,7 @@ compare_rows(const void *a, const void *b)
* depend mainly on the length of the list we are willing to keep.
*/
static void
compute_minimal_stats(VacAttrStats *stats,
compute_minimal_stats(VacAttrStats *stats, int attnum,
TupleDesc tupDesc, double totalrows,
HeapTuple *rows, int numrows)
{
@ -890,6 +1047,7 @@ compute_minimal_stats(VacAttrStats *stats,
int track_cnt,
track_max;
int num_mcv = stats->attr->attstattarget;
StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
/*
* We track up to 2*n values for an n-element MCV list; but at least
@ -901,7 +1059,7 @@ compute_minimal_stats(VacAttrStats *stats,
track = (TrackItem *) palloc(track_max * sizeof(TrackItem));
track_cnt = 0;
fmgr_info(stats->eqfunc, &f_cmpeq);
fmgr_info(mystats->eqfunc, &f_cmpeq);
for (i = 0; i < numrows; i++)
{
@ -914,7 +1072,7 @@ compute_minimal_stats(VacAttrStats *stats,
vacuum_delay_point();
value = heap_getattr(tuple, stats->attnum, tupDesc, &isnull);
value = heap_getattr(tuple, attnum, tupDesc, &isnull);
/* Check for null/nonnull */
if (isnull)
@ -1137,7 +1295,7 @@ compute_minimal_stats(VacAttrStats *stats,
float4 *mcv_freqs;
/* Must copy the target values into anl_context */
old_context = MemoryContextSwitchTo(anl_context);
old_context = MemoryContextSwitchTo(stats->anl_context);
mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
for (i = 0; i < num_mcv; i++)
@ -1150,7 +1308,7 @@ compute_minimal_stats(VacAttrStats *stats,
MemoryContextSwitchTo(old_context);
stats->stakind[0] = STATISTIC_KIND_MCV;
stats->staop[0] = stats->eqopr;
stats->staop[0] = mystats->eqopr;
stats->stanumbers[0] = mcv_freqs;
stats->numnumbers[0] = num_mcv;
stats->stavalues[0] = mcv_values;
@ -1175,7 +1333,7 @@ compute_minimal_stats(VacAttrStats *stats,
* data values into order.
*/
static void
compute_scalar_stats(VacAttrStats *stats,
compute_scalar_stats(VacAttrStats *stats, int attnum,
TupleDesc tupDesc, double totalrows,
HeapTuple *rows, int numrows)
{
@ -1199,12 +1357,13 @@ compute_scalar_stats(VacAttrStats *stats,
int track_cnt = 0;
int num_mcv = stats->attr->attstattarget;
int num_bins = stats->attr->attstattarget;
StdAnalyzeData *mystats = (StdAnalyzeData *) stats->extra_data;
values = (ScalarItem *) palloc(numrows * sizeof(ScalarItem));
tupnoLink = (int *) palloc(numrows * sizeof(int));
track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
SelectSortFunction(stats->ltopr, &cmpFn, &cmpFnKind);
SelectSortFunction(mystats->ltopr, &cmpFn, &cmpFnKind);
fmgr_info(cmpFn, &f_cmpfn);
/* Initial scan to find sortable values */
@ -1216,7 +1375,7 @@ compute_scalar_stats(VacAttrStats *stats,
vacuum_delay_point();
value = heap_getattr(tuple, stats->attnum, tupDesc, &isnull);
value = heap_getattr(tuple, attnum, tupDesc, &isnull);
/* Check for null/nonnull */
if (isnull)
@ -1469,7 +1628,7 @@ compute_scalar_stats(VacAttrStats *stats,
float4 *mcv_freqs;
/* Must copy the target values into anl_context */
old_context = MemoryContextSwitchTo(anl_context);
old_context = MemoryContextSwitchTo(stats->anl_context);
mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
for (i = 0; i < num_mcv; i++)
@ -1482,7 +1641,7 @@ compute_scalar_stats(VacAttrStats *stats,
MemoryContextSwitchTo(old_context);
stats->stakind[slot_idx] = STATISTIC_KIND_MCV;
stats->staop[slot_idx] = stats->eqopr;
stats->staop[slot_idx] = mystats->eqopr;
stats->stanumbers[slot_idx] = mcv_freqs;
stats->numnumbers[slot_idx] = num_mcv;
stats->stavalues[slot_idx] = mcv_values;
@ -1555,7 +1714,7 @@ compute_scalar_stats(VacAttrStats *stats,
Assert(nvals >= num_hist);
/* Must copy the target values into anl_context */
old_context = MemoryContextSwitchTo(anl_context);
old_context = MemoryContextSwitchTo(stats->anl_context);
hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
for (i = 0; i < num_hist; i++)
{
@ -1569,7 +1728,7 @@ compute_scalar_stats(VacAttrStats *stats,
MemoryContextSwitchTo(old_context);
stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;
stats->staop[slot_idx] = stats->ltopr;
stats->staop[slot_idx] = mystats->ltopr;
stats->stavalues[slot_idx] = hist_values;
stats->numvalues[slot_idx] = num_hist;
slot_idx++;
@ -1584,7 +1743,7 @@ compute_scalar_stats(VacAttrStats *stats,
corr_x2sum;
/* Must copy the target values into anl_context */
old_context = MemoryContextSwitchTo(anl_context);
old_context = MemoryContextSwitchTo(stats->anl_context);
corrs = (float4 *) palloc(sizeof(float4));
MemoryContextSwitchTo(old_context);
@ -1607,7 +1766,7 @@ compute_scalar_stats(VacAttrStats *stats,
(values_cnt * corr_x2sum - corr_xsum * corr_xsum);
stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;
stats->staop[slot_idx] = stats->ltopr;
stats->staop[slot_idx] = mystats->ltopr;
stats->stanumbers[slot_idx] = corrs;
stats->numnumbers[slot_idx] = 1;
slot_idx++;
@ -1665,155 +1824,3 @@ compare_mcvs(const void *a, const void *b)
return da - db;
}
/*
* update_attstats() -- update attribute statistics for one relation
*
* Statistics are stored in several places: the pg_class row for the
* relation has stats about the whole relation, and there is a
* pg_statistic row for each (non-system) attribute that has ever
* been analyzed. The pg_class values are updated by VACUUM, not here.
*
* pg_statistic rows are just added or updated normally. This means
* that pg_statistic will probably contain some deleted rows at the
* completion of a vacuum cycle, unless it happens to get vacuumed last.
*
* To keep things simple, we punt for pg_statistic, and don't try
* to compute or store rows for pg_statistic itself in pg_statistic.
* This could possibly be made to work, but it's not worth the trouble.
* Note analyze_rel() has seen to it that we won't come here when
* vacuuming pg_statistic itself.
*
* Note: if two backends concurrently try to analyze the same relation,
* the second one is likely to fail here with a "tuple concurrently
* updated" error. This is slightly annoying, but no real harm is done.
* We could prevent the problem by using a stronger lock on the
* relation for ANALYZE (ie, ShareUpdateExclusiveLock instead
* of AccessShareLock); but that cure seems worse than the disease,
* especially now that ANALYZE doesn't start a new transaction
* for each relation. The lock could be held for a long time...
*/
static void
update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
{
Relation sd;
int attno;
sd = heap_openr(StatisticRelationName, RowExclusiveLock);
for (attno = 0; attno < natts; attno++)
{
VacAttrStats *stats = vacattrstats[attno];
HeapTuple stup,
oldtup;
int i,
k,
n;
Datum values[Natts_pg_statistic];
char nulls[Natts_pg_statistic];
char replaces[Natts_pg_statistic];
/* Ignore attr if we weren't able to collect stats */
if (!stats->stats_valid)
continue;
/*
* Construct a new pg_statistic tuple
*/
for (i = 0; i < Natts_pg_statistic; ++i)
{
nulls[i] = ' ';
replaces[i] = 'r';
}
i = 0;
values[i++] = ObjectIdGetDatum(relid); /* starelid */
values[i++] = Int16GetDatum(stats->attnum); /* staattnum */
values[i++] = Float4GetDatum(stats->stanullfrac); /* stanullfrac */
values[i++] = Int32GetDatum(stats->stawidth); /* stawidth */
values[i++] = Float4GetDatum(stats->stadistinct); /* stadistinct */
for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
{
values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
}
for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
{
values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */
}
for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
{
int nnum = stats->numnumbers[k];
if (nnum > 0)
{
Datum *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
ArrayType *arry;
for (n = 0; n < nnum; n++)
numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
/* XXX knows more than it should about type float4: */
arry = construct_array(numdatums, nnum,
FLOAT4OID,
sizeof(float4), false, 'i');
values[i++] = PointerGetDatum(arry); /* stanumbersN */
}
else
{
nulls[i] = 'n';
values[i++] = (Datum) 0;
}
}
for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
{
if (stats->numvalues[k] > 0)
{
ArrayType *arry;
arry = construct_array(stats->stavalues[k],
stats->numvalues[k],
stats->attr->atttypid,
stats->attrtype->typlen,
stats->attrtype->typbyval,
stats->attrtype->typalign);
values[i++] = PointerGetDatum(arry); /* stavaluesN */
}
else
{
nulls[i] = 'n';
values[i++] = (Datum) 0;
}
}
/* Is there already a pg_statistic tuple for this attribute? */
oldtup = SearchSysCache(STATRELATT,
ObjectIdGetDatum(relid),
Int16GetDatum(stats->attnum),
0, 0);
if (HeapTupleIsValid(oldtup))
{
/* Yes, replace it */
stup = heap_modifytuple(oldtup,
sd,
values,
nulls,
replaces);
ReleaseSysCache(oldtup);
simple_heap_update(sd, &stup->t_self, stup);
}
else
{
/* No, insert new tuple */
stup = heap_formtuple(sd->rd_att, values, nulls);
simple_heap_insert(sd, stup);
}
/* update indexes too */
CatalogUpdateIndexes(sd, stup);
heap_freetuple(stup);
}
heap_close(sd, RowExclusiveLock);
}