1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-27 12:41:57 +03:00

Revert analyze support for partitioned tables

This reverts the following commits:
1b5617eb84 Describe (auto-)analyze behavior for partitioned tables
0e69f705cc Set pg_class.reltuples for partitioned tables
41badeaba8 Document ANALYZE storage parameters for partitioned tables
0827e8af70 autovacuum: handle analyze for partitioned tables

There are efficiency issues in this code when handling databases with
large numbers of partitions, and it doesn't look like there isn't any
trivial way to handle those.  There are some other issues as well.  It's
now too late in the cycle for nontrivial fixes, so we'll have to let
Postgres 14 users continue to manually deal with ANALYZE their
partitioned tables, and hopefully we can fix the issues for Postgres 15.

I kept [most of] be280cdad2 ("Don't reset relhasindex for partitioned
tables on ANALYZE") because while we added it due to 0827e8af70, it is
a good bugfix in its own right, since it affects manual analyze as well
as autovacuum-induced analyze, and there's no reason to revert it.

I retained the addition of relkind 'p' to tables included by
pg_stat_user_tables, because reverting that would require a catversion
bump.
Also, in pg14 only, I keep a struct member that was added to
PgStat_TabStatEntry to avoid breaking compatibility with existing stat
files.

Backpatch to 14.

Discussion: https://postgr.es/m/20210722205458.f2bug3z6qzxzpx2s@alap3.anarazel.de
This commit is contained in:
Alvaro Herrera
2021-08-16 17:27:52 -04:00
parent f83d80ea1e
commit b3d24cc0f0
11 changed files with 57 additions and 320 deletions

View File

@ -108,7 +108,7 @@ static relopt_bool boolRelOpts[] =
{
"autovacuum_enabled",
"Enables autovacuum in this relation",
RELOPT_KIND_HEAP | RELOPT_KIND_TOAST | RELOPT_KIND_PARTITIONED,
RELOPT_KIND_HEAP | RELOPT_KIND_TOAST,
ShareUpdateExclusiveLock
},
true
@ -237,7 +237,7 @@ static relopt_int intRelOpts[] =
{
"autovacuum_analyze_threshold",
"Minimum number of tuple inserts, updates or deletes prior to analyze",
RELOPT_KIND_HEAP | RELOPT_KIND_PARTITIONED,
RELOPT_KIND_HEAP,
ShareUpdateExclusiveLock
},
-1, 0, INT_MAX
@ -411,7 +411,7 @@ static relopt_real realRelOpts[] =
{
"autovacuum_analyze_scale_factor",
"Number of tuple inserts, updates or deletes prior to analyze as a fraction of reltuples",
RELOPT_KIND_HEAP | RELOPT_KIND_PARTITIONED,
RELOPT_KIND_HEAP,
ShareUpdateExclusiveLock
},
-1, 0.0, 100.0
@ -1979,11 +1979,12 @@ bytea *
partitioned_table_reloptions(Datum reloptions, bool validate)
{
/*
* autovacuum_enabled, autovacuum_analyze_threshold and
* autovacuum_analyze_scale_factor are supported for partitioned tables.
* There are no options for partitioned tables yet, but this is able to do
* some validation.
*/
return default_reloptions(reloptions, validate, RELOPT_KIND_PARTITIONED);
return (bytea *) build_reloptions(reloptions, validate,
RELOPT_KIND_PARTITIONED,
0, NULL, 0);
}
/*

View File

@ -626,8 +626,8 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
PROGRESS_ANALYZE_PHASE_FINALIZE_ANALYZE);
/*
* Update pages/tuples stats in pg_class ... but not if we're doing
* inherited stats.
* Update pages/tuples stats in pg_class, and report ANALYZE to the stats
* collector ... but not if we're doing inherited stats.
*
* We assume that VACUUM hasn't set pg_class.reltuples already, even
* during a VACUUM ANALYZE. Although VACUUM often updates pg_class,
@ -668,47 +668,19 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
InvalidMultiXactId,
in_outer_xact);
}
}
else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
/*
* Partitioned tables don't have storage, so we don't set any fields
* in their pg_class entries except for reltuples, which is necessary
* for auto-analyze to work properly, and relhasindex.
*/
vac_update_relstats(onerel, -1, totalrows,
0, hasindex, InvalidTransactionId,
InvalidMultiXactId,
in_outer_xact);
}
/*
* Now report ANALYZE to the stats collector. For regular tables, we do
* it only if not doing inherited stats. For partitioned tables, we only
* do it for inherited stats. (We're never called for not-inherited stats
* on partitioned tables anyway.)
*
* Reset the changes_since_analyze counter only if we analyzed all
* columns; otherwise, there is still work for auto-analyze to do.
*/
if (!inh || onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
/*
* Now report ANALYZE to the stats collector.
*
* We deliberately don't report to the stats collector when doing
* inherited stats, because the stats collector only tracks per-table
* stats.
*
* Reset the changes_since_analyze counter only if we analyzed all
* columns; otherwise, there is still work for auto-analyze to do.
*/
pgstat_report_analyze(onerel, totalrows, totaldeadrows,
(va_cols == NIL));
/*
* If this is a manual analyze of all columns of a permanent leaf
* partition, and not doing inherited stats, also let the collector know
* about the ancestor tables of this partition. Autovacuum does the
* equivalent of this at the start of its run, so there's no reason to do
* it there.
*/
if (!inh && !IsAutoVacuumWorkerProcess() &&
(va_cols == NIL) &&
onerel->rd_rel->relispartition &&
onerel->rd_rel->relkind == RELKIND_RELATION &&
onerel->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT)
{
pgstat_report_anl_ancestors(RelationGetRelid(onerel));
}
/*

View File

@ -335,7 +335,6 @@ typedef struct ForeignTruncateInfo
static void truncate_check_rel(Oid relid, Form_pg_class reltuple);
static void truncate_check_perms(Oid relid, Form_pg_class reltuple);
static void truncate_check_activity(Relation rel);
static void truncate_update_partedrel_stats(List *parted_rels);
static void RangeVarCallbackForTruncate(const RangeVar *relation,
Oid relId, Oid oldRelId, void *arg);
static List *MergeAttributes(List *schema, List *supers, char relpersistence,
@ -1739,7 +1738,6 @@ ExecuteTruncateGuts(List *explicit_rels,
{
List *rels;
List *seq_relids = NIL;
List *parted_rels = NIL;
HTAB *ft_htab = NULL;
EState *estate;
ResultRelInfo *resultRelInfos;
@ -1888,15 +1886,9 @@ ExecuteTruncateGuts(List *explicit_rels,
{
Relation rel = (Relation) lfirst(cell);
/*
* Save OID of partitioned tables for later; nothing else to do for
* them here.
*/
/* Skip partitioned tables as there is nothing to do */
if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
parted_rels = lappend_oid(parted_rels, RelationGetRelid(rel));
continue;
}
/*
* Build the lists of foreign tables belonging to each foreign server
@ -2044,9 +2036,6 @@ ExecuteTruncateGuts(List *explicit_rels,
ResetSequence(seq_relid);
}
/* Reset partitioned tables' pg_class.reltuples */
truncate_update_partedrel_stats(parted_rels);
/*
* Write a WAL record to allow this set of actions to be logically
* decoded.
@ -2193,40 +2182,6 @@ truncate_check_activity(Relation rel)
CheckTableNotInUse(rel, "TRUNCATE");
}
/*
* Update pg_class.reltuples for all the given partitioned tables to 0.
*/
static void
truncate_update_partedrel_stats(List *parted_rels)
{
Relation pg_class;
ListCell *lc;
pg_class = table_open(RelationRelationId, RowExclusiveLock);
foreach(lc, parted_rels)
{
Oid relid = lfirst_oid(lc);
HeapTuple tuple;
Form_pg_class rd_rel;
tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
if (!HeapTupleIsValid(tuple))
elog(ERROR, "could not find tuple for relation %u", relid);
rd_rel = (Form_pg_class) GETSTRUCT(tuple);
if (rd_rel->reltuples != (float4) 0)
{
rd_rel->reltuples = (float4) 0;
heap_inplace_update(pg_class, tuple);
}
heap_freetuple(tuple);
}
table_close(pg_class, RowExclusiveLock);
}
/*
* storage_name
* returns the name corresponding to a typstorage/attstorage enum value

View File

@ -75,7 +75,6 @@
#include "catalog/dependency.h"
#include "catalog/namespace.h"
#include "catalog/pg_database.h"
#include "catalog/pg_inherits.h"
#include "commands/dbcommands.h"
#include "commands/vacuum.h"
#include "lib/ilist.h"
@ -1970,7 +1969,6 @@ do_autovacuum(void)
int effective_multixact_freeze_max_age;
bool did_vacuum = false;
bool found_concurrent_worker = false;
bool updated = false;
int i;
/*
@ -2056,19 +2054,12 @@ do_autovacuum(void)
/*
* Scan pg_class to determine which tables to vacuum.
*
* We do this in three passes: First we let pgstat collector know about
* the partitioned table ancestors of all partitions that have recently
* acquired rows for analyze. This informs the second pass about the
* total number of tuple count in partitioning hierarchies.
*
* On the second pass, we collect the list of plain relations,
* materialized views and partitioned tables. On the third one we collect
* TOAST tables.
*
* The reason for doing the third pass is that during it we want to use
* the main relation's pg_class.reloptions entry if the TOAST table does
* not have any, and we cannot obtain it unless we know beforehand what's
* the main table OID.
* We do this in two passes: on the first one we collect the list of plain
* relations and materialized views, and on the second one we collect
* TOAST tables. The reason for doing the second pass is that during it we
* want to use the main relation's pg_class.reloptions entry if the TOAST
* table does not have any, and we cannot obtain it unless we know
* beforehand what's the main table OID.
*
* We need to check TOAST tables separately because in cases with short,
* wide tables there might be proportionally much more activity in the
@ -2077,44 +2068,7 @@ do_autovacuum(void)
relScan = table_beginscan_catalog(classRel, 0, NULL);
/*
* First pass: before collecting the list of tables to vacuum, let stat
* collector know about partitioned-table ancestors of each partition.
*/
while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
{
Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
Oid relid = classForm->oid;
PgStat_StatTabEntry *tabentry;
/* Only consider permanent leaf partitions */
if (!classForm->relispartition ||
classForm->relkind == RELKIND_PARTITIONED_TABLE ||
classForm->relpersistence == RELPERSISTENCE_TEMP)
continue;
/*
* No need to do this for partitions that haven't acquired any rows.
*/
tabentry = pgstat_fetch_stat_tabentry(relid);
if (tabentry &&
tabentry->changes_since_analyze -
tabentry->changes_since_analyze_reported > 0)
{
pgstat_report_anl_ancestors(relid);
updated = true;
}
}
/* Acquire fresh stats for the next passes, if needed */
if (updated)
{
autovac_refresh_stats();
dbentry = pgstat_fetch_stat_dbentry(MyDatabaseId);
shared = pgstat_fetch_stat_dbentry(InvalidOid);
}
/*
* On the second pass, we collect main tables to vacuum, and also the main
* On the first pass, we collect main tables to vacuum, and also the main
* table relid to TOAST relid mapping.
*/
while ((tuple = heap_getnext(relScan, ForwardScanDirection)) != NULL)
@ -2128,8 +2082,7 @@ do_autovacuum(void)
bool wraparound;
if (classForm->relkind != RELKIND_RELATION &&
classForm->relkind != RELKIND_MATVIEW &&
classForm->relkind != RELKIND_PARTITIONED_TABLE)
classForm->relkind != RELKIND_MATVIEW)
continue;
relid = classForm->oid;
@ -2204,7 +2157,7 @@ do_autovacuum(void)
table_endscan(relScan);
/* third pass: check TOAST tables */
/* second pass: check TOAST tables */
ScanKeyInit(&key,
Anum_pg_class_relkind,
BTEqualStrategyNumber, F_CHAREQ,
@ -2797,7 +2750,6 @@ extract_autovac_opts(HeapTuple tup, TupleDesc pg_class_desc)
Assert(((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_RELATION ||
((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_MATVIEW ||
((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_PARTITIONED_TABLE ||
((Form_pg_class) GETSTRUCT(tup))->relkind == RELKIND_TOASTVALUE);
relopts = extractRelOptions(tup, pg_class_desc, NULL);

View File

@ -38,7 +38,6 @@
#include "access/transam.h"
#include "access/twophase_rmgr.h"
#include "access/xact.h"
#include "catalog/partition.h"
#include "catalog/pg_database.h"
#include "catalog/pg_proc.h"
#include "common/ip.h"
@ -345,7 +344,6 @@ static void pgstat_recv_resetreplslotcounter(PgStat_MsgResetreplslotcounter *msg
static void pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len);
static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len);
static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
static void pgstat_recv_anl_ancestors(PgStat_MsgAnlAncestors *msg, int len);
static void pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len);
static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
static void pgstat_recv_wal(PgStat_MsgWal *msg, int len);
@ -1599,9 +1597,6 @@ pgstat_report_vacuum(Oid tableoid, bool shared,
*
* Caller must provide new live- and dead-tuples estimates, as well as a
* flag indicating whether to reset the changes_since_analyze counter.
* Exceptional support only changes_since_analyze for partitioned tables,
* though they don't have any data. This counter will tell us whether
* partitioned tables need autoanalyze or not.
* --------
*/
void
@ -1623,31 +1618,21 @@ pgstat_report_analyze(Relation rel,
* be double-counted after commit. (This approach also ensures that the
* collector ends up with the right numbers if we abort instead of
* committing.)
*
* For partitioned tables, we don't report live and dead tuples, because
* such tables don't have any data.
*/
if (rel->pgstat_info != NULL)
{
PgStat_TableXactStatus *trans;
if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
/* If this rel is partitioned, skip modifying */
livetuples = deadtuples = 0;
else
for (trans = rel->pgstat_info->trans; trans; trans = trans->upper)
{
for (trans = rel->pgstat_info->trans; trans; trans = trans->upper)
{
livetuples -= trans->tuples_inserted - trans->tuples_deleted;
deadtuples -= trans->tuples_updated + trans->tuples_deleted;
}
/* count stuff inserted by already-aborted subxacts, too */
deadtuples -= rel->pgstat_info->t_counts.t_delta_dead_tuples;
/* Since ANALYZE's counts are estimates, we could have underflowed */
livetuples = Max(livetuples, 0);
deadtuples = Max(deadtuples, 0);
livetuples -= trans->tuples_inserted - trans->tuples_deleted;
deadtuples -= trans->tuples_updated + trans->tuples_deleted;
}
/* count stuff inserted by already-aborted subxacts, too */
deadtuples -= rel->pgstat_info->t_counts.t_delta_dead_tuples;
/* Since ANALYZE's counts are estimates, we could have underflowed */
livetuples = Max(livetuples, 0);
deadtuples = Max(deadtuples, 0);
}
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ANALYZE);
@ -1659,48 +1644,6 @@ pgstat_report_analyze(Relation rel,
msg.m_live_tuples = livetuples;
msg.m_dead_tuples = deadtuples;
pgstat_send(&msg, sizeof(msg));
}
/*
* pgstat_report_anl_ancestors
*
* Send list of partitioned table ancestors of the given partition to the
* collector. The collector is in charge of propagating the analyze tuple
* counts from the partition to its ancestors. This is necessary so that
* other processes can decide whether to analyze the partitioned tables.
*/
void
pgstat_report_anl_ancestors(Oid relid)
{
PgStat_MsgAnlAncestors msg;
List *ancestors;
ListCell *lc;
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ANL_ANCESTORS);
msg.m_databaseid = MyDatabaseId;
msg.m_tableoid = relid;
msg.m_nancestors = 0;
ancestors = get_partition_ancestors(relid);
foreach(lc, ancestors)
{
Oid ancestor = lfirst_oid(lc);
msg.m_ancestors[msg.m_nancestors] = ancestor;
if (++msg.m_nancestors >= PGSTAT_NUM_ANCESTORENTRIES)
{
pgstat_send(&msg, offsetof(PgStat_MsgAnlAncestors, m_ancestors[0]) +
msg.m_nancestors * sizeof(Oid));
msg.m_nancestors = 0;
}
}
if (msg.m_nancestors > 0)
pgstat_send(&msg, offsetof(PgStat_MsgAnlAncestors, m_ancestors[0]) +
msg.m_nancestors * sizeof(Oid));
list_free(ancestors);
}
/* --------
@ -2039,8 +1982,7 @@ pgstat_initstats(Relation rel)
char relkind = rel->rd_rel->relkind;
/* We only count stats for things that have storage */
if (!RELKIND_HAS_STORAGE(relkind) &&
relkind != RELKIND_PARTITIONED_TABLE)
if (!RELKIND_HAS_STORAGE(relkind))
{
rel->pgstat_info = NULL;
return;
@ -3370,10 +3312,6 @@ PgstatCollectorMain(int argc, char *argv[])
pgstat_recv_analyze(&msg.msg_analyze, len);
break;
case PGSTAT_MTYPE_ANL_ANCESTORS:
pgstat_recv_anl_ancestors(&msg.msg_anl_ancestors, len);
break;
case PGSTAT_MTYPE_ARCHIVER:
pgstat_recv_archiver(&msg.msg_archiver, len);
break;
@ -3588,7 +3526,6 @@ pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid tableoid, bool create)
result->n_live_tuples = 0;
result->n_dead_tuples = 0;
result->changes_since_analyze = 0;
result->changes_since_analyze_reported = 0;
result->inserts_since_vacuum = 0;
result->blocks_fetched = 0;
result->blocks_hit = 0;
@ -4870,7 +4807,6 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
tabentry->n_live_tuples = tabmsg->t_counts.t_delta_live_tuples;
tabentry->n_dead_tuples = tabmsg->t_counts.t_delta_dead_tuples;
tabentry->changes_since_analyze = tabmsg->t_counts.t_changed_tuples;
tabentry->changes_since_analyze_reported = 0;
tabentry->inserts_since_vacuum = tabmsg->t_counts.t_tuples_inserted;
tabentry->blocks_fetched = tabmsg->t_counts.t_blocks_fetched;
tabentry->blocks_hit = tabmsg->t_counts.t_blocks_hit;
@ -5268,10 +5204,7 @@ pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len)
* have no good way to estimate how many of those there were.
*/
if (msg->m_resetcounter)
{
tabentry->changes_since_analyze = 0;
tabentry->changes_since_analyze_reported = 0;
}
if (msg->m_autovacuum)
{
@ -5285,29 +5218,6 @@ pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len)
}
}
static void
pgstat_recv_anl_ancestors(PgStat_MsgAnlAncestors *msg, int len)
{
PgStat_StatDBEntry *dbentry;
PgStat_StatTabEntry *tabentry;
dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
for (int i = 0; i < msg->m_nancestors; i++)
{
Oid ancestor_relid = msg->m_ancestors[i];
PgStat_StatTabEntry *ancestor;
ancestor = pgstat_get_tab_entry(dbentry, ancestor_relid, true);
ancestor->changes_since_analyze +=
tabentry->changes_since_analyze - tabentry->changes_since_analyze_reported;
}
tabentry->changes_since_analyze_reported = tabentry->changes_since_analyze;
}
/* ----------
* pgstat_recv_archiver() -

View File

@ -69,7 +69,6 @@ typedef enum StatMsgType
PGSTAT_MTYPE_AUTOVAC_START,
PGSTAT_MTYPE_VACUUM,
PGSTAT_MTYPE_ANALYZE,
PGSTAT_MTYPE_ANL_ANCESTORS,
PGSTAT_MTYPE_ARCHIVER,
PGSTAT_MTYPE_BGWRITER,
PGSTAT_MTYPE_WAL,
@ -107,7 +106,7 @@ typedef int64 PgStat_Counter;
*
* tuples_inserted/updated/deleted/hot_updated count attempted actions,
* regardless of whether the transaction committed. delta_live_tuples,
* delta_dead_tuples, changed_tuples are set depending on commit or abort.
* delta_dead_tuples, and changed_tuples are set depending on commit or abort.
* Note that delta_live_tuples and delta_dead_tuples can be negative!
* ----------
*/
@ -430,25 +429,6 @@ typedef struct PgStat_MsgAnalyze
PgStat_Counter m_dead_tuples;
} PgStat_MsgAnalyze;
/* ----------
* PgStat_MsgAnlAncestors Sent by the backend or autovacuum daemon
* to inform partitioned tables that are
* ancestors of a partition, to propagate
* analyze counters
* ----------
*/
#define PGSTAT_NUM_ANCESTORENTRIES \
((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(Oid) - sizeof(int)) \
/ sizeof(Oid))
typedef struct PgStat_MsgAnlAncestors
{
PgStat_MsgHdr m_hdr;
Oid m_databaseid;
Oid m_tableoid;
int m_nancestors;
Oid m_ancestors[PGSTAT_NUM_ANCESTORENTRIES];
} PgStat_MsgAnlAncestors;
/* ----------
* PgStat_MsgArchiver Sent by the archiver to update statistics.
@ -697,7 +677,6 @@ typedef union PgStat_Msg
PgStat_MsgAutovacStart msg_autovacuum_start;
PgStat_MsgVacuum msg_vacuum;
PgStat_MsgAnalyze msg_analyze;
PgStat_MsgAnlAncestors msg_anl_ancestors;
PgStat_MsgArchiver msg_archiver;
PgStat_MsgBgWriter msg_bgwriter;
PgStat_MsgWal msg_wal;
@ -793,7 +772,7 @@ typedef struct PgStat_StatTabEntry
PgStat_Counter n_live_tuples;
PgStat_Counter n_dead_tuples;
PgStat_Counter changes_since_analyze;
PgStat_Counter changes_since_analyze_reported;
PgStat_Counter unused_counter; /* kept for ABI compatibility */
PgStat_Counter inserts_since_vacuum;
PgStat_Counter blocks_fetched;
@ -1002,7 +981,6 @@ extern void pgstat_report_vacuum(Oid tableoid, bool shared,
extern void pgstat_report_analyze(Relation rel,
PgStat_Counter livetuples, PgStat_Counter deadtuples,
bool resetcounter);
extern void pgstat_report_anl_ancestors(Oid relid);
extern void pgstat_report_recovery_conflict(int reason);
extern void pgstat_report_deadlock(void);