mirror of
https://github.com/postgres/postgres.git
synced 2025-10-21 02:52:47 +03:00
Fix VACUUM so that it always updates pg_class.reltuples/relpages.
When we added the ability for vacuum to skip heap pages by consulting the visibility map, we made it just not update the reltuples/relpages statistics if it skipped any pages. But this could leave us with extremely out-of-date stats for a table that contains any unchanging areas, especially for TOAST tables which never get processed by ANALYZE. In particular this could result in autovacuum making poor decisions about when to process the table, as in recent report from Florian Helmberger. And in general it's a bad idea to not update the stats at all. Instead, use the previous values of reltuples/relpages as an estimate of the tuple density in unvisited pages. This approach results in a "moving average" estimate of reltuples, which should converge to the correct value over multiple VACUUM and ANALYZE cycles even when individual measurements aren't very good. This new method for updating reltuples is used by both VACUUM and ANALYZE, with the result that we no longer need the grotty interconnections that caused ANALYZE to not update the stats depending on what had happened in the parent VACUUM command. Also, fix the logic for skipping all-visible pages during VACUUM so that it looks ahead rather than behind to decide what to do, as per a suggestion from Greg Stark. This eliminates useless scanning of all-visible pages at the start of the relation or just after a not-all-visible page. In particular, the first few pages of the relation will not be invariably included in the scanned pages, which seems to help in not overweighting them in the reltuples estimate. Back-patch to 8.4, where the visibility map was introduced.
This commit is contained in:
@@ -20,6 +20,8 @@
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "access/clog.h"
|
||||
#include "access/genam.h"
|
||||
#include "access/heapam.h"
|
||||
@@ -62,7 +64,7 @@ static BufferAccessStrategy vac_strategy;
|
||||
static List *get_rel_oids(Oid relid, const RangeVar *vacrel);
|
||||
static void vac_truncate_clog(TransactionId frozenXID);
|
||||
static bool vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast,
|
||||
bool for_wraparound, bool *scanned_all);
|
||||
bool for_wraparound);
|
||||
|
||||
|
||||
/*
|
||||
@@ -219,12 +221,10 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
|
||||
foreach(cur, relations)
|
||||
{
|
||||
Oid relid = lfirst_oid(cur);
|
||||
bool scanned_all = false;
|
||||
|
||||
if (vacstmt->options & VACOPT_VACUUM)
|
||||
{
|
||||
if (!vacuum_rel(relid, vacstmt, do_toast, for_wraparound,
|
||||
&scanned_all))
|
||||
if (!vacuum_rel(relid, vacstmt, do_toast, for_wraparound))
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -241,7 +241,7 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
|
||||
PushActiveSnapshot(GetTransactionSnapshot());
|
||||
}
|
||||
|
||||
analyze_rel(relid, vacstmt, vac_strategy, !scanned_all);
|
||||
analyze_rel(relid, vacstmt, vac_strategy);
|
||||
|
||||
if (use_own_xacts)
|
||||
{
|
||||
@@ -453,6 +453,79 @@ vacuum_set_xid_limits(int freeze_min_age,
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
|
||||
*
|
||||
* If we scanned the whole relation then we should just use the count of
|
||||
* live tuples seen; but if we did not, we should not trust the count
|
||||
* unreservedly, especially not in VACUUM, which may have scanned a quite
|
||||
* nonrandom subset of the table. When we have only partial information,
|
||||
* we take the old value of pg_class.reltuples as a measurement of the
|
||||
* tuple density in the unscanned pages.
|
||||
*
|
||||
* This routine is shared by VACUUM and ANALYZE.
|
||||
*/
|
||||
double
|
||||
vac_estimate_reltuples(Relation relation, bool is_analyze,
|
||||
BlockNumber total_pages,
|
||||
BlockNumber scanned_pages,
|
||||
double scanned_tuples)
|
||||
{
|
||||
BlockNumber old_rel_pages = relation->rd_rel->relpages;
|
||||
double old_rel_tuples = relation->rd_rel->reltuples;
|
||||
double old_density;
|
||||
double new_density;
|
||||
double multiplier;
|
||||
double updated_density;
|
||||
|
||||
/* If we did scan the whole table, just use the count as-is */
|
||||
if (scanned_pages >= total_pages)
|
||||
return scanned_tuples;
|
||||
|
||||
/*
|
||||
* If scanned_pages is zero but total_pages isn't, keep the existing
|
||||
* value of reltuples.
|
||||
*/
|
||||
if (scanned_pages == 0)
|
||||
return old_rel_tuples;
|
||||
|
||||
/*
|
||||
* If old value of relpages is zero, old density is indeterminate; we
|
||||
* can't do much except scale up scanned_tuples to match total_pages.
|
||||
*/
|
||||
if (old_rel_pages == 0)
|
||||
return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
|
||||
|
||||
/*
|
||||
* Okay, we've covered the corner cases. The normal calculation is to
|
||||
* convert the old measurement to a density (tuples per page), then
|
||||
* update the density using an exponential-moving-average approach,
|
||||
* and finally compute reltuples as updated_density * total_pages.
|
||||
*
|
||||
* For ANALYZE, the moving average multiplier is just the fraction of
|
||||
* the table's pages we scanned. This is equivalent to assuming
|
||||
* that the tuple density in the unscanned pages didn't change. Of
|
||||
* course, it probably did, if the new density measurement is different.
|
||||
* But over repeated cycles, the value of reltuples will converge towards
|
||||
* the correct value, if repeated measurements show the same new density.
|
||||
*
|
||||
* For VACUUM, the situation is a bit different: we have looked at a
|
||||
* nonrandom sample of pages, but we know for certain that the pages we
|
||||
* didn't look at are precisely the ones that haven't changed lately.
|
||||
* Thus, there is a reasonable argument for doing exactly the same thing
|
||||
* as for the ANALYZE case, that is use the old density measurement as
|
||||
* the value for the unscanned pages.
|
||||
*
|
||||
* This logic could probably use further refinement.
|
||||
*/
|
||||
old_density = old_rel_tuples / old_rel_pages;
|
||||
new_density = scanned_tuples / scanned_pages;
|
||||
multiplier = (double) scanned_pages / (double) total_pages;
|
||||
updated_density = old_density + (new_density - old_density) * multiplier;
|
||||
return floor(updated_density * total_pages + 0.5);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* vac_update_relstats() -- update statistics for one relation
|
||||
*
|
||||
@@ -480,7 +553,7 @@ vacuum_set_xid_limits(int freeze_min_age,
|
||||
* somebody vacuuming pg_class might think they could delete a tuple
|
||||
* marked with xmin = our xid.
|
||||
*
|
||||
* This routine is shared by VACUUM and stand-alone ANALYZE.
|
||||
* This routine is shared by VACUUM and ANALYZE.
|
||||
*/
|
||||
void
|
||||
vac_update_relstats(Relation relation,
|
||||
@@ -758,14 +831,10 @@ vac_truncate_clog(TransactionId frozenXID)
|
||||
* many small transactions. Otherwise, two-phase locking would require
|
||||
* us to lock the entire database during one pass of the vacuum cleaner.
|
||||
*
|
||||
* We'll return true in *scanned_all if the vacuum scanned all heap
|
||||
* pages, and updated pg_class.
|
||||
*
|
||||
* At entry and exit, we are not inside a transaction.
|
||||
*/
|
||||
static bool
|
||||
vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
|
||||
bool *scanned_all)
|
||||
vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound)
|
||||
{
|
||||
LOCKMODE lmode;
|
||||
Relation onerel;
|
||||
@@ -775,9 +844,6 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
|
||||
int save_sec_context;
|
||||
int save_nestlevel;
|
||||
|
||||
if (scanned_all)
|
||||
*scanned_all = false;
|
||||
|
||||
/* Begin a transaction for vacuuming this relation */
|
||||
StartTransactionCommand();
|
||||
|
||||
@@ -971,7 +1037,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
|
||||
vacstmt->freeze_min_age, vacstmt->freeze_table_age);
|
||||
}
|
||||
else
|
||||
lazy_vacuum_rel(onerel, vacstmt, vac_strategy, scanned_all);
|
||||
lazy_vacuum_rel(onerel, vacstmt, vac_strategy);
|
||||
|
||||
/* Roll back any GUC changes executed by index functions */
|
||||
AtEOXact_GUC(false, save_nestlevel);
|
||||
@@ -997,7 +1063,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
|
||||
* totally unimportant for toast relations.
|
||||
*/
|
||||
if (toast_relid != InvalidOid)
|
||||
vacuum_rel(toast_relid, vacstmt, false, for_wraparound, NULL);
|
||||
vacuum_rel(toast_relid, vacstmt, false, for_wraparound);
|
||||
|
||||
/*
|
||||
* Now release the session-level lock on the master table.
|
||||
|
Reference in New Issue
Block a user