diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 848263d3900..925d06ed8ce 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -1215,19 +1215,22 @@ acquire_sample_rows(Relation onerel, int elevel, qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows); /* - * Estimate total numbers of rows in relation. For live rows, use - * vac_estimate_reltuples; for dead rows, we have no source of old - * information, so we have to assume the density is the same in unseen - * pages as in the pages we scanned. + * Estimate total numbers of live and dead rows in relation, extrapolating + * on the assumption that the average tuple density in pages we didn't + * scan is the same as in the pages we did scan. Since what we scanned is + * a random sample of the pages in the relation, this should be a good + * assumption. */ - *totalrows = vac_estimate_reltuples(onerel, true, - totalblocks, - bs.m, - liverows); if (bs.m > 0) + { + *totalrows = floor((liverows / bs.m) * totalblocks + 0.5); *totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5); + } else + { + *totalrows = 0.0; *totaldeadrows = 0.0; + } /* * Emit some interesting relation info diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index d533cef6a6c..dd93ecb6046 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -685,13 +685,13 @@ vacuum_set_xid_limits(Relation rel, * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples * * If we scanned the whole relation then we should just use the count of - * live tuples seen; but if we did not, we should not trust the count - * unreservedly, especially not in VACUUM, which may have scanned a quite - * nonrandom subset of the table. When we have only partial information, - * we take the old value of pg_class.reltuples as a measurement of the + * live tuples seen; but if we did not, we should not blindly extrapolate + * from that number, since VACUUM may have scanned a quite nonrandom + * subset of the table. When we have only partial information, we take + * the old value of pg_class.reltuples as a measurement of the * tuple density in the unscanned pages. * - * This routine is shared by VACUUM and ANALYZE. + * The is_analyze argument is historical. */ double vac_estimate_reltuples(Relation relation, bool is_analyze, @@ -702,9 +702,8 @@ vac_estimate_reltuples(Relation relation, bool is_analyze, BlockNumber old_rel_pages = relation->rd_rel->relpages; double old_rel_tuples = relation->rd_rel->reltuples; double old_density; - double new_density; - double multiplier; - double updated_density; + double unscanned_pages; + double total_tuples; /* If we did scan the whole table, just use the count as-is */ if (scanned_pages >= total_pages) @@ -728,31 +727,14 @@ vac_estimate_reltuples(Relation relation, bool is_analyze, /* * Okay, we've covered the corner cases. The normal calculation is to - * convert the old measurement to a density (tuples per page), then update - * the density using an exponential-moving-average approach, and finally - * compute reltuples as updated_density * total_pages. - * - * For ANALYZE, the moving average multiplier is just the fraction of the - * table's pages we scanned. This is equivalent to assuming that the - * tuple density in the unscanned pages didn't change. Of course, it - * probably did, if the new density measurement is different. But over - * repeated cycles, the value of reltuples will converge towards the - * correct value, if repeated measurements show the same new density. - * - * For VACUUM, the situation is a bit different: we have looked at a - * nonrandom sample of pages, but we know for certain that the pages we - * didn't look at are precisely the ones that haven't changed lately. - * Thus, there is a reasonable argument for doing exactly the same thing - * as for the ANALYZE case, that is use the old density measurement as the - * value for the unscanned pages. - * - * This logic could probably use further refinement. + * convert the old measurement to a density (tuples per page), then + * estimate the number of tuples in the unscanned pages using that figure, + * and finally add on the number of tuples in the scanned pages. */ old_density = old_rel_tuples / old_rel_pages; - new_density = scanned_tuples / scanned_pages; - multiplier = (double) scanned_pages / (double) total_pages; - updated_density = old_density + (new_density - old_density) * multiplier; - return floor(updated_density * total_pages + 0.5); + unscanned_pages = (double) total_pages - (double) scanned_pages; + total_tuples = old_density * unscanned_pages + scanned_tuples; + return floor(total_tuples + 0.5); }