1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-30 11:03:19 +03:00

vacuumdb: Add option for analyzing only relations missing stats.

This commit adds a new --missing-stats-only option that can be used
with --analyze-only or --analyze-in-stages.  When this option is
specified, vacuumdb will analyze a relation if it lacks any
statistics for a column, expression index, or extended statistics
object.  This new option is primarily intended for use after
pg_upgrade (since it can now retain most optimizer statistics), but
it might be useful in other situations, too.

Author: Corey Huinker <corey.huinker@gmail.com>
Co-authored-by: Nathan Bossart <nathandbossart@gmail.com>
Reviewed-by: John Naylor <johncnaylorls@gmail.com>
Discussion: https://postgr.es/m/Z5O1bpcwDrMgyrYy%40nathan
This commit is contained in:
Nathan Bossart
2025-03-18 16:32:56 -05:00
parent 9c03c8d187
commit edba754f05
4 changed files with 215 additions and 2 deletions

View File

@ -237,4 +237,64 @@ $node->command_fails_like(
qr/cannot vacuum all databases and a specific one at the same time/,
'cannot use option --all and a dbname as argument at the same time');
$node->safe_psql('postgres',
'CREATE TABLE regression_vacuumdb_test AS select generate_series(1, 10) a, generate_series(2, 11) b;');
$node->issues_sql_like(
[ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
qr/statement:\ ANALYZE/sx,
'--missing-stats-only with missing stats');
$node->issues_sql_unlike(
[ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
qr/statement:\ ANALYZE/sx,
'--missing-stats-only with no missing stats');
$node->safe_psql('postgres',
'CREATE INDEX regression_vacuumdb_test_idx ON regression_vacuumdb_test (mod(a, 2));');
$node->issues_sql_like(
[ 'vacuumdb', '--analyze-in-stages', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
qr/statement:\ ANALYZE/sx,
'--missing-stats-only with missing index expression stats');
$node->issues_sql_unlike(
[ 'vacuumdb', '--analyze-in-stages', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
qr/statement:\ ANALYZE/sx,
'--missing-stats-only with no missing index expression stats');
$node->safe_psql('postgres',
'CREATE STATISTICS regression_vacuumdb_test_stat ON a, b FROM regression_vacuumdb_test;');
$node->issues_sql_like(
[ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
qr/statement:\ ANALYZE/sx,
'--missing-stats-only with missing extended stats');
$node->issues_sql_unlike(
[ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
qr/statement:\ ANALYZE/sx,
'--missing-stats-only with no missing extended stats');
$node->safe_psql('postgres',
"CREATE TABLE regression_vacuumdb_child (a INT) INHERITS (regression_vacuumdb_test);\n"
. "INSERT INTO regression_vacuumdb_child VALUES (1, 2);\n"
. "ANALYZE regression_vacuumdb_child;\n");
$node->issues_sql_like(
[ 'vacuumdb', '--analyze-in-stages', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
qr/statement:\ ANALYZE/sx,
'--missing-stats-only with missing inherited stats');
$node->issues_sql_unlike(
[ 'vacuumdb', '--analyze-in-stages', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ],
qr/statement:\ ANALYZE/sx,
'--missing-stats-only with no missing inherited stats');
$node->safe_psql('postgres',
"CREATE TABLE regression_vacuumdb_parted (a INT) PARTITION BY LIST (a);\n"
. "CREATE TABLE regression_vacuumdb_part1 PARTITION OF regression_vacuumdb_parted FOR VALUES IN (1);\n"
. "INSERT INTO regression_vacuumdb_parted VALUES (1);\n"
. "ANALYZE regression_vacuumdb_part1;\n");
$node->issues_sql_like(
[ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_parted', 'postgres' ],
qr/statement:\ ANALYZE/sx,
'--missing-stats-only with missing partition stats');
$node->issues_sql_unlike(
[ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_parted', 'postgres' ],
qr/statement:\ ANALYZE/sx,
'--missing-stats-only with no missing partition stats');
done_testing();

View File

@ -47,6 +47,7 @@ typedef struct vacuumingOptions
bool process_toast;
bool skip_database_stats;
char *buffer_usage_limit;
bool missing_stats_only;
} vacuumingOptions;
/* object filter options */
@ -134,6 +135,7 @@ main(int argc, char *argv[])
{"no-process-toast", no_argument, NULL, 11},
{"no-process-main", no_argument, NULL, 12},
{"buffer-usage-limit", required_argument, NULL, 13},
{"missing-stats-only", no_argument, NULL, 14},
{NULL, 0, NULL, 0}
};
@ -281,6 +283,9 @@ main(int argc, char *argv[])
case 13:
vacopts.buffer_usage_limit = escape_quotes(optarg);
break;
case 14:
vacopts.missing_stats_only = true;
break;
default:
/* getopt_long already emitted a complaint */
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@ -366,6 +371,14 @@ main(int argc, char *argv[])
pg_fatal("cannot use the \"%s\" option with the \"%s\" option",
"buffer-usage-limit", "full");
/*
* Prohibit --missing-stats-only without --analyze-only or
* --analyze-in-stages.
*/
if (vacopts.missing_stats_only && !vacopts.analyze_only)
pg_fatal("cannot use the \"%s\" option without \"%s\" or \"%s\"",
"missing-stats-only", "analyze-only", "analyze-in-stages");
/* fill cparams except for dbname, which is set below */
cparams.pghost = host;
cparams.pgport = port;
@ -406,12 +419,14 @@ main(int argc, char *argv[])
if (analyze_in_stages)
{
int stage;
SimpleStringList *found_objs = NULL;
for (stage = 0; stage < ANALYZE_NUM_STAGES; stage++)
{
vacuum_one_database(&cparams, &vacopts,
stage,
&objects, NULL,
&objects,
vacopts.missing_stats_only ? &found_objs : NULL,
concurrentCons,
progname, echo, quiet);
}
@ -614,6 +629,13 @@ vacuum_one_database(ConnParams *cparams,
"--buffer-usage-limit", "16");
}
if (vacopts->missing_stats_only && PQserverVersion(conn) < 150000)
{
PQfinish(conn);
pg_fatal("cannot use the \"%s\" option on server versions older than PostgreSQL %s",
"--missing-stats-only", "15");
}
/* skip_database_stats is used automatically if server supports it */
vacopts->skip_database_stats = (PQserverVersion(conn) >= 160000);
@ -838,6 +860,9 @@ retrieve_objects(PGconn *conn, vacuumingOptions *vacopts,
" FROM pg_catalog.pg_class c\n"
" JOIN pg_catalog.pg_namespace ns"
" ON c.relnamespace OPERATOR(pg_catalog.=) ns.oid\n"
" CROSS JOIN LATERAL (SELECT c.relkind IN ("
CppAsString2(RELKIND_PARTITIONED_TABLE) ", "
CppAsString2(RELKIND_PARTITIONED_INDEX) ")) as p (inherited)\n"
" LEFT JOIN pg_catalog.pg_class t"
" ON c.reltoastrelid OPERATOR(pg_catalog.=) t.oid\n");
@ -921,6 +946,84 @@ retrieve_objects(PGconn *conn, vacuumingOptions *vacopts,
vacopts->min_mxid_age);
}
if (vacopts->missing_stats_only)
{
appendPQExpBufferStr(&catalog_query, " AND (\n");
/* regular stats */
appendPQExpBufferStr(&catalog_query,
" EXISTS (SELECT NULL FROM pg_catalog.pg_attribute a\n"
" WHERE a.attrelid OPERATOR(pg_catalog.=) c.oid\n"
" AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n"
" AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n"
" AND NOT a.attisdropped\n"
" AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n"
" AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic s\n"
" WHERE s.starelid OPERATOR(pg_catalog.=) a.attrelid\n"
" AND s.staattnum OPERATOR(pg_catalog.=) a.attnum\n"
" AND s.stainherit OPERATOR(pg_catalog.=) p.inherited))\n");
/* extended stats */
appendPQExpBufferStr(&catalog_query,
" OR EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext e\n"
" WHERE e.stxrelid OPERATOR(pg_catalog.=) c.oid\n"
" AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n"
" AND e.stxstattarget IS DISTINCT FROM 0::pg_catalog.int2\n"
" AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext_data d\n"
" WHERE d.stxoid OPERATOR(pg_catalog.=) e.oid\n"
" AND d.stxdinherit OPERATOR(pg_catalog.=) p.inherited))\n");
/* expression indexes */
appendPQExpBufferStr(&catalog_query,
" OR EXISTS (SELECT NULL FROM pg_catalog.pg_attribute a\n"
" JOIN pg_catalog.pg_index i"
" ON i.indexrelid OPERATOR(pg_catalog.=) a.attrelid\n"
" WHERE i.indrelid OPERATOR(pg_catalog.=) c.oid\n"
" AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n"
" AND i.indkey[a.attnum OPERATOR(pg_catalog.-) 1::pg_catalog.int2]"
" OPERATOR(pg_catalog.=) 0::pg_catalog.int2\n"
" AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n"
" AND NOT a.attisdropped\n"
" AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n"
" AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic s\n"
" WHERE s.starelid OPERATOR(pg_catalog.=) a.attrelid\n"
" AND s.staattnum OPERATOR(pg_catalog.=) a.attnum\n"
" AND s.stainherit OPERATOR(pg_catalog.=) p.inherited))\n");
/* inheritance and regular stats */
appendPQExpBufferStr(&catalog_query,
" OR EXISTS (SELECT NULL FROM pg_catalog.pg_attribute a\n"
" WHERE a.attrelid OPERATOR(pg_catalog.=) c.oid\n"
" AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n"
" AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n"
" AND NOT a.attisdropped\n"
" AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n"
" AND c.relhassubclass\n"
" AND NOT p.inherited\n"
" AND EXISTS (SELECT NULL FROM pg_catalog.pg_inherits h\n"
" WHERE h.inhparent OPERATOR(pg_catalog.=) c.oid)\n"
" AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic s\n"
" WHERE s.starelid OPERATOR(pg_catalog.=) a.attrelid\n"
" AND s.staattnum OPERATOR(pg_catalog.=) a.attnum\n"
" AND s.stainherit))\n");
/* inheritance and extended stats */
appendPQExpBufferStr(&catalog_query,
" OR EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext e\n"
" WHERE e.stxrelid OPERATOR(pg_catalog.=) c.oid\n"
" AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n"
" AND e.stxstattarget IS DISTINCT FROM 0::pg_catalog.int2\n"
" AND c.relhassubclass\n"
" AND NOT p.inherited\n"
" AND EXISTS (SELECT NULL FROM pg_catalog.pg_inherits h\n"
" WHERE h.inhparent OPERATOR(pg_catalog.=) c.oid)\n"
" AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext_data d\n"
" WHERE d.stxoid OPERATOR(pg_catalog.=) e.oid\n"
" AND d.stxdinherit))\n");
appendPQExpBufferStr(&catalog_query, " )\n");
}
/*
* Execute the catalog query. We use the default search_path for this
* query for consistency with table lookups done elsewhere by the user.
@ -983,6 +1086,11 @@ vacuum_all_databases(ConnParams *cparams,
if (analyze_in_stages)
{
SimpleStringList **found_objs = NULL;
if (vacopts->missing_stats_only)
found_objs = palloc0(PQntuples(result) * sizeof(SimpleStringList *));
/*
* When analyzing all databases in stages, we analyze them all in the
* fastest stage first, so that initial statistics become available
@ -999,7 +1107,8 @@ vacuum_all_databases(ConnParams *cparams,
vacuum_one_database(cparams, vacopts,
stage,
objects, NULL,
objects,
vacopts->missing_stats_only ? &found_objs[i] : NULL,
concurrentCons,
progname, echo, quiet);
}
@ -1239,6 +1348,7 @@ help(const char *progname)
printf(_(" -j, --jobs=NUM use this many concurrent connections to vacuum\n"));
printf(_(" --min-mxid-age=MXID_AGE minimum multixact ID age of tables to vacuum\n"));
printf(_(" --min-xid-age=XID_AGE minimum transaction ID age of tables to vacuum\n"));
printf(_(" --missing-stats-only only analyze relations with missing statistics\n"));
printf(_(" --no-index-cleanup don't remove index entries that point to dead tuples\n"));
printf(_(" --no-process-main skip the main relation\n"));
printf(_(" --no-process-toast skip the TOAST table associated with the table to vacuum\n"));

View File

@ -2849,6 +2849,33 @@ sub issues_sql_like
=pod
=item $node->issues_sql_unlike(cmd, unexpected_sql, test_name)
Run a command on the node, then verify that $unexpected_sql does not appear in
the server log file.
=cut
sub issues_sql_unlike
{
local $Test::Builder::Level = $Test::Builder::Level + 1;
my ($self, $cmd, $unexpected_sql, $test_name) = @_;
local %ENV = $self->_get_env();
my $log_location = -s $self->logfile;
my $result = PostgreSQL::Test::Utils::run_log($cmd);
ok($result, "@$cmd exit code 0");
my $log =
PostgreSQL::Test::Utils::slurp_file($self->logfile, $log_location);
unlike($log, $unexpected_sql, "$test_name: SQL not found in server log");
return;
}
=pod
=item $node->log_content()
Returns the contents of log of the node