1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-05 07:41:25 +03:00

vacuumdb: Teach vacuum_one_database() to reuse query results.

Presently, each call to vacuum_one_database() queries the catalogs
to retrieve the list of tables to process.  A follow-up commit will
add a "missing stats only" feature to --analyze-in-stages, which
requires saving the catalog query results (since tables without
statistics will have them after the first stage).  This commit adds
a new parameter to vacuum_one_database() that specifies either a
previously-retrieved list or a place to return the catalog query
results.  Note that nothing uses this new parameter yet.

Author: Corey Huinker <corey.huinker@gmail.com>
Co-authored-by: Nathan Bossart <nathandbossart@gmail.com>
Reviewed-by: John Naylor <johncnaylorls@gmail.com>
Discussion: https://postgr.es/m/Z5O1bpcwDrMgyrYy%40nathan
This commit is contained in:
Nathan Bossart
2025-03-18 16:32:55 -05:00
parent a6524105d2
commit 9c03c8d187

View File

@@ -62,10 +62,16 @@ typedef enum
static VacObjFilter objfilter = OBJFILTER_NONE; static VacObjFilter objfilter = OBJFILTER_NONE;
static SimpleStringList *retrieve_objects(PGconn *conn,
vacuumingOptions *vacopts,
SimpleStringList *objects,
bool echo);
static void vacuum_one_database(ConnParams *cparams, static void vacuum_one_database(ConnParams *cparams,
vacuumingOptions *vacopts, vacuumingOptions *vacopts,
int stage, int stage,
SimpleStringList *objects, SimpleStringList *objects,
SimpleStringList **found_objs,
int concurrentCons, int concurrentCons,
const char *progname, bool echo, bool quiet); const char *progname, bool echo, bool quiet);
@@ -405,7 +411,7 @@ main(int argc, char *argv[])
{ {
vacuum_one_database(&cparams, &vacopts, vacuum_one_database(&cparams, &vacopts,
stage, stage,
&objects, &objects, NULL,
concurrentCons, concurrentCons,
progname, echo, quiet); progname, echo, quiet);
} }
@@ -413,7 +419,7 @@ main(int argc, char *argv[])
else else
vacuum_one_database(&cparams, &vacopts, vacuum_one_database(&cparams, &vacopts,
ANALYZE_NO_STAGE, ANALYZE_NO_STAGE,
&objects, &objects, NULL,
concurrentCons, concurrentCons,
progname, echo, quiet); progname, echo, quiet);
} }
@@ -461,8 +467,36 @@ escape_quotes(const char *src)
/* /*
* vacuum_one_database * vacuum_one_database
* *
* Process tables in the given database. If the 'objects' list is empty, * Process tables in the given database.
* process all tables in the database. *
* There are two ways to specify the list of objects to process:
*
* 1) The "found_objs" parameter is a double pointer to a fully qualified list
* of objects to process, as returned by a previous call to
* vacuum_one_database().
*
* a) If both "found_objs" (the double pointer) and "*found_objs" (the
* once-dereferenced double pointer) are not NULL, this list takes
* priority, and anything specified in "objects" is ignored.
*
* b) If "found_objs" (the double pointer) is not NULL but "*found_objs"
* (the once-dereferenced double pointer) _is_ NULL, the "objects"
* parameter takes priority, and the results of the catalog query
* described in (2) are stored in "found_objs".
*
* c) If "found_objs" (the double pointer) is NULL, the "objects"
* parameter again takes priority, and the results of the catalog query
* are not saved.
*
* 2) The "objects" parameter is a user-specified list of objects to process.
* When (1b) or (1c) applies, this function performs a catalog query to
* retrieve a fully qualified list of objects to process, as described
* below.
*
* a) If "objects" is not NULL, the catalog query gathers only the objects
* listed in "objects".
*
* b) If "objects" is NULL, all tables in the database are gathered.
* *
* Note that this function is only concerned with running exactly one stage * Note that this function is only concerned with running exactly one stage
* when in analyze-in-stages mode; caller must iterate on us if necessary. * when in analyze-in-stages mode; caller must iterate on us if necessary.
@@ -475,22 +509,18 @@ vacuum_one_database(ConnParams *cparams,
vacuumingOptions *vacopts, vacuumingOptions *vacopts,
int stage, int stage,
SimpleStringList *objects, SimpleStringList *objects,
SimpleStringList **found_objs,
int concurrentCons, int concurrentCons,
const char *progname, bool echo, bool quiet) const char *progname, bool echo, bool quiet)
{ {
PQExpBufferData sql; PQExpBufferData sql;
PQExpBufferData buf;
PQExpBufferData catalog_query;
PGresult *res;
PGconn *conn; PGconn *conn;
SimpleStringListCell *cell; SimpleStringListCell *cell;
ParallelSlotArray *sa; ParallelSlotArray *sa;
SimpleStringList dbtables = {NULL, NULL}; int ntups = 0;
int i;
int ntups;
bool failed = false; bool failed = false;
bool objects_listed = false;
const char *initcmd; const char *initcmd;
SimpleStringList *ret = NULL;
const char *stage_commands[] = { const char *stage_commands[] = {
"SET default_statistics_target=1; SET vacuum_cost_delay=0;", "SET default_statistics_target=1; SET vacuum_cost_delay=0;",
"SET default_statistics_target=10; RESET vacuum_cost_delay;", "SET default_statistics_target=10; RESET vacuum_cost_delay;",
@@ -599,19 +629,155 @@ vacuum_one_database(ConnParams *cparams,
} }
/* /*
* Prepare the list of tables to process by querying the catalogs. * If the caller provided the results of a previous catalog query, just
* * use that. Otherwise, run the catalog query ourselves and set the
* Since we execute the constructed query with the default search_path * return variable if provided.
* (which could be unsafe), everything in this query MUST be fully
* qualified.
*
* First, build a WITH clause for the catalog query if any tables were
* specified, with a set of values made of relation names and their
* optional set of columns. This is used to match any provided column
* lists with the generated qualified identifiers and to filter for the
* tables provided via --table. If a listed table does not exist, the
* catalog query will fail.
*/ */
if (found_objs && *found_objs)
ret = *found_objs;
else
{
ret = retrieve_objects(conn, vacopts, objects, echo);
if (found_objs)
*found_objs = ret;
}
/*
* Count the number of objects in the catalog query result. If there are
* none, we are done.
*/
for (cell = ret ? ret->head : NULL; cell; cell = cell->next)
ntups++;
if (ntups == 0)
{
PQfinish(conn);
return;
}
/*
* Ensure concurrentCons is sane. If there are more connections than
* vacuumable relations, we don't need to use them all.
*/
if (concurrentCons > ntups)
concurrentCons = ntups;
if (concurrentCons <= 0)
concurrentCons = 1;
/*
* All slots need to be prepared to run the appropriate analyze stage, if
* caller requested that mode. We have to prepare the initial connection
* ourselves before setting up the slots.
*/
if (stage == ANALYZE_NO_STAGE)
initcmd = NULL;
else
{
initcmd = stage_commands[stage];
executeCommand(conn, initcmd, echo);
}
/*
* Setup the database connections. We reuse the connection we already have
* for the first slot. If not in parallel mode, the first slot in the
* array contains the connection.
*/
sa = ParallelSlotsSetup(concurrentCons, cparams, progname, echo, initcmd);
ParallelSlotsAdoptConn(sa, conn);
initPQExpBuffer(&sql);
cell = ret->head;
do
{
const char *tabname = cell->val;
ParallelSlot *free_slot;
if (CancelRequested)
{
failed = true;
goto finish;
}
free_slot = ParallelSlotsGetIdle(sa, NULL);
if (!free_slot)
{
failed = true;
goto finish;
}
prepare_vacuum_command(&sql, PQserverVersion(free_slot->connection),
vacopts, tabname);
/*
* Execute the vacuum. All errors are handled in processQueryResult
* through ParallelSlotsGetIdle.
*/
ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
run_vacuum_command(free_slot->connection, sql.data,
echo, tabname);
cell = cell->next;
} while (cell != NULL);
if (!ParallelSlotsWaitCompletion(sa))
{
failed = true;
goto finish;
}
/* If we used SKIP_DATABASE_STATS, mop up with ONLY_DATABASE_STATS */
if (vacopts->skip_database_stats && stage == ANALYZE_NO_STAGE)
{
const char *cmd = "VACUUM (ONLY_DATABASE_STATS);";
ParallelSlot *free_slot = ParallelSlotsGetIdle(sa, NULL);
if (!free_slot)
{
failed = true;
goto finish;
}
ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
run_vacuum_command(free_slot->connection, cmd, echo, NULL);
if (!ParallelSlotsWaitCompletion(sa))
failed = true;
}
finish:
ParallelSlotsTerminate(sa);
pg_free(sa);
termPQExpBuffer(&sql);
if (failed)
exit(1);
}
/*
* Prepare the list of tables to process by querying the catalogs.
*
* Since we execute the constructed query with the default search_path (which
* could be unsafe), everything in this query MUST be fully qualified.
*
* First, build a WITH clause for the catalog query if any tables were
* specified, with a set of values made of relation names and their optional
* set of columns. This is used to match any provided column lists with the
* generated qualified identifiers and to filter for the tables provided via
* --table. If a listed table does not exist, the catalog query will fail.
*/
static SimpleStringList *
retrieve_objects(PGconn *conn, vacuumingOptions *vacopts,
SimpleStringList *objects, bool echo)
{
PQExpBufferData buf;
PQExpBufferData catalog_query;
PGresult *res;
SimpleStringListCell *cell;
SimpleStringList *found_objs = palloc0(sizeof(SimpleStringList));
bool objects_listed = false;
initPQExpBuffer(&catalog_query); initPQExpBuffer(&catalog_query);
for (cell = objects ? objects->head : NULL; cell; cell = cell->next) for (cell = objects ? objects->head : NULL; cell; cell = cell->next)
{ {
@@ -765,23 +931,12 @@ vacuum_one_database(ConnParams *cparams,
termPQExpBuffer(&catalog_query); termPQExpBuffer(&catalog_query);
PQclear(executeQuery(conn, ALWAYS_SECURE_SEARCH_PATH_SQL, echo)); PQclear(executeQuery(conn, ALWAYS_SECURE_SEARCH_PATH_SQL, echo));
/*
* If no rows are returned, there are no matching tables, so we are done.
*/
ntups = PQntuples(res);
if (ntups == 0)
{
PQclear(res);
PQfinish(conn);
return;
}
/* /*
* Build qualified identifiers for each table, including the column list * Build qualified identifiers for each table, including the column list
* if given. * if given.
*/ */
initPQExpBuffer(&buf); initPQExpBuffer(&buf);
for (i = 0; i < ntups; i++) for (int i = 0; i < PQntuples(res); i++)
{ {
appendPQExpBufferStr(&buf, appendPQExpBufferStr(&buf,
fmtQualifiedIdEnc(PQgetvalue(res, i, 1), fmtQualifiedIdEnc(PQgetvalue(res, i, 1),
@@ -791,110 +946,13 @@ vacuum_one_database(ConnParams *cparams,
if (objects_listed && !PQgetisnull(res, i, 2)) if (objects_listed && !PQgetisnull(res, i, 2))
appendPQExpBufferStr(&buf, PQgetvalue(res, i, 2)); appendPQExpBufferStr(&buf, PQgetvalue(res, i, 2));
simple_string_list_append(&dbtables, buf.data); simple_string_list_append(found_objs, buf.data);
resetPQExpBuffer(&buf); resetPQExpBuffer(&buf);
} }
termPQExpBuffer(&buf); termPQExpBuffer(&buf);
PQclear(res); PQclear(res);
/* return found_objs;
* Ensure concurrentCons is sane. If there are more connections than
* vacuumable relations, we don't need to use them all.
*/
if (concurrentCons > ntups)
concurrentCons = ntups;
if (concurrentCons <= 0)
concurrentCons = 1;
/*
* All slots need to be prepared to run the appropriate analyze stage, if
* caller requested that mode. We have to prepare the initial connection
* ourselves before setting up the slots.
*/
if (stage == ANALYZE_NO_STAGE)
initcmd = NULL;
else
{
initcmd = stage_commands[stage];
executeCommand(conn, initcmd, echo);
}
/*
* Setup the database connections. We reuse the connection we already have
* for the first slot. If not in parallel mode, the first slot in the
* array contains the connection.
*/
sa = ParallelSlotsSetup(concurrentCons, cparams, progname, echo, initcmd);
ParallelSlotsAdoptConn(sa, conn);
initPQExpBuffer(&sql);
cell = dbtables.head;
do
{
const char *tabname = cell->val;
ParallelSlot *free_slot;
if (CancelRequested)
{
failed = true;
goto finish;
}
free_slot = ParallelSlotsGetIdle(sa, NULL);
if (!free_slot)
{
failed = true;
goto finish;
}
prepare_vacuum_command(&sql, PQserverVersion(free_slot->connection),
vacopts, tabname);
/*
* Execute the vacuum. All errors are handled in processQueryResult
* through ParallelSlotsGetIdle.
*/
ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
run_vacuum_command(free_slot->connection, sql.data,
echo, tabname);
cell = cell->next;
} while (cell != NULL);
if (!ParallelSlotsWaitCompletion(sa))
{
failed = true;
goto finish;
}
/* If we used SKIP_DATABASE_STATS, mop up with ONLY_DATABASE_STATS */
if (vacopts->skip_database_stats && stage == ANALYZE_NO_STAGE)
{
const char *cmd = "VACUUM (ONLY_DATABASE_STATS);";
ParallelSlot *free_slot = ParallelSlotsGetIdle(sa, NULL);
if (!free_slot)
{
failed = true;
goto finish;
}
ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
run_vacuum_command(free_slot->connection, cmd, echo, NULL);
if (!ParallelSlotsWaitCompletion(sa))
failed = true;
}
finish:
ParallelSlotsTerminate(sa);
pg_free(sa);
termPQExpBuffer(&sql);
if (failed)
exit(1);
} }
/* /*
@@ -941,7 +999,7 @@ vacuum_all_databases(ConnParams *cparams,
vacuum_one_database(cparams, vacopts, vacuum_one_database(cparams, vacopts,
stage, stage,
objects, objects, NULL,
concurrentCons, concurrentCons,
progname, echo, quiet); progname, echo, quiet);
} }
@@ -955,7 +1013,7 @@ vacuum_all_databases(ConnParams *cparams,
vacuum_one_database(cparams, vacopts, vacuum_one_database(cparams, vacopts,
ANALYZE_NO_STAGE, ANALYZE_NO_STAGE,
objects, objects, NULL,
concurrentCons, concurrentCons,
progname, echo, quiet); progname, echo, quiet);
} }