vacuumdb: Teach vacuum_one_database() to reuse query results.

Presently, each call to vacuum_one_database() queries the catalogs to retrieve the list of tables to process. A follow-up commit will add a "missing stats only" feature to --analyze-in-stages, which requires saving the catalog query results (since tables without statistics will have them after the first stage). This commit adds a new parameter to vacuum_one_database() that specifies either a previously-retrieved list or a place to return the catalog query results. Note that nothing uses this new parameter yet. Author: Corey Huinker <corey.huinker@gmail.com> Co-authored-by: Nathan Bossart <nathandbossart@gmail.com> Reviewed-by: John Naylor <johncnaylorls@gmail.com> Discussion: https://postgr.es/m/Z5O1bpcwDrMgyrYy%40nathan
2025-08-05 07:41:25 +03:00 · 2025-03-18 16:32:55 -05:00
parent a6524105d2
commit 9c03c8d187
1 changed files with 194 additions and 136 deletions
--- a/src/bin/scripts/vacuumdb.c
+++ b/src/bin/scripts/vacuumdb.c
@@ -62,10 +62,16 @@ typedef enum
 static VacObjFilter objfilter = OBJFILTER_NONE;
 static SimpleStringList *retrieve_objects(PGconn *conn,
 										  vacuumingOptions *vacopts,
 										  SimpleStringList *objects,
 										  bool echo);
 static void vacuum_one_database(ConnParams *cparams,
 								vacuumingOptions *vacopts,
 								int stage,
 								SimpleStringList *objects,
 								SimpleStringList **found_objs,
 								int concurrentCons,
 								const char *progname, bool echo, bool quiet);
@@ -405,7 +411,7 @@ main(int argc, char *argv[])
 			{
 				vacuum_one_database(&cparams, &vacopts,
 									stage,
-									&objects,
+									&objects, NULL,
 									concurrentCons,
 									progname, echo, quiet);
 			}
@@ -413,7 +419,7 @@ main(int argc, char *argv[])
 		else
 			vacuum_one_database(&cparams, &vacopts,
 								ANALYZE_NO_STAGE,
-								&objects,
+								&objects, NULL,
 								concurrentCons,
 								progname, echo, quiet);
 	}
@@ -461,8 +467,36 @@ escape_quotes(const char *src)
 /*
 * vacuum_one_database
 *
- * Process tables in the given database.  If the 'objects' list is empty,
+ * Process tables in the given database.
- * process all tables in the database.
+ *
 * There are two ways to specify the list of objects to process:
 *
 * 1) The "found_objs" parameter is a double pointer to a fully qualified list
 *    of objects to process, as returned by a previous call to
 *    vacuum_one_database().
 *
 *     a) If both "found_objs" (the double pointer) and "*found_objs" (the
 *        once-dereferenced double pointer) are not NULL, this list takes
 *        priority, and anything specified in "objects" is ignored.
 *
 *     b) If "found_objs" (the double pointer) is not NULL but "*found_objs"
 *        (the once-dereferenced double pointer) _is_ NULL, the "objects"
 *        parameter takes priority, and the results of the catalog query
 *        described in (2) are stored in "found_objs".
 *
 *     c) If "found_objs" (the double pointer) is NULL, the "objects"
 *        parameter again takes priority, and the results of the catalog query
 *        are not saved.
 *
 * 2) The "objects" parameter is a user-specified list of objects to process.
 *    When (1b) or (1c) applies, this function performs a catalog query to
 *    retrieve a fully qualified list of objects to process, as described
 *    below.
 *
 *     a) If "objects" is not NULL, the catalog query gathers only the objects
 *        listed in "objects".
 *
 *     b) If "objects" is NULL, all tables in the database are gathered.
 *
 * Note that this function is only concerned with running exactly one stage
 * when in analyze-in-stages mode; caller must iterate on us if necessary.
@@ -475,22 +509,18 @@ vacuum_one_database(ConnParams *cparams,
 					vacuumingOptions *vacopts,
 					int stage,
 					SimpleStringList *objects,
 					SimpleStringList **found_objs,
 					int concurrentCons,
 					const char *progname, bool echo, bool quiet)
 {
 	PQExpBufferData sql;
 	PQExpBufferData buf;
 	PQExpBufferData catalog_query;
 	PGresult   *res;
 	PGconn	   *conn;
 	SimpleStringListCell *cell;
 	ParallelSlotArray *sa;
-	SimpleStringList dbtables = {NULL, NULL};
+	int			ntups = 0;
 	int			i;
 	int			ntups;
 	bool		failed = false;
 	bool		objects_listed = false;
 	const char *initcmd;
 	SimpleStringList *ret = NULL;
 	const char *stage_commands[] = {
 		"SET default_statistics_target=1; SET vacuum_cost_delay=0;",
 		"SET default_statistics_target=10; RESET vacuum_cost_delay;",
@@ -599,19 +629,155 @@ vacuum_one_database(ConnParams *cparams,
 	}
 	/*
-	 * Prepare the list of tables to process by querying the catalogs.
+	 * If the caller provided the results of a previous catalog query, just
-	 *
+	 * use that.  Otherwise, run the catalog query ourselves and set the
-	 * Since we execute the constructed query with the default search_path
+	 * return variable if provided.
 	 * (which could be unsafe), everything in this query MUST be fully
 	 * qualified.
 	 *
 	 * First, build a WITH clause for the catalog query if any tables were
 	 * specified, with a set of values made of relation names and their
 	 * optional set of columns.  This is used to match any provided column
 	 * lists with the generated qualified identifiers and to filter for the
 	 * tables provided via --table.  If a listed table does not exist, the
 	 * catalog query will fail.
 	 */
 	if (found_objs && *found_objs)
 		ret = *found_objs;
 	else
 	{
 		ret = retrieve_objects(conn, vacopts, objects, echo);
 		if (found_objs)
 			*found_objs = ret;
 	}
 	/*
 	 * Count the number of objects in the catalog query result.  If there are
 	 * none, we are done.
 	 */
 	for (cell = ret ? ret->head : NULL; cell; cell = cell->next)
 		ntups++;
 	if (ntups == 0)
 	{
 		PQfinish(conn);
 		return;
 	}
 	/*
 	 * Ensure concurrentCons is sane.  If there are more connections than
 	 * vacuumable relations, we don't need to use them all.
 	 */
 	if (concurrentCons > ntups)
 		concurrentCons = ntups;
 	if (concurrentCons <= 0)
 		concurrentCons = 1;
 	/*
 	 * All slots need to be prepared to run the appropriate analyze stage, if
 	 * caller requested that mode.  We have to prepare the initial connection
 	 * ourselves before setting up the slots.
 	 */
 	if (stage == ANALYZE_NO_STAGE)
 		initcmd = NULL;
 	else
 	{
 		initcmd = stage_commands[stage];
 		executeCommand(conn, initcmd, echo);
 	}
 	/*
 	 * Setup the database connections. We reuse the connection we already have
 	 * for the first slot.  If not in parallel mode, the first slot in the
 	 * array contains the connection.
 	 */
 	sa = ParallelSlotsSetup(concurrentCons, cparams, progname, echo, initcmd);
 	ParallelSlotsAdoptConn(sa, conn);
 	initPQExpBuffer(&sql);
 	cell = ret->head;
 	do
 	{
 		const char *tabname = cell->val;
 		ParallelSlot *free_slot;
 		if (CancelRequested)
 		{
 			failed = true;
 			goto finish;
 		}
 		free_slot = ParallelSlotsGetIdle(sa, NULL);
 		if (!free_slot)
 		{
 			failed = true;
 			goto finish;
 		}
 		prepare_vacuum_command(&sql, PQserverVersion(free_slot->connection),
 							   vacopts, tabname);
 		/*
 		 * Execute the vacuum.  All errors are handled in processQueryResult
 		 * through ParallelSlotsGetIdle.
 		 */
 		ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
 		run_vacuum_command(free_slot->connection, sql.data,
 						   echo, tabname);
 		cell = cell->next;
 	} while (cell != NULL);
 	if (!ParallelSlotsWaitCompletion(sa))
 	{
 		failed = true;
 		goto finish;
 	}
 	/* If we used SKIP_DATABASE_STATS, mop up with ONLY_DATABASE_STATS */
 	if (vacopts->skip_database_stats && stage == ANALYZE_NO_STAGE)
 	{
 		const char *cmd = "VACUUM (ONLY_DATABASE_STATS);";
 		ParallelSlot *free_slot = ParallelSlotsGetIdle(sa, NULL);
 		if (!free_slot)
 		{
 			failed = true;
 			goto finish;
 		}
 		ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
 		run_vacuum_command(free_slot->connection, cmd, echo, NULL);
 		if (!ParallelSlotsWaitCompletion(sa))
 			failed = true;
 	}
 finish:
 	ParallelSlotsTerminate(sa);
 	pg_free(sa);
 	termPQExpBuffer(&sql);
 	if (failed)
 		exit(1);
 }
 /*
 * Prepare the list of tables to process by querying the catalogs.
 *
 * Since we execute the constructed query with the default search_path (which
 * could be unsafe), everything in this query MUST be fully qualified.
 *
 * First, build a WITH clause for the catalog query if any tables were
 * specified, with a set of values made of relation names and their optional
 * set of columns.  This is used to match any provided column lists with the
 * generated qualified identifiers and to filter for the tables provided via
 * --table.  If a listed table does not exist, the catalog query will fail.
 */
 static SimpleStringList *
 retrieve_objects(PGconn *conn, vacuumingOptions *vacopts,
 				 SimpleStringList *objects, bool echo)
 {
 	PQExpBufferData buf;
 	PQExpBufferData catalog_query;
 	PGresult   *res;
 	SimpleStringListCell *cell;
 	SimpleStringList *found_objs = palloc0(sizeof(SimpleStringList));
 	bool		objects_listed = false;
 	initPQExpBuffer(&catalog_query);
 	for (cell = objects ? objects->head : NULL; cell; cell = cell->next)
 	{
@@ -765,23 +931,12 @@ vacuum_one_database(ConnParams *cparams,
 	termPQExpBuffer(&catalog_query);
 	PQclear(executeQuery(conn, ALWAYS_SECURE_SEARCH_PATH_SQL, echo));
 	/*
 	 * If no rows are returned, there are no matching tables, so we are done.
 	 */
 	ntups = PQntuples(res);
 	if (ntups == 0)
 	{
 		PQclear(res);
 		PQfinish(conn);
 		return;
 	}
 	/*
 	 * Build qualified identifiers for each table, including the column list
 	 * if given.
 	 */
 	initPQExpBuffer(&buf);
-	for (i = 0; i < ntups; i++)
+	for (int i = 0; i < PQntuples(res); i++)
 	{
 		appendPQExpBufferStr(&buf,
 							 fmtQualifiedIdEnc(PQgetvalue(res, i, 1),
@@ -791,110 +946,13 @@ vacuum_one_database(ConnParams *cparams,
 		if (objects_listed && !PQgetisnull(res, i, 2))
 			appendPQExpBufferStr(&buf, PQgetvalue(res, i, 2));
-		simple_string_list_append(&dbtables, buf.data);
+		simple_string_list_append(found_objs, buf.data);
 		resetPQExpBuffer(&buf);
 	}
 	termPQExpBuffer(&buf);
 	PQclear(res);
-	/*
+	return found_objs;
 	 * Ensure concurrentCons is sane.  If there are more connections than
 	 * vacuumable relations, we don't need to use them all.
 	 */
 	if (concurrentCons > ntups)
 		concurrentCons = ntups;
 	if (concurrentCons <= 0)
 		concurrentCons = 1;
 	/*
 	 * All slots need to be prepared to run the appropriate analyze stage, if
 	 * caller requested that mode.  We have to prepare the initial connection
 	 * ourselves before setting up the slots.
 	 */
 	if (stage == ANALYZE_NO_STAGE)
 		initcmd = NULL;
 	else
 	{
 		initcmd = stage_commands[stage];
 		executeCommand(conn, initcmd, echo);
 	}
 	/*
 	 * Setup the database connections. We reuse the connection we already have
 	 * for the first slot.  If not in parallel mode, the first slot in the
 	 * array contains the connection.
 	 */
 	sa = ParallelSlotsSetup(concurrentCons, cparams, progname, echo, initcmd);
 	ParallelSlotsAdoptConn(sa, conn);
 	initPQExpBuffer(&sql);
 	cell = dbtables.head;
 	do
 	{
 		const char *tabname = cell->val;
 		ParallelSlot *free_slot;
 		if (CancelRequested)
 		{
 			failed = true;
 			goto finish;
 		}
 		free_slot = ParallelSlotsGetIdle(sa, NULL);
 		if (!free_slot)
 		{
 			failed = true;
 			goto finish;
 		}
 		prepare_vacuum_command(&sql, PQserverVersion(free_slot->connection),
 							   vacopts, tabname);
 		/*
 		 * Execute the vacuum.  All errors are handled in processQueryResult
 		 * through ParallelSlotsGetIdle.
 		 */
 		ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
 		run_vacuum_command(free_slot->connection, sql.data,
 						   echo, tabname);
 		cell = cell->next;
 	} while (cell != NULL);
 	if (!ParallelSlotsWaitCompletion(sa))
 	{
 		failed = true;
 		goto finish;
 	}
 	/* If we used SKIP_DATABASE_STATS, mop up with ONLY_DATABASE_STATS */
 	if (vacopts->skip_database_stats && stage == ANALYZE_NO_STAGE)
 	{
 		const char *cmd = "VACUUM (ONLY_DATABASE_STATS);";
 		ParallelSlot *free_slot = ParallelSlotsGetIdle(sa, NULL);
 		if (!free_slot)
 		{
 			failed = true;
 			goto finish;
 		}
 		ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
 		run_vacuum_command(free_slot->connection, cmd, echo, NULL);
 		if (!ParallelSlotsWaitCompletion(sa))
 			failed = true;
 	}
 finish:
 	ParallelSlotsTerminate(sa);
 	pg_free(sa);
 	termPQExpBuffer(&sql);
 	if (failed)
 		exit(1);
 }
 /*
@@ -941,7 +999,7 @@ vacuum_all_databases(ConnParams *cparams,
 				vacuum_one_database(cparams, vacopts,
 									stage,
-									objects,
+									objects, NULL,
 									concurrentCons,
 									progname, echo, quiet);
 			}
@@ -955,7 +1013,7 @@ vacuum_all_databases(ConnParams *cparams,
 			vacuum_one_database(cparams, vacopts,
 								ANALYZE_NO_STAGE,
-								objects,
+								objects, NULL,
 								concurrentCons,
 								progname, echo, quiet);
 		}