pg_upgrade: Parallelize data type checks.

This commit makes use of the new task framework in pg_upgrade to parallelize the checks for incompatible data types, i.e., data types whose on-disk format has changed, data types that have been removed, etc. This step will now process multiple databases concurrently when pg_upgrade's --jobs option is provided a value greater than 1. Reviewed-by: Daniel Gustafsson, Ilya Gladyshev Discussion: https://postgr.es/m/20240516211638.GA1688936%40nathanxps13
2025-12-21 05:21:08 +03:00 · 2024-09-16 16:10:33 -05:00
parent 6ab8f27bc7
commit bbf83cab98
1 changed files with 196 additions and 165 deletions
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -315,99 +315,25 @@ static DataTypesUsageChecks data_types_usage_checks[] =
 };

 /*
- * check_for_data_types_usage()
- *	Detect whether there are any stored columns depending on given type(s)
- *
- * If so, write a report to the given file name and signal a failure to the
- * user.
- *
- * The checks to run are defined in a DataTypesUsageChecks structure where
- * each check has a metadata for explaining errors to the user, a base_query,
- * a report filename and a function pointer hook for validating if the check
- * should be executed given the cluster at hand.
- *
- * base_query should be a SELECT yielding a single column named "oid",
- * containing the pg_type OIDs of one or more types that are known to have
- * inconsistent on-disk representations across server versions.
- *
- * We check for the type(s) in tables, matviews, and indexes, but not views;
- * there's no storage involved in a view.
+ * Private state for check_for_data_types_usage()'s UpgradeTask.
 */
-static void
-check_for_data_types_usage(ClusterInfo *cluster, DataTypesUsageChecks *checks)
+struct data_type_check_state
 {
-	bool		found = false;
-	bool	   *results;
-	PQExpBufferData report;
-	DataTypesUsageChecks *tmp = checks;
-	int			n_data_types_usage_checks = 0;
-
-	prep_status("Checking data type usage");
-
-	/* Gather number of checks to perform */
-	while (tmp->status != NULL)
-	{
-		n_data_types_usage_checks++;
-		tmp++;
-	}
-
-	/* Prepare an array to store the results of checks in */
-	results = pg_malloc0(sizeof(bool) * n_data_types_usage_checks);
+	DataTypesUsageChecks *check;	/* the check for this step */
+	bool	   *result;			/* true if check failed for any database */
+	PQExpBuffer *report;		/* buffer for report on failed checks */
+};

 /*
-	 * Connect to each database in the cluster and run all defined checks
-	 * against that database before trying the next one.
+ * Returns a palloc'd query string for the data type check, for use by
+ * check_for_data_types_usage()'s UpgradeTask.
 */
-	for (int dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
+static char *
+data_type_check_query(int checknum)
 {
-		DbInfo	   *active_db = &cluster->dbarr.dbs[dbnum];
-		PGconn	   *conn = connectToServer(cluster, active_db->db_name);
+	DataTypesUsageChecks *check = &data_types_usage_checks[checknum];

-		for (int checknum = 0; checknum < n_data_types_usage_checks; checknum++)
-		{
-			PGresult   *res;
-			int			ntups;
-			int			i_nspname;
-			int			i_relname;
-			int			i_attname;
-			FILE	   *script = NULL;
-			bool		db_used = false;
-			char		output_path[MAXPGPATH];
-			DataTypesUsageChecks *cur_check = &checks[checknum];
-
-			if (cur_check->threshold_version == MANUAL_CHECK)
-			{
-				Assert(cur_check->version_hook);
-
-				/*
-				 * Make sure that the check applies to the current cluster
-				 * version and skip if not. If no check hook has been defined
-				 * we run the check for all versions.
-				 */
-				if (!cur_check->version_hook(cluster))
-					continue;
-			}
-			else if (cur_check->threshold_version != ALL_VERSIONS)
-			{
-				if (GET_MAJOR_VERSION(cluster->major_version) > cur_check->threshold_version)
-					continue;
-			}
-			else
-				Assert(cur_check->threshold_version == ALL_VERSIONS);
-
-			snprintf(output_path, sizeof(output_path), "%s/%s",
-					 log_opts.basedir,
-					 cur_check->report_filename);
-
-			/*
-			 * The type(s) of interest might be wrapped in a domain, array,
-			 * composite, or range, and these container types can be nested
-			 * (to varying extents depending on server version, but that's not
-			 * of concern here).  To handle all these cases we need a
-			 * recursive CTE.
-			 */
-			res = executeQueryOrDie(conn,
-									"WITH RECURSIVE oids AS ( "
+	return psprintf("WITH RECURSIVE oids AS ( "
 	/* start with the type(s) returned by base_query */
 					"	%s "
 					"	UNION ALL "
@@ -451,39 +377,55 @@ check_for_data_types_usage(ClusterInfo *cluster, DataTypesUsageChecks *checks)
 					"		n.nspname !~ '^pg_toast_temp_' AND "
 	/* exclude system catalogs, too */
 					"		n.nspname NOT IN ('pg_catalog', 'information_schema')",
-									cur_check->base_query);
-
-			ntups = PQntuples(res);
+					check->base_query);
+}

 /*
-			 * The datatype was found, so extract the data and log to the
-			 * requested filename. We need to open the file for appending
-			 * since the check might have already found the type in another
-			 * database earlier in the loop.
+ * Callback function for processing results of queries for
+ * check_for_data_types_usage()'s UpgradeTask.  If the query returned any rows
+ * (i.e., the check failed), write the details to the report file.
 */
+static void
+process_data_type_check(DbInfo *dbinfo, PGresult *res, void *arg)
+{
+	struct data_type_check_state *state = (struct data_type_check_state *) arg;
+	int			ntups = PQntuples(res);
+
+	AssertVariableIsOfType(&process_data_type_check, UpgradeTaskProcessCB);
+
 	if (ntups)
 	{
-				/*
-				 * Make sure we have a buffer to save reports to now that we
-				 * found a first failing check.
-				 */
-				if (!found)
-					initPQExpBuffer(&report);
-				found = true;
+		char		output_path[MAXPGPATH];
+		int			i_nspname;
+		int			i_relname;
+		int			i_attname;
+		FILE	   *script = NULL;
+		bool		db_used = false;
+
+		snprintf(output_path, sizeof(output_path), "%s/%s",
+				 log_opts.basedir,
+				 state->check->report_filename);

 		/*
-				 * If this is the first time we see an error for the check in
-				 * question then print a status message of the failure.
+		 * Make sure we have a buffer to save reports to now that we found a
+		 * first failing check.
 		 */
-				if (!results[checknum])
+		if (*state->report == NULL)
+			*state->report = createPQExpBuffer();
+
+		/*
+		 * If this is the first time we see an error for the check in question
+		 * then print a status message of the failure.
+		 */
+		if (!(*state->result))
 		{
-					pg_log(PG_REPORT, "    failed check: %s", _(cur_check->status));
-					appendPQExpBuffer(&report, "\n%s\n%s    %s\n",
-									  _(cur_check->report_text),
+			pg_log(PG_REPORT, "    failed check: %s", _(state->check->status));
+			appendPQExpBuffer(*state->report, "\n%s\n%s    %s\n",
+							  _(state->check->report_text),
 							  _("A list of the problem columns is in the file:"),
 							  output_path);
 		}
-				results[checknum] = true;
+		*state->result = true;

 		i_nspname = PQfnumber(res, "nspname");
 		i_relname = PQfnumber(res, "relname");
@@ -496,7 +438,7 @@ check_for_data_types_usage(ClusterInfo *cluster, DataTypesUsageChecks *checks)

 			if (!db_used)
 			{
-						fprintf(script, "In database: %s\n", active_db->db_name);
+				fprintf(script, "In database: %s\n", dbinfo->db_name);
 				db_used = true;
 			}
 			fprintf(script, "  %s.%s.%s\n",
@@ -511,17 +453,106 @@ check_for_data_types_usage(ClusterInfo *cluster, DataTypesUsageChecks *checks)
 			script = NULL;
 		}
 	}
-
-			PQclear(res);
 }

-		PQfinish(conn);
+/*
+ * check_for_data_types_usage()
+ *	Detect whether there are any stored columns depending on given type(s)
+ *
+ * If so, write a report to the given file name and signal a failure to the
+ * user.
+ *
+ * The checks to run are defined in a DataTypesUsageChecks structure where
+ * each check has a metadata for explaining errors to the user, a base_query,
+ * a report filename and a function pointer hook for validating if the check
+ * should be executed given the cluster at hand.
+ *
+ * base_query should be a SELECT yielding a single column named "oid",
+ * containing the pg_type OIDs of one or more types that are known to have
+ * inconsistent on-disk representations across server versions.
+ *
+ * We check for the type(s) in tables, matviews, and indexes, but not views;
+ * there's no storage involved in a view.
+ */
+static void
+check_for_data_types_usage(ClusterInfo *cluster)
+{
+	bool	   *results;
+	PQExpBuffer report = NULL;
+	DataTypesUsageChecks *tmp = data_types_usage_checks;
+	int			n_data_types_usage_checks = 0;
+	UpgradeTask *task = upgrade_task_create();
+	char	  **queries = NULL;
+	struct data_type_check_state *states;
+
+	prep_status("Checking data type usage");
+
+	/* Gather number of checks to perform */
+	while (tmp->status != NULL)
+	{
+		n_data_types_usage_checks++;
+		tmp++;
 	}

-	if (found)
-		pg_fatal("Data type checks failed: %s", report.data);
+	/* Prepare an array to store the results of checks in */
+	results = pg_malloc0(sizeof(bool) * n_data_types_usage_checks);
+	queries = pg_malloc0(sizeof(char *) * n_data_types_usage_checks);
+	states = pg_malloc0(sizeof(struct data_type_check_state) * n_data_types_usage_checks);
+
+	for (int i = 0; i < n_data_types_usage_checks; i++)
+	{
+		DataTypesUsageChecks *check = &data_types_usage_checks[i];
+
+		if (check->threshold_version == MANUAL_CHECK)
+		{
+			Assert(check->version_hook);
+
+			/*
+			 * Make sure that the check applies to the current cluster version
+			 * and skip it if not.
+			 */
+			if (!check->version_hook(cluster))
+				continue;
+		}
+		else if (check->threshold_version != ALL_VERSIONS)
+		{
+			if (GET_MAJOR_VERSION(cluster->major_version) > check->threshold_version)
+				continue;
+		}
+		else
+			Assert(check->threshold_version == ALL_VERSIONS);
+
+		queries[i] = data_type_check_query(i);
+
+		states[i].check = check;
+		states[i].result = &results[i];
+		states[i].report = &report;
+
+		upgrade_task_add_step(task, queries[i], process_data_type_check,
+							  true, &states[i]);
+	}
+
+	/*
+	 * Connect to each database in the cluster and run all defined checks
+	 * against that database before trying the next one.
+	 */
+	upgrade_task_run(task, cluster);
+	upgrade_task_free(task);
+
+	if (report)
+	{
+		pg_fatal("Data type checks failed: %s", report->data);
+		destroyPQExpBuffer(report);
+	}

 	pg_free(results);
+	for (int i = 0; i < n_data_types_usage_checks; i++)
+	{
+		if (queries[i])
+			pg_free(queries[i]);
+	}
+	pg_free(queries);
+	pg_free(states);

 	check_ok();
 }
@@ -616,7 +647,7 @@ check_and_dump_old_cluster(void)
 		check_old_cluster_subscription_state();
 	}

-	check_for_data_types_usage(&old_cluster, data_types_usage_checks);
+	check_for_data_types_usage(&old_cluster);

 	/*
 	 * PG 14 changed the function signature of encoding conversion functions.