1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-25 20:23:07 +03:00

pg_upgrade: Parallelize subscription check.

This commit makes use of the new task framework in pg_upgrade to
parallelize the part of check_old_cluster_subscription_state() that
verifies each of the subscribed tables is in the 'i' (initialize)
or 'r' (ready) state.  This check will now process multiple
databases concurrently when pg_upgrade's --jobs option is provided
a value greater than 1.

Reviewed-by: Daniel Gustafsson, Ilya Gladyshev
Discussion: https://postgr.es/m/20240516211638.GA1688936%40nathanxps13
This commit is contained in:
Nathan Bossart
2024-09-16 16:10:33 -05:00
parent 6d3d2e8e54
commit 7baa36de58

View File

@@ -1905,6 +1905,38 @@ check_old_cluster_for_valid_slots(void)
check_ok(); check_ok();
} }
/*
* Callback function for processing results of query for
* check_old_cluster_subscription_state()'s UpgradeTask. If the query returned
* any rows (i.e., the check failed), write the details to the report file.
*/
static void
process_old_sub_state_check(DbInfo *dbinfo, PGresult *res, void *arg)
{
UpgradeTaskReport *report = (UpgradeTaskReport *) arg;
int ntup = PQntuples(res);
int i_srsubstate = PQfnumber(res, "srsubstate");
int i_subname = PQfnumber(res, "subname");
int i_nspname = PQfnumber(res, "nspname");
int i_relname = PQfnumber(res, "relname");
AssertVariableIsOfType(&process_old_sub_state_check, UpgradeTaskProcessCB);
for (int i = 0; i < ntup; i++)
{
if (report->file == NULL &&
(report->file = fopen_priv(report->path, "w")) == NULL)
pg_fatal("could not open file \"%s\": %m", report->path);
fprintf(report->file, "The table sync state \"%s\" is not allowed for database:\"%s\" subscription:\"%s\" schema:\"%s\" relation:\"%s\"\n",
PQgetvalue(res, i, i_srsubstate),
dbinfo->db_name,
PQgetvalue(res, i, i_subname),
PQgetvalue(res, i, i_nspname),
PQgetvalue(res, i, i_relname));
}
}
/* /*
* check_old_cluster_subscription_state() * check_old_cluster_subscription_state()
* *
@@ -1915,28 +1947,25 @@ check_old_cluster_for_valid_slots(void)
static void static void
check_old_cluster_subscription_state(void) check_old_cluster_subscription_state(void)
{ {
FILE *script = NULL; UpgradeTask *task = upgrade_task_create();
char output_path[MAXPGPATH]; UpgradeTaskReport report;
const char *query;
PGresult *res;
PGconn *conn;
int ntup; int ntup;
prep_status("Checking for subscription state"); prep_status("Checking for subscription state");
snprintf(output_path, sizeof(output_path), "%s/%s", report.file = NULL;
snprintf(report.path, sizeof(report.path), "%s/%s",
log_opts.basedir, log_opts.basedir,
"subs_invalid.txt"); "subs_invalid.txt");
for (int dbnum = 0; dbnum < old_cluster.dbarr.ndbs; dbnum++)
{
PGresult *res;
DbInfo *active_db = &old_cluster.dbarr.dbs[dbnum];
PGconn *conn = connectToServer(&old_cluster, active_db->db_name);
/* We need to check for pg_replication_origin only once. */
if (dbnum == 0)
{
/* /*
* Check that all the subscriptions have their respective * Check that all the subscriptions have their respective replication
* replication origin. * origin. This check only needs to run once.
*/ */
conn = connectToServer(&old_cluster, old_cluster.dbarr.dbs[0].db_name);
res = executeQueryOrDie(conn, res = executeQueryOrDie(conn,
"SELECT d.datname, s.subname " "SELECT d.datname, s.subname "
"FROM pg_catalog.pg_subscription s " "FROM pg_catalog.pg_subscription s "
@@ -1945,49 +1974,48 @@ check_old_cluster_subscription_state(void)
"INNER JOIN pg_catalog.pg_database d " "INNER JOIN pg_catalog.pg_database d "
" ON d.oid = s.subdbid " " ON d.oid = s.subdbid "
"WHERE o.roname IS NULL;"); "WHERE o.roname IS NULL;");
ntup = PQntuples(res); ntup = PQntuples(res);
for (int i = 0; i < ntup; i++) for (int i = 0; i < ntup; i++)
{ {
if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL) if (report.file == NULL &&
pg_fatal("could not open file \"%s\": %m", output_path); (report.file = fopen_priv(report.path, "w")) == NULL)
fprintf(script, "The replication origin is missing for database:\"%s\" subscription:\"%s\"\n", pg_fatal("could not open file \"%s\": %m", report.path);
fprintf(report.file, "The replication origin is missing for database:\"%s\" subscription:\"%s\"\n",
PQgetvalue(res, i, 0), PQgetvalue(res, i, 0),
PQgetvalue(res, i, 1)); PQgetvalue(res, i, 1));
} }
PQclear(res); PQclear(res);
} PQfinish(conn);
/* /*
* We don't allow upgrade if there is a risk of dangling slot or * We don't allow upgrade if there is a risk of dangling slot or origin
* origin corresponding to initial sync after upgrade. * corresponding to initial sync after upgrade.
* *
* A slot/origin not created yet refers to the 'i' (initialize) state, * A slot/origin not created yet refers to the 'i' (initialize) state,
* while 'r' (ready) state refers to a slot/origin created previously * while 'r' (ready) state refers to a slot/origin created previously but
* but already dropped. These states are supported for pg_upgrade. The * already dropped. These states are supported for pg_upgrade. The other
* other states listed below are not supported: * states listed below are not supported:
* *
* a) SUBREL_STATE_DATASYNC: A relation upgraded while in this state * a) SUBREL_STATE_DATASYNC: A relation upgraded while in this state would
* would retain a replication slot, which could not be dropped by the * retain a replication slot, which could not be dropped by the sync
* sync worker spawned after the upgrade because the subscription ID * worker spawned after the upgrade because the subscription ID used for
* used for the slot name won't match anymore. * the slot name won't match anymore.
* *
* b) SUBREL_STATE_SYNCDONE: A relation upgraded while in this state * b) SUBREL_STATE_SYNCDONE: A relation upgraded while in this state would
* would retain the replication origin when there is a failure in * retain the replication origin when there is a failure in tablesync
* tablesync worker immediately after dropping the replication slot in * worker immediately after dropping the replication slot in the
* the publisher. * publisher.
* *
* c) SUBREL_STATE_FINISHEDCOPY: A tablesync worker spawned to work on * c) SUBREL_STATE_FINISHEDCOPY: A tablesync worker spawned to work on a
* a relation upgraded while in this state would expect an origin ID * relation upgraded while in this state would expect an origin ID with
* with the OID of the subscription used before the upgrade, causing * the OID of the subscription used before the upgrade, causing it to
* it to fail. * fail.
* *
* d) SUBREL_STATE_SYNCWAIT, SUBREL_STATE_CATCHUP and * d) SUBREL_STATE_SYNCWAIT, SUBREL_STATE_CATCHUP and
* SUBREL_STATE_UNKNOWN: These states are not stored in the catalog, * SUBREL_STATE_UNKNOWN: These states are not stored in the catalog, so we
* so we need not allow these states. * need not allow these states.
*/ */
res = executeQueryOrDie(conn, query = "SELECT r.srsubstate, s.subname, n.nspname, c.relname "
"SELECT r.srsubstate, s.subname, n.nspname, c.relname "
"FROM pg_catalog.pg_subscription_rel r " "FROM pg_catalog.pg_subscription_rel r "
"LEFT JOIN pg_catalog.pg_subscription s" "LEFT JOIN pg_catalog.pg_subscription s"
" ON r.srsubid = s.oid " " ON r.srsubid = s.oid "
@@ -1996,34 +2024,22 @@ check_old_cluster_subscription_state(void)
"LEFT JOIN pg_catalog.pg_namespace n" "LEFT JOIN pg_catalog.pg_namespace n"
" ON c.relnamespace = n.oid " " ON c.relnamespace = n.oid "
"WHERE r.srsubstate NOT IN ('i', 'r') " "WHERE r.srsubstate NOT IN ('i', 'r') "
"ORDER BY s.subname"); "ORDER BY s.subname";
ntup = PQntuples(res); upgrade_task_add_step(task, query, process_old_sub_state_check,
for (int i = 0; i < ntup; i++) true, &report);
upgrade_task_run(task, &old_cluster);
upgrade_task_free(task);
if (report.file)
{ {
if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL) fclose(report.file);
pg_fatal("could not open file \"%s\": %m", output_path);
fprintf(script, "The table sync state \"%s\" is not allowed for database:\"%s\" subscription:\"%s\" schema:\"%s\" relation:\"%s\"\n",
PQgetvalue(res, i, 0),
active_db->db_name,
PQgetvalue(res, i, 1),
PQgetvalue(res, i, 2),
PQgetvalue(res, i, 3));
}
PQclear(res);
PQfinish(conn);
}
if (script)
{
fclose(script);
pg_log(PG_REPORT, "fatal"); pg_log(PG_REPORT, "fatal");
pg_fatal("Your installation contains subscriptions without origin or having relations not in i (initialize) or r (ready) state.\n" pg_fatal("Your installation contains subscriptions without origin or having relations not in i (initialize) or r (ready) state.\n"
"You can allow the initial sync to finish for all relations and then restart the upgrade.\n" "You can allow the initial sync to finish for all relations and then restart the upgrade.\n"
"A list of the problematic subscriptions is in the file:\n" "A list of the problematic subscriptions is in the file:\n"
" %s", output_path); " %s", report.path);
} }
else else
check_ok(); check_ok();