Avoid syncing data twice for the 'publish_via_partition_root' option.

When there are multiple publications for a subscription and one of those publishes via the parent table by using publish_via_partition_root and the other one directly publishes the child table, we end up copying the same data twice during initial synchronization. The reason for this was that we get both the parent and child tables from the publisher and try to copy the data for both of them. This patch extends the function pg_get_publication_tables() to take a publication list as its input parameter. This allows us to exclude a partition table whose ancestor is published by the same publication list. This problem does exist in back-branches but we decide to fix it there in a separate commit if required. The fix for back-branches requires quite complicated changes to fetch the required table information from the publisher as we can't update the function pg_get_publication_tables() in back-branches. We are not sure whether we want to deviate and complicate the code in back-branches for this problem as there are no field reports yet. Author: Wang wei Reviewed-by: Peter Smith, Jacob Champion, Kuroda Hayato, Vignesh C, Osumi Takamichi, Amit Kapila Discussion: https://postgr.es/m/OS0PR01MB57167F45D481F78CDC5986F794B99@OS0PR01MB5716.jpnprd01.prod.outlook.com
2025-07-15 19:21:59 +03:00 · 2023-03-29 10:46:58 +05:30
parent de5a47af2d
commit 062a844424
10 changed files with 274 additions and 99 deletions
--- a/src/backend/commands/subscriptioncmds.c
+++ b/src/backend/commands/subscriptioncmds.c
@ -1936,21 +1936,60 @@ fetch_table_list(WalReceiverConn *wrconn, List *publications)
 	WalRcvExecResult *res;
 	StringInfoData cmd;
 	TupleTableSlot *slot;
-	Oid			tableRow[3] = {TEXTOID, TEXTOID, NAMEARRAYOID};
+	Oid			tableRow[3] = {TEXTOID, TEXTOID, InvalidOid};
 	List	   *tablelist = NIL;
-	bool		check_columnlist = (walrcv_server_version(wrconn) >= 150000);
+	int			server_version = walrcv_server_version(wrconn);
+	bool		check_columnlist = (server_version >= 150000);

 	initStringInfo(&cmd);
-	appendStringInfoString(&cmd, "SELECT DISTINCT t.schemaname, t.tablename \n");

-	/* Get column lists for each relation if the publisher supports it */
-	if (check_columnlist)
-		appendStringInfoString(&cmd, ", t.attnames\n");
+	/* Get the list of tables from the publisher. */
+	if (server_version >= 160000)
+	{
+		StringInfoData pub_names;

-	appendStringInfoString(&cmd, "FROM pg_catalog.pg_publication_tables t\n"
-						   " WHERE t.pubname IN (");
-	get_publications_str(publications, &cmd, true);
-	appendStringInfoChar(&cmd, ')');
+		tableRow[2] = INT2VECTOROID;
+		initStringInfo(&pub_names);
+		get_publications_str(publications, &pub_names, true);
+
+		/*
+		 * From version 16, we allowed passing multiple publications to the
+		 * function pg_get_publication_tables. This helped to filter out the
+		 * partition table whose ancestor is also published in this
+		 * publication array.
+		 *
+		 * Join pg_get_publication_tables with pg_publication to exclude
+		 * non-existing publications.
+		 *
+		 * Note that attrs are always stored in sorted order so we don't need
+		 * to worry if different publications have specified them in a
+		 * different order. See publication_translate_columns.
+		 */
+		appendStringInfo(&cmd, "SELECT DISTINCT n.nspname, c.relname, gpt.attrs\n"
+						 "       FROM pg_class c\n"
+						 "         JOIN pg_namespace n ON n.oid = c.relnamespace\n"
+						 "         JOIN ( SELECT (pg_get_publication_tables(VARIADIC array_agg(pubname::text))).*\n"
+						 "                FROM pg_publication\n"
+						 "                WHERE pubname IN ( %s )) AS gpt\n"
+						 "             ON gpt.relid = c.oid\n",
+						 pub_names.data);
+
+		pfree(pub_names.data);
+	}
+	else
+	{
+		tableRow[2] = NAMEARRAYOID;
+		appendStringInfoString(&cmd, "SELECT DISTINCT t.schemaname, t.tablename \n");
+
+		/* Get column lists for each relation if the publisher supports it */
+		if (check_columnlist)
+			appendStringInfoString(&cmd, ", t.attnames\n");
+
+		appendStringInfoString(&cmd, "FROM pg_catalog.pg_publication_tables t\n"
+							   " WHERE t.pubname IN (");
+		get_publications_str(publications, &cmd, true);
+		appendStringInfoChar(&cmd, ')');
+	}

 	res = walrcv_exec(wrconn, cmd.data, check_columnlist ? 3 : 2, tableRow);
 	pfree(cmd.data);