From b81ffa13e35693af8accf00f1694bbeca6809fb6 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Thu, 3 Apr 2025 10:45:38 -0700 Subject: [PATCH] pg_upgrade check for Unicode-dependent relations. This check will not cause an upgrade failure, only a warning. Discussion: https://postgr.es/m/ef03d678b39a64392f4b12e0f59d1495c740969e.camel%40j-davis.com Reviewed-by: Peter Eisentraut --- src/bin/pg_upgrade/check.c | 180 +++++++++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index 02d9146e5ed..3189de4df96 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -13,6 +13,7 @@ #include "catalog/pg_class_d.h" #include "fe_utils/string_utils.h" #include "pg_upgrade.h" +#include "common/unicode_version.h" static void check_new_cluster_is_empty(void); static void check_is_install_user(ClusterInfo *cluster); @@ -25,6 +26,7 @@ static void check_for_tables_with_oids(ClusterInfo *cluster); static void check_for_pg_role_prefix(ClusterInfo *cluster); static void check_for_new_tablespace_dir(void); static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster); +static void check_for_unicode_update(ClusterInfo *cluster); static void check_new_cluster_logical_replication_slots(void); static void check_new_cluster_subscription_configuration(void); static void check_old_cluster_for_valid_slots(void); @@ -633,6 +635,12 @@ check_and_dump_old_cluster(void) check_for_data_types_usage(&old_cluster); + /* + * Unicode updates can affect some objects that use expressions with + * functions dependent on Unicode. + */ + check_for_unicode_update(&old_cluster); + /* * PG 14 changed the function signature of encoding conversion functions. * Conversions from older versions cannot be upgraded automatically @@ -1754,6 +1762,178 @@ check_for_user_defined_encoding_conversions(ClusterInfo *cluster) check_ok(); } +/* + * Callback function for processing results of query for + * check_for_unicode_update()'s UpgradeTask. If the query returned any rows + * (i.e., the check failed), write the details to the report file. + */ +static void +process_unicode_update(DbInfo *dbinfo, PGresult *res, void *arg) +{ + UpgradeTaskReport *report = (UpgradeTaskReport *) arg; + int ntups = PQntuples(res); + int i_reloid = PQfnumber(res, "reloid"); + int i_nspname = PQfnumber(res, "nspname"); + int i_relname = PQfnumber(res, "relname"); + + if (ntups == 0) + return; + + if (report->file == NULL && + (report->file = fopen_priv(report->path, "w")) == NULL) + pg_fatal("could not open file \"%s\": %m", report->path); + + fprintf(report->file, "In database: %s\n", dbinfo->db_name); + + for (int rowno = 0; rowno < ntups; rowno++) + fprintf(report->file, " (oid=%s) %s.%s\n", + PQgetvalue(res, rowno, i_reloid), + PQgetvalue(res, rowno, i_nspname), + PQgetvalue(res, rowno, i_relname)); +} + +/* + * Check if the Unicode version built into Postgres changed between the old + * cluster and the new cluster. + */ +static bool +unicode_version_changed(ClusterInfo *cluster) +{ + PGconn *conn_template1 = connectToServer(cluster, "template1"); + PGresult *res; + char *old_unicode_version; + bool unicode_updated; + + res = executeQueryOrDie(conn_template1, "SELECT unicode_version()"); + old_unicode_version = PQgetvalue(res, 0, 0); + unicode_updated = (strcmp(old_unicode_version, PG_UNICODE_VERSION) != 0); + + PQclear(res); + PQfinish(conn_template1); + + return unicode_updated; +} + +/* + * check_for_unicode_update() + * + * Check if the version of Unicode in the old server and the new server + * differ. If so, check for indexes, partitioned tables, or constraints that + * use expressions with functions dependent on Unicode behavior. + */ +static void +check_for_unicode_update(ClusterInfo *cluster) +{ + UpgradeTaskReport report; + UpgradeTask *task = upgrade_task_create(); + const char *query; + + /* + * The builtin provider did not exist prior to version 17. While there are + * still problems that could potentially be caught from earlier versions, + * such as an index on NORMALIZE(), we don't check for that here. + */ + if (GET_MAJOR_VERSION(cluster->major_version) < 1700) + return; + + prep_status("Checking for objects affected by Unicode update"); + + if (!unicode_version_changed(cluster)) + { + check_ok(); + return; + } + + report.file = NULL; + snprintf(report.path, sizeof(report.path), "%s/%s", + log_opts.basedir, + "unicode_dependent_rels.txt"); + + query = + /* collations that use built-in Unicode for character semantics */ + "WITH collations(collid) AS ( " + " SELECT oid FROM pg_collation " + " WHERE collprovider='b' AND colllocale IN ('C.UTF-8','PG_UNICODE_FAST') " + /* include default collation, if appropriate */ + " UNION " + " SELECT 'pg_catalog.default'::regcollation FROM pg_database " + " WHERE datname = current_database() AND " + " datlocprovider='b' AND datlocale IN ('C.UTF-8','PG_UNICODE_FAST') " + "), " + /* functions that use built-in Unicode */ + "functions(procid) AS ( " + " SELECT proc.oid FROM pg_proc proc " + " WHERE proname IN ('normalize','unicode_assigned','unicode_version','is_normalized') AND " + " pronamespace='pg_catalog'::regnamespace " + "), " + /* operators that use the input collation for character semantics */ + "coll_operators(operid, procid, collid) AS ( " + " SELECT oper.oid, oper.oprcode, collid FROM pg_operator oper, collations " + " WHERE oprname IN ('~', '~*', '!~', '!~*', '~~*', '!~~*') AND " + " oprnamespace='pg_catalog'::regnamespace AND " + " oprright='text'::regtype " + "), " + /* functions that use the input collation for character semantics */ + "coll_functions(procid, collid) AS ( " + " SELECT proc.oid, collid FROM pg_proc proc, collations " + " WHERE proname IN ('lower','initcap','upper') AND " + " pronamespace='pg_catalog'::regnamespace AND " + " proargtypes[0] = 'text'::regtype " + /* include functions behind the operators listed above */ + " UNION " + " SELECT procid, collid FROM coll_operators " + "), " + + /* + * Generate patterns to search a pg_node_tree for the above functions and + * operators. + */ + "patterns(p) AS ( " + " SELECT '{FUNCEXPR :funcid ' || procid::text || '[ }]' FROM functions " + " UNION " + " SELECT '{OPEXPR :opno ' || operid::text || ' (:\\w+ \\w+ )*' || " + " ':inputcollid ' || collid::text || '[ }]' FROM coll_operators " + " UNION " + " SELECT '{FUNCEXPR :funcid ' || procid::text || ' (:\\w+ \\w+ )*' || " + " ':inputcollid ' || collid::text || '[ }]' FROM coll_functions " + ") " + + /* + * Match the patterns against expressions used for relation contents. + */ + "SELECT reloid, relkind, nspname, relname " + " FROM ( " + " SELECT conrelid " + " FROM pg_constraint, patterns WHERE conbin::text ~ p " + " UNION " + " SELECT indexrelid " + " FROM pg_index, patterns WHERE indexprs::text ~ p OR indpred::text ~ p " + " UNION " + " SELECT partrelid " + " FROM pg_partitioned_table, patterns WHERE partexprs::text ~ p " + " ) s(reloid), pg_class c, pg_namespace n, pg_database d " + " WHERE s.reloid = c.oid AND c.relnamespace = n.oid AND " + " d.datname = current_database() AND " + " d.encoding = pg_char_to_encoding('UTF8');"; + + upgrade_task_add_step(task, query, + process_unicode_update, + true, &report); + upgrade_task_run(task, cluster); + upgrade_task_free(task); + + if (report.file) + { + fclose(report.file); + report_status(PG_WARNING, "warning"); + pg_log(PG_WARNING, "Your installation contains relations that may be affected by a new version of Unicode.\n" + "A list of potentially-affected relations is in the file:\n" + " %s", report.path); + } + else + check_ok(); +} + /* * check_new_cluster_logical_replication_slots() *