1
0
mirror of https://github.com/postgres/postgres.git synced 2025-04-20 00:42:27 +03:00

pg_upgrade check for Unicode-dependent relations.

This check will not cause an upgrade failure, only a warning.

Discussion: https://postgr.es/m/ef03d678b39a64392f4b12e0f59d1495c740969e.camel%40j-davis.com
Reviewed-by: Peter Eisentraut <peter@eisentraut.org>
This commit is contained in:
Jeff Davis 2025-04-03 10:45:38 -07:00
parent fd09c1316b
commit b81ffa13e3

View File

@ -13,6 +13,7 @@
#include "catalog/pg_class_d.h" #include "catalog/pg_class_d.h"
#include "fe_utils/string_utils.h" #include "fe_utils/string_utils.h"
#include "pg_upgrade.h" #include "pg_upgrade.h"
#include "common/unicode_version.h"
static void check_new_cluster_is_empty(void); static void check_new_cluster_is_empty(void);
static void check_is_install_user(ClusterInfo *cluster); static void check_is_install_user(ClusterInfo *cluster);
@ -25,6 +26,7 @@ static void check_for_tables_with_oids(ClusterInfo *cluster);
static void check_for_pg_role_prefix(ClusterInfo *cluster); static void check_for_pg_role_prefix(ClusterInfo *cluster);
static void check_for_new_tablespace_dir(void); static void check_for_new_tablespace_dir(void);
static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster); static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster);
static void check_for_unicode_update(ClusterInfo *cluster);
static void check_new_cluster_logical_replication_slots(void); static void check_new_cluster_logical_replication_slots(void);
static void check_new_cluster_subscription_configuration(void); static void check_new_cluster_subscription_configuration(void);
static void check_old_cluster_for_valid_slots(void); static void check_old_cluster_for_valid_slots(void);
@ -633,6 +635,12 @@ check_and_dump_old_cluster(void)
check_for_data_types_usage(&old_cluster); check_for_data_types_usage(&old_cluster);
/*
* Unicode updates can affect some objects that use expressions with
* functions dependent on Unicode.
*/
check_for_unicode_update(&old_cluster);
/* /*
* PG 14 changed the function signature of encoding conversion functions. * PG 14 changed the function signature of encoding conversion functions.
* Conversions from older versions cannot be upgraded automatically * Conversions from older versions cannot be upgraded automatically
@ -1754,6 +1762,178 @@ check_for_user_defined_encoding_conversions(ClusterInfo *cluster)
check_ok(); check_ok();
} }
/*
* Callback function for processing results of query for
* check_for_unicode_update()'s UpgradeTask. If the query returned any rows
* (i.e., the check failed), write the details to the report file.
*/
static void
process_unicode_update(DbInfo *dbinfo, PGresult *res, void *arg)
{
UpgradeTaskReport *report = (UpgradeTaskReport *) arg;
int ntups = PQntuples(res);
int i_reloid = PQfnumber(res, "reloid");
int i_nspname = PQfnumber(res, "nspname");
int i_relname = PQfnumber(res, "relname");
if (ntups == 0)
return;
if (report->file == NULL &&
(report->file = fopen_priv(report->path, "w")) == NULL)
pg_fatal("could not open file \"%s\": %m", report->path);
fprintf(report->file, "In database: %s\n", dbinfo->db_name);
for (int rowno = 0; rowno < ntups; rowno++)
fprintf(report->file, " (oid=%s) %s.%s\n",
PQgetvalue(res, rowno, i_reloid),
PQgetvalue(res, rowno, i_nspname),
PQgetvalue(res, rowno, i_relname));
}
/*
* Check if the Unicode version built into Postgres changed between the old
* cluster and the new cluster.
*/
static bool
unicode_version_changed(ClusterInfo *cluster)
{
PGconn *conn_template1 = connectToServer(cluster, "template1");
PGresult *res;
char *old_unicode_version;
bool unicode_updated;
res = executeQueryOrDie(conn_template1, "SELECT unicode_version()");
old_unicode_version = PQgetvalue(res, 0, 0);
unicode_updated = (strcmp(old_unicode_version, PG_UNICODE_VERSION) != 0);
PQclear(res);
PQfinish(conn_template1);
return unicode_updated;
}
/*
* check_for_unicode_update()
*
* Check if the version of Unicode in the old server and the new server
* differ. If so, check for indexes, partitioned tables, or constraints that
* use expressions with functions dependent on Unicode behavior.
*/
static void
check_for_unicode_update(ClusterInfo *cluster)
{
UpgradeTaskReport report;
UpgradeTask *task = upgrade_task_create();
const char *query;
/*
* The builtin provider did not exist prior to version 17. While there are
* still problems that could potentially be caught from earlier versions,
* such as an index on NORMALIZE(), we don't check for that here.
*/
if (GET_MAJOR_VERSION(cluster->major_version) < 1700)
return;
prep_status("Checking for objects affected by Unicode update");
if (!unicode_version_changed(cluster))
{
check_ok();
return;
}
report.file = NULL;
snprintf(report.path, sizeof(report.path), "%s/%s",
log_opts.basedir,
"unicode_dependent_rels.txt");
query =
/* collations that use built-in Unicode for character semantics */
"WITH collations(collid) AS ( "
" SELECT oid FROM pg_collation "
" WHERE collprovider='b' AND colllocale IN ('C.UTF-8','PG_UNICODE_FAST') "
/* include default collation, if appropriate */
" UNION "
" SELECT 'pg_catalog.default'::regcollation FROM pg_database "
" WHERE datname = current_database() AND "
" datlocprovider='b' AND datlocale IN ('C.UTF-8','PG_UNICODE_FAST') "
"), "
/* functions that use built-in Unicode */
"functions(procid) AS ( "
" SELECT proc.oid FROM pg_proc proc "
" WHERE proname IN ('normalize','unicode_assigned','unicode_version','is_normalized') AND "
" pronamespace='pg_catalog'::regnamespace "
"), "
/* operators that use the input collation for character semantics */
"coll_operators(operid, procid, collid) AS ( "
" SELECT oper.oid, oper.oprcode, collid FROM pg_operator oper, collations "
" WHERE oprname IN ('~', '~*', '!~', '!~*', '~~*', '!~~*') AND "
" oprnamespace='pg_catalog'::regnamespace AND "
" oprright='text'::regtype "
"), "
/* functions that use the input collation for character semantics */
"coll_functions(procid, collid) AS ( "
" SELECT proc.oid, collid FROM pg_proc proc, collations "
" WHERE proname IN ('lower','initcap','upper') AND "
" pronamespace='pg_catalog'::regnamespace AND "
" proargtypes[0] = 'text'::regtype "
/* include functions behind the operators listed above */
" UNION "
" SELECT procid, collid FROM coll_operators "
"), "
/*
* Generate patterns to search a pg_node_tree for the above functions and
* operators.
*/
"patterns(p) AS ( "
" SELECT '{FUNCEXPR :funcid ' || procid::text || '[ }]' FROM functions "
" UNION "
" SELECT '{OPEXPR :opno ' || operid::text || ' (:\\w+ \\w+ )*' || "
" ':inputcollid ' || collid::text || '[ }]' FROM coll_operators "
" UNION "
" SELECT '{FUNCEXPR :funcid ' || procid::text || ' (:\\w+ \\w+ )*' || "
" ':inputcollid ' || collid::text || '[ }]' FROM coll_functions "
") "
/*
* Match the patterns against expressions used for relation contents.
*/
"SELECT reloid, relkind, nspname, relname "
" FROM ( "
" SELECT conrelid "
" FROM pg_constraint, patterns WHERE conbin::text ~ p "
" UNION "
" SELECT indexrelid "
" FROM pg_index, patterns WHERE indexprs::text ~ p OR indpred::text ~ p "
" UNION "
" SELECT partrelid "
" FROM pg_partitioned_table, patterns WHERE partexprs::text ~ p "
" ) s(reloid), pg_class c, pg_namespace n, pg_database d "
" WHERE s.reloid = c.oid AND c.relnamespace = n.oid AND "
" d.datname = current_database() AND "
" d.encoding = pg_char_to_encoding('UTF8');";
upgrade_task_add_step(task, query,
process_unicode_update,
true, &report);
upgrade_task_run(task, cluster);
upgrade_task_free(task);
if (report.file)
{
fclose(report.file);
report_status(PG_WARNING, "warning");
pg_log(PG_WARNING, "Your installation contains relations that may be affected by a new version of Unicode.\n"
"A list of potentially-affected relations is in the file:\n"
" %s", report.path);
}
else
check_ok();
}
/* /*
* check_new_cluster_logical_replication_slots() * check_new_cluster_logical_replication_slots()
* *