mirror of
https://github.com/postgres/postgres.git
synced 2025-06-05 23:56:58 +03:00
ICU: do not convert locale 'C' to 'en-US-u-va-posix'.
Older versions of ICU canonicalize "C" to "en-US-u-va-posix"; but starting in ICU version 64, the "C" locale is considered obsolete. Postgres commit ea1db8ae70 introduced code to always canonicalize "C" to "en-US-u-va-posix" for consistency and convenience, but it was deemed too confusing. This commit removes that code, so that "C" is treated like other ICU locale names: canonicalization is attempted, and if it fails, the behavior is controlled by icu_validation_level. A similar change was previously committed as f7faa9976c, then reverted due to an ICU-version-dependent test failure. This commit un-reverts it, omitting the test because we now expect the behavior to depend on the version of ICU being used. Discussion: https://postgr.es/m/3a200aca-4672-4b37-fc91-5d198a323503%40eisentraut.org Discussion: https://postgr.es/m/f83f089ee1e9acd5dbbbf3353294d24e1f196e95.camel@j-davis.com Discussion: https://postgr.es/m/37520ec1ae9591f83132f82dbd625f3fc2d69c16.camel@j-davis.com
This commit is contained in:
parent
2535c74b1a
commit
f3a01af29b
@ -2784,26 +2784,10 @@ icu_language_tag(const char *loc_str, int elevel)
|
|||||||
{
|
{
|
||||||
#ifdef USE_ICU
|
#ifdef USE_ICU
|
||||||
UErrorCode status;
|
UErrorCode status;
|
||||||
char lang[ULOC_LANG_CAPACITY];
|
|
||||||
char *langtag;
|
char *langtag;
|
||||||
size_t buflen = 32; /* arbitrary starting buffer size */
|
size_t buflen = 32; /* arbitrary starting buffer size */
|
||||||
const bool strict = true;
|
const bool strict = true;
|
||||||
|
|
||||||
status = U_ZERO_ERROR;
|
|
||||||
uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
|
|
||||||
if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
|
|
||||||
{
|
|
||||||
if (elevel > 0)
|
|
||||||
ereport(elevel,
|
|
||||||
(errmsg("could not get language from locale \"%s\": %s",
|
|
||||||
loc_str, u_errorName(status))));
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* C/POSIX locales aren't handled by uloc_getLanguageTag() */
|
|
||||||
if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
|
|
||||||
return pstrdup("en-US-u-va-posix");
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A BCP47 language tag doesn't have a clearly-defined upper limit (cf.
|
* A BCP47 language tag doesn't have a clearly-defined upper limit (cf.
|
||||||
* RFC5646 section 4.4). Additionally, in older ICU versions,
|
* RFC5646 section 4.4). Additionally, in older ICU versions,
|
||||||
@ -2884,8 +2868,7 @@ icu_validate_locale(const char *loc_str)
|
|||||||
|
|
||||||
/* check for special language name */
|
/* check for special language name */
|
||||||
if (strcmp(lang, "") == 0 ||
|
if (strcmp(lang, "") == 0 ||
|
||||||
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
|
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0)
|
||||||
strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
|
|
||||||
found = true;
|
found = true;
|
||||||
|
|
||||||
/* search for matching language within ICU */
|
/* search for matching language within ICU */
|
||||||
|
@ -2244,24 +2244,10 @@ icu_language_tag(const char *loc_str)
|
|||||||
{
|
{
|
||||||
#ifdef USE_ICU
|
#ifdef USE_ICU
|
||||||
UErrorCode status;
|
UErrorCode status;
|
||||||
char lang[ULOC_LANG_CAPACITY];
|
|
||||||
char *langtag;
|
char *langtag;
|
||||||
size_t buflen = 32; /* arbitrary starting buffer size */
|
size_t buflen = 32; /* arbitrary starting buffer size */
|
||||||
const bool strict = true;
|
const bool strict = true;
|
||||||
|
|
||||||
status = U_ZERO_ERROR;
|
|
||||||
uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
|
|
||||||
if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
|
|
||||||
{
|
|
||||||
pg_fatal("could not get language from locale \"%s\": %s",
|
|
||||||
loc_str, u_errorName(status));
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* C/POSIX locales aren't handled by uloc_getLanguageTag() */
|
|
||||||
if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
|
|
||||||
return pstrdup("en-US-u-va-posix");
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A BCP47 language tag doesn't have a clearly-defined upper limit (cf.
|
* A BCP47 language tag doesn't have a clearly-defined upper limit (cf.
|
||||||
* RFC5646 section 4.4). Additionally, in older ICU versions,
|
* RFC5646 section 4.4). Additionally, in older ICU versions,
|
||||||
@ -2326,8 +2312,7 @@ icu_validate_locale(const char *loc_str)
|
|||||||
|
|
||||||
/* check for special language name */
|
/* check for special language name */
|
||||||
if (strcmp(lang, "") == 0 ||
|
if (strcmp(lang, "") == 0 ||
|
||||||
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
|
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0)
|
||||||
strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
|
|
||||||
found = true;
|
found = true;
|
||||||
|
|
||||||
/* search for matching language within ICU */
|
/* search for matching language within ICU */
|
||||||
|
@ -1020,6 +1020,7 @@ CREATE ROLE regress_test_role;
|
|||||||
CREATE SCHEMA test_schema;
|
CREATE SCHEMA test_schema;
|
||||||
-- We need to do this this way to cope with varying names for encodings:
|
-- We need to do this this way to cope with varying names for encodings:
|
||||||
SET client_min_messages TO WARNING;
|
SET client_min_messages TO WARNING;
|
||||||
|
SET icu_validation_level = disabled;
|
||||||
do $$
|
do $$
|
||||||
BEGIN
|
BEGIN
|
||||||
EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' ||
|
EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' ||
|
||||||
@ -1034,6 +1035,7 @@ BEGIN
|
|||||||
quote_literal((SELECT CASE WHEN datlocprovider='i' THEN daticulocale ELSE datcollate END FROM pg_database WHERE datname = current_database())) || ');';
|
quote_literal((SELECT CASE WHEN datlocprovider='i' THEN daticulocale ELSE datcollate END FROM pg_database WHERE datname = current_database())) || ');';
|
||||||
END
|
END
|
||||||
$$;
|
$$;
|
||||||
|
RESET icu_validation_level;
|
||||||
RESET client_min_messages;
|
RESET client_min_messages;
|
||||||
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
|
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
|
||||||
ERROR: parameter "locale" must be specified
|
ERROR: parameter "locale" must be specified
|
||||||
|
@ -358,6 +358,7 @@ CREATE SCHEMA test_schema;
|
|||||||
|
|
||||||
-- We need to do this this way to cope with varying names for encodings:
|
-- We need to do this this way to cope with varying names for encodings:
|
||||||
SET client_min_messages TO WARNING;
|
SET client_min_messages TO WARNING;
|
||||||
|
SET icu_validation_level = disabled;
|
||||||
|
|
||||||
do $$
|
do $$
|
||||||
BEGIN
|
BEGIN
|
||||||
@ -373,6 +374,7 @@ BEGIN
|
|||||||
END
|
END
|
||||||
$$;
|
$$;
|
||||||
|
|
||||||
|
RESET icu_validation_level;
|
||||||
RESET client_min_messages;
|
RESET client_min_messages;
|
||||||
|
|
||||||
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
|
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user