1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-17 06:41:09 +03:00

Windows support in pg_import_system_collations

Windows can enumerate the locales that are either installed or
supported by calling EnumSystemLocalesEx(), similar to what is already
done in the READ_LOCALE_A_OUTPUT switch.  We can refactor some of the
logic already used in that switch into a new function
create_collation_from_locale().

The enumerated locales have BCP 47 shape, that is with a hyphen
between language and territory, instead of POSIX's underscore.  The
created collations will retain the BCP 47 shape, but we will also
create a POSIX alias, so xx-YY will have an xx_YY alias.

A new test collate.windows.win1252 is added that is like
collate.linux.utf8.

Author: Juan Jose Santamaria Flecha <juanjo.santamaria@gmail.com>
Reviewed-by: Dmitry Koval <d.koval@postgrespro.ru>
Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com>
Discussion: https://www.postgresql.org/message-id/flat/0050ec23-34d9-2765-9015-98c04f0e18ac@postgrespro.ru
This commit is contained in:
Peter Eisentraut
2023-01-03 14:21:40 +01:00
parent 33ab0a2a52
commit bf03cfd162
6 changed files with 1650 additions and 52 deletions

View File

@ -499,6 +499,12 @@ pg_collation_actual_version(PG_FUNCTION_ARGS)
#define READ_LOCALE_A_OUTPUT
#endif
/* will we use EnumSystemLocalesEx in pg_import_system_collations? */
#ifdef WIN32
#define ENUM_SYSTEM_LOCALE
#endif
#ifdef READ_LOCALE_A_OUTPUT
/*
* "Normalize" a libc locale name, stripping off encoding tags such as
@ -610,6 +616,161 @@ get_icu_locale_comment(const char *localename)
#endif /* USE_ICU */
/*
* Create a new collation using the input locale 'locale'. (subroutine for
* pg_import_system_collations())
*
* 'nspid' is the namespace id where the collation will be created.
*
* 'nvalidp' is incremented if the locale has a valid encoding.
*
* 'ncreatedp' is incremented if the collation is actually created. If the
* collation already exists it will quietly do nothing.
*
* The returned value is the encoding of the locale, -1 if the locale is not
* valid for creating a collation.
*
*/
pg_attribute_unused()
static int
create_collation_from_locale(const char *locale, int nspid,
int *nvalidp, int *ncreatedp)
{
int enc;
Oid collid;
/*
* Some systems have locale names that don't consist entirely of
* ASCII letters (such as "bokm&aring;l" or "fran&ccedil;ais").
* This is pretty silly, since we need the locale itself to
* interpret the non-ASCII characters. We can't do much with
* those, so we filter them out.
*/
if (!pg_is_ascii(locale))
{
elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", locale);
return -1;
}
enc = pg_get_encoding_from_locale(locale, false);
if (enc < 0)
{
elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"", locale);
return -1;
}
if (!PG_VALID_BE_ENCODING(enc))
{
elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", locale);
return -1;
}
if (enc == PG_SQL_ASCII)
return -1; /* C/POSIX are already in the catalog */
/* count valid locales found in operating system */
(*nvalidp)++;
/*
* Create a collation named the same as the locale, but quietly
* doing nothing if it already exists. This is the behavior we
* need even at initdb time, because some versions of "locale -a"
* can report the same locale name more than once. And it's
* convenient for later import runs, too, since you just about
* always want to add on new locales without a lot of chatter
* about existing ones.
*/
collid = CollationCreate(locale, nspid, GetUserId(),
COLLPROVIDER_LIBC, true, enc,
locale, locale, NULL,
get_collation_actual_version(COLLPROVIDER_LIBC, locale),
true, true);
if (OidIsValid(collid))
{
(*ncreatedp)++;
/* Must do CCI between inserts to handle duplicates correctly */
CommandCounterIncrement();
}
return enc;
}
#ifdef ENUM_SYSTEM_LOCALE
/* parameter to be passed to the callback function win32_read_locale() */
typedef struct
{
Oid nspid;
int *ncreatedp;
int *nvalidp;
} CollParam;
/*
* Callback function for EnumSystemLocalesEx() in
* pg_import_system_collations(). Creates a collation for every valid locale
* and a POSIX alias collation.
*
* The callback contract is to return TRUE to continue enumerating and FALSE
* to stop enumerating. We always want to continue.
*/
static BOOL CALLBACK
win32_read_locale(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
{
CollParam *param = (CollParam *) lparam;
char localebuf[NAMEDATALEN];
int result;
int enc;
(void) dwFlags;
result = WideCharToMultiByte(CP_ACP, 0, pStr, -1, localebuf, NAMEDATALEN,
NULL, NULL);
if (result == 0)
{
if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf);
return TRUE;
}
if (localebuf[0] == '\0')
return TRUE;
enc = create_collation_from_locale(localebuf, param->nspid,
param->nvalidp, param->ncreatedp);
if (enc < 0)
return TRUE;
/*
* Windows will use hyphens between language and territory, where POSIX
* uses an underscore. Simply create a POSIX alias.
*/
if (strchr(localebuf, '-'))
{
char alias[NAMEDATALEN];
Oid collid;
strcpy(alias, localebuf);
for (char *p = alias; *p; p++)
if (*p == '-')
*p = '_';
collid = CollationCreate(alias, param->nspid, GetUserId(),
COLLPROVIDER_LIBC, true, enc,
localebuf, localebuf, NULL,
get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
true, true);
if (OidIsValid(collid))
{
(*param->ncreatedp)++;
CommandCounterIncrement();
}
}
return TRUE;
}
#endif /* ENUM_SYSTEM_LOCALE */
/*
* pg_import_system_collations: add known system collations to pg_collation
*/
@ -668,58 +829,9 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
}
localebuf[len - 1] = '\0';
/*
* Some systems have locale names that don't consist entirely of
* ASCII letters (such as "bokm&aring;l" or "fran&ccedil;ais").
* This is pretty silly, since we need the locale itself to
* interpret the non-ASCII characters. We can't do much with
* those, so we filter them out.
*/
if (!pg_is_ascii(localebuf))
{
elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", localebuf);
continue;
}
enc = pg_get_encoding_from_locale(localebuf, false);
enc = create_collation_from_locale(localebuf, nspid, &nvalid, &ncreated);
if (enc < 0)
{
elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"",
localebuf);
continue;
}
if (!PG_VALID_BE_ENCODING(enc))
{
elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", localebuf);
continue;
}
if (enc == PG_SQL_ASCII)
continue; /* C/POSIX are already in the catalog */
/* count valid locales found in operating system */
nvalid++;
/*
* Create a collation named the same as the locale, but quietly
* doing nothing if it already exists. This is the behavior we
* need even at initdb time, because some versions of "locale -a"
* can report the same locale name more than once. And it's
* convenient for later import runs, too, since you just about
* always want to add on new locales without a lot of chatter
* about existing ones.
*/
collid = CollationCreate(localebuf, nspid, GetUserId(),
COLLPROVIDER_LIBC, true, enc,
localebuf, localebuf, NULL,
get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
true, true);
if (OidIsValid(collid))
{
ncreated++;
/* Must do CCI between inserts to handle duplicates correctly */
CommandCounterIncrement();
}
/*
* Generate aliases such as "en_US" in addition to "en_US.utf8"
@ -857,5 +969,30 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
}
#endif /* USE_ICU */
/* Load collations known to WIN32 */
#ifdef ENUM_SYSTEM_LOCALE
{
int nvalid = 0;
CollParam param;
param.nspid = nspid;
param.ncreatedp = &ncreated;
param.nvalidp = &nvalid;
/*
* Enumerate the locales that are either installed on or supported
* by the OS.
*/
if (!EnumSystemLocalesEx(win32_read_locale, LOCALE_ALL,
(LPARAM) &param, NULL))
_dosmaperr(GetLastError());
/* Give a warning if EnumSystemLocalesEx seems to be malfunctioning */
if (nvalid == 0)
ereport(WARNING,
(errmsg("no usable system locales were found")));
}
#endif /* ENUM_SYSTEM_LOCALE */
PG_RETURN_INT32(ncreated);
}