mirror of
https://github.com/postgres/postgres.git
synced 2025-05-06 19:59:18 +03:00
It emerges that recent versions of Windows (at least 2016 Standard) spell this locale name as "Norwegian Bokmål_Norway.1252", defeating our mapping code that translates "Norwegian (Bokmål)_Norway" to something that's all-ASCII (cf commits db29620d4 and aa1d2fc5e). Add another mapping entry to handle this spelling. Per bug #16068 from Robert Ford. Like the previous patches, back-patch to all supported branches. Discussion: https://postgr.es/m/16068-4cb6eeaa7eb46d93@postgresql.org
194 lines
5.9 KiB
C
194 lines
5.9 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* win32setlocale.c
|
|
* Wrapper to work around bugs in Windows setlocale() implementation
|
|
*
|
|
* Copyright (c) 2011-2018, PostgreSQL Global Development Group
|
|
*
|
|
* IDENTIFICATION
|
|
* src/port/win32setlocale.c
|
|
*
|
|
*
|
|
* The setlocale() function in Windows is broken in two ways. First, it
|
|
* has a problem with locale names that have a dot in the country name. For
|
|
* example:
|
|
*
|
|
* "Chinese (Traditional)_Hong Kong S.A.R..950"
|
|
*
|
|
* For some reason, setlocale() doesn't accept that as argument, even though
|
|
* setlocale(LC_ALL, NULL) returns exactly that. Fortunately, it accepts
|
|
* various alternative names for such countries, so to work around the broken
|
|
* setlocale() function, we map the troublemaking locale names to accepted
|
|
* aliases, before calling setlocale().
|
|
*
|
|
* The second problem is that the locale name for "Norwegian (Bokmål)"
|
|
* contains a non-ASCII character. That's problematic, because it's not clear
|
|
* what encoding the locale name itself is supposed to be in, when you
|
|
* haven't yet set a locale. Also, it causes problems when the cluster
|
|
* contains databases with different encodings, as the locale name is stored
|
|
* in the pg_database system catalog. To work around that, when setlocale()
|
|
* returns that locale name, map it to a pure-ASCII alias for the same
|
|
* locale.
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "c.h"
|
|
|
|
#undef setlocale
|
|
|
|
struct locale_map
|
|
{
|
|
/*
|
|
* String in locale name to replace. Can be a single string (end is NULL),
|
|
* or separate start and end strings. If two strings are given, the locale
|
|
* name must contain both of them, and everything between them is
|
|
* replaced. This is used for a poor-man's regexp search, allowing
|
|
* replacement of "start.*end".
|
|
*/
|
|
const char *locale_name_start;
|
|
const char *locale_name_end;
|
|
|
|
const char *replacement; /* string to replace the match with */
|
|
};
|
|
|
|
/*
|
|
* Mappings applied before calling setlocale(), to the argument.
|
|
*/
|
|
static const struct locale_map locale_map_argument[] = {
|
|
/*
|
|
* "HKG" is listed here:
|
|
* http://msdn.microsoft.com/en-us/library/cdax410z%28v=vs.71%29.aspx
|
|
* (Country/Region Strings).
|
|
*
|
|
* "ARE" is the ISO-3166 three-letter code for U.A.E. It is not on the
|
|
* above list, but seems to work anyway.
|
|
*/
|
|
{"Hong Kong S.A.R.", NULL, "HKG"},
|
|
{"U.A.E.", NULL, "ARE"},
|
|
|
|
/*
|
|
* The ISO-3166 country code for Macau S.A.R. is MAC, but Windows doesn't
|
|
* seem to recognize that. And Macau isn't listed in the table of accepted
|
|
* abbreviations linked above. Fortunately, "ZHM" seems to be accepted as
|
|
* an alias for "Chinese (Traditional)_Macau S.A.R..950". I'm not sure
|
|
* where "ZHM" comes from, must be some legacy naming scheme. But hey, it
|
|
* works.
|
|
*
|
|
* Note that unlike HKG and ARE, ZHM is an alias for the *whole* locale
|
|
* name, not just the country part.
|
|
*
|
|
* Some versions of Windows spell it "Macau", others "Macao".
|
|
*/
|
|
{"Chinese (Traditional)_Macau S.A.R..950", NULL, "ZHM"},
|
|
{"Chinese_Macau S.A.R..950", NULL, "ZHM"},
|
|
{"Chinese (Traditional)_Macao S.A.R..950", NULL, "ZHM"},
|
|
{"Chinese_Macao S.A.R..950", NULL, "ZHM"},
|
|
{NULL, NULL, NULL}
|
|
};
|
|
|
|
/*
|
|
* Mappings applied after calling setlocale(), to its return value.
|
|
*/
|
|
static const struct locale_map locale_map_result[] = {
|
|
/*
|
|
* "Norwegian (Bokmål)" locale name contains the a-ring character.
|
|
* Map it to a pure-ASCII alias.
|
|
*
|
|
* It's not clear what encoding setlocale() uses when it returns the
|
|
* locale name, so to play it safe, we search for "Norwegian (Bok*l)".
|
|
*
|
|
* Just to make life even more complicated, some versions of Windows spell
|
|
* the locale name without parentheses. Translate that too.
|
|
*/
|
|
{"Norwegian (Bokm", "l)_Norway", "Norwegian_Norway"},
|
|
{"Norwegian Bokm", "l_Norway", "Norwegian_Norway"},
|
|
{NULL, NULL, NULL}
|
|
};
|
|
|
|
#define MAX_LOCALE_NAME_LEN 100
|
|
|
|
static const char *
|
|
map_locale(const struct locale_map *map, const char *locale)
|
|
{
|
|
static char aliasbuf[MAX_LOCALE_NAME_LEN];
|
|
int i;
|
|
|
|
/* Check if the locale name matches any of the problematic ones. */
|
|
for (i = 0; map[i].locale_name_start != NULL; i++)
|
|
{
|
|
const char *needle_start = map[i].locale_name_start;
|
|
const char *needle_end = map[i].locale_name_end;
|
|
const char *replacement = map[i].replacement;
|
|
char *match;
|
|
char *match_start = NULL;
|
|
char *match_end = NULL;
|
|
|
|
match = strstr(locale, needle_start);
|
|
if (match)
|
|
{
|
|
/*
|
|
* Found a match for the first part. If this was a two-part
|
|
* replacement, find the second part.
|
|
*/
|
|
match_start = match;
|
|
if (needle_end)
|
|
{
|
|
match = strstr(match_start + strlen(needle_start), needle_end);
|
|
if (match)
|
|
match_end = match + strlen(needle_end);
|
|
else
|
|
match_start = NULL;
|
|
}
|
|
else
|
|
match_end = match_start + strlen(needle_start);
|
|
}
|
|
|
|
if (match_start)
|
|
{
|
|
/* Found a match. Replace the matched string. */
|
|
int matchpos = match_start - locale;
|
|
int replacementlen = strlen(replacement);
|
|
char *rest = match_end;
|
|
int restlen = strlen(rest);
|
|
|
|
/* check that the result fits in the static buffer */
|
|
if (matchpos + replacementlen + restlen + 1 > MAX_LOCALE_NAME_LEN)
|
|
return NULL;
|
|
|
|
memcpy(&aliasbuf[0], &locale[0], matchpos);
|
|
memcpy(&aliasbuf[matchpos], replacement, replacementlen);
|
|
/* includes null terminator */
|
|
memcpy(&aliasbuf[matchpos + replacementlen], rest, restlen + 1);
|
|
|
|
return aliasbuf;
|
|
}
|
|
}
|
|
|
|
/* no match, just return the original string */
|
|
return locale;
|
|
}
|
|
|
|
char *
|
|
pgwin32_setlocale(int category, const char *locale)
|
|
{
|
|
const char *argument;
|
|
char *result;
|
|
|
|
if (locale == NULL)
|
|
argument = NULL;
|
|
else
|
|
argument = map_locale(locale_map_argument, locale);
|
|
|
|
/* Call the real setlocale() function */
|
|
result = setlocale(category, argument);
|
|
|
|
/*
|
|
* setlocale() is specified to return a "char *" that the caller is
|
|
* forbidden to modify, so casting away the "const" is innocuous.
|
|
*/
|
|
if (result)
|
|
result = (char *) map_locale(locale_map_result, result);
|
|
|
|
return result;
|
|
}
|