mirror of
https://github.com/postgres/postgres.git
synced 2025-07-07 00:36:50 +03:00
Add support for collation attributes on older ICU versions
Starting in ICU 54, collation customization attributes can be specified in the locale string, for example "@colStrength=primary;colCaseLevel=yes". Add support for this for older ICU versions as well, by adding some minimal parsing of the attributes in the locale string and calling ucol_setAttribute() on them. This is essentially what never ICU versions do internally in ucol_open(). This was we can offer this functionality in a consistent way in all ICU versions supported by PostgreSQL. Also add some tests for ICU collation customization. Reported-by: Daniel Verite <daniel@manitou-mail.org> Discussion: https://www.postgresql.org/message-id/0270ebd4-f67c-8774-1a5a-91adfb9bb41f@2ndquadrant.com
This commit is contained in:
@ -58,6 +58,7 @@
|
||||
#include "catalog/pg_control.h"
|
||||
#include "mb/pg_wchar.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/formatting.h"
|
||||
#include "utils/hsearch.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/memutils.h"
|
||||
@ -132,6 +133,9 @@ static HTAB *collation_cache = NULL;
|
||||
static char *IsoLocaleName(const char *); /* MSVC specific */
|
||||
#endif
|
||||
|
||||
#ifdef USE_ICU
|
||||
static void icu_set_collation_attributes(UCollator *collator, const char *loc);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* pg_perm_setlocale
|
||||
@ -1380,6 +1384,9 @@ pg_newlocale_from_collation(Oid collid)
|
||||
(errmsg("could not open collator for locale \"%s\": %s",
|
||||
collcollate, u_errorName(status))));
|
||||
|
||||
if (U_ICU_VERSION_MAJOR_NUM < 54)
|
||||
icu_set_collation_attributes(collator, collcollate);
|
||||
|
||||
/* We will leak this string if we get an error below :-( */
|
||||
result.info.icu.locale = MemoryContextStrdup(TopMemoryContext,
|
||||
collcollate);
|
||||
@ -1588,6 +1595,103 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
|
||||
return len_result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse collation attributes and apply them to the open collator. This takes
|
||||
* a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
|
||||
* applies the key-value arguments.
|
||||
*
|
||||
* Starting with ICU version 54, the attributes are processed automatically by
|
||||
* ucol_open(), so this is only necessary for emulating this behavior on older
|
||||
* versions.
|
||||
*/
|
||||
pg_attribute_unused()
|
||||
static void
|
||||
icu_set_collation_attributes(UCollator *collator, const char *loc)
|
||||
{
|
||||
char *str = asc_tolower(loc, strlen(loc));
|
||||
|
||||
str = strchr(str, '@');
|
||||
if (!str)
|
||||
return;
|
||||
str++;
|
||||
|
||||
for (char *token = strtok(str, ";"); token; token = strtok(NULL, ";"))
|
||||
{
|
||||
char *e = strchr(token, '=');
|
||||
|
||||
if (e)
|
||||
{
|
||||
char *name;
|
||||
char *value;
|
||||
UColAttribute uattr = -1;
|
||||
UColAttributeValue uvalue = -1;
|
||||
UErrorCode status;
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
|
||||
*e = '\0';
|
||||
name = token;
|
||||
value = e + 1;
|
||||
|
||||
/*
|
||||
* See attribute name and value lists in ICU i18n/coll.cpp
|
||||
*/
|
||||
if (strcmp(name, "colstrength") == 0)
|
||||
uattr = UCOL_STRENGTH;
|
||||
else if (strcmp(name, "colbackwards") == 0)
|
||||
uattr = UCOL_FRENCH_COLLATION;
|
||||
else if (strcmp(name, "colcaselevel") == 0)
|
||||
uattr = UCOL_CASE_LEVEL;
|
||||
else if (strcmp(name, "colcasefirst") == 0)
|
||||
uattr = UCOL_CASE_FIRST;
|
||||
else if (strcmp(name, "colalternate") == 0)
|
||||
uattr = UCOL_ALTERNATE_HANDLING;
|
||||
else if (strcmp(name, "colnormalization") == 0)
|
||||
uattr = UCOL_NORMALIZATION_MODE;
|
||||
else if (strcmp(name, "colnumeric") == 0)
|
||||
uattr = UCOL_NUMERIC_COLLATION;
|
||||
/* ignore if unknown */
|
||||
|
||||
if (strcmp(value, "primary") == 0)
|
||||
uvalue = UCOL_PRIMARY;
|
||||
else if (strcmp(value, "secondary") == 0)
|
||||
uvalue = UCOL_SECONDARY;
|
||||
else if (strcmp(value, "tertiary") == 0)
|
||||
uvalue = UCOL_TERTIARY;
|
||||
else if (strcmp(value, "quaternary") == 0)
|
||||
uvalue = UCOL_QUATERNARY;
|
||||
else if (strcmp(value, "identical") == 0)
|
||||
uvalue = UCOL_IDENTICAL;
|
||||
else if (strcmp(value, "no") == 0)
|
||||
uvalue = UCOL_OFF;
|
||||
else if (strcmp(value, "yes") == 0)
|
||||
uvalue = UCOL_ON;
|
||||
else if (strcmp(value, "shifted") == 0)
|
||||
uvalue = UCOL_SHIFTED;
|
||||
else if (strcmp(value, "non-ignorable") == 0)
|
||||
uvalue = UCOL_NON_IGNORABLE;
|
||||
else if (strcmp(value, "lower") == 0)
|
||||
uvalue = UCOL_LOWER_FIRST;
|
||||
else if (strcmp(value, "upper") == 0)
|
||||
uvalue = UCOL_UPPER_FIRST;
|
||||
else
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
|
||||
if (uattr != -1 && uvalue != -1)
|
||||
ucol_setAttribute(collator, uattr, uvalue, &status);
|
||||
|
||||
/*
|
||||
* Pretend the error came from ucol_open(), for consistent error
|
||||
* message across ICU versions.
|
||||
*/
|
||||
if (U_FAILURE(status))
|
||||
ereport(ERROR,
|
||||
(errmsg("could not open collator for locale \"%s\": %s",
|
||||
loc, u_errorName(status))));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* USE_ICU */
|
||||
|
||||
/*
|
||||
|
Reference in New Issue
Block a user