/*----------------------------------------------------------------------- * * PostgreSQL locale utilities for ICU * * Portions Copyright (c) 2002-2025, PostgreSQL Global Development Group * * src/backend/utils/adt/pg_locale_icu.c * *----------------------------------------------------------------------- */ #include "postgres.h" #ifdef USE_ICU #include #include /* * ucol_strcollUTF8() was introduced in ICU 50, but it is buggy before ICU 53. * (see * ) */ #if U_ICU_VERSION_MAJOR_NUM >= 53 #define HAVE_UCOL_STRCOLLUTF8 1 #else #undef HAVE_UCOL_STRCOLLUTF8 #endif #endif #include "access/htup_details.h" #include "catalog/pg_database.h" #include "catalog/pg_collation.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "utils/builtins.h" #include "utils/formatting.h" #include "utils/memutils.h" #include "utils/pg_locale.h" #include "utils/syscache.h" /* * Size of stack buffer to use for string transformations, used to avoid heap * allocations in typical cases. This should be large enough that most strings * will fit, but small enough that we feel comfortable putting it on the * stack. */ #define TEXTBUFLEN 1024 extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context); extern size_t strlower_icu(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale); extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale); extern size_t strupper_icu(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale); #ifdef USE_ICU extern UCollator *pg_ucol_open(const char *loc_str); static int strncoll_icu(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale); static size_t strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale); static size_t strnxfrm_prefix_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale); extern char *get_collation_actual_version_icu(const char *collcollate); typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode); /* * Converter object for converting between ICU's UChar strings and C strings * in database encoding. Since the database encoding doesn't change, we only * need one of these per session. */ static UConverter *icu_converter = NULL; static UCollator *make_icu_collator(const char *iculocstr, const char *icurules); static int strncoll_icu(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale); static size_t strnxfrm_prefix_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale); #ifdef HAVE_UCOL_STRCOLLUTF8 static int strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale); #endif static size_t strnxfrm_prefix_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale); static void init_icu_converter(void); static size_t uchar_length(UConverter *converter, const char *str, int32_t len); static int32_t uchar_convert(UConverter *converter, UChar *dest, int32_t destlen, const char *src, int32_t srclen); static int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes); static size_t icu_from_uchar(char *dest, size_t destsize, const UChar *buff_uchar, int32_t len_uchar); static void icu_set_collation_attributes(UCollator *collator, const char *loc, UErrorCode *status); static int32_t icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale, UChar **buff_dest, UChar *buff_source, int32_t len_source); static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode); static const struct collate_methods collate_methods_icu = { .strncoll = strncoll_icu, .strnxfrm = strnxfrm_icu, .strnxfrm_prefix = strnxfrm_prefix_icu, .strxfrm_is_safe = true, }; static const struct collate_methods collate_methods_icu_utf8 = { #ifdef HAVE_UCOL_STRCOLLUTF8 .strncoll = strncoll_icu_utf8, #else .strncoll = strncoll_icu, #endif .strnxfrm = strnxfrm_icu, .strnxfrm_prefix = strnxfrm_prefix_icu_utf8, .strxfrm_is_safe = true, }; #endif pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context) { #ifdef USE_ICU bool deterministic; const char *iculocstr; const char *icurules = NULL; UCollator *collator; pg_locale_t result; if (collid == DEFAULT_COLLATION_OID) { HeapTuple tp; Datum datum; bool isnull; tp = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId)); if (!HeapTupleIsValid(tp)) elog(ERROR, "cache lookup failed for database %u", MyDatabaseId); /* default database collation is always deterministic */ deterministic = true; datum = SysCacheGetAttrNotNull(DATABASEOID, tp, Anum_pg_database_datlocale); iculocstr = TextDatumGetCString(datum); datum = SysCacheGetAttr(DATABASEOID, tp, Anum_pg_database_daticurules, &isnull); if (!isnull) icurules = TextDatumGetCString(datum); ReleaseSysCache(tp); } else { Form_pg_collation collform; HeapTuple tp; Datum datum; bool isnull; tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); if (!HeapTupleIsValid(tp)) elog(ERROR, "cache lookup failed for collation %u", collid); collform = (Form_pg_collation) GETSTRUCT(tp); deterministic = collform->collisdeterministic; datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale); iculocstr = TextDatumGetCString(datum); datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull); if (!isnull) icurules = TextDatumGetCString(datum); ReleaseSysCache(tp); } collator = make_icu_collator(iculocstr, icurules); result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct)); result->info.icu.locale = MemoryContextStrdup(context, iculocstr); result->info.icu.ucol = collator; result->provider = COLLPROVIDER_ICU; result->deterministic = deterministic; result->collate_is_c = false; result->ctype_is_c = false; if (GetDatabaseEncoding() == PG_UTF8) result->collate = &collate_methods_icu_utf8; else result->collate = &collate_methods_icu; return result; #else /* could get here if a collation was created by a build with ICU */ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("ICU is not supported in this build"))); return NULL; #endif } #ifdef USE_ICU /* * Wrapper around ucol_open() to handle API differences for older ICU * versions. * * Ensure that no path leaks a UCollator. */ UCollator * pg_ucol_open(const char *loc_str) { UCollator *collator; UErrorCode status; const char *orig_str = loc_str; char *fixed_str = NULL; /* * Must never open default collator, because it depends on the environment * and may change at any time. Should not happen, but check here to catch * bugs that might be hard to catch otherwise. * * NB: the default collator is not the same as the collator for the root * locale. The root locale may be specified as the empty string, "und", or * "root". The default collator is opened by passing NULL to ucol_open(). */ if (loc_str == NULL) elog(ERROR, "opening default collator is not supported"); /* * In ICU versions 54 and earlier, "und" is not a recognized spelling of * the root locale. If the first component of the locale is "und", replace * with "root" before opening. */ if (U_ICU_VERSION_MAJOR_NUM < 55) { char lang[ULOC_LANG_CAPACITY]; status = U_ZERO_ERROR; uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status); if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("could not get language from locale \"%s\": %s", loc_str, u_errorName(status)))); } if (strcmp(lang, "und") == 0) { const char *remainder = loc_str + strlen("und"); fixed_str = palloc(strlen("root") + strlen(remainder) + 1); strcpy(fixed_str, "root"); strcat(fixed_str, remainder); loc_str = fixed_str; } } status = U_ZERO_ERROR; collator = ucol_open(loc_str, &status); if (U_FAILURE(status)) ereport(ERROR, /* use original string for error report */ (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("could not open collator for locale \"%s\": %s", orig_str, u_errorName(status)))); if (U_ICU_VERSION_MAJOR_NUM < 54) { status = U_ZERO_ERROR; icu_set_collation_attributes(collator, loc_str, &status); /* * Pretend the error came from ucol_open(), for consistent error * message across ICU versions. */ if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) { ucol_close(collator); ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("could not open collator for locale \"%s\": %s", orig_str, u_errorName(status)))); } } if (fixed_str != NULL) pfree(fixed_str); return collator; } /* * Create a UCollator with the given locale string and rules. * * Ensure that no path leaks a UCollator. */ static UCollator * make_icu_collator(const char *iculocstr, const char *icurules) { if (!icurules) { /* simple case without rules */ return pg_ucol_open(iculocstr); } else { UCollator *collator_std_rules; UCollator *collator_all_rules; const UChar *std_rules; UChar *my_rules; UChar *all_rules; int32_t length; int32_t total; UErrorCode status; /* * If rules are specified, we extract the rules of the standard * collation, add our own rules, and make a new collator with the * combined rules. */ icu_to_uchar(&my_rules, icurules, strlen(icurules)); collator_std_rules = pg_ucol_open(iculocstr); std_rules = ucol_getRules(collator_std_rules, &length); total = u_strlen(std_rules) + u_strlen(my_rules) + 1; /* avoid leaking collator on OOM */ all_rules = palloc_extended(sizeof(UChar) * total, MCXT_ALLOC_NO_OOM); if (!all_rules) { ucol_close(collator_std_rules); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); } u_strcpy(all_rules, std_rules); u_strcat(all_rules, my_rules); ucol_close(collator_std_rules); status = U_ZERO_ERROR; collator_all_rules = ucol_openRules(all_rules, u_strlen(all_rules), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH, NULL, &status); if (U_FAILURE(status)) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s", iculocstr, icurules, u_errorName(status)))); } return collator_all_rules; } } size_t strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { int32_t len_uchar; int32_t len_conv; UChar *buff_uchar; UChar *buff_conv; size_t result_len; len_uchar = icu_to_uchar(&buff_uchar, src, srclen); len_conv = icu_convert_case(u_strToLower, locale, &buff_conv, buff_uchar, len_uchar); result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv); pfree(buff_uchar); pfree(buff_conv); return result_len; } size_t strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { int32_t len_uchar; int32_t len_conv; UChar *buff_uchar; UChar *buff_conv; size_t result_len; len_uchar = icu_to_uchar(&buff_uchar, src, srclen); len_conv = icu_convert_case(u_strToTitle_default_BI, locale, &buff_conv, buff_uchar, len_uchar); result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv); pfree(buff_uchar); pfree(buff_conv); return result_len; } size_t strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { int32_t len_uchar; int32_t len_conv; UChar *buff_uchar; UChar *buff_conv; size_t result_len; len_uchar = icu_to_uchar(&buff_uchar, src, srclen); len_conv = icu_convert_case(u_strToUpper, locale, &buff_conv, buff_uchar, len_uchar); result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv); pfree(buff_uchar); pfree(buff_conv); return result_len; } /* * strncoll_icu_utf8 * * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given * database encoding. An argument length of -1 means the string is * NUL-terminated. */ #ifdef HAVE_UCOL_STRCOLLUTF8 int strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale) { int result; UErrorCode status; Assert(locale->provider == COLLPROVIDER_ICU); Assert(GetDatabaseEncoding() == PG_UTF8); status = U_ZERO_ERROR; result = ucol_strcollUTF8(locale->info.icu.ucol, arg1, len1, arg2, len2, &status); if (U_FAILURE(status)) ereport(ERROR, (errmsg("collation failed: %s", u_errorName(status)))); return result; } #endif /* 'srclen' of -1 means the strings are NUL-terminated */ size_t strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { char sbuf[TEXTBUFLEN]; char *buf = sbuf; UChar *uchar; int32_t ulen; size_t uchar_bsize; Size result_bsize; Assert(locale->provider == COLLPROVIDER_ICU); init_icu_converter(); ulen = uchar_length(icu_converter, src, srclen); uchar_bsize = (ulen + 1) * sizeof(UChar); if (uchar_bsize > TEXTBUFLEN) buf = palloc(uchar_bsize); uchar = (UChar *) buf; ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen); result_bsize = ucol_getSortKey(locale->info.icu.ucol, uchar, ulen, (uint8_t *) dest, destsize); /* * ucol_getSortKey() counts the nul-terminator in the result length, but * this function should not. */ Assert(result_bsize > 0); result_bsize--; if (buf != sbuf) pfree(buf); /* if dest is defined, it should be nul-terminated */ Assert(result_bsize >= destsize || dest[result_bsize] == '\0'); return result_bsize; } /* 'srclen' of -1 means the strings are NUL-terminated */ size_t strnxfrm_prefix_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { size_t result; UCharIterator iter; uint32_t state[2]; UErrorCode status; Assert(locale->provider == COLLPROVIDER_ICU); Assert(GetDatabaseEncoding() == PG_UTF8); uiter_setUTF8(&iter, src, srclen); state[0] = state[1] = 0; /* won't need that again */ status = U_ZERO_ERROR; result = ucol_nextSortKeyPart(locale->info.icu.ucol, &iter, state, (uint8_t *) dest, destsize, &status); if (U_FAILURE(status)) ereport(ERROR, (errmsg("sort key generation failed: %s", u_errorName(status)))); return result; } char * get_collation_actual_version_icu(const char *collcollate) { UCollator *collator; UVersionInfo versioninfo; char buf[U_MAX_VERSION_STRING_LENGTH]; collator = pg_ucol_open(collcollate); ucol_getVersion(collator, versioninfo); ucol_close(collator); u_versionToString(versioninfo, buf); return pstrdup(buf); } /* * Convert a string in the database encoding into a string of UChars. * * The source string at buff is of length nbytes * (it needn't be nul-terminated) * * *buff_uchar receives a pointer to the palloc'd result string, and * the function's result is the number of UChars generated. * * The result string is nul-terminated, though most callers rely on the * result length instead. */ static int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes) { int32_t len_uchar; init_icu_converter(); len_uchar = uchar_length(icu_converter, buff, nbytes); *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar)); len_uchar = uchar_convert(icu_converter, *buff_uchar, len_uchar + 1, buff, nbytes); return len_uchar; } /* * Convert a string of UChars into the database encoding. * * The source string at buff_uchar is of length len_uchar * (it needn't be nul-terminated) * * *result receives a pointer to the palloc'd result string, and the * function's result is the number of bytes generated (not counting nul). * * The result string is nul-terminated. */ static size_t icu_from_uchar(char *dest, size_t destsize, const UChar *buff_uchar, int32_t len_uchar) { UErrorCode status; int32_t len_result; init_icu_converter(); status = U_ZERO_ERROR; len_result = ucnv_fromUChars(icu_converter, NULL, 0, buff_uchar, len_uchar, &status); if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) ereport(ERROR, (errmsg("%s failed: %s", "ucnv_fromUChars", u_errorName(status)))); if (len_result + 1 > destsize) return len_result; status = U_ZERO_ERROR; len_result = ucnv_fromUChars(icu_converter, dest, len_result + 1, buff_uchar, len_uchar, &status); if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) ereport(ERROR, (errmsg("%s failed: %s", "ucnv_fromUChars", u_errorName(status)))); return len_result; } static int32_t icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale, UChar **buff_dest, UChar *buff_source, int32_t len_source) { UErrorCode status; int32_t len_dest; len_dest = len_source; /* try first with same length */ *buff_dest = palloc(len_dest * sizeof(**buff_dest)); status = U_ZERO_ERROR; len_dest = func(*buff_dest, len_dest, buff_source, len_source, mylocale->info.icu.locale, &status); if (status == U_BUFFER_OVERFLOW_ERROR) { /* try again with adjusted length */ pfree(*buff_dest); *buff_dest = palloc(len_dest * sizeof(**buff_dest)); status = U_ZERO_ERROR; len_dest = func(*buff_dest, len_dest, buff_source, len_source, mylocale->info.icu.locale, &status); } if (U_FAILURE(status)) ereport(ERROR, (errmsg("case conversion failed: %s", u_errorName(status)))); return len_dest; } static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode) { return u_strToTitle(dest, destCapacity, src, srcLength, NULL, locale, pErrorCode); } /* * strncoll_icu * * Convert the arguments from the database encoding to UChar strings, then * call ucol_strcoll(). An argument length of -1 means that the string is * NUL-terminated. * * When the database encoding is UTF-8, and ICU supports ucol_strcollUTF8(), * caller should call that instead. */ static int strncoll_icu(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale) { char sbuf[TEXTBUFLEN]; char *buf = sbuf; int32_t ulen1; int32_t ulen2; size_t bufsize1; size_t bufsize2; UChar *uchar1, *uchar2; int result; Assert(locale->provider == COLLPROVIDER_ICU); /* if encoding is UTF8, use more efficient strncoll_icu_utf8 */ #ifdef HAVE_UCOL_STRCOLLUTF8 Assert(GetDatabaseEncoding() != PG_UTF8); #endif init_icu_converter(); ulen1 = uchar_length(icu_converter, arg1, len1); ulen2 = uchar_length(icu_converter, arg2, len2); bufsize1 = (ulen1 + 1) * sizeof(UChar); bufsize2 = (ulen2 + 1) * sizeof(UChar); if (bufsize1 + bufsize2 > TEXTBUFLEN) buf = palloc(bufsize1 + bufsize2); uchar1 = (UChar *) buf; uchar2 = (UChar *) (buf + bufsize1); ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1); ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2); result = ucol_strcoll(locale->info.icu.ucol, uchar1, ulen1, uchar2, ulen2); if (buf != sbuf) pfree(buf); return result; } /* 'srclen' of -1 means the strings are NUL-terminated */ static size_t strnxfrm_prefix_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { char sbuf[TEXTBUFLEN]; char *buf = sbuf; UCharIterator iter; uint32_t state[2]; UErrorCode status; int32_t ulen = -1; UChar *uchar = NULL; size_t uchar_bsize; Size result_bsize; Assert(locale->provider == COLLPROVIDER_ICU); /* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */ Assert(GetDatabaseEncoding() != PG_UTF8); init_icu_converter(); ulen = uchar_length(icu_converter, src, srclen); uchar_bsize = (ulen + 1) * sizeof(UChar); if (uchar_bsize > TEXTBUFLEN) buf = palloc(uchar_bsize); uchar = (UChar *) buf; ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen); uiter_setString(&iter, uchar, ulen); state[0] = state[1] = 0; /* won't need that again */ status = U_ZERO_ERROR; result_bsize = ucol_nextSortKeyPart(locale->info.icu.ucol, &iter, state, (uint8_t *) dest, destsize, &status); if (U_FAILURE(status)) ereport(ERROR, (errmsg("sort key generation failed: %s", u_errorName(status)))); return result_bsize; } static void init_icu_converter(void) { const char *icu_encoding_name; UErrorCode status; UConverter *conv; if (icu_converter) return; /* already done */ icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding()); if (!icu_encoding_name) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("encoding \"%s\" not supported by ICU", pg_encoding_to_char(GetDatabaseEncoding())))); status = U_ZERO_ERROR; conv = ucnv_open(icu_encoding_name, &status); if (U_FAILURE(status)) ereport(ERROR, (errmsg("could not open ICU converter for encoding \"%s\": %s", icu_encoding_name, u_errorName(status)))); icu_converter = conv; } /* * Find length, in UChars, of given string if converted to UChar string. * * A length of -1 indicates that the input string is NUL-terminated. */ static size_t uchar_length(UConverter *converter, const char *str, int32_t len) { UErrorCode status = U_ZERO_ERROR; int32_t ulen; ulen = ucnv_toUChars(converter, NULL, 0, str, len, &status); if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) ereport(ERROR, (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status)))); return ulen; } /* * Convert the given source string into a UChar string, stored in dest, and * return the length (in UChars). * * A srclen of -1 indicates that the input string is NUL-terminated. */ static int32_t uchar_convert(UConverter *converter, UChar *dest, int32_t destlen, const char *src, int32_t srclen) { UErrorCode status = U_ZERO_ERROR; int32_t ulen; status = U_ZERO_ERROR; ulen = ucnv_toUChars(converter, dest, destlen, src, srclen, &status); if (U_FAILURE(status)) ereport(ERROR, (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status)))); return ulen; } /* * Parse collation attributes from the given locale string and apply them to * the open collator. * * First, the locale string is canonicalized to an ICU format locale ID such * as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies * the key-value arguments. * * Starting with ICU version 54, the attributes are processed automatically by * ucol_open(), so this is only necessary for emulating this behavior on older * versions. */ pg_attribute_unused() static void icu_set_collation_attributes(UCollator *collator, const char *loc, UErrorCode *status) { int32_t len; char *icu_locale_id; char *lower_str; char *str; char *token; /* * The input locale may be a BCP 47 language tag, e.g. * "und-u-kc-ks-level1", which expresses the same attributes in a * different form. It will be converted to the equivalent ICU format * locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by * uloc_canonicalize(). */ *status = U_ZERO_ERROR; len = uloc_canonicalize(loc, NULL, 0, status); icu_locale_id = palloc(len + 1); *status = U_ZERO_ERROR; len = uloc_canonicalize(loc, icu_locale_id, len + 1, status); if (U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING) return; lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id)); pfree(icu_locale_id); str = strchr(lower_str, '@'); if (!str) return; str++; while ((token = strsep(&str, ";"))) { char *e = strchr(token, '='); if (e) { char *name; char *value; UColAttribute uattr; UColAttributeValue uvalue; *status = U_ZERO_ERROR; *e = '\0'; name = token; value = e + 1; /* * See attribute name and value lists in ICU i18n/coll.cpp */ if (strcmp(name, "colstrength") == 0) uattr = UCOL_STRENGTH; else if (strcmp(name, "colbackwards") == 0) uattr = UCOL_FRENCH_COLLATION; else if (strcmp(name, "colcaselevel") == 0) uattr = UCOL_CASE_LEVEL; else if (strcmp(name, "colcasefirst") == 0) uattr = UCOL_CASE_FIRST; else if (strcmp(name, "colalternate") == 0) uattr = UCOL_ALTERNATE_HANDLING; else if (strcmp(name, "colnormalization") == 0) uattr = UCOL_NORMALIZATION_MODE; else if (strcmp(name, "colnumeric") == 0) uattr = UCOL_NUMERIC_COLLATION; else /* ignore if unknown */ continue; if (strcmp(value, "primary") == 0) uvalue = UCOL_PRIMARY; else if (strcmp(value, "secondary") == 0) uvalue = UCOL_SECONDARY; else if (strcmp(value, "tertiary") == 0) uvalue = UCOL_TERTIARY; else if (strcmp(value, "quaternary") == 0) uvalue = UCOL_QUATERNARY; else if (strcmp(value, "identical") == 0) uvalue = UCOL_IDENTICAL; else if (strcmp(value, "no") == 0) uvalue = UCOL_OFF; else if (strcmp(value, "yes") == 0) uvalue = UCOL_ON; else if (strcmp(value, "shifted") == 0) uvalue = UCOL_SHIFTED; else if (strcmp(value, "non-ignorable") == 0) uvalue = UCOL_NON_IGNORABLE; else if (strcmp(value, "lower") == 0) uvalue = UCOL_LOWER_FIRST; else if (strcmp(value, "upper") == 0) uvalue = UCOL_UPPER_FIRST; else { *status = U_ILLEGAL_ARGUMENT_ERROR; break; } ucol_setAttribute(collator, uattr, uvalue, status); } } pfree(lower_str); } #endif /* USE_ICU */