1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-25 13:17:41 +03:00

Add support for Unicode case folding.

Expand case mapping tables to include entries for case folding, which
are parsed from CaseFolding.txt.

Discussion: https://postgr.es/m/a1886ddfcd8f60cb3e905c93009b646b4cfb74c5.camel%40j-davis.com
This commit is contained in:
Jeff Davis
2025-01-23 09:06:50 -08:00
parent 7921927bbb
commit 4e7f62bc38
7 changed files with 3280 additions and 3125 deletions

View File

@@ -51,6 +51,14 @@ unicode_uppercase_simple(pg_wchar code)
return map ? map->simplemap[CaseUpper] : code;
}
pg_wchar
unicode_casefold_simple(pg_wchar code)
{
const pg_case_map *map = find_case_map(code);
return map ? map->simplemap[CaseFold] : code;
}
/*
* unicode_strlower()
*
@@ -142,6 +150,30 @@ unicode_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
NULL);
}
/*
* unicode_strfold()
*
* Case fold src, and return the result length (not including terminating
* NUL).
*
* String src must be encoded in UTF-8. If srclen < 0, src must be
* NUL-terminated.
*
* Result string is stored in dst, truncating if larger than dstsize. If
* dstsize is greater than the result length, dst will be NUL-terminated;
* otherwise not.
*
* If dstsize is zero, dst may be NULL. This is useful for calculating the
* required buffer size before allocating.
*/
size_t
unicode_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
bool full)
{
return convert_case(dst, dstsize, src, srclen, CaseFold, full, NULL,
NULL);
}
/*
* Implement Unicode Default Case Conversion algorithm.
*