mirror of
https://github.com/postgres/postgres.git
synced 2025-06-07 11:02:12 +03:00
Refactor convert_case() to prepare for optimizations.
Upcoming optimizations will add complexity to convert_case(). This patch reorganizes slightly so that the complexity can be contained within the logic to convert the case of a single character, rather than mixing it in with logic to iterate through the string. Reviewed-by: Alexander Borisov <lex.borisov@gmail.com> Discussion: https://postgr.es/m/44005c3d-88f4-4a26-981f-fd82dfa8e313@gmail.com
This commit is contained in:
parent
3abe9dc188
commit
d3b2e5e1ab
@ -20,12 +20,20 @@
|
|||||||
#include "common/unicode_category.h"
|
#include "common/unicode_category.h"
|
||||||
#include "mb/pg_wchar.h"
|
#include "mb/pg_wchar.h"
|
||||||
|
|
||||||
|
enum CaseMapResult
|
||||||
|
{
|
||||||
|
CASEMAP_SELF,
|
||||||
|
CASEMAP_SIMPLE,
|
||||||
|
CASEMAP_SPECIAL,
|
||||||
|
};
|
||||||
|
|
||||||
static const pg_case_map *find_case_map(pg_wchar ucs);
|
static const pg_case_map *find_case_map(pg_wchar ucs);
|
||||||
static size_t convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
static size_t convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
||||||
CaseKind str_casekind, bool full, WordBoundaryNext wbnext,
|
CaseKind str_casekind, bool full, WordBoundaryNext wbnext,
|
||||||
void *wbstate);
|
void *wbstate);
|
||||||
static bool check_special_conditions(int conditions, const char *str,
|
static enum CaseMapResult casemap(pg_wchar u1, CaseKind casekind, bool full,
|
||||||
size_t len, size_t offset);
|
const char *src, size_t srclen, size_t srcoff,
|
||||||
|
pg_wchar *u2, const pg_wchar **special);
|
||||||
|
|
||||||
pg_wchar
|
pg_wchar
|
||||||
unicode_lowercase_simple(pg_wchar code)
|
unicode_lowercase_simple(pg_wchar code)
|
||||||
@ -214,8 +222,9 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
|||||||
{
|
{
|
||||||
pg_wchar u1 = utf8_to_unicode((unsigned char *) src + srcoff);
|
pg_wchar u1 = utf8_to_unicode((unsigned char *) src + srcoff);
|
||||||
int u1len = unicode_utf8len(u1);
|
int u1len = unicode_utf8len(u1);
|
||||||
const pg_case_map *casemap = find_case_map(u1);
|
pg_wchar simple = 0;
|
||||||
const pg_special_case *special = NULL;
|
const pg_wchar *special = NULL;
|
||||||
|
enum CaseMapResult casemap_result;
|
||||||
|
|
||||||
if (str_casekind == CaseTitle)
|
if (str_casekind == CaseTitle)
|
||||||
{
|
{
|
||||||
@ -228,56 +237,47 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
|||||||
chr_casekind = CaseLower;
|
chr_casekind = CaseLower;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
casemap_result = casemap(u1, chr_casekind, full, src, srclen, srcoff,
|
||||||
* Find special case that matches the conditions, if any.
|
&simple, &special);
|
||||||
*
|
|
||||||
* Note: only a single special mapping per codepoint is currently
|
switch (casemap_result)
|
||||||
* supported, though Unicode allows for multiple special mappings for
|
|
||||||
* a single codepoint.
|
|
||||||
*/
|
|
||||||
if (full && casemap && casemap->special_case)
|
|
||||||
{
|
{
|
||||||
int16 conditions = casemap->special_case->conditions;
|
case CASEMAP_SELF:
|
||||||
|
/* no mapping; copy bytes from src */
|
||||||
|
Assert(simple == 0);
|
||||||
|
Assert(special == NULL);
|
||||||
|
if (result_len + u1len <= dstsize)
|
||||||
|
memcpy(dst + result_len, src + srcoff, u1len);
|
||||||
|
|
||||||
Assert(casemap->special_case->codepoint == u1);
|
result_len += u1len;
|
||||||
if (check_special_conditions(conditions, src, srclen, srcoff))
|
break;
|
||||||
special = casemap->special_case;
|
case CASEMAP_SIMPLE:
|
||||||
}
|
{
|
||||||
|
/* replace with single character */
|
||||||
|
pg_wchar u2 = simple;
|
||||||
|
pg_wchar u2len = unicode_utf8len(u2);
|
||||||
|
|
||||||
/* perform mapping, update result_len, and write to dst */
|
Assert(special == NULL);
|
||||||
if (special)
|
if (result_len + u2len <= dstsize)
|
||||||
{
|
unicode_to_utf8(u2, (unsigned char *) dst + result_len);
|
||||||
for (int i = 0; i < MAX_CASE_EXPANSION; i++)
|
|
||||||
{
|
|
||||||
pg_wchar u2 = special->map[chr_casekind][i];
|
|
||||||
size_t u2len = unicode_utf8len(u2);
|
|
||||||
|
|
||||||
if (u2 == '\0')
|
result_len += u2len;
|
||||||
break;
|
}
|
||||||
|
break;
|
||||||
|
case CASEMAP_SPECIAL:
|
||||||
|
/* replace with up to MAX_CASE_EXPANSION characters */
|
||||||
|
Assert(simple == 0);
|
||||||
|
for (int i = 0; i < MAX_CASE_EXPANSION && special[i]; i++)
|
||||||
|
{
|
||||||
|
pg_wchar u2 = special[i];
|
||||||
|
size_t u2len = unicode_utf8len(u2);
|
||||||
|
|
||||||
if (result_len + u2len <= dstsize)
|
if (result_len + u2len <= dstsize)
|
||||||
unicode_to_utf8(u2, (unsigned char *) dst + result_len);
|
unicode_to_utf8(u2, (unsigned char *) dst + result_len);
|
||||||
|
|
||||||
result_len += u2len;
|
result_len += u2len;
|
||||||
}
|
}
|
||||||
}
|
break;
|
||||||
else if (casemap)
|
|
||||||
{
|
|
||||||
pg_wchar u2 = casemap->simplemap[chr_casekind];
|
|
||||||
pg_wchar u2len = unicode_utf8len(u2);
|
|
||||||
|
|
||||||
if (result_len + u2len <= dstsize)
|
|
||||||
unicode_to_utf8(u2, (unsigned char *) dst + result_len);
|
|
||||||
|
|
||||||
result_len += u2len;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* no mapping; copy bytes from src */
|
|
||||||
if (result_len + u1len <= dstsize)
|
|
||||||
memcpy(dst + result_len, src + srcoff, u1len);
|
|
||||||
|
|
||||||
result_len += u1len;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
srcoff += u1len;
|
srcoff += u1len;
|
||||||
@ -351,6 +351,10 @@ check_final_sigma(const unsigned char *str, size_t len, size_t offset)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Unicode allows for special casing to be applied only under certain
|
||||||
|
* circumstances. The only currently-supported condition is Final_Sigma.
|
||||||
|
*/
|
||||||
static bool
|
static bool
|
||||||
check_special_conditions(int conditions, const char *str, size_t len,
|
check_special_conditions(int conditions, const char *str, size_t len,
|
||||||
size_t offset)
|
size_t offset)
|
||||||
@ -365,6 +369,51 @@ check_special_conditions(int conditions, const char *str, size_t len,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Map the given character to the requested case.
|
||||||
|
*
|
||||||
|
* If full is true, and a special case mapping is found and the conditions are
|
||||||
|
* met, 'special' is set to the mapping result (which is an array of up to
|
||||||
|
* MAX_CASE_EXPANSION characters) and CASEMAP_SPECIAL is returned.
|
||||||
|
*
|
||||||
|
* Otherwise, search for a simple mapping, and if found, set 'simple' to the
|
||||||
|
* result and return CASEMAP_SIMPLE.
|
||||||
|
*
|
||||||
|
* If no mapping is found, return CASEMAP_SELF, and the caller should copy the
|
||||||
|
* character without modification.
|
||||||
|
*/
|
||||||
|
static enum CaseMapResult
|
||||||
|
casemap(pg_wchar u1, CaseKind casekind, bool full,
|
||||||
|
const char *src, size_t srclen, size_t srcoff,
|
||||||
|
pg_wchar *simple, const pg_wchar **special)
|
||||||
|
{
|
||||||
|
const pg_case_map *map;
|
||||||
|
|
||||||
|
if (u1 < 0x80)
|
||||||
|
{
|
||||||
|
*simple = case_map[u1].simplemap[casekind];
|
||||||
|
|
||||||
|
return CASEMAP_SIMPLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
map = find_case_map(u1);
|
||||||
|
|
||||||
|
if (map == NULL)
|
||||||
|
return CASEMAP_SELF;
|
||||||
|
|
||||||
|
if (full && map->special_case != NULL &&
|
||||||
|
check_special_conditions(map->special_case->conditions,
|
||||||
|
src, srclen, srcoff))
|
||||||
|
{
|
||||||
|
*special = map->special_case->map[casekind];
|
||||||
|
return CASEMAP_SPECIAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
*simple = map->simplemap[casekind];
|
||||||
|
|
||||||
|
return CASEMAP_SIMPLE;
|
||||||
|
}
|
||||||
|
|
||||||
/* find entry in simple case map, if any */
|
/* find entry in simple case map, if any */
|
||||||
static const pg_case_map *
|
static const pg_case_map *
|
||||||
find_case_map(pg_wchar ucs)
|
find_case_map(pg_wchar ucs)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user