mirror of
https://github.com/postgres/postgres.git
synced 2025-10-24 01:29:19 +03:00
Remove ts_locale.c's lowerstr()
lowerstr() and lowerstr_with_len() in ts_locale.c do the same thing as str_tolower() that the rest of the system uses, except that the former don't use the common locale provider framework but instead use the global libc locale settings. This patch replaces uses of lowerstr*() with str_tolower(..., DEFAULT_COLLATION_OID). For instances that use a libc locale globally, this will result in exactly the same behavior. For instances that use other locale providers, you now get consistent behavior and are no longer dependent on the libc locale settings (for this case; there are others). Most uses of these functions are for processing dictionary and configuration files. In those cases, using the default collation seems appropriate. At least we don't have a more specific collation available. But the code in contrib/pg_trgm should really depend on the collation of the columns being processed. This is not done here, this can be done in a separate patch. (You can probably construct some edge cases where this change would create some locale-related upgrade incompatibility, for example if before you used a combination of ICU and a differently-behaving libc locale. We can document this in the release notes, but I don't think there is anything more we can do about this.) Reviewed-by: Jeff Davis <pgsql@j-davis.com> Discussion: https://www.postgresql.org/message-id/flat/653f3b84-fc87-45a7-9a0c-bfb4fcab3e7d%40eisentraut.org
This commit is contained in:
@@ -13,11 +13,12 @@
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "catalog/pg_collation_d.h"
|
||||
#include "commands/defrem.h"
|
||||
#include "tsearch/dicts/spell.h"
|
||||
#include "tsearch/ts_locale.h"
|
||||
#include "tsearch/ts_public.h"
|
||||
#include "utils/fmgrprotos.h"
|
||||
#include "utils/formatting.h"
|
||||
|
||||
|
||||
typedef struct
|
||||
@@ -72,7 +73,7 @@ dispell_init(PG_FUNCTION_ARGS)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("multiple StopWords parameters")));
|
||||
readstoplist(defGetString(defel), &(d->stoplist), lowerstr);
|
||||
readstoplist(defGetString(defel), &(d->stoplist), str_tolower);
|
||||
stoploaded = true;
|
||||
}
|
||||
else
|
||||
@@ -121,7 +122,7 @@ dispell_lexize(PG_FUNCTION_ARGS)
|
||||
if (len <= 0)
|
||||
PG_RETURN_POINTER(NULL);
|
||||
|
||||
txt = lowerstr_with_len(in, len);
|
||||
txt = str_tolower(in, len, DEFAULT_COLLATION_OID);
|
||||
res = NINormalizeWord(&(d->obj), txt);
|
||||
|
||||
if (res == NULL)
|
||||
|
||||
@@ -13,10 +13,11 @@
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "catalog/pg_collation_d.h"
|
||||
#include "commands/defrem.h"
|
||||
#include "tsearch/ts_locale.h"
|
||||
#include "tsearch/ts_public.h"
|
||||
#include "utils/fmgrprotos.h"
|
||||
#include "utils/formatting.h"
|
||||
|
||||
|
||||
typedef struct
|
||||
@@ -47,7 +48,7 @@ dsimple_init(PG_FUNCTION_ARGS)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("multiple StopWords parameters")));
|
||||
readstoplist(defGetString(defel), &d->stoplist, lowerstr);
|
||||
readstoplist(defGetString(defel), &d->stoplist, str_tolower);
|
||||
stoploaded = true;
|
||||
}
|
||||
else if (strcmp(defel->defname, "accept") == 0)
|
||||
@@ -80,7 +81,7 @@ dsimple_lexize(PG_FUNCTION_ARGS)
|
||||
char *txt;
|
||||
TSLexeme *res;
|
||||
|
||||
txt = lowerstr_with_len(in, len);
|
||||
txt = str_tolower(in, len, DEFAULT_COLLATION_OID);
|
||||
|
||||
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
|
||||
{
|
||||
|
||||
@@ -13,10 +13,12 @@
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "catalog/pg_collation_d.h"
|
||||
#include "commands/defrem.h"
|
||||
#include "tsearch/ts_locale.h"
|
||||
#include "tsearch/ts_public.h"
|
||||
#include "utils/fmgrprotos.h"
|
||||
#include "utils/formatting.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -183,8 +185,8 @@ dsynonym_init(PG_FUNCTION_ARGS)
|
||||
}
|
||||
else
|
||||
{
|
||||
d->syn[cur].in = lowerstr(starti);
|
||||
d->syn[cur].out = lowerstr(starto);
|
||||
d->syn[cur].in = str_tolower(starti, strlen(starti), DEFAULT_COLLATION_OID);
|
||||
d->syn[cur].out = str_tolower(starto, strlen(starto), DEFAULT_COLLATION_OID);
|
||||
}
|
||||
|
||||
d->syn[cur].outlen = strlen(starto);
|
||||
@@ -223,7 +225,7 @@ dsynonym_lexize(PG_FUNCTION_ARGS)
|
||||
if (d->case_sensitive)
|
||||
key.in = pnstrdup(in, len);
|
||||
else
|
||||
key.in = lowerstr_with_len(in, len);
|
||||
key.in = str_tolower(in, len, DEFAULT_COLLATION_OID);
|
||||
|
||||
key.out = NULL;
|
||||
|
||||
|
||||
@@ -66,6 +66,7 @@
|
||||
#include "miscadmin.h"
|
||||
#include "tsearch/dicts/spell.h"
|
||||
#include "tsearch/ts_locale.h"
|
||||
#include "utils/formatting.h"
|
||||
#include "utils/memutils.h"
|
||||
|
||||
|
||||
@@ -169,7 +170,7 @@ cpstrdup(IspellDict *Conf, const char *str)
|
||||
|
||||
|
||||
/*
|
||||
* Apply lowerstr(), producing a temporary result (in the buildCxt).
|
||||
* Apply str_tolower(), producing a temporary result (in the buildCxt).
|
||||
*/
|
||||
static char *
|
||||
lowerstr_ctx(IspellDict *Conf, const char *src)
|
||||
@@ -178,7 +179,7 @@ lowerstr_ctx(IspellDict *Conf, const char *src)
|
||||
char *dst;
|
||||
|
||||
saveCtx = MemoryContextSwitchTo(Conf->buildCxt);
|
||||
dst = lowerstr(src);
|
||||
dst = str_tolower(src, strlen(src), DEFAULT_COLLATION_OID);
|
||||
MemoryContextSwitchTo(saveCtx);
|
||||
|
||||
return dst;
|
||||
@@ -1449,7 +1450,7 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
|
||||
|
||||
while ((recoded = tsearch_readline(&trst)) != NULL)
|
||||
{
|
||||
pstr = lowerstr(recoded);
|
||||
pstr = str_tolower(recoded, strlen(recoded), DEFAULT_COLLATION_OID);
|
||||
|
||||
/* Skip comments and empty lines */
|
||||
if (*pstr == '#' || *pstr == '\n')
|
||||
|
||||
@@ -197,92 +197,3 @@ tsearch_readline_callback(void *arg)
|
||||
stp->lineno,
|
||||
stp->filename);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* lowerstr --- fold null-terminated string to lower case
|
||||
*
|
||||
* Returned string is palloc'd
|
||||
*/
|
||||
char *
|
||||
lowerstr(const char *str)
|
||||
{
|
||||
return lowerstr_with_len(str, strlen(str));
|
||||
}
|
||||
|
||||
/*
|
||||
* lowerstr_with_len --- fold string to lower case
|
||||
*
|
||||
* Input string need not be null-terminated.
|
||||
*
|
||||
* Returned string is palloc'd
|
||||
*/
|
||||
char *
|
||||
lowerstr_with_len(const char *str, int len)
|
||||
{
|
||||
char *out;
|
||||
pg_locale_t mylocale = 0; /* TODO */
|
||||
|
||||
if (len == 0)
|
||||
return pstrdup("");
|
||||
|
||||
/*
|
||||
* Use wide char code only when max encoding length > 1 and ctype != C.
|
||||
* Some operating systems fail with multi-byte encodings and a C locale.
|
||||
* Also, for a C locale there is no need to process as multibyte. From
|
||||
* backend/utils/adt/oracle_compat.c Teodor
|
||||
*/
|
||||
if (pg_database_encoding_max_length() > 1 && !database_ctype_is_c)
|
||||
{
|
||||
wchar_t *wstr,
|
||||
*wptr;
|
||||
int wlen;
|
||||
|
||||
/*
|
||||
* alloc number of wchar_t for worst case, len contains number of
|
||||
* bytes >= number of characters and alloc 1 wchar_t for 0, because
|
||||
* wchar2char wants zero-terminated string
|
||||
*/
|
||||
wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));
|
||||
|
||||
wlen = char2wchar(wstr, len + 1, str, len, mylocale);
|
||||
Assert(wlen <= len);
|
||||
|
||||
while (*wptr)
|
||||
{
|
||||
*wptr = towlower((wint_t) *wptr);
|
||||
wptr++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Alloc result string for worst case + '\0'
|
||||
*/
|
||||
len = pg_database_encoding_max_length() * wlen + 1;
|
||||
out = (char *) palloc(len);
|
||||
|
||||
wlen = wchar2char(out, wstr, len, mylocale);
|
||||
|
||||
pfree(wstr);
|
||||
|
||||
if (wlen < 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
||||
errmsg("conversion from wchar_t to server encoding failed: %m")));
|
||||
Assert(wlen < len);
|
||||
}
|
||||
else
|
||||
{
|
||||
const char *ptr = str;
|
||||
char *outptr;
|
||||
|
||||
outptr = out = (char *) palloc(sizeof(char) * (len + 1));
|
||||
while ((ptr - str) < len && *ptr)
|
||||
{
|
||||
*outptr++ = tolower(TOUCHAR(ptr));
|
||||
ptr++;
|
||||
}
|
||||
*outptr = '\0';
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
#include "catalog/pg_collation_d.h"
|
||||
#include "miscadmin.h"
|
||||
#include "tsearch/ts_locale.h"
|
||||
#include "tsearch/ts_public.h"
|
||||
@@ -65,7 +66,7 @@ get_tsearch_config_filename(const char *basename,
|
||||
* or palloc a new version.
|
||||
*/
|
||||
void
|
||||
readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *))
|
||||
readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *, size_t, Oid))
|
||||
{
|
||||
char **stop = NULL;
|
||||
|
||||
@@ -115,7 +116,7 @@ readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *))
|
||||
|
||||
if (wordop)
|
||||
{
|
||||
stop[s->len] = wordop(line);
|
||||
stop[s->len] = wordop(line, strlen(line), DEFAULT_COLLATION_OID);
|
||||
if (stop[s->len] != line)
|
||||
pfree(line);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user