1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-24 01:29:19 +03:00

Remove ts_locale.c's t_isdigit(), t_isspace(), t_isprint()

These do the same thing as the standard isdigit(), isspace(), and
isprint() but with multibyte and encoding support.  But all the
callers are only interested in analyzing single-byte ASCII characters.
So this extra layer is overkill and we can replace the uses with the
standard functions.

All the t_is*() functions in ts_locale.c are under scrutiny because
they don't use the common locale provider framework but instead use
the global libc locale settings.  For the functions being touched by
this patch, we don't need all that anyway, as mentioned above, so the
simplest solution is to just remove them.  The few remaining t_is*()
functions will need a different treatment in a separate patch.

pg_trgm has some compile-time options with macros such as
KEEPONLYALNUM.  These are not documented, and the non-default variant
is not supported by any test cases.  As part of this undertaking, I'm
removing the non-default variant, as it is in the way of cleanup.  So
in this case, the not-KEEPONLYALNUM code path is gone.

Reviewed-by: Jeff Davis <pgsql@j-davis.com>
Discussion: https://www.postgresql.org/message-id/flat/653f3b84-fc87-45a7-9a0c-bfb4fcab3e7d%40eisentraut.org
This commit is contained in:
Peter Eisentraut
2024-12-17 12:48:58 +01:00
parent 60be3f9f0a
commit d3aad4ac57
13 changed files with 49 additions and 103 deletions

View File

@@ -47,7 +47,7 @@ findwrd(char *in, char **end, uint16 *flags)
char *lastchar;
/* Skip leading spaces */
while (*in && t_isspace(in))
while (*in && isspace((unsigned char) *in))
in += pg_mblen(in);
/* Return NULL on empty lines */
@@ -60,7 +60,7 @@ findwrd(char *in, char **end, uint16 *flags)
lastchar = start = in;
/* Find end of word */
while (*in && !t_isspace(in))
while (*in && !isspace((unsigned char) *in))
{
lastchar = in;
in += pg_mblen(in);

View File

@@ -190,7 +190,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
ptr = line;
/* is it a comment? */
while (*ptr && t_isspace(ptr))
while (*ptr && isspace((unsigned char) *ptr))
ptr += pg_mblen(ptr);
if (t_iseq(ptr, '#') || *ptr == '\0' ||
@@ -212,7 +212,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
errmsg("unexpected delimiter")));
state = TR_WAITSUBS;
}
else if (!t_isspace(ptr))
else if (!isspace((unsigned char) *ptr))
{
beginwrd = ptr;
state = TR_INLEX;
@@ -225,7 +225,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
state = TR_WAITSUBS;
}
else if (t_isspace(ptr))
else if (isspace((unsigned char) *ptr))
{
newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
state = TR_WAITLEX;
@@ -245,7 +245,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
state = TR_INSUBS;
beginwrd = ptr + pg_mblen(ptr);
}
else if (!t_isspace(ptr))
else if (!isspace((unsigned char) *ptr))
{
useasis = false;
beginwrd = ptr;
@@ -254,7 +254,7 @@ thesaurusRead(const char *filename, DictThesaurus *d)
}
else if (state == TR_INSUBS)
{
if (t_isspace(ptr))
if (isspace((unsigned char) *ptr))
{
if (ptr == beginwrd)
ereport(ERROR,

View File

@@ -390,7 +390,7 @@ getNextFlagFromString(IspellDict *Conf, const char **sflagset, char *sflag)
*sflagset = next;
while (**sflagset)
{
if (t_isdigit(*sflagset))
if (isdigit((unsigned char) **sflagset))
{
if (!met_comma)
ereport(ERROR,
@@ -408,7 +408,7 @@ getNextFlagFromString(IspellDict *Conf, const char **sflagset, char *sflag)
*sflagset)));
met_comma = true;
}
else if (!t_isspace(*sflagset))
else if (!isspace((unsigned char) **sflagset))
{
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
@@ -542,7 +542,7 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
while (*s)
{
/* we allow only single encoded flags for faster works */
if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s))
if (pg_mblen(s) == 1 && isprint((unsigned char) *s) && !isspace((unsigned char) *s))
s++;
else
{
@@ -558,7 +558,7 @@ NIImportDictionary(IspellDict *Conf, const char *filename)
s = line;
while (*s)
{
if (t_isspace(s))
if (isspace((unsigned char) *s))
{
*s = '\0';
break;
@@ -799,7 +799,7 @@ get_nextfield(char **str, char *next)
{
if (t_iseq(*str, '#'))
return false;
else if (!t_isspace(*str))
else if (!isspace((unsigned char) **str))
{
int clen = pg_mblen(*str);
@@ -814,7 +814,7 @@ get_nextfield(char **str, char *next)
}
else /* state == PAE_INMASK */
{
if (t_isspace(*str))
if (isspace((unsigned char) **str))
{
*next = '\0';
return true;
@@ -925,7 +925,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
{
if (t_iseq(str, '#'))
return false;
else if (!t_isspace(str))
else if (!isspace((unsigned char) *str))
{
COPYCHAR(pmask, str);
pmask += pg_mblen(str);
@@ -939,7 +939,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
*pmask = '\0';
state = PAE_WAIT_FIND;
}
else if (!t_isspace(str))
else if (!isspace((unsigned char) *str))
{
COPYCHAR(pmask, str);
pmask += pg_mblen(str);
@@ -957,7 +957,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
prepl += pg_mblen(str);
state = PAE_INREPL;
}
else if (!t_isspace(str))
else if (!isspace((unsigned char) *str))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
@@ -974,7 +974,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
COPYCHAR(pfind, str);
pfind += pg_mblen(str);
}
else if (!t_isspace(str))
else if (!isspace((unsigned char) *str))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
@@ -991,7 +991,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
prepl += pg_mblen(str);
state = PAE_INREPL;
}
else if (!t_isspace(str))
else if (!isspace((unsigned char) *str))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
@@ -1008,7 +1008,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl)
COPYCHAR(prepl, str);
prepl += pg_mblen(str);
}
else if (!t_isspace(str))
else if (!isspace((unsigned char) *str))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("syntax error")));
@@ -1070,7 +1070,7 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
char *sflag;
int clen;
while (*s && t_isspace(s))
while (*s && isspace((unsigned char) *s))
s += pg_mblen(s);
if (!*s)
@@ -1080,7 +1080,7 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val)
/* Get flag without \n */
sflag = sbuf;
while (*s && !t_isspace(s) && *s != '\n')
while (*s && !isspace((unsigned char) *s) && *s != '\n')
{
clen = pg_mblen(s);
COPYCHAR(sflag, s);
@@ -1225,7 +1225,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
while ((recoded = tsearch_readline(&trst)) != NULL)
{
if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
if (*recoded == '\0' || isspace((unsigned char) *recoded) || t_iseq(recoded, '#'))
{
pfree(recoded);
continue;
@@ -1262,7 +1262,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
{
char *s = recoded + strlen("FLAG");
while (*s && t_isspace(s))
while (*s && isspace((unsigned char) *s))
s += pg_mblen(s);
if (*s)
@@ -1298,7 +1298,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
{
int fields_read;
if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#'))
if (*recoded == '\0' || isspace((unsigned char) *recoded) || t_iseq(recoded, '#'))
goto nextline;
fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask);
@@ -1461,9 +1461,9 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
s = findchar2(recoded, 'l', 'L');
if (s)
{
while (*s && !t_isspace(s))
while (*s && !isspace((unsigned char) *s))
s += pg_mblen(s);
while (*s && t_isspace(s))
while (*s && isspace((unsigned char) *s))
s += pg_mblen(s);
if (*s && pg_mblen(s) == 1)
@@ -1494,7 +1494,7 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
s = recoded + 4; /* we need non-lowercased string */
flagflags = 0;
while (*s && t_isspace(s))
while (*s && isspace((unsigned char) *s))
s += pg_mblen(s);
if (*s == '*')
@@ -1523,7 +1523,7 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
s++;
if (*s == '\0' || *s == '#' || *s == '\n' || *s == ':' ||
t_isspace(s))
isspace((unsigned char) *s))
{
oldformat = true;
goto nextline;
@@ -1750,7 +1750,7 @@ NISortDictionary(IspellDict *Conf)
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix alias \"%s\"",
Conf->Spell[i]->p.flag)));
if (*end != '\0' && !t_isdigit(end) && !t_isspace(end))
if (*end != '\0' && !isdigit((unsigned char) *end) && !isspace((unsigned char) *end))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid affix alias \"%s\"",

View File

@@ -31,36 +31,6 @@ static void tsearch_readline_callback(void *arg);
*/
#define WC_BUF_LEN 3
int
t_isdigit(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
pg_locale_t mylocale = 0; /* TODO */
if (clen == 1 || database_ctype_is_c)
return isdigit(TOUCHAR(ptr));
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
return iswdigit((wint_t) character[0]);
}
int
t_isspace(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
pg_locale_t mylocale = 0; /* TODO */
if (clen == 1 || database_ctype_is_c)
return isspace(TOUCHAR(ptr));
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
return iswspace((wint_t) character[0]);
}
int
t_isalpha(const char *ptr)
{
@@ -91,21 +61,6 @@ t_isalnum(const char *ptr)
return iswalnum((wint_t) character[0]);
}
int
t_isprint(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
pg_locale_t mylocale = 0; /* TODO */
if (clen == 1 || database_ctype_is_c)
return isprint(TOUCHAR(ptr));
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
return iswprint((wint_t) character[0]);
}
/*
* Set up to read a file using tsearch_readline(). This facility is

View File

@@ -88,7 +88,7 @@ readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *))
char *pbuf = line;
/* Trim trailing space */
while (*pbuf && !t_isspace(pbuf))
while (*pbuf && !isspace((unsigned char) *pbuf))
pbuf += pg_mblen(pbuf);
*pbuf = '\0';