1
0
mirror of https://github.com/postgres/postgres.git synced 2026-01-26 09:41:40 +03:00

fuzzystrmatch: use pg_ascii_toupper().

fuzzystrmatch is designed for ASCII, so no need to rely on the global
LC_CTYPE setting.

Reviewed-by: Peter Eisentraut <peter@eisentraut.org>
Discussion: https://postgr.es/m/dd0cdd1f-e786-426e-b336-1ffa9b2f1fc6%40eisentraut.org
This commit is contained in:
Jeff Davis
2026-01-12 08:54:04 -08:00
parent 2defd00062
commit b96a9fd76f

View File

@@ -62,7 +62,7 @@ static const char *const soundex_table = "01230120022455012623010202";
static char
soundex_code(char letter)
{
letter = toupper((unsigned char) letter);
letter = pg_ascii_toupper((unsigned char) letter);
/* Defend against non-ASCII letters */
if (letter >= 'A' && letter <= 'Z')
return soundex_table[letter - 'A'];
@@ -122,16 +122,21 @@ static const char _codes[26] = {
static int
getcode(char c)
{
if (isalpha((unsigned char) c))
{
c = toupper((unsigned char) c);
/* Defend against non-ASCII letters */
if (c >= 'A' && c <= 'Z')
return _codes[c - 'A'];
}
c = pg_ascii_toupper((unsigned char) c);
/* Defend against non-ASCII letters */
if (c >= 'A' && c <= 'Z')
return _codes[c - 'A'];
return 0;
}
static bool
ascii_isalpha(char c)
{
return (c >= 'A' && c <= 'Z') ||
(c >= 'a' && c <= 'z');
}
#define isvowel(c) (getcode(c) & 1) /* AEIOU */
/* These letters are passed through unchanged */
@@ -301,18 +306,18 @@ metaphone(PG_FUNCTION_ARGS)
* accessing the array directly... */
/* Look at the next letter in the word */
#define Next_Letter (toupper((unsigned char) word[w_idx+1]))
#define Next_Letter (pg_ascii_toupper((unsigned char) word[w_idx+1]))
/* Look at the current letter in the word */
#define Curr_Letter (toupper((unsigned char) word[w_idx]))
#define Curr_Letter (pg_ascii_toupper((unsigned char) word[w_idx]))
/* Go N letters back. */
#define Look_Back_Letter(n) \
(w_idx >= (n) ? toupper((unsigned char) word[w_idx-(n)]) : '\0')
(w_idx >= (n) ? pg_ascii_toupper((unsigned char) word[w_idx-(n)]) : '\0')
/* Previous letter. I dunno, should this return null on failure? */
#define Prev_Letter (Look_Back_Letter(1))
/* Look two letters down. It makes sure you don't walk off the string. */
#define After_Next_Letter \
(Next_Letter != '\0' ? toupper((unsigned char) word[w_idx+2]) : '\0')
#define Look_Ahead_Letter(n) toupper((unsigned char) Lookahead(word+w_idx, n))
(Next_Letter != '\0' ? pg_ascii_toupper((unsigned char) word[w_idx+2]) : '\0')
#define Look_Ahead_Letter(n) pg_ascii_toupper((unsigned char) Lookahead(word+w_idx, n))
/* Allows us to safely look ahead an arbitrary # of letters */
@@ -340,7 +345,7 @@ Lookahead(char *word, int how_far)
#define Phone_Len (p_idx)
/* Note is a letter is a 'break' in the word */
#define Isbreak(c) (!isalpha((unsigned char) (c)))
#define Isbreak(c) (!ascii_isalpha((unsigned char) (c)))
static void
@@ -379,7 +384,7 @@ _metaphone(char *word, /* IN */
/*-- The first phoneme has to be processed specially. --*/
/* Find our first letter */
for (; !isalpha((unsigned char) (Curr_Letter)); w_idx++)
for (; !ascii_isalpha((unsigned char) (Curr_Letter)); w_idx++)
{
/* On the off chance we were given nothing but crap... */
if (Curr_Letter == '\0')
@@ -478,7 +483,7 @@ _metaphone(char *word, /* IN */
*/
/* Ignore non-alphas */
if (!isalpha((unsigned char) (Curr_Letter)))
if (!ascii_isalpha((unsigned char) (Curr_Letter)))
continue;
/* Drop duplicates, except CC */
@@ -731,7 +736,7 @@ _soundex(const char *instr, char *outstr)
Assert(outstr);
/* Skip leading non-alphabetic characters */
while (*instr && !isalpha((unsigned char) *instr))
while (*instr && !ascii_isalpha((unsigned char) *instr))
++instr;
/* If no string left, return all-zeroes buffer */
@@ -742,12 +747,12 @@ _soundex(const char *instr, char *outstr)
}
/* Take the first letter as is */
*outstr++ = (char) toupper((unsigned char) *instr++);
*outstr++ = (char) pg_ascii_toupper((unsigned char) *instr++);
count = 1;
while (*instr && count < SOUNDEX_LEN)
{
if (isalpha((unsigned char) *instr) &&
if (ascii_isalpha((unsigned char) *instr) &&
soundex_code(*instr) != soundex_code(*(instr - 1)))
{
*outstr = soundex_code(*instr);