mirror of
https://github.com/postgres/postgres.git
synced 2026-01-26 09:41:40 +03:00
fuzzystrmatch: use pg_ascii_toupper().
fuzzystrmatch is designed for ASCII, so no need to rely on the global LC_CTYPE setting. Reviewed-by: Peter Eisentraut <peter@eisentraut.org> Discussion: https://postgr.es/m/dd0cdd1f-e786-426e-b336-1ffa9b2f1fc6%40eisentraut.org
This commit is contained in:
@@ -62,7 +62,7 @@ static const char *const soundex_table = "01230120022455012623010202";
|
||||
static char
|
||||
soundex_code(char letter)
|
||||
{
|
||||
letter = toupper((unsigned char) letter);
|
||||
letter = pg_ascii_toupper((unsigned char) letter);
|
||||
/* Defend against non-ASCII letters */
|
||||
if (letter >= 'A' && letter <= 'Z')
|
||||
return soundex_table[letter - 'A'];
|
||||
@@ -122,16 +122,21 @@ static const char _codes[26] = {
|
||||
static int
|
||||
getcode(char c)
|
||||
{
|
||||
if (isalpha((unsigned char) c))
|
||||
{
|
||||
c = toupper((unsigned char) c);
|
||||
/* Defend against non-ASCII letters */
|
||||
if (c >= 'A' && c <= 'Z')
|
||||
return _codes[c - 'A'];
|
||||
}
|
||||
c = pg_ascii_toupper((unsigned char) c);
|
||||
/* Defend against non-ASCII letters */
|
||||
if (c >= 'A' && c <= 'Z')
|
||||
return _codes[c - 'A'];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
ascii_isalpha(char c)
|
||||
{
|
||||
return (c >= 'A' && c <= 'Z') ||
|
||||
(c >= 'a' && c <= 'z');
|
||||
}
|
||||
|
||||
#define isvowel(c) (getcode(c) & 1) /* AEIOU */
|
||||
|
||||
/* These letters are passed through unchanged */
|
||||
@@ -301,18 +306,18 @@ metaphone(PG_FUNCTION_ARGS)
|
||||
* accessing the array directly... */
|
||||
|
||||
/* Look at the next letter in the word */
|
||||
#define Next_Letter (toupper((unsigned char) word[w_idx+1]))
|
||||
#define Next_Letter (pg_ascii_toupper((unsigned char) word[w_idx+1]))
|
||||
/* Look at the current letter in the word */
|
||||
#define Curr_Letter (toupper((unsigned char) word[w_idx]))
|
||||
#define Curr_Letter (pg_ascii_toupper((unsigned char) word[w_idx]))
|
||||
/* Go N letters back. */
|
||||
#define Look_Back_Letter(n) \
|
||||
(w_idx >= (n) ? toupper((unsigned char) word[w_idx-(n)]) : '\0')
|
||||
(w_idx >= (n) ? pg_ascii_toupper((unsigned char) word[w_idx-(n)]) : '\0')
|
||||
/* Previous letter. I dunno, should this return null on failure? */
|
||||
#define Prev_Letter (Look_Back_Letter(1))
|
||||
/* Look two letters down. It makes sure you don't walk off the string. */
|
||||
#define After_Next_Letter \
|
||||
(Next_Letter != '\0' ? toupper((unsigned char) word[w_idx+2]) : '\0')
|
||||
#define Look_Ahead_Letter(n) toupper((unsigned char) Lookahead(word+w_idx, n))
|
||||
(Next_Letter != '\0' ? pg_ascii_toupper((unsigned char) word[w_idx+2]) : '\0')
|
||||
#define Look_Ahead_Letter(n) pg_ascii_toupper((unsigned char) Lookahead(word+w_idx, n))
|
||||
|
||||
|
||||
/* Allows us to safely look ahead an arbitrary # of letters */
|
||||
@@ -340,7 +345,7 @@ Lookahead(char *word, int how_far)
|
||||
#define Phone_Len (p_idx)
|
||||
|
||||
/* Note is a letter is a 'break' in the word */
|
||||
#define Isbreak(c) (!isalpha((unsigned char) (c)))
|
||||
#define Isbreak(c) (!ascii_isalpha((unsigned char) (c)))
|
||||
|
||||
|
||||
static void
|
||||
@@ -379,7 +384,7 @@ _metaphone(char *word, /* IN */
|
||||
|
||||
/*-- The first phoneme has to be processed specially. --*/
|
||||
/* Find our first letter */
|
||||
for (; !isalpha((unsigned char) (Curr_Letter)); w_idx++)
|
||||
for (; !ascii_isalpha((unsigned char) (Curr_Letter)); w_idx++)
|
||||
{
|
||||
/* On the off chance we were given nothing but crap... */
|
||||
if (Curr_Letter == '\0')
|
||||
@@ -478,7 +483,7 @@ _metaphone(char *word, /* IN */
|
||||
*/
|
||||
|
||||
/* Ignore non-alphas */
|
||||
if (!isalpha((unsigned char) (Curr_Letter)))
|
||||
if (!ascii_isalpha((unsigned char) (Curr_Letter)))
|
||||
continue;
|
||||
|
||||
/* Drop duplicates, except CC */
|
||||
@@ -731,7 +736,7 @@ _soundex(const char *instr, char *outstr)
|
||||
Assert(outstr);
|
||||
|
||||
/* Skip leading non-alphabetic characters */
|
||||
while (*instr && !isalpha((unsigned char) *instr))
|
||||
while (*instr && !ascii_isalpha((unsigned char) *instr))
|
||||
++instr;
|
||||
|
||||
/* If no string left, return all-zeroes buffer */
|
||||
@@ -742,12 +747,12 @@ _soundex(const char *instr, char *outstr)
|
||||
}
|
||||
|
||||
/* Take the first letter as is */
|
||||
*outstr++ = (char) toupper((unsigned char) *instr++);
|
||||
*outstr++ = (char) pg_ascii_toupper((unsigned char) *instr++);
|
||||
|
||||
count = 1;
|
||||
while (*instr && count < SOUNDEX_LEN)
|
||||
{
|
||||
if (isalpha((unsigned char) *instr) &&
|
||||
if (ascii_isalpha((unsigned char) *instr) &&
|
||||
soundex_code(*instr) != soundex_code(*(instr - 1)))
|
||||
{
|
||||
*outstr = soundex_code(*instr);
|
||||
|
||||
Reference in New Issue
Block a user