1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-24 01:29:19 +03:00

Ensure Soundex difference() function handles empty input sanely.

fuzzystrmatch's difference() function assumes that _soundex()
always initializes its output buffer fully.  This was not so for
the case of a string containing no alphabetic characters, resulting
in unstable output and Valgrind complaints.

Fix by using memset() to fill the whole buffer in the early-exit
case.  Also make some cosmetic improvements (I didn't care for the
random switches between "instr[0]" and "*instr" notation).

Report and diagnosis by Alexander Lakhin (bug #17935).
Back-patch to all supported branches.

Discussion: https://postgr.es/m/17935-b99316aa79c18513@postgresql.org
This commit is contained in:
Tom Lane
2023-05-16 10:53:42 -04:00
parent 7deeb02f6d
commit ccd3623256
3 changed files with 15 additions and 7 deletions

View File

@@ -23,6 +23,12 @@ SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret');
A500 | M626 | 0 A500 | M626 | 0
(1 row) (1 row)
SELECT soundex(''), difference('', '');
soundex | difference
---------+------------
| 4
(1 row)
SELECT levenshtein('GUMBO', 'GAMBOL'); SELECT levenshtein('GUMBO', 'GAMBOL');
levenshtein levenshtein
------------- -------------

View File

@@ -727,16 +727,14 @@ _soundex(const char *instr, char *outstr)
AssertArg(instr); AssertArg(instr);
AssertArg(outstr); AssertArg(outstr);
outstr[SOUNDEX_LEN] = '\0';
/* Skip leading non-alphabetic characters */ /* Skip leading non-alphabetic characters */
while (!isalpha((unsigned char) instr[0]) && instr[0]) while (*instr && !isalpha((unsigned char) *instr))
++instr; ++instr;
/* No string left */ /* If no string left, return all-zeroes buffer */
if (!instr[0]) if (!*instr)
{ {
outstr[0] = (char) 0; memset(outstr, '\0', SOUNDEX_LEN + 1);
return; return;
} }
@@ -749,7 +747,7 @@ _soundex(const char *instr, char *outstr)
if (isalpha((unsigned char) *instr) && if (isalpha((unsigned char) *instr) &&
soundex_code(*instr) != soundex_code(*(instr - 1))) soundex_code(*instr) != soundex_code(*(instr - 1)))
{ {
*outstr = soundex_code(instr[0]); *outstr = soundex_code(*instr);
if (*outstr != '0') if (*outstr != '0')
{ {
++outstr; ++outstr;
@@ -766,6 +764,9 @@ _soundex(const char *instr, char *outstr)
++outstr; ++outstr;
++count; ++count;
} }
/* And null-terminate */
*outstr = '\0';
} }
PG_FUNCTION_INFO_V1(difference); PG_FUNCTION_INFO_V1(difference);

View File

@@ -6,6 +6,7 @@ SELECT soundex('hello world!');
SELECT soundex('Anne'), soundex('Ann'), difference('Anne', 'Ann'); SELECT soundex('Anne'), soundex('Ann'), difference('Anne', 'Ann');
SELECT soundex('Anne'), soundex('Andrew'), difference('Anne', 'Andrew'); SELECT soundex('Anne'), soundex('Andrew'), difference('Anne', 'Andrew');
SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret'); SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret');
SELECT soundex(''), difference('', '');
SELECT levenshtein('GUMBO', 'GAMBOL'); SELECT levenshtein('GUMBO', 'GAMBOL');