1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-11 20:28:21 +03:00

Sorry - I should have gotten to this sooner. Here's a patch which you should

be able to apply against what you just committed. It rolls soundex into
fuzzystrmatch.

Remove soundex/metaphone and merge into fuzzystrmatch.

Joe Conway
This commit is contained in:
Bruce Momjian
2001-08-07 18:16:01 +00:00
parent fb5b85a8f2
commit cdd02cdf00
14 changed files with 167 additions and 689 deletions

View File

@ -20,6 +20,11 @@
* Metaphone was originally created by Lawrence Philips and presented in article
* in "Computer Language" December 1990 issue.
*
* soundex()
* -----------
* Folded existing soundex contrib into this one. Renamed text_soundex() (C function)
* to soundex() for consistency.
*
* Permission to use, copy, modify, and distribute this software and its
* documentation for any purpose, without fee, and without a written agreement
* is hereby granted, provided that the above copyright notice and this
@ -40,12 +45,15 @@
*/
Version 0.1 (3 August, 2001):
Version 0.2 (7 August, 2001):
Functions to calculate the degree to which two strings match in a "fuzzy" way
Tested under Linux (Red Hat 6.2 and 7.0) and PostgreSQL 7.2devel
Release Notes:
Version 0.2
- folded soundex contrib into this one
Version 0.1
- initial release

View File

@ -0,0 +1,62 @@
NOTE: Modified August 07, 2001 by Joe Conway. Updated for accuracy
after combining soundex code into the fuzzystrmatch contrib
---------------------------------------------------------------------
The Soundex system is a method of matching similar sounding names
(or any words) to the same code. It was initially used by the
United States Census in 1880, 1900, and 1910, but it has little use
beyond English names (or the English pronunciation of names), and
it is not a linguistic tool.
The following are some usage examples:
SELECT soundex('hello world!');
CREATE TABLE s (nm text)\g
insert into s values ('john')\g
insert into s values ('joan')\g
insert into s values ('wobbly')\g
select * from s
where soundex(nm) = soundex('john')\g
select nm from s a, s b
where soundex(a.nm) = soundex(b.nm)
and a.oid <> b.oid\g
CREATE FUNCTION text_sx_eq(text, text) RETURNS bool AS
'select soundex($1) = soundex($2)'
LANGUAGE 'sql'\g
CREATE FUNCTION text_sx_lt(text,text) RETURNS bool AS
'select soundex($1) < soundex($2)'
LANGUAGE 'sql'\g
CREATE FUNCTION text_sx_gt(text,text) RETURNS bool AS
'select soundex($1) > soundex($2)'
LANGUAGE 'sql';
CREATE FUNCTION text_sx_le(text,text) RETURNS bool AS
'select soundex($1) <= soundex($2)'
LANGUAGE 'sql';
CREATE FUNCTION text_sx_ge(text,text) RETURNS bool AS
'select soundex($1) >= soundex($2)'
LANGUAGE 'sql';
CREATE FUNCTION text_sx_ne(text,text) RETURNS bool AS
'select soundex($1) <> soundex($2)'
LANGUAGE 'sql';
DROP OPERATOR #= (text,text)\g
CREATE OPERATOR #= (leftarg=text, rightarg=text, procedure=text_sx_eq,
commutator=text_sx_eq)\g
SELECT *
FROM s
WHERE text_sx_eq(nm,'john')\g
SELECT *
from s
where s.nm #= 'john';

View File

@ -629,3 +629,71 @@ int _metaphone (
return(META_SUCCESS);
} /* END metaphone */
/*
* SQL function: soundex(text) returns text
*/
PG_FUNCTION_INFO_V1(soundex);
Datum
soundex(PG_FUNCTION_ARGS)
{
char outstr[SOUNDEX_LEN + 1];
char *arg;
arg = _textout(PG_GETARG_TEXT_P(0));
_soundex(arg, outstr);
PG_RETURN_TEXT_P(_textin(outstr));
}
static void
_soundex(const char *instr, char *outstr)
{
int count;
AssertArg(instr);
AssertArg(outstr);
outstr[SOUNDEX_LEN] = '\0';
/* Skip leading non-alphabetic characters */
while (!isalpha((unsigned char) instr[0]) && instr[0])
++instr;
/* No string left */
if (!instr[0])
{
outstr[0] = (char) 0;
return;
}
/* Take the first letter as is */
*outstr++ = (char) toupper((unsigned char) *instr++);
count = 1;
while (*instr && count < SOUNDEX_LEN)
{
if (isalpha((unsigned char) *instr) &&
soundex_code(*instr) != soundex_code(*(instr - 1)))
{
*outstr = soundex_code(instr[0]);
if (*outstr != '0')
{
++outstr;
++count;
}
}
++instr;
}
/* Fill with 0's */
while (count < SOUNDEX_LEN)
{
*outstr = '0';
++outstr;
++count;
}
}

View File

@ -51,32 +51,43 @@
#include "utils/builtins.h"
#define MAX_LEVENSHTEIN_STRLEN 255
#define MAX_METAPHONE_STRLEN 255
typedef struct dynmatrix
{
int value;
} dynmat;
/*
* External declarations
*/
extern Datum levenshtein(PG_FUNCTION_ARGS);
extern Datum metaphone(PG_FUNCTION_ARGS);
extern Datum soundex(PG_FUNCTION_ARGS);
/*
* Internal declarations
* Soundex
*/
static void _soundex(const char *instr, char *outstr);
#define SOUNDEX_LEN 4
#define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
#define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
static const char *soundex_table = "01230120022455012623010202";
#define soundex_code(letter) soundex_table[toupper((unsigned char) (letter)) - 'A']
/*
* Levenshtein
*/
#define STRLEN(p) strlen(p)
#define CHAREQ(p1, p2) (*(p1) == *(p2))
#define NextChar(p) ((p)++)
#define MAX_LEVENSHTEIN_STRLEN 255
/*
* Metaphone
*/
#define MAX_METAPHONE_STRLEN 255
/*
* Original code by Michael G Schwern starts here.
* Code slightly modified for use as PostgreSQL

View File

@ -3,3 +3,9 @@ CREATE FUNCTION levenshtein (text,text) RETURNS int
CREATE FUNCTION metaphone (text,int) RETURNS text
AS 'MODULE_PATHNAME','metaphone' LANGUAGE 'c' with (iscachable, isstrict);
CREATE FUNCTION soundex(text) RETURNS text
AS 'MODULE_PATHNAME', 'soundex' LANGUAGE 'c' with (iscachable, isstrict);
CREATE FUNCTION text_soundex(text) RETURNS text
AS 'MODULE_PATHNAME', 'soundex' LANGUAGE 'c';