mirror of
https://github.com/postgres/postgres.git
synced 2025-06-11 20:28:21 +03:00
Sorry - I should have gotten to this sooner. Here's a patch which you should
be able to apply against what you just committed. It rolls soundex into fuzzystrmatch. Remove soundex/metaphone and merge into fuzzystrmatch. Joe Conway
This commit is contained in:
@ -20,6 +20,11 @@
|
||||
* Metaphone was originally created by Lawrence Philips and presented in article
|
||||
* in "Computer Language" December 1990 issue.
|
||||
*
|
||||
* soundex()
|
||||
* -----------
|
||||
* Folded existing soundex contrib into this one. Renamed text_soundex() (C function)
|
||||
* to soundex() for consistency.
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software and its
|
||||
* documentation for any purpose, without fee, and without a written agreement
|
||||
* is hereby granted, provided that the above copyright notice and this
|
||||
@ -40,12 +45,15 @@
|
||||
*/
|
||||
|
||||
|
||||
Version 0.1 (3 August, 2001):
|
||||
Version 0.2 (7 August, 2001):
|
||||
Functions to calculate the degree to which two strings match in a "fuzzy" way
|
||||
Tested under Linux (Red Hat 6.2 and 7.0) and PostgreSQL 7.2devel
|
||||
|
||||
Release Notes:
|
||||
|
||||
Version 0.2
|
||||
- folded soundex contrib into this one
|
||||
|
||||
Version 0.1
|
||||
- initial release
|
||||
|
||||
|
62
contrib/fuzzystrmatch/README.soundex
Normal file
62
contrib/fuzzystrmatch/README.soundex
Normal file
@ -0,0 +1,62 @@
|
||||
NOTE: Modified August 07, 2001 by Joe Conway. Updated for accuracy
|
||||
after combining soundex code into the fuzzystrmatch contrib
|
||||
---------------------------------------------------------------------
|
||||
The Soundex system is a method of matching similar sounding names
|
||||
(or any words) to the same code. It was initially used by the
|
||||
United States Census in 1880, 1900, and 1910, but it has little use
|
||||
beyond English names (or the English pronunciation of names), and
|
||||
it is not a linguistic tool.
|
||||
|
||||
The following are some usage examples:
|
||||
|
||||
SELECT soundex('hello world!');
|
||||
|
||||
CREATE TABLE s (nm text)\g
|
||||
|
||||
insert into s values ('john')\g
|
||||
insert into s values ('joan')\g
|
||||
insert into s values ('wobbly')\g
|
||||
|
||||
select * from s
|
||||
where soundex(nm) = soundex('john')\g
|
||||
|
||||
select nm from s a, s b
|
||||
where soundex(a.nm) = soundex(b.nm)
|
||||
and a.oid <> b.oid\g
|
||||
|
||||
CREATE FUNCTION text_sx_eq(text, text) RETURNS bool AS
|
||||
'select soundex($1) = soundex($2)'
|
||||
LANGUAGE 'sql'\g
|
||||
|
||||
CREATE FUNCTION text_sx_lt(text,text) RETURNS bool AS
|
||||
'select soundex($1) < soundex($2)'
|
||||
LANGUAGE 'sql'\g
|
||||
|
||||
CREATE FUNCTION text_sx_gt(text,text) RETURNS bool AS
|
||||
'select soundex($1) > soundex($2)'
|
||||
LANGUAGE 'sql';
|
||||
|
||||
CREATE FUNCTION text_sx_le(text,text) RETURNS bool AS
|
||||
'select soundex($1) <= soundex($2)'
|
||||
LANGUAGE 'sql';
|
||||
|
||||
CREATE FUNCTION text_sx_ge(text,text) RETURNS bool AS
|
||||
'select soundex($1) >= soundex($2)'
|
||||
LANGUAGE 'sql';
|
||||
|
||||
CREATE FUNCTION text_sx_ne(text,text) RETURNS bool AS
|
||||
'select soundex($1) <> soundex($2)'
|
||||
LANGUAGE 'sql';
|
||||
|
||||
DROP OPERATOR #= (text,text)\g
|
||||
|
||||
CREATE OPERATOR #= (leftarg=text, rightarg=text, procedure=text_sx_eq,
|
||||
commutator=text_sx_eq)\g
|
||||
|
||||
SELECT *
|
||||
FROM s
|
||||
WHERE text_sx_eq(nm,'john')\g
|
||||
|
||||
SELECT *
|
||||
from s
|
||||
where s.nm #= 'john';
|
@ -629,3 +629,71 @@ int _metaphone (
|
||||
|
||||
return(META_SUCCESS);
|
||||
} /* END metaphone */
|
||||
|
||||
|
||||
/*
|
||||
* SQL function: soundex(text) returns text
|
||||
*/
|
||||
PG_FUNCTION_INFO_V1(soundex);
|
||||
|
||||
Datum
|
||||
soundex(PG_FUNCTION_ARGS)
|
||||
{
|
||||
char outstr[SOUNDEX_LEN + 1];
|
||||
char *arg;
|
||||
|
||||
arg = _textout(PG_GETARG_TEXT_P(0));
|
||||
|
||||
_soundex(arg, outstr);
|
||||
|
||||
PG_RETURN_TEXT_P(_textin(outstr));
|
||||
}
|
||||
|
||||
static void
|
||||
_soundex(const char *instr, char *outstr)
|
||||
{
|
||||
int count;
|
||||
|
||||
AssertArg(instr);
|
||||
AssertArg(outstr);
|
||||
|
||||
outstr[SOUNDEX_LEN] = '\0';
|
||||
|
||||
/* Skip leading non-alphabetic characters */
|
||||
while (!isalpha((unsigned char) instr[0]) && instr[0])
|
||||
++instr;
|
||||
|
||||
/* No string left */
|
||||
if (!instr[0])
|
||||
{
|
||||
outstr[0] = (char) 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Take the first letter as is */
|
||||
*outstr++ = (char) toupper((unsigned char) *instr++);
|
||||
|
||||
count = 1;
|
||||
while (*instr && count < SOUNDEX_LEN)
|
||||
{
|
||||
if (isalpha((unsigned char) *instr) &&
|
||||
soundex_code(*instr) != soundex_code(*(instr - 1)))
|
||||
{
|
||||
*outstr = soundex_code(instr[0]);
|
||||
if (*outstr != '0')
|
||||
{
|
||||
++outstr;
|
||||
++count;
|
||||
}
|
||||
}
|
||||
++instr;
|
||||
}
|
||||
|
||||
/* Fill with 0's */
|
||||
while (count < SOUNDEX_LEN)
|
||||
{
|
||||
*outstr = '0';
|
||||
++outstr;
|
||||
++count;
|
||||
}
|
||||
}
|
||||
|
@ -51,32 +51,43 @@
|
||||
#include "utils/builtins.h"
|
||||
|
||||
|
||||
#define MAX_LEVENSHTEIN_STRLEN 255
|
||||
#define MAX_METAPHONE_STRLEN 255
|
||||
|
||||
typedef struct dynmatrix
|
||||
{
|
||||
int value;
|
||||
} dynmat;
|
||||
|
||||
|
||||
/*
|
||||
* External declarations
|
||||
*/
|
||||
extern Datum levenshtein(PG_FUNCTION_ARGS);
|
||||
extern Datum metaphone(PG_FUNCTION_ARGS);
|
||||
extern Datum soundex(PG_FUNCTION_ARGS);
|
||||
|
||||
/*
|
||||
* Internal declarations
|
||||
* Soundex
|
||||
*/
|
||||
static void _soundex(const char *instr, char *outstr);
|
||||
|
||||
#define SOUNDEX_LEN 4
|
||||
#define _textin(str) DirectFunctionCall1(textin, CStringGetDatum(str))
|
||||
#define _textout(str) DatumGetPointer(DirectFunctionCall1(textout, PointerGetDatum(str)))
|
||||
|
||||
/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
|
||||
static const char *soundex_table = "01230120022455012623010202";
|
||||
|
||||
#define soundex_code(letter) soundex_table[toupper((unsigned char) (letter)) - 'A']
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Levenshtein
|
||||
*/
|
||||
#define STRLEN(p) strlen(p)
|
||||
#define CHAREQ(p1, p2) (*(p1) == *(p2))
|
||||
#define NextChar(p) ((p)++)
|
||||
#define MAX_LEVENSHTEIN_STRLEN 255
|
||||
|
||||
|
||||
/*
|
||||
* Metaphone
|
||||
*/
|
||||
#define MAX_METAPHONE_STRLEN 255
|
||||
|
||||
/*
|
||||
* Original code by Michael G Schwern starts here.
|
||||
* Code slightly modified for use as PostgreSQL
|
||||
|
@ -3,3 +3,9 @@ CREATE FUNCTION levenshtein (text,text) RETURNS int
|
||||
|
||||
CREATE FUNCTION metaphone (text,int) RETURNS text
|
||||
AS 'MODULE_PATHNAME','metaphone' LANGUAGE 'c' with (iscachable, isstrict);
|
||||
|
||||
CREATE FUNCTION soundex(text) RETURNS text
|
||||
AS 'MODULE_PATHNAME', 'soundex' LANGUAGE 'c' with (iscachable, isstrict);
|
||||
|
||||
CREATE FUNCTION text_soundex(text) RETURNS text
|
||||
AS 'MODULE_PATHNAME', 'soundex' LANGUAGE 'c';
|
||||
|
Reference in New Issue
Block a user