1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-11 20:28:21 +03:00

Pgindent run for 8.0.

This commit is contained in:
Bruce Momjian
2004-08-29 05:07:03 +00:00
parent 90cb9c3051
commit b6b71b85bc
527 changed files with 20550 additions and 18283 deletions

View File

@ -1,36 +1,36 @@
/*
* This is a port of the Double Metaphone algorithm for use in PostgreSQL.
*
*
* Double Metaphone computes 2 "sounds like" strings - a primary and an
* alternate. In most cases they are the same, but for foreign names
* especially they can be a bit different, depending on pronunciation.
*
* Information on using Double Metaphone can be found at
* http://www.codeproject.com/useritems/dmetaphone1.asp
* http://www.codeproject.com/useritems/dmetaphone1.asp
* and the original article describing it can be found at
* http://www.cuj.com/documents/s=8038/cuj0006philips/
* http://www.cuj.com/documents/s=8038/cuj0006philips/
*
* For PostgrSQL we provide 2 functions - one for the primary and one for
* the alternate. That way the functions are pure text->text mappings that
* are useful in functional indexes. These are 'dmetaphone' for the
* primary and 'dmetaphone_alt' for the alternate.
*
* Assuming that dmetaphone.so is in $libdir, the SQL to set up the
* Assuming that dmetaphone.so is in $libdir, the SQL to set up the
* functions looks like this:
*
* CREATE FUNCTION dmetaphone (text) RETURNS text
* LANGUAGE C IMMUTABLE STRICT
* AS '$libdir/dmetaphone', 'dmetaphone';
* CREATE FUNCTION dmetaphone (text) RETURNS text
* LANGUAGE C IMMUTABLE STRICT
* AS '$libdir/dmetaphone', 'dmetaphone';
*
* CREATE FUNCTION dmetaphone_alt (text) RETURNS text
* LANGUAGE C IMMUTABLE STRICT
* AS '$libdir/dmetaphone', 'dmetaphone_alt';
* CREATE FUNCTION dmetaphone_alt (text) RETURNS text
* LANGUAGE C IMMUTABLE STRICT
* AS '$libdir/dmetaphone', 'dmetaphone_alt';
*
* Note that you have to declare the functions IMMUTABLE if you want to
* use them in functional indexes, and you have to declare them as STRICT
* as they do not check for NULL input, and will segfault if given NULL input.
* (See below for alternative ) Declaring them as STRICT means PostgreSQL
* will never call them with NULL, but instead assume the result is NULL,
* as they do not check for NULL input, and will segfault if given NULL input.
* (See below for alternative ) Declaring them as STRICT means PostgreSQL
* will never call them with NULL, but instead assume the result is NULL,
* which is what we (I) want.
*
* Alternatively, compile with -DDMETAPHONE_NOSTRICT and the functions
@ -42,25 +42,25 @@
* need. That's the way the perl module was written, because perl can handle
* a list return more easily than we can in PostgreSQL. The result has been
* fast enough for my needs, but it could maybe be optimized a bit to remove
* that behaviour.
* that behaviour.
*
*/
/*
* $Revision: 1.2 $
* $Id: dmetaphone.c,v 1.2 2004/08/20 19:48:14 momjian Exp $
* $Revision: 1.3 $
* $Id: dmetaphone.c,v 1.3 2004/08/29 05:06:35 momjian Exp $
*/
/***************************** COPYRIGHT NOTICES ***********************
Most of this code is directly from the Text::DoubleMetaphone perl module
version 0.05 available from http://www.cpan.org.
version 0.05 available from http://www.cpan.org.
It bears this copyright notice:
Copyright 2000, Maurice Aubrey <maurice@hevanet.com>.
Copyright 2000, Maurice Aubrey <maurice@hevanet.com>.
All rights reserved.
This code is based heavily on the C++ implementation by
@ -73,7 +73,7 @@ It bears this copyright notice:
The remaining code is authored by Andrew Dunstan <amdunstan@ncshp.org> and
<andrew@dunslane.net> and is covered this copyright:
Copyright 2003, North Carolina State Highway Patrol.
Copyright 2003, North Carolina State Highway Patrol.
All rights reserved.
Permission to use, copy, modify, and distribute this software and its
@ -81,14 +81,14 @@ The remaining code is authored by Andrew Dunstan <amdunstan@ncshp.org> and
is hereby granted, provided that the above copyright notice and this
paragraph and the following two paragraphs appear in all copies.
IN NO EVENT SHALL THE NORTH CAROLINA STATE HIGHWAY PATROL BE LIABLE TO ANY
PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
IN NO EVENT SHALL THE NORTH CAROLINA STATE HIGHWAY PATROL BE LIABLE TO ANY
PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
DOCUMENTATION, EVEN IF THE NORTH CAROLINA STATE HIGHWAY PATROL HAS BEEN
DOCUMENTATION, EVEN IF THE NORTH CAROLINA STATE HIGHWAY PATROL HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
THE NORTH CAROLINA STATE HIGHWAY PATROL SPECIFICALLY DISCLAIMS ANY
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
THE NORTH CAROLINA STATE HIGHWAY PATROL SPECIFICALLY DISCLAIMS ANY
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED
HEREUNDER IS ON AN "AS IS" BASIS, AND THE NORTH CAROLINA STATE HIGHWAY PATROL
HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
@ -109,7 +109,6 @@ The remaining code is authored by Andrew Dunstan <amdunstan@ncshp.org> and
/* turn off assertions for embedded function */
#define NDEBUG
#endif
#include <stdio.h>
@ -124,7 +123,7 @@ extern Datum dmetaphone_alt(PG_FUNCTION_ARGS);
/* prototype for the main function we got from the perl module */
static void
DoubleMetaphone(char *, char **);
DoubleMetaphone(char *, char **);
#ifndef DMETAPHONE_MAIN
@ -138,37 +137,41 @@ PG_FUNCTION_INFO_V1(dmetaphone);
Datum
dmetaphone(PG_FUNCTION_ARGS)
{
text * arg, * result;
int alen, rsize;
char * aptr, *codes[2], * code, * rptr;
text *arg,
*result;
int alen,
rsize;
char *aptr,
*codes[2],
*code,
*rptr;
#ifdef DMETAPHONE_NOSTRICT
if (PG_ARGISNULL(0))
PG_RETURNNULL();
#endif
arg = PG_GETARG_TEXT_P(0);
alen = VARSIZE(arg)-VARHDRSZ;
alen = VARSIZE(arg) - VARHDRSZ;
/*
* Postgres' string values might not have trailing nuls.
* The VARSIZE will not include the nul in any case
* so we copy things out and add a trailing nul.
* When we copy back we ignore the nul
* (and we don't make space for it).
/*
* Postgres' string values might not have trailing nuls. The VARSIZE
* will not include the nul in any case so we copy things out and add
* a trailing nul. When we copy back we ignore the nul (and we don't
* make space for it).
*/
aptr = palloc(alen+1);
memcpy(aptr,VARDATA(arg),alen);
aptr[alen]=0;
DoubleMetaphone(aptr,codes);
aptr = palloc(alen + 1);
memcpy(aptr, VARDATA(arg), alen);
aptr[alen] = 0;
DoubleMetaphone(aptr, codes);
code = codes[0];
if (!code)
code = "";
rsize = VARHDRSZ + strlen(code) ;
rsize = VARHDRSZ + strlen(code);
result = (text *) palloc(rsize);
memset(result,0,rsize);
memset(result, 0, rsize);
rptr = VARDATA(result);
memcpy(rptr,code,strlen(code));
memcpy(rptr, code, strlen(code));
VARATT_SIZEP(result) = rsize;
PG_RETURN_TEXT_P(result);
}
@ -183,28 +186,33 @@ PG_FUNCTION_INFO_V1(dmetaphone_alt);
Datum
dmetaphone_alt(PG_FUNCTION_ARGS)
{
text * arg, * result;
int alen, rsize;
char * aptr, * codes[2], * code, * rptr;
text *arg,
*result;
int alen,
rsize;
char *aptr,
*codes[2],
*code,
*rptr;
#ifdef DMETAPHONE_NOSTRICT
if (PG_ARGISNULL(0))
PG_RETURNNULL();
#endif
arg = PG_GETARG_TEXT_P(0);
alen = VARSIZE(arg)-VARHDRSZ;
aptr = palloc(alen+1);
memcpy(aptr,VARDATA(arg),alen);
aptr[alen]=0;
DoubleMetaphone(aptr,codes);
alen = VARSIZE(arg) - VARHDRSZ;
aptr = palloc(alen + 1);
memcpy(aptr, VARDATA(arg), alen);
aptr[alen] = 0;
DoubleMetaphone(aptr, codes);
code = codes[1];
if (!code)
code = "";
rsize = VARHDRSZ + strlen(code) ;
rsize = VARHDRSZ + strlen(code);
result = (text *) palloc(rsize);
memset(result,0,rsize);
memset(result, 0, rsize);
rptr = VARDATA(result);
memcpy(rptr,code,strlen(code));
memcpy(rptr, code, strlen(code));
VARATT_SIZEP(result) = rsize;
PG_RETURN_TEXT_P(result);
}
@ -215,141 +223,139 @@ dmetaphone_alt(PG_FUNCTION_ARGS)
/* all memory handling is done with these macros */
#define META_MALLOC(v,n,t) \
(v = (t*)palloc(((n)*sizeof(t))))
(v = (t*)palloc(((n)*sizeof(t))))
#define META_REALLOC(v,n,t) \
(v = (t*)repalloc((v),((n)*sizeof(t))))
(v = (t*)repalloc((v),((n)*sizeof(t))))
/*
/*
* Don't do pfree - it seems to cause a segv sometimes - which might have just
* been caused by reloading the module in development.
* So we rely on context cleanup - Tom Lane says pfree shouldn't be necessary
* in a case like this.
*/
#define META_FREE(x) /* pfree((x)) */
#define META_FREE(x) /* pfree((x)) */
#else /* not defined DMETAPHONE_MAIN */
#else /* not defined DMETAPHONE_MAIN */
/* use the standard malloc library when not running in PostgreSQL */
#define META_MALLOC(v,n,t) \
(v = (t*)malloc(((n)*sizeof(t))))
(v = (t*)malloc(((n)*sizeof(t))))
#define META_REALLOC(v,n,t) \
(v = (t*)realloc((v),((n)*sizeof(t))))
(v = (t*)realloc((v),((n)*sizeof(t))))
#define META_FREE(x) free((x))
#endif /* defined DMETAPHONE_MAIN */
#endif /* defined DMETAPHONE_MAIN */
/* this typedef was orignally in the perl module's .h file */
/* this typedef was orignally in the perl module's .h file */
typedef struct
{
char *str;
int length;
int bufsize;
int free_string_on_destroy;
char *str;
int length;
int bufsize;
int free_string_on_destroy;
}
metastring;
/*
metastring;
/*
* remaining perl module funcs unchanged except for declaring them static
* and reformatting to PostgreSQL indentation and to fit in 80 cols.
*
*/
*/
static metastring *
NewMetaString(char *init_str)
{
metastring *s;
char empty_string[] = "";
metastring *s;
char empty_string[] = "";
META_MALLOC(s, 1, metastring);
assert( s != NULL );
META_MALLOC(s, 1, metastring);
assert(s != NULL);
if (init_str == NULL)
if (init_str == NULL)
init_str = empty_string;
s->length = strlen(init_str);
/* preallocate a bit more for potential growth */
s->bufsize = s->length + 7;
s->length = strlen(init_str);
/* preallocate a bit more for potential growth */
s->bufsize = s->length + 7;
META_MALLOC(s->str, s->bufsize, char);
assert( s->str != NULL );
strncpy(s->str, init_str, s->length + 1);
s->free_string_on_destroy = 1;
META_MALLOC(s->str, s->bufsize, char);
assert(s->str != NULL);
return s;
strncpy(s->str, init_str, s->length + 1);
s->free_string_on_destroy = 1;
return s;
}
static void
DestroyMetaString(metastring * s)
{
if (s == NULL)
if (s == NULL)
return;
if (s->free_string_on_destroy && (s->str != NULL))
if (s->free_string_on_destroy && (s->str != NULL))
META_FREE(s->str);
META_FREE(s);
META_FREE(s);
}
static void
IncreaseBuffer(metastring * s, int chars_needed)
{
META_REALLOC(s->str, (s->bufsize + chars_needed + 10), char);
assert( s->str != NULL );
s->bufsize = s->bufsize + chars_needed + 10;
META_REALLOC(s->str, (s->bufsize + chars_needed + 10), char);
assert(s->str != NULL);
s->bufsize = s->bufsize + chars_needed + 10;
}
static void
MakeUpper(metastring * s)
{
char *i;
char *i;
for (i = s->str; *i; i++)
{
for (i = s->str; *i; i++)
*i = toupper(*i);
}
}
static int
IsVowel(metastring * s, int pos)
{
char c;
char c;
if ((pos < 0) || (pos >= s->length))
if ((pos < 0) || (pos >= s->length))
return 0;
c = *(s->str + pos);
if ((c == 'A') || (c == 'E') || (c == 'I') || (c =='O') ||
(c =='U') || (c == 'Y'))
c = *(s->str + pos);
if ((c == 'A') || (c == 'E') || (c == 'I') || (c == 'O') ||
(c == 'U') || (c == 'Y'))
return 1;
return 0;
return 0;
}
static int
SlavoGermanic(metastring * s)
{
if ((char *) strstr(s->str, "W"))
if ((char *) strstr(s->str, "W"))
return 1;
else if ((char *) strstr(s->str, "K"))
else if ((char *) strstr(s->str, "K"))
return 1;
else if ((char *) strstr(s->str, "CZ"))
else if ((char *) strstr(s->str, "CZ"))
return 1;
else if ((char *) strstr(s->str, "WITZ"))
else if ((char *) strstr(s->str, "WITZ"))
return 1;
else
else
return 0;
}
@ -357,117 +363,115 @@ SlavoGermanic(metastring * s)
static char
GetAt(metastring * s, int pos)
{
if ((pos < 0) || (pos >= s->length))
if ((pos < 0) || (pos >= s->length))
return '\0';
return ((char) *(s->str + pos));
return ((char) *(s->str + pos));
}
static void
SetAt(metastring * s, int pos, char c)
{
if ((pos < 0) || (pos >= s->length))
if ((pos < 0) || (pos >= s->length))
return;
*(s->str + pos) = c;
*(s->str + pos) = c;
}
/*
/*
Caveats: the START value is 0 based
*/
static int
StringAt(metastring * s, int start, int length, ...)
StringAt(metastring * s, int start, int length,...)
{
char *test;
char *pos;
va_list ap;
char *test;
char *pos;
va_list ap;
if ((start < 0) || (start >= s->length))
return 0;
if ((start < 0) || (start >= s->length))
return 0;
pos = (s->str + start);
va_start(ap, length);
pos = (s->str + start);
va_start(ap, length);
do
do
{
test = va_arg(ap, char *);
if (*test && (strncmp(pos, test, length) == 0))
return 1;
}
while (strcmp(test, ""));
while (strcmp(test, ""));
va_end(ap);
va_end(ap);
return 0;
return 0;
}
static void
MetaphAdd(metastring * s, char *new_str)
{
int add_length;
int add_length;
if (new_str == NULL)
if (new_str == NULL)
return;
add_length = strlen(new_str);
if ((s->length + add_length) > (s->bufsize - 1))
{
add_length = strlen(new_str);
if ((s->length + add_length) > (s->bufsize - 1))
IncreaseBuffer(s, add_length);
}
strcat(s->str, new_str);
s->length += add_length;
strcat(s->str, new_str);
s->length += add_length;
}
static void
DoubleMetaphone(char *str, char **codes)
{
int length;
metastring *original;
metastring *primary;
metastring *secondary;
int current;
int last;
int length;
metastring *original;
metastring *primary;
metastring *secondary;
int current;
int last;
current = 0;
/* we need the real length and last prior to padding */
length = strlen(str);
last = length - 1;
original = NewMetaString(str);
/* Pad original so we can index beyond end */
MetaphAdd(original, " ");
current = 0;
/* we need the real length and last prior to padding */
length = strlen(str);
last = length - 1;
original = NewMetaString(str);
/* Pad original so we can index beyond end */
MetaphAdd(original, " ");
primary = NewMetaString("");
secondary = NewMetaString("");
primary->free_string_on_destroy = 0;
secondary->free_string_on_destroy = 0;
primary = NewMetaString("");
secondary = NewMetaString("");
primary->free_string_on_destroy = 0;
secondary->free_string_on_destroy = 0;
MakeUpper(original);
MakeUpper(original);
/* skip these when at start of word */
if (StringAt(original, 0, 2, "GN", "KN", "PN", "WR", "PS", ""))
/* skip these when at start of word */
if (StringAt(original, 0, 2, "GN", "KN", "PN", "WR", "PS", ""))
current += 1;
/* Initial 'X' is pronounced 'Z' e.g. 'Xavier' */
if (GetAt(original, 0) == 'X')
/* Initial 'X' is pronounced 'Z' e.g. 'Xavier' */
if (GetAt(original, 0) == 'X')
{
MetaphAdd(primary, "S"); /* 'Z' maps to 'S' */
MetaphAdd(secondary, "S");
current += 1;
}
/* main loop */
while ((primary->length < 4) || (secondary->length < 4))
/* main loop */
while ((primary->length < 4) || (secondary->length < 4))
{
if (current >= length)
break;
switch (GetAt(original, current))
{
{
case 'A':
case 'E':
case 'I':
@ -508,8 +512,8 @@ DoubleMetaphone(char *str, char **codes)
&& StringAt(original, (current - 1), 3, "ACH", "")
&& ((GetAt(original, current + 2) != 'I')
&& ((GetAt(original, current + 2) != 'E')
|| StringAt(original, (current - 2), 6, "BACHER",
"MACHER", ""))))
|| StringAt(original, (current - 2), 6, "BACHER",
"MACHER", ""))))
{
MetaphAdd(primary, "K");
MetaphAdd(secondary, "K");
@ -550,7 +554,7 @@ DoubleMetaphone(char *str, char **codes)
/* greek roots e.g. 'chemistry', 'chorus' */
if ((current == 0)
&& (StringAt(original, (current + 1), 5,
&& (StringAt(original, (current + 1), 5,
"HARAC", "HARIS", "")
|| StringAt(original, (current + 1), 3, "HOR",
"HYM", "HIA", "HEM", ""))
@ -566,19 +570,21 @@ DoubleMetaphone(char *str, char **codes)
if (
(StringAt(original, 0, 4, "VAN ", "VON ", "")
|| StringAt(original, 0, 3, "SCH", ""))
/* 'architect but not 'arch', 'orchestra', 'orchid' */
/* 'architect but not 'arch', 'orchestra', 'orchid' */
|| StringAt(original, (current - 2), 6, "ORCHES",
"ARCHIT", "ORCHID", "")
|| StringAt(original, (current + 2), 1, "T", "S",
"")
|| ((StringAt(original, (current - 1), 1,
"A", "O", "U", "E", "")
|| ((StringAt(original, (current - 1), 1,
"A", "O", "U", "E", "")
|| (current == 0))
/* e.g., 'wachtler', 'wechsler',
but not 'tichner' */
&& StringAt(original, (current + 2), 1, "L", "R",
"N", "M", "B", "H", "F", "V", "W",
" ", "")))
/*
* e.g., 'wachtler', 'wechsler', but not 'tichner'
*/
&& StringAt(original, (current + 2), 1, "L", "R",
"N", "M", "B", "H", "F", "V", "W",
" ", "")))
{
MetaphAdd(primary, "K");
MetaphAdd(secondary, "K");
@ -633,14 +639,14 @@ DoubleMetaphone(char *str, char **codes)
{
/* 'bellocchio' but not 'bacchus' */
if (StringAt(original, (current + 2), 1, "I", "E", "H", "")
&& !StringAt(original, (current + 2), 2, "HU", ""))
&& !StringAt(original, (current + 2), 2, "HU", ""))
{
/* 'accident', 'accede' 'succeed' */
if (
((current == 1)
&& (GetAt(original, current - 1) == 'A'))
|| StringAt(original, (current - 1), 5, "UCCEE",
"UCCES", ""))
|| StringAt(original, (current - 1), 5, "UCCEE",
"UCCES", ""))
{
MetaphAdd(primary, "KS");
MetaphAdd(secondary, "KS");
@ -655,7 +661,7 @@ DoubleMetaphone(char *str, char **codes)
break;
}
else
{ /* Pierce's rule */
{ /* Pierce's rule */
MetaphAdd(primary, "K");
MetaphAdd(secondary, "K");
current += 2;
@ -696,19 +702,18 @@ DoubleMetaphone(char *str, char **codes)
/* name sent in 'mac caffrey', 'mac gregor */
if (StringAt(original, (current + 1), 2, " C", " Q", " G", ""))
current += 3;
else if (StringAt(original, (current + 1), 1, "C", "K", "Q", "")
&& !StringAt(original, (current + 1), 2,
"CE", "CI", ""))
current += 2;
else
if (StringAt(original, (current + 1), 1, "C", "K", "Q", "")
&& !StringAt(original, (current + 1), 2,
"CE", "CI", ""))
current += 2;
else
current += 1;
current += 1;
break;
case 'D':
if (StringAt(original, current, 2, "DG", ""))
{
if (StringAt(original, (current + 2), 1,
if (StringAt(original, (current + 2), 1,
"I", "E", "Y", ""))
{
/* e.g. 'edge' */
@ -780,19 +785,22 @@ DoubleMetaphone(char *str, char **codes)
break;
}
}
/* Parker's rule (with some further refinements) -
e.g., 'hugh' */
/*
* Parker's rule (with some further refinements) -
* e.g., 'hugh'
*/
if (
((current > 1)
&& StringAt(original, (current - 2), 1,
&& StringAt(original, (current - 2), 1,
"B", "H", "D", ""))
/* e.g., 'bough' */
/* e.g., 'bough' */
|| ((current > 2)
&& StringAt(original, (current - 3), 1,
&& StringAt(original, (current - 3), 1,
"B", "H", "D", ""))
/* e.g., 'broughton' */
/* e.g., 'broughton' */
|| ((current > 3)
&& StringAt(original, (current - 4), 1,
&& StringAt(original, (current - 4), 1,
"B", "H", "")))
{
current += 2;
@ -800,8 +808,10 @@ DoubleMetaphone(char *str, char **codes)
}
else
{
/* e.g., 'laugh', 'McLaughlin', 'cough',
'gough', 'rough', 'tough' */
/*
* e.g., 'laugh', 'McLaughlin', 'cough', 'gough',
* 'rough', 'tough'
*/
if ((current > 2)
&& (GetAt(original, current - 1) == 'U')
&& StringAt(original, (current - 3), 1, "C",
@ -837,15 +847,15 @@ DoubleMetaphone(char *str, char **codes)
if (!StringAt(original, (current + 2), 2, "EY", "")
&& (GetAt(original, current + 1) != 'Y')
&& !SlavoGermanic(original))
{
MetaphAdd(primary, "N");
MetaphAdd(secondary, "KN");
}
else
{
MetaphAdd(primary, "KN");
MetaphAdd(secondary, "KN");
}
{
MetaphAdd(primary, "N");
MetaphAdd(secondary, "KN");
}
else
{
MetaphAdd(primary, "KN");
MetaphAdd(secondary, "KN");
}
current += 2;
break;
}
@ -863,9 +873,9 @@ DoubleMetaphone(char *str, char **codes)
/* -ges-,-gep-,-gel-, -gie- at beginning */
if ((current == 0)
&& ((GetAt(original, current + 1) == 'Y')
|| StringAt(original, (current + 1), 2, "ES", "EP",
"EB", "EL", "EY", "IB", "IL", "IN", "IE",
"EI", "ER", "")))
|| StringAt(original, (current + 1), 2, "ES", "EP",
"EB", "EL", "EY", "IB", "IL", "IN", "IE",
"EI", "ER", "")))
{
MetaphAdd(primary, "K");
MetaphAdd(secondary, "J");
@ -873,11 +883,11 @@ DoubleMetaphone(char *str, char **codes)
break;
}
/* -ger-, -gy- */
/* -ger-, -gy- */
if (
(StringAt(original, (current + 1), 2, "ER", "")
|| (GetAt(original, current + 1) == 'Y'))
&& !StringAt(original, 0, 6,
&& !StringAt(original, 0, 6,
"DANGER", "RANGER", "MANGER", "")
&& !StringAt(original, (current - 1), 1, "E", "I", "")
&& !StringAt(original, (current - 1), 3, "RGY", "OGY",
@ -889,9 +899,9 @@ DoubleMetaphone(char *str, char **codes)
break;
}
/* italian e.g, 'biaggi' */
/* italian e.g, 'biaggi' */
if (StringAt(original, (current + 1), 1, "E", "I", "Y", "")
|| StringAt(original, (current - 1), 4,
|| StringAt(original, (current - 1), 4,
"AGGI", "OGGI", ""))
{
/* obvious germanic */
@ -939,7 +949,8 @@ DoubleMetaphone(char *str, char **codes)
MetaphAdd(secondary, "H");
current += 2;
}
else /* also takes care of 'HH' */
else
/* also takes care of 'HH' */
current += 1;
break;
@ -991,9 +1002,9 @@ DoubleMetaphone(char *str, char **codes)
else
{
if (!StringAt(original, (current + 1), 1, "L", "T",
"K", "S", "N", "M", "B", "Z", "")
"K", "S", "N", "M", "B", "Z", "")
&& !StringAt(original, (current - 1), 1,
"S", "K", "L", ""))
"S", "K", "L", ""))
{
MetaphAdd(primary, "J");
MetaphAdd(secondary, "J");
@ -1002,7 +1013,7 @@ DoubleMetaphone(char *str, char **codes)
}
}
if (GetAt(original, current + 1) == 'J') /* it could happen! */
if (GetAt(original, current + 1) == 'J') /* it could happen! */
current += 2;
else
current += 1;
@ -1024,10 +1035,10 @@ DoubleMetaphone(char *str, char **codes)
if (((current == (length - 3))
&& StringAt(original, (current - 1), 4, "ILLO",
"ILLA", "ALLE", ""))
|| ((StringAt(original, (last - 1), 2, "AS", "OS", "")
|| StringAt(original, last, 1, "A", "O", ""))
&& StringAt(original, (current - 1), 4,
"ALLE", "")))
|| ((StringAt(original, (last - 1), 2, "AS", "OS", "")
|| StringAt(original, last, 1, "A", "O", ""))
&& StringAt(original, (current - 1), 4,
"ALLE", "")))
{
MetaphAdd(primary, "L");
MetaphAdd(secondary, "");
@ -1045,8 +1056,8 @@ DoubleMetaphone(char *str, char **codes)
case 'M':
if ((StringAt(original, (current - 1), 3, "UMB", "")
&& (((current + 1) == last)
|| StringAt(original, (current + 2), 2, "ER", "")))
/* 'dumb','thumb' */
|| StringAt(original, (current + 2), 2, "ER", "")))
/* 'dumb','thumb' */
|| (GetAt(original, current + 1) == 'M'))
current += 2;
else
@ -1102,7 +1113,7 @@ DoubleMetaphone(char *str, char **codes)
if ((current == last)
&& !SlavoGermanic(original)
&& StringAt(original, (current - 2), 2, "IE", "")
&& !StringAt(original, (current - 4), 2, "ME", "MA", ""))
&& !StringAt(original, (current - 4), 2, "ME", "MA", ""))
{
MetaphAdd(primary, "");
MetaphAdd(secondary, "R");
@ -1141,8 +1152,8 @@ DoubleMetaphone(char *str, char **codes)
{
/* germanic */
if (StringAt
(original, (current + 1), 4, "HEIM", "HOEK", "HOLM",
"HOLZ", ""))
(original, (current + 1), 4, "HEIM", "HOEK", "HOLM",
"HOLZ", ""))
{
MetaphAdd(primary, "S");
MetaphAdd(secondary, "S");
@ -1174,12 +1185,13 @@ DoubleMetaphone(char *str, char **codes)
break;
}
/* german & anglicisations, e.g. 'smith' match 'schmidt',
'snider' match 'schneider'
also, -sz- in slavic language altho in hungarian it is
pronounced 's' */
/*
* german & anglicisations, e.g. 'smith' match 'schmidt',
* 'snider' match 'schneider' also, -sz- in slavic
* language altho in hungarian it is pronounced 's'
*/
if (((current == 0)
&& StringAt(original, (current + 1), 1,
&& StringAt(original, (current + 1), 1,
"M", "N", "L", "W", ""))
|| StringAt(original, (current + 1), 1, "Z", ""))
{
@ -1198,12 +1210,12 @@ DoubleMetaphone(char *str, char **codes)
if (GetAt(original, current + 2) == 'H')
{
/* dutch origin, e.g. 'school', 'schooner' */
if (StringAt(original, (current + 3), 2,
if (StringAt(original, (current + 3), 2,
"OO", "ER", "EN",
"UY", "ED", "EM", ""))
{
/* 'schermerhorn', 'schenker' */
if (StringAt(original, (current + 3), 2,
if (StringAt(original, (current + 3), 2,
"ER", "EN", ""))
{
MetaphAdd(primary, "X");
@ -1235,7 +1247,7 @@ DoubleMetaphone(char *str, char **codes)
}
}
if (StringAt(original, (current + 2), 1,
if (StringAt(original, (current + 2), 1,
"I", "E", "Y", ""))
{
MetaphAdd(primary, "S");
@ -1252,7 +1264,7 @@ DoubleMetaphone(char *str, char **codes)
/* french e.g. 'resnais', 'artois' */
if ((current == last)
&& StringAt(original, (current - 2), 2, "AI", "OI", ""))
&& StringAt(original, (current - 2), 2, "AI", "OI", ""))
{
MetaphAdd(primary, "");
MetaphAdd(secondary, "S");
@ -1353,8 +1365,8 @@ DoubleMetaphone(char *str, char **codes)
/* Arnow should match Arnoff */
if (((current == last) && IsVowel(original, current - 1))
|| StringAt(original, (current - 1), 5, "EWSKI", "EWSKY",
"OWSKI", "OWSKY", "")
|| StringAt(original, (current - 1), 5, "EWSKI", "EWSKY",
"OWSKI", "OWSKY", "")
|| StringAt(original, 0, 3, "SCH", ""))
{
MetaphAdd(primary, "");
@ -1379,15 +1391,15 @@ DoubleMetaphone(char *str, char **codes)
case 'X':
/* french e.g. breaux */
if (!((current == last)
&& (StringAt(original, (current - 3), 3,
&& (StringAt(original, (current - 3), 3,
"IAU", "EAU", "")
|| StringAt(original, (current - 2), 2,
|| StringAt(original, (current - 2), 2,
"AU", "OU", ""))))
{
MetaphAdd(primary, "KS");
MetaphAdd(secondary, "KS");
}
if (StringAt(original, (current + 1), 1, "C", "X", ""))
current += 2;
@ -1404,7 +1416,7 @@ DoubleMetaphone(char *str, char **codes)
current += 2;
break;
}
else if (StringAt(original, (current + 1), 2,
else if (StringAt(original, (current + 1), 2,
"ZO", "ZI", "ZA", "")
|| (SlavoGermanic(original)
&& ((current > 0)
@ -1427,38 +1439,42 @@ DoubleMetaphone(char *str, char **codes)
default:
current += 1;
}
/* printf("PRIMARY: %s\n", primary->str);
printf("SECONDARY: %s\n", secondary->str); */
}
/*
* printf("PRIMARY: %s\n", primary->str); printf("SECONDARY:
* %s\n", secondary->str);
*/
}
if (primary->length > 4)
if (primary->length > 4)
SetAt(primary, 4, '\0');
if (secondary->length > 4)
if (secondary->length > 4)
SetAt(secondary, 4, '\0');
*codes = primary->str;
*++codes = secondary->str;
*codes = primary->str;
*++codes = secondary->str;
DestroyMetaString(original);
DestroyMetaString(primary);
DestroyMetaString(secondary);
DestroyMetaString(original);
DestroyMetaString(primary);
DestroyMetaString(secondary);
}
#ifdef DMETAPHONE_MAIN
/* just for testing - not part of the perl code */
main(int argc, char ** argv)
main(int argc, char **argv)
{
char * codes[2];
char *codes[2];
if (argc > 1)
{
DoubleMetaphone(argv[1],codes);
printf("%s|%s\n",codes[0],codes[1]);
}
DoubleMetaphone(argv[1], codes);
printf("%s|%s\n", codes[0], codes[1]);
}
}
#endif