1
0
mirror of https://github.com/postgres/postgres.git synced 2025-04-25 21:42:33 +03:00

Add a CaseSensitive parameter to synonym dictionaries.

Simon Riggs
This commit is contained in:
Tom Lane 2008-03-10 03:01:28 +00:00
parent 2fc2795456
commit 7953fdcd9e
2 changed files with 34 additions and 8 deletions

View File

@ -1,4 +1,4 @@
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.41 2008/03/04 03:17:18 momjian Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.42 2008/03/10 03:01:28 tgl Exp $ -->
<chapter id="textsearch"> <chapter id="textsearch">
<title id="textsearch-title">Full Text Search</title> <title id="textsearch-title">Full Text Search</title>
@ -2209,7 +2209,8 @@ SELECT ts_lexize('public.simple_dict','The');
dictionary can be used to overcome linguistic problems, for example, to dictionary can be used to overcome linguistic problems, for example, to
prevent an English stemmer dictionary from reducing the word 'Paris' to prevent an English stemmer dictionary from reducing the word 'Paris' to
'pari'. It is enough to have a <literal>Paris paris</literal> line in the 'pari'. It is enough to have a <literal>Paris paris</literal> line in the
synonym dictionary and put it before the <literal>english_stem</> dictionary: synonym dictionary and put it before the <literal>english_stem</>
dictionary. For example:
<programlisting> <programlisting>
SELECT * FROM ts_debug('english', 'Paris'); SELECT * FROM ts_debug('english', 'Paris');
@ -2242,10 +2243,17 @@ SELECT * FROM ts_debug('english', 'Paris');
<productname>PostgreSQL</> installation's shared-data directory). <productname>PostgreSQL</> installation's shared-data directory).
The file format is just one line The file format is just one line
per word to be substituted, with the word followed by its synonym, per word to be substituted, with the word followed by its synonym,
separated by white space. Blank lines and trailing spaces are ignored, separated by white space. Blank lines and trailing spaces are ignored.
and upper case is folded to lower case.
</para> </para>
<para>
The <literal>synonym</> template also has an optional parameter
<literal>CaseSensitive</>, which defaults to <literal>false</>. When
<literal>CaseSensitive</> is <literal>false</>, words in the synonym file
are folded to lower case, as are input tokens. When it is
<literal>true</>, words and tokens are not folded to lower case,
but are compared as-is.
</para>
</sect2> </sect2>
<sect2 id="textsearch-thesaurus"> <sect2 id="textsearch-thesaurus">

View File

@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.7 2008/01/01 19:45:52 momjian Exp $ * $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.8 2008/03/10 03:01:28 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -30,6 +30,7 @@ typedef struct
{ {
int len; /* length of syn array */ int len; /* length of syn array */
Syn *syn; Syn *syn;
bool case_sensitive;
} DictSyn; } DictSyn;
/* /*
@ -77,6 +78,7 @@ dsynonym_init(PG_FUNCTION_ARGS)
DictSyn *d; DictSyn *d;
ListCell *l; ListCell *l;
char *filename = NULL; char *filename = NULL;
bool case_sensitive = false;
FILE *fin; FILE *fin;
char *starti, char *starti,
*starto, *starto,
@ -90,6 +92,8 @@ dsynonym_init(PG_FUNCTION_ARGS)
if (pg_strcasecmp("Synonyms", defel->defname) == 0) if (pg_strcasecmp("Synonyms", defel->defname) == 0)
filename = defGetString(defel); filename = defGetString(defel);
else if (pg_strcasecmp("CaseSensitive", defel->defname) == 0)
case_sensitive = defGetBoolean(defel);
else else
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@ -154,8 +158,16 @@ dsynonym_init(PG_FUNCTION_ARGS)
} }
} }
d->syn[cur].in = lowerstr(starti); if (case_sensitive)
d->syn[cur].out = lowerstr(starto); {
d->syn[cur].in = pstrdup(starti);
d->syn[cur].out = pstrdup(starto);
}
else
{
d->syn[cur].in = lowerstr(starti);
d->syn[cur].out = lowerstr(starto);
}
cur++; cur++;
@ -168,6 +180,8 @@ skipline:
d->len = cur; d->len = cur;
qsort(d->syn, d->len, sizeof(Syn), compareSyn); qsort(d->syn, d->len, sizeof(Syn), compareSyn);
d->case_sensitive = case_sensitive;
PG_RETURN_POINTER(d); PG_RETURN_POINTER(d);
} }
@ -185,7 +199,11 @@ dsynonym_lexize(PG_FUNCTION_ARGS)
if (len <= 0 || d->len <= 0) if (len <= 0 || d->len <= 0)
PG_RETURN_POINTER(NULL); PG_RETURN_POINTER(NULL);
key.in = lowerstr_with_len(in, len); if (d->case_sensitive)
key.in = pnstrdup(in, len);
else
key.in = lowerstr_with_len(in, len);
key.out = NULL; key.out = NULL;
found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn); found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);