mirror of
https://github.com/postgres/postgres.git
synced 2025-04-25 21:42:33 +03:00
Add a CaseSensitive parameter to synonym dictionaries.
Simon Riggs
This commit is contained in:
parent
2fc2795456
commit
7953fdcd9e
@ -1,4 +1,4 @@
|
|||||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.41 2008/03/04 03:17:18 momjian Exp $ -->
|
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.42 2008/03/10 03:01:28 tgl Exp $ -->
|
||||||
|
|
||||||
<chapter id="textsearch">
|
<chapter id="textsearch">
|
||||||
<title id="textsearch-title">Full Text Search</title>
|
<title id="textsearch-title">Full Text Search</title>
|
||||||
@ -2209,7 +2209,8 @@ SELECT ts_lexize('public.simple_dict','The');
|
|||||||
dictionary can be used to overcome linguistic problems, for example, to
|
dictionary can be used to overcome linguistic problems, for example, to
|
||||||
prevent an English stemmer dictionary from reducing the word 'Paris' to
|
prevent an English stemmer dictionary from reducing the word 'Paris' to
|
||||||
'pari'. It is enough to have a <literal>Paris paris</literal> line in the
|
'pari'. It is enough to have a <literal>Paris paris</literal> line in the
|
||||||
synonym dictionary and put it before the <literal>english_stem</> dictionary:
|
synonym dictionary and put it before the <literal>english_stem</>
|
||||||
|
dictionary. For example:
|
||||||
|
|
||||||
<programlisting>
|
<programlisting>
|
||||||
SELECT * FROM ts_debug('english', 'Paris');
|
SELECT * FROM ts_debug('english', 'Paris');
|
||||||
@ -2242,10 +2243,17 @@ SELECT * FROM ts_debug('english', 'Paris');
|
|||||||
<productname>PostgreSQL</> installation's shared-data directory).
|
<productname>PostgreSQL</> installation's shared-data directory).
|
||||||
The file format is just one line
|
The file format is just one line
|
||||||
per word to be substituted, with the word followed by its synonym,
|
per word to be substituted, with the word followed by its synonym,
|
||||||
separated by white space. Blank lines and trailing spaces are ignored,
|
separated by white space. Blank lines and trailing spaces are ignored.
|
||||||
and upper case is folded to lower case.
|
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
The <literal>synonym</> template also has an optional parameter
|
||||||
|
<literal>CaseSensitive</>, which defaults to <literal>false</>. When
|
||||||
|
<literal>CaseSensitive</> is <literal>false</>, words in the synonym file
|
||||||
|
are folded to lower case, as are input tokens. When it is
|
||||||
|
<literal>true</>, words and tokens are not folded to lower case,
|
||||||
|
but are compared as-is.
|
||||||
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
<sect2 id="textsearch-thesaurus">
|
<sect2 id="textsearch-thesaurus">
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.7 2008/01/01 19:45:52 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.8 2008/03/10 03:01:28 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -30,6 +30,7 @@ typedef struct
|
|||||||
{
|
{
|
||||||
int len; /* length of syn array */
|
int len; /* length of syn array */
|
||||||
Syn *syn;
|
Syn *syn;
|
||||||
|
bool case_sensitive;
|
||||||
} DictSyn;
|
} DictSyn;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -77,6 +78,7 @@ dsynonym_init(PG_FUNCTION_ARGS)
|
|||||||
DictSyn *d;
|
DictSyn *d;
|
||||||
ListCell *l;
|
ListCell *l;
|
||||||
char *filename = NULL;
|
char *filename = NULL;
|
||||||
|
bool case_sensitive = false;
|
||||||
FILE *fin;
|
FILE *fin;
|
||||||
char *starti,
|
char *starti,
|
||||||
*starto,
|
*starto,
|
||||||
@ -90,6 +92,8 @@ dsynonym_init(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
if (pg_strcasecmp("Synonyms", defel->defname) == 0)
|
if (pg_strcasecmp("Synonyms", defel->defname) == 0)
|
||||||
filename = defGetString(defel);
|
filename = defGetString(defel);
|
||||||
|
else if (pg_strcasecmp("CaseSensitive", defel->defname) == 0)
|
||||||
|
case_sensitive = defGetBoolean(defel);
|
||||||
else
|
else
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
@ -154,8 +158,16 @@ dsynonym_init(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
d->syn[cur].in = lowerstr(starti);
|
if (case_sensitive)
|
||||||
d->syn[cur].out = lowerstr(starto);
|
{
|
||||||
|
d->syn[cur].in = pstrdup(starti);
|
||||||
|
d->syn[cur].out = pstrdup(starto);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
d->syn[cur].in = lowerstr(starti);
|
||||||
|
d->syn[cur].out = lowerstr(starto);
|
||||||
|
}
|
||||||
|
|
||||||
cur++;
|
cur++;
|
||||||
|
|
||||||
@ -168,6 +180,8 @@ skipline:
|
|||||||
d->len = cur;
|
d->len = cur;
|
||||||
qsort(d->syn, d->len, sizeof(Syn), compareSyn);
|
qsort(d->syn, d->len, sizeof(Syn), compareSyn);
|
||||||
|
|
||||||
|
d->case_sensitive = case_sensitive;
|
||||||
|
|
||||||
PG_RETURN_POINTER(d);
|
PG_RETURN_POINTER(d);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -185,7 +199,11 @@ dsynonym_lexize(PG_FUNCTION_ARGS)
|
|||||||
if (len <= 0 || d->len <= 0)
|
if (len <= 0 || d->len <= 0)
|
||||||
PG_RETURN_POINTER(NULL);
|
PG_RETURN_POINTER(NULL);
|
||||||
|
|
||||||
key.in = lowerstr_with_len(in, len);
|
if (d->case_sensitive)
|
||||||
|
key.in = pnstrdup(in, len);
|
||||||
|
else
|
||||||
|
key.in = lowerstr_with_len(in, len);
|
||||||
|
|
||||||
key.out = NULL;
|
key.out = NULL;
|
||||||
|
|
||||||
found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
|
found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user