Add a CaseSensitive parameter to synonym dictionaries.

Simon Riggs
2025-07-03 20:02:46 +03:00 · 2008-03-10 03:01:28 +00:00
parent 2fc2795456
commit 7953fdcd9e
2 changed files with 34 additions and 8 deletions
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.41 2008/03/04 03:17:18 momjian Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.42 2008/03/10 03:01:28 tgl Exp $ -->
 <chapter id="textsearch">
 <title id="textsearch-title">Full Text Search</title>
@ -2209,7 +2209,8 @@ SELECT ts_lexize('public.simple_dict','The');
    dictionary can be used to overcome linguistic problems, for example, to
    prevent an English stemmer dictionary from reducing the word 'Paris' to
    'pari'.  It is enough to have a <literal>Paris paris</literal> line in the
-    synonym dictionary and put it before the <literal>english_stem</> dictionary:
+    synonym dictionary and put it before the <literal>english_stem</>
    dictionary.  For example:
 <programlisting>
 SELECT * FROM ts_debug('english', 'Paris');
@ -2242,10 +2243,17 @@ SELECT * FROM ts_debug('english', 'Paris');
    <productname>PostgreSQL</> installation's shared-data directory).
    The file format is just one line
    per word to be substituted, with the word followed by its synonym,
-    separated by white space.  Blank lines and trailing spaces are ignored,
+    separated by white space.  Blank lines and trailing spaces are ignored.
    and upper case is folded to lower case.
   </para>
   <para>
    The <literal>synonym</> template also has an optional parameter
    <literal>CaseSensitive</>, which defaults to <literal>false</>.  When
    <literal>CaseSensitive</> is <literal>false</>, words in the synonym file
    are folded to lower case, as are input tokens.  When it is
    <literal>true</>, words and tokens are not folded to lower case,
    but are compared as-is.
   </para>
  </sect2>
  <sect2 id="textsearch-thesaurus">
--- a/src/backend/tsearch/dict_synonym.c
+++ b/src/backend/tsearch/dict_synonym.c
@ -7,7 +7,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.7 2008/01/01 19:45:52 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.8 2008/03/10 03:01:28 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -30,6 +30,7 @@ typedef struct
 {
 	int			len;			/* length of syn array */
 	Syn		   *syn;
 	bool		case_sensitive;
 } DictSyn;
 /*
@ -77,6 +78,7 @@ dsynonym_init(PG_FUNCTION_ARGS)
 	DictSyn    *d;
 	ListCell   *l;
 	char	   *filename = NULL;
 	bool		case_sensitive = false;
 	FILE	   *fin;
 	char	   *starti,
 			   *starto,
@ -90,6 +92,8 @@ dsynonym_init(PG_FUNCTION_ARGS)
 		if (pg_strcasecmp("Synonyms", defel->defname) == 0)
 			filename = defGetString(defel);
 		else if (pg_strcasecmp("CaseSensitive", defel->defname) == 0)
 			case_sensitive = defGetBoolean(defel);
 		else
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@ -154,8 +158,16 @@ dsynonym_init(PG_FUNCTION_ARGS)
 			}
 		}
-		d->syn[cur].in = lowerstr(starti);
+		if (case_sensitive)
-		d->syn[cur].out = lowerstr(starto);
+		{
 			d->syn[cur].in = pstrdup(starti);
 			d->syn[cur].out = pstrdup(starto);
 		}
 		else
 		{
 			d->syn[cur].in = lowerstr(starti);
 			d->syn[cur].out = lowerstr(starto);
 		}
 		cur++;
@ -168,6 +180,8 @@ skipline:
 	d->len = cur;
 	qsort(d->syn, d->len, sizeof(Syn), compareSyn);
 	d->case_sensitive = case_sensitive;
 	PG_RETURN_POINTER(d);
 }
@ -185,7 +199,11 @@ dsynonym_lexize(PG_FUNCTION_ARGS)
 	if (len <= 0 || d->len <= 0)
 		PG_RETURN_POINTER(NULL);
-	key.in = lowerstr_with_len(in, len);
+	if (d->case_sensitive)
 		key.in = pnstrdup(in, len);
 	else
 		key.in = lowerstr_with_len(in, len);
 	key.out = NULL;
 	found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);