mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Add sample text search dictionary templates and parsers, to replace the
hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov.
This commit is contained in:
38
contrib/dict_xsyn/Makefile
Normal file
38
contrib/dict_xsyn/Makefile
Normal file
@ -0,0 +1,38 @@
|
||||
# $PostgreSQL: pgsql/contrib/dict_xsyn/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
|
||||
MODULE_big = dict_xsyn
|
||||
OBJS = dict_xsyn.o
|
||||
DATA_built = dict_xsyn.sql
|
||||
DATA = uninstall_dict_xsyn.sql
|
||||
DOCS = README.dict_xsyn
|
||||
REGRESS = dict_xsyn
|
||||
|
||||
DICTDIR = tsearch_data
|
||||
DICTFILES = xsyn_sample.rules
|
||||
|
||||
ifdef USE_PGXS
|
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
include $(PGXS)
|
||||
else
|
||||
subdir = contrib/dict_xsyn
|
||||
top_builddir = ../..
|
||||
include $(top_builddir)/src/Makefile.global
|
||||
include $(top_srcdir)/contrib/contrib-global.mk
|
||||
endif
|
||||
|
||||
install: install-data
|
||||
|
||||
.PHONY: install-data
|
||||
install-data: $(DICTFILES)
|
||||
for i in $(DICTFILES); \
|
||||
do $(INSTALL_DATA) $(srcdir)/$$i '$(DESTDIR)$(datadir)/$(DICTDIR)/'$$i; \
|
||||
done
|
||||
|
||||
uninstall: uninstall-data
|
||||
|
||||
.PHONY: uninstall-data
|
||||
uninstall-data:
|
||||
for i in $(DICTFILES); \
|
||||
do rm -rf '$(DESTDIR)$(datadir)/$(DICTDIR)/'$$i ; \
|
||||
done
|
52
contrib/dict_xsyn/README.dict_xsyn
Normal file
52
contrib/dict_xsyn/README.dict_xsyn
Normal file
@ -0,0 +1,52 @@
|
||||
Extended Synonym dictionary
|
||||
===========================
|
||||
|
||||
This is a simple synonym dictionary. It replaces words with groups of their
|
||||
synonyms, and so makes it possible to search for a word using any of its
|
||||
synonyms.
|
||||
|
||||
* Configuration
|
||||
|
||||
It accepts the following options:
|
||||
|
||||
- KEEPORIG controls whether the original word is included, or only its
|
||||
synonyms. Default is 'true'.
|
||||
|
||||
- RULES is the base name of the file containing the list of synonyms.
|
||||
This file must be in $(prefix)/share/tsearch_data/, and its name must
|
||||
end in ".rules" (which is not included in the RULES parameter).
|
||||
|
||||
The rules file has the following format:
|
||||
|
||||
- Each line represents a group of synonyms for a single word, which is
|
||||
given first on the line. Synonyms are separated by whitespace:
|
||||
|
||||
word syn1 syn2 syn3
|
||||
|
||||
- Sharp ('#') sign is a comment delimiter. It may appear at any position
|
||||
inside the line. The rest of the line will be skipped.
|
||||
|
||||
Look at xsyn_sample.rules, which is installed in $(prefix)/share/tsearch_data/,
|
||||
for an example.
|
||||
|
||||
* Usage
|
||||
|
||||
1. Compile and install
|
||||
|
||||
2. Load dictionary
|
||||
|
||||
psql mydb < dict_xsyn.sql
|
||||
|
||||
3. Test it
|
||||
|
||||
mydb=# SELECT ts_lexize('xsyn','word');
|
||||
ts_lexize
|
||||
----------------
|
||||
{word,syn1,syn2,syn3)
|
||||
|
||||
4. Change the dictionary options as you wish
|
||||
|
||||
mydb# ALTER TEXT SEARCH DICTIONARY xsyn (KEEPORIG=false);
|
||||
ALTER TEXT SEARCH DICTIONARY
|
||||
|
||||
That's all.
|
235
contrib/dict_xsyn/dict_xsyn.c
Normal file
235
contrib/dict_xsyn/dict_xsyn.c
Normal file
@ -0,0 +1,235 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* dict_xsyn.c
|
||||
* Extended synonym dictionary
|
||||
*
|
||||
* Copyright (c) 2007, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include <ctype.h>
|
||||
|
||||
#include "commands/defrem.h"
|
||||
#include "fmgr.h"
|
||||
#include "storage/fd.h"
|
||||
#include "tsearch/ts_locale.h"
|
||||
#include "tsearch/ts_utils.h"
|
||||
|
||||
PG_MODULE_MAGIC;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char *key; /* Word */
|
||||
char *value; /* Unparsed list of synonyms, including the word itself */
|
||||
} Syn;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int len;
|
||||
Syn *syn;
|
||||
|
||||
bool keeporig;
|
||||
} DictSyn;
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(dxsyn_init);
|
||||
Datum dxsyn_init(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(dxsyn_lexize);
|
||||
Datum dxsyn_lexize(PG_FUNCTION_ARGS);
|
||||
|
||||
static char *
|
||||
find_word(char *in, char **end)
|
||||
{
|
||||
char *start;
|
||||
|
||||
*end = NULL;
|
||||
while (*in && t_isspace(in))
|
||||
in += pg_mblen(in);
|
||||
|
||||
if (!*in || *in == '#')
|
||||
return NULL;
|
||||
start = in;
|
||||
|
||||
while (*in && !t_isspace(in))
|
||||
in += pg_mblen(in);
|
||||
|
||||
*end = in;
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
static int
|
||||
compare_syn(const void *a, const void *b)
|
||||
{
|
||||
return strcmp(((Syn *) a)->key, ((Syn *) b)->key);
|
||||
}
|
||||
|
||||
static void
|
||||
read_dictionary(DictSyn *d, char *filename)
|
||||
{
|
||||
char *real_filename = get_tsearch_config_filename(filename, "rules");
|
||||
FILE *fin;
|
||||
char *line;
|
||||
int cur = 0;
|
||||
|
||||
if ((fin = AllocateFile(real_filename, "r")) == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
||||
errmsg("could not open synonym file \"%s\": %m",
|
||||
real_filename)));
|
||||
|
||||
while ((line = t_readline(fin)) != NULL)
|
||||
{
|
||||
char *value;
|
||||
char *key;
|
||||
char *end = NULL;
|
||||
|
||||
if (*line == '\0')
|
||||
continue;
|
||||
|
||||
value = lowerstr(line);
|
||||
pfree(line);
|
||||
|
||||
key = find_word(value, &end);
|
||||
if (!key)
|
||||
{
|
||||
pfree(value);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cur == d->len)
|
||||
{
|
||||
d->len = (d->len > 0) ? 2 * d->len : 16;
|
||||
if (d->syn)
|
||||
d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
|
||||
else
|
||||
d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
|
||||
}
|
||||
|
||||
d->syn[cur].key = pnstrdup(key, end - key);
|
||||
d->syn[cur].value = value;
|
||||
|
||||
cur++;
|
||||
}
|
||||
|
||||
FreeFile(fin);
|
||||
|
||||
d->len = cur;
|
||||
if (cur > 1)
|
||||
qsort(d->syn, d->len, sizeof(Syn), compare_syn);
|
||||
|
||||
pfree(real_filename);
|
||||
}
|
||||
|
||||
Datum
|
||||
dxsyn_init(PG_FUNCTION_ARGS)
|
||||
{
|
||||
List *dictoptions = (List *) PG_GETARG_POINTER(0);
|
||||
DictSyn *d;
|
||||
ListCell *l;
|
||||
|
||||
d = (DictSyn *) palloc0(sizeof(DictSyn));
|
||||
d->len = 0;
|
||||
d->syn = NULL;
|
||||
d->keeporig = true;
|
||||
|
||||
foreach(l, dictoptions)
|
||||
{
|
||||
DefElem *defel = (DefElem *) lfirst(l);
|
||||
|
||||
if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0)
|
||||
{
|
||||
d->keeporig = defGetBoolean(defel);
|
||||
}
|
||||
else if (pg_strcasecmp(defel->defname, "RULES") == 0)
|
||||
{
|
||||
read_dictionary(d, defGetString(defel));
|
||||
}
|
||||
else
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("unrecognized xsyn parameter: \"%s\"",
|
||||
defel->defname)));
|
||||
}
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(d);
|
||||
}
|
||||
|
||||
Datum
|
||||
dxsyn_lexize(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
|
||||
char *in = (char *) PG_GETARG_POINTER(1);
|
||||
int length = PG_GETARG_INT32(2);
|
||||
Syn word;
|
||||
Syn *found;
|
||||
TSLexeme *res = NULL;
|
||||
|
||||
if (!length || d->len == 0)
|
||||
PG_RETURN_POINTER(NULL);
|
||||
|
||||
/* Create search pattern */
|
||||
{
|
||||
char *temp = pnstrdup(in, length);
|
||||
|
||||
word.key = lowerstr(temp);
|
||||
pfree(temp);
|
||||
word.value = NULL;
|
||||
}
|
||||
|
||||
/* Look for matching syn */
|
||||
found = (Syn *)bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn);
|
||||
pfree(word.key);
|
||||
|
||||
if (!found)
|
||||
PG_RETURN_POINTER(NULL);
|
||||
|
||||
/* Parse string of synonyms and return array of words */
|
||||
{
|
||||
char *value = pstrdup(found->value);
|
||||
int value_length = strlen(value);
|
||||
char *pos = value;
|
||||
int nsyns = 0;
|
||||
bool is_first = true;
|
||||
|
||||
res = palloc(0);
|
||||
|
||||
while(pos < value + value_length)
|
||||
{
|
||||
char *end;
|
||||
char *syn = find_word(pos, &end);
|
||||
|
||||
if (!syn)
|
||||
break;
|
||||
*end = '\0';
|
||||
|
||||
res = repalloc(res, sizeof(TSLexeme)*(nsyns + 2));
|
||||
res[nsyns].lexeme = NULL;
|
||||
|
||||
/* first word is added to result only if KEEPORIG flag is set */
|
||||
if(d->keeporig || !is_first)
|
||||
{
|
||||
res[nsyns].lexeme = pstrdup(syn);
|
||||
res[nsyns + 1].lexeme = NULL;
|
||||
|
||||
nsyns++;
|
||||
}
|
||||
|
||||
is_first = false;
|
||||
|
||||
pos = end + 1;
|
||||
}
|
||||
|
||||
pfree(value);
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(res);
|
||||
}
|
29
contrib/dict_xsyn/dict_xsyn.sql.in
Normal file
29
contrib/dict_xsyn/dict_xsyn.sql.in
Normal file
@ -0,0 +1,29 @@
|
||||
-- $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
|
||||
-- Adjust this setting to control where the objects get created.
|
||||
SET search_path = public;
|
||||
|
||||
BEGIN;
|
||||
|
||||
CREATE FUNCTION dxsyn_init(internal)
|
||||
RETURNS internal
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION dxsyn_lexize(internal, internal, internal, internal)
|
||||
RETURNS internal
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE TEXT SEARCH TEMPLATE xsyn_template (
|
||||
LEXIZE = dxsyn_lexize,
|
||||
INIT = dxsyn_init
|
||||
);
|
||||
|
||||
CREATE TEXT SEARCH DICTIONARY xsyn (
|
||||
TEMPLATE = xsyn_template
|
||||
);
|
||||
|
||||
COMMENT ON TEXT SEARCH DICTIONARY xsyn IS 'eXtended synonym dictionary';
|
||||
|
||||
END;
|
22
contrib/dict_xsyn/expected/dict_xsyn.out
Normal file
22
contrib/dict_xsyn/expected/dict_xsyn.out
Normal file
@ -0,0 +1,22 @@
|
||||
--
|
||||
-- first, define the datatype. Turn off echoing so that expected file
|
||||
-- does not depend on contents of this file.
|
||||
--
|
||||
SET client_min_messages = warning;
|
||||
\set ECHO none
|
||||
RESET client_min_messages;
|
||||
--configuration
|
||||
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
|
||||
--lexize
|
||||
SELECT ts_lexize('xsyn', 'supernova');
|
||||
ts_lexize
|
||||
----------------
|
||||
{sn,sne,1987a}
|
||||
(1 row)
|
||||
|
||||
SELECT ts_lexize('xsyn', 'grb');
|
||||
ts_lexize
|
||||
-----------
|
||||
|
||||
(1 row)
|
||||
|
16
contrib/dict_xsyn/sql/dict_xsyn.sql
Normal file
16
contrib/dict_xsyn/sql/dict_xsyn.sql
Normal file
@ -0,0 +1,16 @@
|
||||
--
|
||||
-- first, define the datatype. Turn off echoing so that expected file
|
||||
-- does not depend on contents of this file.
|
||||
--
|
||||
SET client_min_messages = warning;
|
||||
\set ECHO none
|
||||
\i dict_xsyn.sql
|
||||
\set ECHO all
|
||||
RESET client_min_messages;
|
||||
|
||||
--configuration
|
||||
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
|
||||
|
||||
--lexize
|
||||
SELECT ts_lexize('xsyn', 'supernova');
|
||||
SELECT ts_lexize('xsyn', 'grb');
|
9
contrib/dict_xsyn/uninstall_dict_xsyn.sql
Normal file
9
contrib/dict_xsyn/uninstall_dict_xsyn.sql
Normal file
@ -0,0 +1,9 @@
|
||||
SET search_path = public;
|
||||
|
||||
DROP TEXT SEARCH DICTIONARY xsyn;
|
||||
|
||||
DROP TEXT SEARCH TEMPLATE xsyn_template;
|
||||
|
||||
DROP FUNCTION dxsyn_init(internal);
|
||||
|
||||
DROP FUNCTION dxsyn_lexize(internal,internal,internal,internal);
|
6
contrib/dict_xsyn/xsyn_sample.rules
Normal file
6
contrib/dict_xsyn/xsyn_sample.rules
Normal file
@ -0,0 +1,6 @@
|
||||
# Sample rules file for eXtended Synonym (xsyn) dictionary
|
||||
# format is as follows:
|
||||
#
|
||||
# word synonym1 synonym2 ...
|
||||
#
|
||||
supernova sn sne 1987a
|
Reference in New Issue
Block a user