mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Add sample text search dictionary templates and parsers, to replace the
hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov.
This commit is contained in:
19
contrib/dict_int/Makefile
Normal file
19
contrib/dict_int/Makefile
Normal file
@ -0,0 +1,19 @@
|
||||
# $PostgreSQL: pgsql/contrib/dict_int/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
|
||||
MODULE_big = dict_int
|
||||
OBJS = dict_int.o
|
||||
DATA_built = dict_int.sql
|
||||
DATA = uninstall_dict_int.sql
|
||||
DOCS = README.dict_int
|
||||
REGRESS = dict_int
|
||||
|
||||
ifdef USE_PGXS
|
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
include $(PGXS)
|
||||
else
|
||||
subdir = contrib/dict_int
|
||||
top_builddir = ../..
|
||||
include $(top_builddir)/src/Makefile.global
|
||||
include $(top_srcdir)/contrib/contrib-global.mk
|
||||
endif
|
41
contrib/dict_int/README.dict_int
Normal file
41
contrib/dict_int/README.dict_int
Normal file
@ -0,0 +1,41 @@
|
||||
Dictionary for integers
|
||||
=======================
|
||||
|
||||
The motivation for this example dictionary is to control the indexing of
|
||||
integers (signed and unsigned), and, consequently, to minimize the number of
|
||||
unique words which greatly affect the performance of searching.
|
||||
|
||||
* Configuration
|
||||
|
||||
The dictionary accepts two options:
|
||||
|
||||
- The MAXLEN parameter specifies the maximum length (number of digits)
|
||||
allowed in an integer word. The default value is 6.
|
||||
|
||||
- The REJECTLONG parameter specifies if an overlength integer should be
|
||||
truncated or ignored. If REJECTLONG=FALSE (default), the dictionary returns
|
||||
the first MAXLEN digits of the integer. If REJECTLONG=TRUE, the
|
||||
dictionary treats an overlength integer as a stop word, so that it will
|
||||
not be indexed.
|
||||
|
||||
* Usage
|
||||
|
||||
1. Compile and install
|
||||
|
||||
2. Load dictionary
|
||||
|
||||
psql mydb < dict_int.sql
|
||||
|
||||
3. Test it
|
||||
|
||||
mydb# select ts_lexize('intdict', '12345678');
|
||||
ts_lexize
|
||||
-----------
|
||||
{123456}
|
||||
|
||||
4. Change its options as you wish
|
||||
|
||||
mydb# ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = 4, REJECTLONG = true);
|
||||
ALTER TEXT SEARCH DICTIONARY
|
||||
|
||||
That's all.
|
99
contrib/dict_int/dict_int.c
Normal file
99
contrib/dict_int/dict_int.c
Normal file
@ -0,0 +1,99 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* dict_int.c
|
||||
* Text search dictionary for integers
|
||||
*
|
||||
* Copyright (c) 2007, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/contrib/dict_int/dict_int.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "commands/defrem.h"
|
||||
#include "fmgr.h"
|
||||
#include "tsearch/ts_public.h"
|
||||
|
||||
PG_MODULE_MAGIC;
|
||||
|
||||
|
||||
typedef struct {
|
||||
int maxlen;
|
||||
bool rejectlong;
|
||||
} DictInt;
|
||||
|
||||
|
||||
PG_FUNCTION_INFO_V1(dintdict_init);
|
||||
Datum dintdict_init(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(dintdict_lexize);
|
||||
Datum dintdict_lexize(PG_FUNCTION_ARGS);
|
||||
|
||||
Datum
|
||||
dintdict_init(PG_FUNCTION_ARGS)
|
||||
{
|
||||
List *dictoptions = (List *) PG_GETARG_POINTER(0);
|
||||
DictInt *d;
|
||||
ListCell *l;
|
||||
|
||||
d = (DictInt *) palloc0(sizeof(DictInt));
|
||||
d->maxlen = 6;
|
||||
d->rejectlong = false;
|
||||
|
||||
foreach(l, dictoptions)
|
||||
{
|
||||
DefElem *defel = (DefElem *) lfirst(l);
|
||||
|
||||
if (pg_strcasecmp(defel->defname, "MAXLEN") == 0)
|
||||
{
|
||||
d->maxlen = atoi(defGetString(defel));
|
||||
}
|
||||
else if (pg_strcasecmp(defel->defname, "REJECTLONG") == 0)
|
||||
{
|
||||
d->rejectlong = defGetBoolean(defel);
|
||||
}
|
||||
else
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("unrecognized intdict parameter: \"%s\"",
|
||||
defel->defname)));
|
||||
}
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(d);
|
||||
}
|
||||
|
||||
Datum
|
||||
dintdict_lexize(PG_FUNCTION_ARGS)
|
||||
{
|
||||
DictInt *d = (DictInt*)PG_GETARG_POINTER(0);
|
||||
char *in = (char*)PG_GETARG_POINTER(1);
|
||||
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
|
||||
TSLexeme *res=palloc(sizeof(TSLexeme)*2);
|
||||
|
||||
res[1].lexeme = NULL;
|
||||
if (PG_GETARG_INT32(2) > d->maxlen)
|
||||
{
|
||||
if ( d->rejectlong )
|
||||
{
|
||||
/* reject by returning void array */
|
||||
pfree(txt);
|
||||
res[0].lexeme = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* trim integer */
|
||||
txt[d->maxlen] = '\0';
|
||||
res[0].lexeme = txt;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
res[0].lexeme = txt;
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(res);
|
||||
}
|
29
contrib/dict_int/dict_int.sql.in
Normal file
29
contrib/dict_int/dict_int.sql.in
Normal file
@ -0,0 +1,29 @@
|
||||
-- $PostgreSQL: pgsql/contrib/dict_int/dict_int.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
|
||||
-- Adjust this setting to control where the objects get created.
|
||||
SET search_path = public;
|
||||
|
||||
BEGIN;
|
||||
|
||||
CREATE FUNCTION dintdict_init(internal)
|
||||
RETURNS internal
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION dintdict_lexize(internal, internal, internal, internal)
|
||||
RETURNS internal
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE TEXT SEARCH TEMPLATE intdict_template (
|
||||
LEXIZE = dintdict_lexize,
|
||||
INIT = dintdict_init
|
||||
);
|
||||
|
||||
CREATE TEXT SEARCH DICTIONARY intdict (
|
||||
TEMPLATE = intdict_template
|
||||
);
|
||||
|
||||
COMMENT ON TEXT SEARCH DICTIONARY intdict IS 'dictionary for integers';
|
||||
|
||||
END;
|
308
contrib/dict_int/expected/dict_int.out
Normal file
308
contrib/dict_int/expected/dict_int.out
Normal file
@ -0,0 +1,308 @@
|
||||
--
|
||||
-- first, define the datatype. Turn off echoing so that expected file
|
||||
-- does not depend on contents of this file.
|
||||
--
|
||||
SET client_min_messages = warning;
|
||||
\set ECHO none
|
||||
RESET client_min_messages;
|
||||
--lexize
|
||||
select ts_lexize('intdict', '511673');
|
||||
ts_lexize
|
||||
-----------
|
||||
{511673}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '129');
|
||||
ts_lexize
|
||||
-----------
|
||||
{129}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '40865854');
|
||||
ts_lexize
|
||||
-----------
|
||||
{408658}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '952');
|
||||
ts_lexize
|
||||
-----------
|
||||
{952}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '654980341');
|
||||
ts_lexize
|
||||
-----------
|
||||
{654980}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '09810106');
|
||||
ts_lexize
|
||||
-----------
|
||||
{098101}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '14262713');
|
||||
ts_lexize
|
||||
-----------
|
||||
{142627}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '6532082986');
|
||||
ts_lexize
|
||||
-----------
|
||||
{653208}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '0150061');
|
||||
ts_lexize
|
||||
-----------
|
||||
{015006}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '7778');
|
||||
ts_lexize
|
||||
-----------
|
||||
{7778}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '9547');
|
||||
ts_lexize
|
||||
-----------
|
||||
{9547}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '753395478');
|
||||
ts_lexize
|
||||
-----------
|
||||
{753395}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '647652');
|
||||
ts_lexize
|
||||
-----------
|
||||
{647652}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '6988655574');
|
||||
ts_lexize
|
||||
-----------
|
||||
{698865}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '1279');
|
||||
ts_lexize
|
||||
-----------
|
||||
{1279}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '1266645909');
|
||||
ts_lexize
|
||||
-----------
|
||||
{126664}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '7594193969');
|
||||
ts_lexize
|
||||
-----------
|
||||
{759419}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '16928207');
|
||||
ts_lexize
|
||||
-----------
|
||||
{169282}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '196850350328');
|
||||
ts_lexize
|
||||
-----------
|
||||
{196850}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '22026985592');
|
||||
ts_lexize
|
||||
-----------
|
||||
{220269}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '2063765');
|
||||
ts_lexize
|
||||
-----------
|
||||
{206376}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '242387310');
|
||||
ts_lexize
|
||||
-----------
|
||||
{242387}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '93595');
|
||||
ts_lexize
|
||||
-----------
|
||||
{93595}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '9374');
|
||||
ts_lexize
|
||||
-----------
|
||||
{9374}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '996969');
|
||||
ts_lexize
|
||||
-----------
|
||||
{996969}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '353595982');
|
||||
ts_lexize
|
||||
-----------
|
||||
{353595}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '925860');
|
||||
ts_lexize
|
||||
-----------
|
||||
{925860}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '11848378337');
|
||||
ts_lexize
|
||||
-----------
|
||||
{118483}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '333');
|
||||
ts_lexize
|
||||
-----------
|
||||
{333}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '799287416765');
|
||||
ts_lexize
|
||||
-----------
|
||||
{799287}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '745939');
|
||||
ts_lexize
|
||||
-----------
|
||||
{745939}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '67601305734');
|
||||
ts_lexize
|
||||
-----------
|
||||
{676013}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '3361113');
|
||||
ts_lexize
|
||||
-----------
|
||||
{336111}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '9033778607');
|
||||
ts_lexize
|
||||
-----------
|
||||
{903377}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '7507648');
|
||||
ts_lexize
|
||||
-----------
|
||||
{750764}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '1166');
|
||||
ts_lexize
|
||||
-----------
|
||||
{1166}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '9360498');
|
||||
ts_lexize
|
||||
-----------
|
||||
{936049}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '917795');
|
||||
ts_lexize
|
||||
-----------
|
||||
{917795}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '9387894');
|
||||
ts_lexize
|
||||
-----------
|
||||
{938789}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '42764329');
|
||||
ts_lexize
|
||||
-----------
|
||||
{427643}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '564062');
|
||||
ts_lexize
|
||||
-----------
|
||||
{564062}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '5413377');
|
||||
ts_lexize
|
||||
-----------
|
||||
{541337}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '060965');
|
||||
ts_lexize
|
||||
-----------
|
||||
{060965}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '08273593');
|
||||
ts_lexize
|
||||
-----------
|
||||
{082735}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '593556010144');
|
||||
ts_lexize
|
||||
-----------
|
||||
{593556}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '17988843352');
|
||||
ts_lexize
|
||||
-----------
|
||||
{179888}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '252281774');
|
||||
ts_lexize
|
||||
-----------
|
||||
{252281}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '313425');
|
||||
ts_lexize
|
||||
-----------
|
||||
{313425}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '641439323669');
|
||||
ts_lexize
|
||||
-----------
|
||||
{641439}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '314532610153');
|
||||
ts_lexize
|
||||
-----------
|
||||
{314532}
|
||||
(1 row)
|
||||
|
61
contrib/dict_int/sql/dict_int.sql
Normal file
61
contrib/dict_int/sql/dict_int.sql
Normal file
@ -0,0 +1,61 @@
|
||||
--
|
||||
-- first, define the datatype. Turn off echoing so that expected file
|
||||
-- does not depend on contents of this file.
|
||||
--
|
||||
SET client_min_messages = warning;
|
||||
\set ECHO none
|
||||
\i dict_int.sql
|
||||
\set ECHO all
|
||||
RESET client_min_messages;
|
||||
|
||||
--lexize
|
||||
select ts_lexize('intdict', '511673');
|
||||
select ts_lexize('intdict', '129');
|
||||
select ts_lexize('intdict', '40865854');
|
||||
select ts_lexize('intdict', '952');
|
||||
select ts_lexize('intdict', '654980341');
|
||||
select ts_lexize('intdict', '09810106');
|
||||
select ts_lexize('intdict', '14262713');
|
||||
select ts_lexize('intdict', '6532082986');
|
||||
select ts_lexize('intdict', '0150061');
|
||||
select ts_lexize('intdict', '7778');
|
||||
select ts_lexize('intdict', '9547');
|
||||
select ts_lexize('intdict', '753395478');
|
||||
select ts_lexize('intdict', '647652');
|
||||
select ts_lexize('intdict', '6988655574');
|
||||
select ts_lexize('intdict', '1279');
|
||||
select ts_lexize('intdict', '1266645909');
|
||||
select ts_lexize('intdict', '7594193969');
|
||||
select ts_lexize('intdict', '16928207');
|
||||
select ts_lexize('intdict', '196850350328');
|
||||
select ts_lexize('intdict', '22026985592');
|
||||
select ts_lexize('intdict', '2063765');
|
||||
select ts_lexize('intdict', '242387310');
|
||||
select ts_lexize('intdict', '93595');
|
||||
select ts_lexize('intdict', '9374');
|
||||
select ts_lexize('intdict', '996969');
|
||||
select ts_lexize('intdict', '353595982');
|
||||
select ts_lexize('intdict', '925860');
|
||||
select ts_lexize('intdict', '11848378337');
|
||||
select ts_lexize('intdict', '333');
|
||||
select ts_lexize('intdict', '799287416765');
|
||||
select ts_lexize('intdict', '745939');
|
||||
select ts_lexize('intdict', '67601305734');
|
||||
select ts_lexize('intdict', '3361113');
|
||||
select ts_lexize('intdict', '9033778607');
|
||||
select ts_lexize('intdict', '7507648');
|
||||
select ts_lexize('intdict', '1166');
|
||||
select ts_lexize('intdict', '9360498');
|
||||
select ts_lexize('intdict', '917795');
|
||||
select ts_lexize('intdict', '9387894');
|
||||
select ts_lexize('intdict', '42764329');
|
||||
select ts_lexize('intdict', '564062');
|
||||
select ts_lexize('intdict', '5413377');
|
||||
select ts_lexize('intdict', '060965');
|
||||
select ts_lexize('intdict', '08273593');
|
||||
select ts_lexize('intdict', '593556010144');
|
||||
select ts_lexize('intdict', '17988843352');
|
||||
select ts_lexize('intdict', '252281774');
|
||||
select ts_lexize('intdict', '313425');
|
||||
select ts_lexize('intdict', '641439323669');
|
||||
select ts_lexize('intdict', '314532610153');
|
9
contrib/dict_int/uninstall_dict_int.sql
Normal file
9
contrib/dict_int/uninstall_dict_int.sql
Normal file
@ -0,0 +1,9 @@
|
||||
SET search_path = public;
|
||||
|
||||
DROP TEXT SEARCH DICTIONARY intdict;
|
||||
|
||||
DROP TEXT SEARCH TEMPLATE intdict_template;
|
||||
|
||||
DROP FUNCTION dintdict_init(internal);
|
||||
|
||||
DROP FUNCTION dintdict_lexize(internal,internal,internal,internal);
|
Reference in New Issue
Block a user