1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Add sample text search dictionary templates and parsers, to replace the

hard-to-maintain textual examples currently in the SGML docs.  From
Sergey Karpov.
This commit is contained in:
Tom Lane
2007-10-15 21:36:50 +00:00
parent fb631dba2a
commit 5fcb079858
24 changed files with 1324 additions and 9 deletions

19
contrib/dict_int/Makefile Normal file
View File

@ -0,0 +1,19 @@
# $PostgreSQL: pgsql/contrib/dict_int/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
MODULE_big = dict_int
OBJS = dict_int.o
DATA_built = dict_int.sql
DATA = uninstall_dict_int.sql
DOCS = README.dict_int
REGRESS = dict_int
ifdef USE_PGXS
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
else
subdir = contrib/dict_int
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif

View File

@ -0,0 +1,41 @@
Dictionary for integers
=======================
The motivation for this example dictionary is to control the indexing of
integers (signed and unsigned), and, consequently, to minimize the number of
unique words which greatly affect the performance of searching.
* Configuration
The dictionary accepts two options:
- The MAXLEN parameter specifies the maximum length (number of digits)
allowed in an integer word. The default value is 6.
- The REJECTLONG parameter specifies if an overlength integer should be
truncated or ignored. If REJECTLONG=FALSE (default), the dictionary returns
the first MAXLEN digits of the integer. If REJECTLONG=TRUE, the
dictionary treats an overlength integer as a stop word, so that it will
not be indexed.
* Usage
1. Compile and install
2. Load dictionary
psql mydb < dict_int.sql
3. Test it
mydb# select ts_lexize('intdict', '12345678');
ts_lexize
-----------
{123456}
4. Change its options as you wish
mydb# ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = 4, REJECTLONG = true);
ALTER TEXT SEARCH DICTIONARY
That's all.

View File

@ -0,0 +1,99 @@
/*-------------------------------------------------------------------------
*
* dict_int.c
* Text search dictionary for integers
*
* Copyright (c) 2007, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgsql/contrib/dict_int/dict_int.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "commands/defrem.h"
#include "fmgr.h"
#include "tsearch/ts_public.h"
PG_MODULE_MAGIC;
typedef struct {
int maxlen;
bool rejectlong;
} DictInt;
PG_FUNCTION_INFO_V1(dintdict_init);
Datum dintdict_init(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(dintdict_lexize);
Datum dintdict_lexize(PG_FUNCTION_ARGS);
Datum
dintdict_init(PG_FUNCTION_ARGS)
{
List *dictoptions = (List *) PG_GETARG_POINTER(0);
DictInt *d;
ListCell *l;
d = (DictInt *) palloc0(sizeof(DictInt));
d->maxlen = 6;
d->rejectlong = false;
foreach(l, dictoptions)
{
DefElem *defel = (DefElem *) lfirst(l);
if (pg_strcasecmp(defel->defname, "MAXLEN") == 0)
{
d->maxlen = atoi(defGetString(defel));
}
else if (pg_strcasecmp(defel->defname, "REJECTLONG") == 0)
{
d->rejectlong = defGetBoolean(defel);
}
else
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized intdict parameter: \"%s\"",
defel->defname)));
}
}
PG_RETURN_POINTER(d);
}
Datum
dintdict_lexize(PG_FUNCTION_ARGS)
{
DictInt *d = (DictInt*)PG_GETARG_POINTER(0);
char *in = (char*)PG_GETARG_POINTER(1);
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
TSLexeme *res=palloc(sizeof(TSLexeme)*2);
res[1].lexeme = NULL;
if (PG_GETARG_INT32(2) > d->maxlen)
{
if ( d->rejectlong )
{
/* reject by returning void array */
pfree(txt);
res[0].lexeme = NULL;
}
else
{
/* trim integer */
txt[d->maxlen] = '\0';
res[0].lexeme = txt;
}
}
else
{
res[0].lexeme = txt;
}
PG_RETURN_POINTER(res);
}

View File

@ -0,0 +1,29 @@
-- $PostgreSQL: pgsql/contrib/dict_int/dict_int.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $
-- Adjust this setting to control where the objects get created.
SET search_path = public;
BEGIN;
CREATE FUNCTION dintdict_init(internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT;
CREATE FUNCTION dintdict_lexize(internal, internal, internal, internal)
RETURNS internal
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT;
CREATE TEXT SEARCH TEMPLATE intdict_template (
LEXIZE = dintdict_lexize,
INIT = dintdict_init
);
CREATE TEXT SEARCH DICTIONARY intdict (
TEMPLATE = intdict_template
);
COMMENT ON TEXT SEARCH DICTIONARY intdict IS 'dictionary for integers';
END;

View File

@ -0,0 +1,308 @@
--
-- first, define the datatype. Turn off echoing so that expected file
-- does not depend on contents of this file.
--
SET client_min_messages = warning;
\set ECHO none
RESET client_min_messages;
--lexize
select ts_lexize('intdict', '511673');
ts_lexize
-----------
{511673}
(1 row)
select ts_lexize('intdict', '129');
ts_lexize
-----------
{129}
(1 row)
select ts_lexize('intdict', '40865854');
ts_lexize
-----------
{408658}
(1 row)
select ts_lexize('intdict', '952');
ts_lexize
-----------
{952}
(1 row)
select ts_lexize('intdict', '654980341');
ts_lexize
-----------
{654980}
(1 row)
select ts_lexize('intdict', '09810106');
ts_lexize
-----------
{098101}
(1 row)
select ts_lexize('intdict', '14262713');
ts_lexize
-----------
{142627}
(1 row)
select ts_lexize('intdict', '6532082986');
ts_lexize
-----------
{653208}
(1 row)
select ts_lexize('intdict', '0150061');
ts_lexize
-----------
{015006}
(1 row)
select ts_lexize('intdict', '7778');
ts_lexize
-----------
{7778}
(1 row)
select ts_lexize('intdict', '9547');
ts_lexize
-----------
{9547}
(1 row)
select ts_lexize('intdict', '753395478');
ts_lexize
-----------
{753395}
(1 row)
select ts_lexize('intdict', '647652');
ts_lexize
-----------
{647652}
(1 row)
select ts_lexize('intdict', '6988655574');
ts_lexize
-----------
{698865}
(1 row)
select ts_lexize('intdict', '1279');
ts_lexize
-----------
{1279}
(1 row)
select ts_lexize('intdict', '1266645909');
ts_lexize
-----------
{126664}
(1 row)
select ts_lexize('intdict', '7594193969');
ts_lexize
-----------
{759419}
(1 row)
select ts_lexize('intdict', '16928207');
ts_lexize
-----------
{169282}
(1 row)
select ts_lexize('intdict', '196850350328');
ts_lexize
-----------
{196850}
(1 row)
select ts_lexize('intdict', '22026985592');
ts_lexize
-----------
{220269}
(1 row)
select ts_lexize('intdict', '2063765');
ts_lexize
-----------
{206376}
(1 row)
select ts_lexize('intdict', '242387310');
ts_lexize
-----------
{242387}
(1 row)
select ts_lexize('intdict', '93595');
ts_lexize
-----------
{93595}
(1 row)
select ts_lexize('intdict', '9374');
ts_lexize
-----------
{9374}
(1 row)
select ts_lexize('intdict', '996969');
ts_lexize
-----------
{996969}
(1 row)
select ts_lexize('intdict', '353595982');
ts_lexize
-----------
{353595}
(1 row)
select ts_lexize('intdict', '925860');
ts_lexize
-----------
{925860}
(1 row)
select ts_lexize('intdict', '11848378337');
ts_lexize
-----------
{118483}
(1 row)
select ts_lexize('intdict', '333');
ts_lexize
-----------
{333}
(1 row)
select ts_lexize('intdict', '799287416765');
ts_lexize
-----------
{799287}
(1 row)
select ts_lexize('intdict', '745939');
ts_lexize
-----------
{745939}
(1 row)
select ts_lexize('intdict', '67601305734');
ts_lexize
-----------
{676013}
(1 row)
select ts_lexize('intdict', '3361113');
ts_lexize
-----------
{336111}
(1 row)
select ts_lexize('intdict', '9033778607');
ts_lexize
-----------
{903377}
(1 row)
select ts_lexize('intdict', '7507648');
ts_lexize
-----------
{750764}
(1 row)
select ts_lexize('intdict', '1166');
ts_lexize
-----------
{1166}
(1 row)
select ts_lexize('intdict', '9360498');
ts_lexize
-----------
{936049}
(1 row)
select ts_lexize('intdict', '917795');
ts_lexize
-----------
{917795}
(1 row)
select ts_lexize('intdict', '9387894');
ts_lexize
-----------
{938789}
(1 row)
select ts_lexize('intdict', '42764329');
ts_lexize
-----------
{427643}
(1 row)
select ts_lexize('intdict', '564062');
ts_lexize
-----------
{564062}
(1 row)
select ts_lexize('intdict', '5413377');
ts_lexize
-----------
{541337}
(1 row)
select ts_lexize('intdict', '060965');
ts_lexize
-----------
{060965}
(1 row)
select ts_lexize('intdict', '08273593');
ts_lexize
-----------
{082735}
(1 row)
select ts_lexize('intdict', '593556010144');
ts_lexize
-----------
{593556}
(1 row)
select ts_lexize('intdict', '17988843352');
ts_lexize
-----------
{179888}
(1 row)
select ts_lexize('intdict', '252281774');
ts_lexize
-----------
{252281}
(1 row)
select ts_lexize('intdict', '313425');
ts_lexize
-----------
{313425}
(1 row)
select ts_lexize('intdict', '641439323669');
ts_lexize
-----------
{641439}
(1 row)
select ts_lexize('intdict', '314532610153');
ts_lexize
-----------
{314532}
(1 row)

View File

@ -0,0 +1,61 @@
--
-- first, define the datatype. Turn off echoing so that expected file
-- does not depend on contents of this file.
--
SET client_min_messages = warning;
\set ECHO none
\i dict_int.sql
\set ECHO all
RESET client_min_messages;
--lexize
select ts_lexize('intdict', '511673');
select ts_lexize('intdict', '129');
select ts_lexize('intdict', '40865854');
select ts_lexize('intdict', '952');
select ts_lexize('intdict', '654980341');
select ts_lexize('intdict', '09810106');
select ts_lexize('intdict', '14262713');
select ts_lexize('intdict', '6532082986');
select ts_lexize('intdict', '0150061');
select ts_lexize('intdict', '7778');
select ts_lexize('intdict', '9547');
select ts_lexize('intdict', '753395478');
select ts_lexize('intdict', '647652');
select ts_lexize('intdict', '6988655574');
select ts_lexize('intdict', '1279');
select ts_lexize('intdict', '1266645909');
select ts_lexize('intdict', '7594193969');
select ts_lexize('intdict', '16928207');
select ts_lexize('intdict', '196850350328');
select ts_lexize('intdict', '22026985592');
select ts_lexize('intdict', '2063765');
select ts_lexize('intdict', '242387310');
select ts_lexize('intdict', '93595');
select ts_lexize('intdict', '9374');
select ts_lexize('intdict', '996969');
select ts_lexize('intdict', '353595982');
select ts_lexize('intdict', '925860');
select ts_lexize('intdict', '11848378337');
select ts_lexize('intdict', '333');
select ts_lexize('intdict', '799287416765');
select ts_lexize('intdict', '745939');
select ts_lexize('intdict', '67601305734');
select ts_lexize('intdict', '3361113');
select ts_lexize('intdict', '9033778607');
select ts_lexize('intdict', '7507648');
select ts_lexize('intdict', '1166');
select ts_lexize('intdict', '9360498');
select ts_lexize('intdict', '917795');
select ts_lexize('intdict', '9387894');
select ts_lexize('intdict', '42764329');
select ts_lexize('intdict', '564062');
select ts_lexize('intdict', '5413377');
select ts_lexize('intdict', '060965');
select ts_lexize('intdict', '08273593');
select ts_lexize('intdict', '593556010144');
select ts_lexize('intdict', '17988843352');
select ts_lexize('intdict', '252281774');
select ts_lexize('intdict', '313425');
select ts_lexize('intdict', '641439323669');
select ts_lexize('intdict', '314532610153');

View File

@ -0,0 +1,9 @@
SET search_path = public;
DROP TEXT SEARCH DICTIONARY intdict;
DROP TEXT SEARCH TEMPLATE intdict_template;
DROP FUNCTION dintdict_init(internal);
DROP FUNCTION dintdict_lexize(internal,internal,internal,internal);