mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Add sample text search dictionary templates and parsers, to replace the
hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov.
This commit is contained in:
19
contrib/test_parser/Makefile
Normal file
19
contrib/test_parser/Makefile
Normal file
@ -0,0 +1,19 @@
|
||||
# $PostgreSQL: pgsql/contrib/test_parser/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
|
||||
MODULE_big = test_parser
|
||||
OBJS = test_parser.o
|
||||
DATA_built = test_parser.sql
|
||||
DATA = uninstall_test_parser.sql
|
||||
DOCS = README.test_parser
|
||||
REGRESS = test_parser
|
||||
|
||||
ifdef USE_PGXS
|
||||
PG_CONFIG = pg_config
|
||||
PGXS := $(shell $(PG_CONFIG) --pgxs)
|
||||
include $(PGXS)
|
||||
else
|
||||
subdir = contrib/test_parser
|
||||
top_builddir = ../..
|
||||
include $(top_builddir)/src/Makefile.global
|
||||
include $(top_srcdir)/contrib/contrib-global.mk
|
||||
endif
|
52
contrib/test_parser/README.test_parser
Normal file
52
contrib/test_parser/README.test_parser
Normal file
@ -0,0 +1,52 @@
|
||||
Example parser
|
||||
==============
|
||||
|
||||
This is an example of a custom parser for full text search.
|
||||
|
||||
It recognizes space-delimited words and returns only two token types:
|
||||
|
||||
- 3, word, Word
|
||||
|
||||
- 12, blank, Space symbols
|
||||
|
||||
The token numbers have been chosen to keep compatibility with the default
|
||||
ts_headline() function, since we do not want to implement our own version.
|
||||
|
||||
* Configuration
|
||||
|
||||
The parser has no user-configurable parameters.
|
||||
|
||||
* Usage
|
||||
|
||||
1. Compile and install
|
||||
|
||||
2. Load dictionary
|
||||
|
||||
psql mydb < test_parser.sql
|
||||
|
||||
3. Test it
|
||||
|
||||
mydb# SELECT * FROM ts_parse('testparser','That''s my first own parser');
|
||||
tokid | token
|
||||
-------+--------
|
||||
3 | That's
|
||||
12 |
|
||||
3 | my
|
||||
12 |
|
||||
3 | first
|
||||
12 |
|
||||
3 | own
|
||||
12 |
|
||||
3 | parser
|
||||
|
||||
mydb# SELECT to_tsvector('testcfg','That''s my first own parser');
|
||||
to_tsvector
|
||||
-------------------------------------------------
|
||||
'my':2 'own':4 'first':3 'parser':5 'that''s':1
|
||||
|
||||
mydb# SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies', to_tsquery('testcfg', 'star'));
|
||||
headline
|
||||
-----------------------------------------------------------------
|
||||
Supernovae <b>stars</b> are the brightest phenomena in galaxies
|
||||
|
||||
That's all.
|
50
contrib/test_parser/expected/test_parser.out
Normal file
50
contrib/test_parser/expected/test_parser.out
Normal file
@ -0,0 +1,50 @@
|
||||
--
|
||||
-- first, define the parser. Turn off echoing so that expected file
|
||||
-- does not depend on contents of this file.
|
||||
--
|
||||
SET client_min_messages = warning;
|
||||
\set ECHO none
|
||||
RESET client_min_messages;
|
||||
-- make test configuration using parser
|
||||
CREATE TEXT SEARCH CONFIGURATION testcfg (PARSER = testparser);
|
||||
ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple;
|
||||
-- ts_parse
|
||||
SELECT * FROM ts_parse('testparser', 'That''s simple parser can''t parse urls like http://some.url/here/');
|
||||
tokid | token
|
||||
-------+-----------------------
|
||||
3 | That's
|
||||
12 |
|
||||
3 | simple
|
||||
12 |
|
||||
3 | parser
|
||||
12 |
|
||||
3 | can't
|
||||
12 |
|
||||
3 | parse
|
||||
12 |
|
||||
3 | urls
|
||||
12 |
|
||||
3 | like
|
||||
12 |
|
||||
3 | http://some.url/here/
|
||||
(15 rows)
|
||||
|
||||
SELECT to_tsvector('testcfg','That''s my first own parser');
|
||||
to_tsvector
|
||||
-------------------------------------------------
|
||||
'my':2 'own':4 'first':3 'parser':5 'that''s':1
|
||||
(1 row)
|
||||
|
||||
SELECT to_tsquery('testcfg', 'star');
|
||||
to_tsquery
|
||||
------------
|
||||
'star'
|
||||
(1 row)
|
||||
|
||||
SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies',
|
||||
to_tsquery('testcfg', 'stars'));
|
||||
ts_headline
|
||||
-----------------------------------------------------------------
|
||||
Supernovae <b>stars</b> are the brightest phenomena in galaxies
|
||||
(1 row)
|
||||
|
26
contrib/test_parser/sql/test_parser.sql
Normal file
26
contrib/test_parser/sql/test_parser.sql
Normal file
@ -0,0 +1,26 @@
|
||||
--
|
||||
-- first, define the parser. Turn off echoing so that expected file
|
||||
-- does not depend on contents of this file.
|
||||
--
|
||||
SET client_min_messages = warning;
|
||||
\set ECHO none
|
||||
\i test_parser.sql
|
||||
\set ECHO all
|
||||
RESET client_min_messages;
|
||||
|
||||
-- make test configuration using parser
|
||||
|
||||
CREATE TEXT SEARCH CONFIGURATION testcfg (PARSER = testparser);
|
||||
|
||||
ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple;
|
||||
|
||||
-- ts_parse
|
||||
|
||||
SELECT * FROM ts_parse('testparser', 'That''s simple parser can''t parse urls like http://some.url/here/');
|
||||
|
||||
SELECT to_tsvector('testcfg','That''s my first own parser');
|
||||
|
||||
SELECT to_tsquery('testcfg', 'star');
|
||||
|
||||
SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies',
|
||||
to_tsquery('testcfg', 'stars'));
|
130
contrib/test_parser/test_parser.c
Normal file
130
contrib/test_parser/test_parser.c
Normal file
@ -0,0 +1,130 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* test_parser.c
|
||||
* Simple example of a text search parser
|
||||
*
|
||||
* Copyright (c) 2007, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/contrib/test_parser/test_parser.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "fmgr.h"
|
||||
|
||||
PG_MODULE_MAGIC;
|
||||
|
||||
|
||||
/*
|
||||
* types
|
||||
*/
|
||||
|
||||
/* self-defined type */
|
||||
typedef struct {
|
||||
char * buffer; /* text to parse */
|
||||
int len; /* length of the text in buffer */
|
||||
int pos; /* position of the parser */
|
||||
} ParserState;
|
||||
|
||||
/* copy-paste from wparser.h of tsearch2 */
|
||||
typedef struct {
|
||||
int lexid;
|
||||
char *alias;
|
||||
char *descr;
|
||||
} LexDescr;
|
||||
|
||||
/*
|
||||
* prototypes
|
||||
*/
|
||||
PG_FUNCTION_INFO_V1(testprs_start);
|
||||
Datum testprs_start(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(testprs_getlexeme);
|
||||
Datum testprs_getlexeme(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(testprs_end);
|
||||
Datum testprs_end(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(testprs_lextype);
|
||||
Datum testprs_lextype(PG_FUNCTION_ARGS);
|
||||
|
||||
/*
|
||||
* functions
|
||||
*/
|
||||
Datum testprs_start(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ParserState *pst = (ParserState *) palloc0(sizeof(ParserState));
|
||||
pst->buffer = (char *) PG_GETARG_POINTER(0);
|
||||
pst->len = PG_GETARG_INT32(1);
|
||||
pst->pos = 0;
|
||||
|
||||
PG_RETURN_POINTER(pst);
|
||||
}
|
||||
|
||||
Datum testprs_getlexeme(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
|
||||
char **t = (char **) PG_GETARG_POINTER(1);
|
||||
int *tlen = (int *) PG_GETARG_POINTER(2);
|
||||
int type;
|
||||
|
||||
*tlen = pst->pos;
|
||||
*t = pst->buffer + pst->pos;
|
||||
|
||||
if ((pst->buffer)[pst->pos] == ' ')
|
||||
{
|
||||
/* blank type */
|
||||
type = 12;
|
||||
/* go to the next non-white-space character */
|
||||
while ((pst->buffer)[pst->pos] == ' ' &&
|
||||
pst->pos < pst->len)
|
||||
(pst->pos)++;
|
||||
} else {
|
||||
/* word type */
|
||||
type = 3;
|
||||
/* go to the next white-space character */
|
||||
while ((pst->buffer)[pst->pos] != ' ' &&
|
||||
pst->pos < pst->len)
|
||||
(pst->pos)++;
|
||||
}
|
||||
|
||||
*tlen = pst->pos - *tlen;
|
||||
|
||||
/* we are finished if (*tlen == 0) */
|
||||
if (*tlen == 0)
|
||||
type=0;
|
||||
|
||||
PG_RETURN_INT32(type);
|
||||
}
|
||||
|
||||
Datum testprs_end(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
|
||||
pfree(pst);
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
Datum testprs_lextype(PG_FUNCTION_ARGS)
|
||||
{
|
||||
/*
|
||||
* Remarks:
|
||||
* - we have to return the blanks for headline reason
|
||||
* - we use the same lexids like Teodor in the default
|
||||
* word parser; in this way we can reuse the headline
|
||||
* function of the default word parser.
|
||||
*/
|
||||
LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (2+1));
|
||||
|
||||
/* there are only two types in this parser */
|
||||
descr[0].lexid = 3;
|
||||
descr[0].alias = pstrdup("word");
|
||||
descr[0].descr = pstrdup("Word");
|
||||
descr[1].lexid = 12;
|
||||
descr[1].alias = pstrdup("blank");
|
||||
descr[1].descr = pstrdup("Space symbols");
|
||||
descr[2].lexid = 0;
|
||||
|
||||
PG_RETURN_POINTER(descr);
|
||||
}
|
36
contrib/test_parser/test_parser.sql.in
Normal file
36
contrib/test_parser/test_parser.sql.in
Normal file
@ -0,0 +1,36 @@
|
||||
-- $PostgreSQL: pgsql/contrib/test_parser/test_parser.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $
|
||||
|
||||
-- Adjust this setting to control where the objects get created.
|
||||
SET search_path = public;
|
||||
|
||||
BEGIN;
|
||||
|
||||
CREATE FUNCTION testprs_start(internal, int4)
|
||||
RETURNS internal
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION testprs_getlexeme(internal, internal, internal)
|
||||
RETURNS internal
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION testprs_end(internal)
|
||||
RETURNS void
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION testprs_lextype(internal)
|
||||
RETURNS internal
|
||||
AS 'MODULE_PATHNAME'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE TEXT SEARCH PARSER testparser (
|
||||
START = testprs_start,
|
||||
GETTOKEN = testprs_getlexeme,
|
||||
END = testprs_end,
|
||||
HEADLINE = pg_catalog.prsd_headline,
|
||||
LEXTYPES = testprs_lextype
|
||||
);
|
||||
|
||||
END;
|
11
contrib/test_parser/uninstall_test_parser.sql
Normal file
11
contrib/test_parser/uninstall_test_parser.sql
Normal file
@ -0,0 +1,11 @@
|
||||
SET search_path = public;
|
||||
|
||||
DROP TEXT SEARCH PARSER testparser;
|
||||
|
||||
DROP FUNCTION testprs_start(internal, int4);
|
||||
|
||||
DROP FUNCTION testprs_getlexeme(internal, internal, internal);
|
||||
|
||||
DROP FUNCTION testprs_end(internal);
|
||||
|
||||
DROP FUNCTION testprs_lextype(internal);
|
Reference in New Issue
Block a user