Add sample text search dictionary templates and parsers, to replace the

hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov.
2025-07-30 11:03:19 +03:00 · 2007-10-15 21:36:50 +00:00
parent fb631dba2a
commit 5fcb079858
24 changed files with 1324 additions and 9 deletions
--- a/contrib/test_parser/Makefile
+++ b/contrib/test_parser/Makefile
@ -0,0 +1,19 @@
+# $PostgreSQL: pgsql/contrib/test_parser/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
+
+MODULE_big = test_parser
+OBJS = test_parser.o
+DATA_built = test_parser.sql
+DATA = uninstall_test_parser.sql
+DOCS = README.test_parser
+REGRESS = test_parser
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/test_parser
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
--- a/contrib/test_parser/README.test_parser
+++ b/contrib/test_parser/README.test_parser
@ -0,0 +1,52 @@
+Example parser
+==============
+
+This is an example of a custom parser for full text search.
+
+It recognizes space-delimited words and returns only two token types:
+
+ - 3,  word,  Word
+
+ - 12, blank, Space symbols
+
+The token numbers have been chosen to keep compatibility with the default
+ts_headline() function, since we do not want to implement our own version.
+
+* Configuration
+
+The parser has no user-configurable parameters.
+
+* Usage
+
+1. Compile and install
+
+2. Load dictionary
+
+   psql mydb < test_parser.sql
+
+3. Test it
+
+   mydb# SELECT * FROM ts_parse('testparser','That''s my first own parser');
+    tokid | token
+   -------+--------
+        3 | That's
+       12 |
+        3 | my
+       12 |
+        3 | first
+       12 |
+        3 | own
+       12 |
+        3 | parser
+
+   mydb# SELECT to_tsvector('testcfg','That''s my first own parser');
+   to_tsvector
+   -------------------------------------------------
+   'my':2 'own':4 'first':3 'parser':5 'that''s':1
+   
+   mydb# SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies', to_tsquery('testcfg', 'star'));
+   headline
+   -----------------------------------------------------------------
+   Supernovae <b>stars</b> are the brightest phenomena in galaxies
+   
+That's all.
--- a/contrib/test_parser/expected/test_parser.out
+++ b/contrib/test_parser/expected/test_parser.out
@ -0,0 +1,50 @@
+--
+-- first, define the parser.  Turn off echoing so that expected file
+-- does not depend on contents of this file.
+--
+SET client_min_messages = warning;
+\set ECHO none
+RESET client_min_messages;
+-- make test configuration using parser
+CREATE TEXT SEARCH CONFIGURATION testcfg (PARSER = testparser);
+ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple;
+-- ts_parse
+SELECT * FROM ts_parse('testparser', 'That''s simple parser can''t parse urls like http://some.url/here/');
+ tokid |         token         
+-------+-----------------------
+     3 | That's
+    12 |  
+     3 | simple
+    12 |  
+     3 | parser
+    12 |  
+     3 | can't
+    12 |  
+     3 | parse
+    12 |  
+     3 | urls
+    12 |  
+     3 | like
+    12 |  
+     3 | http://some.url/here/
+(15 rows)
+
+SELECT to_tsvector('testcfg','That''s my first own parser');
+                   to_tsvector                   
+-------------------------------------------------
+ 'my':2 'own':4 'first':3 'parser':5 'that''s':1
+(1 row)
+
+SELECT to_tsquery('testcfg', 'star');
+ to_tsquery 
+------------
+ 'star'
+(1 row)
+
+SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies', 
+       to_tsquery('testcfg', 'stars'));
+                           ts_headline                           
+-----------------------------------------------------------------
+ Supernovae <b>stars</b> are the brightest phenomena in galaxies
+(1 row)
+
--- a/contrib/test_parser/sql/test_parser.sql
+++ b/contrib/test_parser/sql/test_parser.sql
@ -0,0 +1,26 @@
+--
+-- first, define the parser.  Turn off echoing so that expected file
+-- does not depend on contents of this file.
+--
+SET client_min_messages = warning;
+\set ECHO none
+\i test_parser.sql
+\set ECHO all
+RESET client_min_messages;
+
+-- make test configuration using parser
+
+CREATE TEXT SEARCH CONFIGURATION testcfg (PARSER = testparser);
+
+ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple;
+
+-- ts_parse
+
+SELECT * FROM ts_parse('testparser', 'That''s simple parser can''t parse urls like http://some.url/here/');
+
+SELECT to_tsvector('testcfg','That''s my first own parser');
+
+SELECT to_tsquery('testcfg', 'star');
+
+SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies', 
+       to_tsquery('testcfg', 'stars'));
--- a/contrib/test_parser/test_parser.c
+++ b/contrib/test_parser/test_parser.c
@ -0,0 +1,130 @@
+/*-------------------------------------------------------------------------
+ *
+ * test_parser.c
+ *	  Simple example of a text search parser
+ *
+ * Copyright (c) 2007, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  $PostgreSQL: pgsql/contrib/test_parser/test_parser.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "fmgr.h"
+
+PG_MODULE_MAGIC;
+
+
+/*
+ * types
+ */
+
+/* self-defined type */
+typedef struct {
+	char *	buffer; /* text to parse */
+	int		len;	/* length of the text in buffer */
+	int		pos;	/* position of the parser */
+} ParserState;
+
+/* copy-paste from wparser.h of tsearch2 */
+typedef struct {
+	int		lexid;
+	char	*alias;
+	char	*descr;
+} LexDescr;
+
+/*
+ * prototypes
+ */
+PG_FUNCTION_INFO_V1(testprs_start);
+Datum testprs_start(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(testprs_getlexeme);
+Datum testprs_getlexeme(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(testprs_end);
+Datum testprs_end(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(testprs_lextype);
+Datum testprs_lextype(PG_FUNCTION_ARGS);
+
+/*
+ * functions
+ */
+Datum testprs_start(PG_FUNCTION_ARGS)
+{
+	ParserState *pst = (ParserState *) palloc0(sizeof(ParserState));
+	pst->buffer = (char *) PG_GETARG_POINTER(0);
+	pst->len = PG_GETARG_INT32(1);
+	pst->pos = 0;
+
+	PG_RETURN_POINTER(pst);
+}
+
+Datum testprs_getlexeme(PG_FUNCTION_ARGS)
+{
+	ParserState *pst   = (ParserState *) PG_GETARG_POINTER(0);
+	char		**t	   = (char **) PG_GETARG_POINTER(1);
+	int			*tlen  = (int *) PG_GETARG_POINTER(2);
+	int			type;
+
+	*tlen = pst->pos;
+	*t = pst->buffer +	pst->pos;
+
+	if ((pst->buffer)[pst->pos] == ' ')
+	{
+		/* blank type */
+		type = 12;
+		/* go to the next non-white-space character */
+		while ((pst->buffer)[pst->pos] == ' ' &&
+			   pst->pos < pst->len)
+			(pst->pos)++;
+	} else {
+		/* word type */
+		type = 3;
+		/* go to the next white-space character */
+		while ((pst->buffer)[pst->pos] != ' ' &&
+			   pst->pos < pst->len)
+			(pst->pos)++;
+	}
+
+	*tlen = pst->pos - *tlen;
+
+	/* we are finished if (*tlen == 0) */
+	if (*tlen == 0)
+		type=0;
+
+	PG_RETURN_INT32(type);
+}
+
+Datum testprs_end(PG_FUNCTION_ARGS)
+{
+	ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
+	pfree(pst);
+	PG_RETURN_VOID();
+}
+
+Datum testprs_lextype(PG_FUNCTION_ARGS)
+{
+	/*
+	 * Remarks:
+	 * - we have to return the blanks for headline reason
+	 * - we use the same lexids like Teodor in the default
+	 * word parser; in this way we can reuse the headline
+	 * function of the default word parser.
+	 */
+	LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (2+1));
+
+	/* there are only two types in this parser */
+	descr[0].lexid = 3;
+	descr[0].alias = pstrdup("word");
+	descr[0].descr = pstrdup("Word");
+	descr[1].lexid = 12;
+	descr[1].alias = pstrdup("blank");
+	descr[1].descr = pstrdup("Space symbols");
+	descr[2].lexid = 0;
+
+	PG_RETURN_POINTER(descr);
+}
--- a/contrib/test_parser/test_parser.sql.in
+++ b/contrib/test_parser/test_parser.sql.in
@ -0,0 +1,36 @@
+-- $PostgreSQL: pgsql/contrib/test_parser/test_parser.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $
+
+-- Adjust this setting to control where the objects get created.
+SET search_path = public;
+
+BEGIN;
+
+CREATE FUNCTION testprs_start(internal, int4)
+    RETURNS internal
+    AS 'MODULE_PATHNAME'
+    LANGUAGE C STRICT;
+
+CREATE FUNCTION testprs_getlexeme(internal, internal, internal)
+    RETURNS internal
+    AS 'MODULE_PATHNAME'
+    LANGUAGE C STRICT;
+
+CREATE FUNCTION testprs_end(internal)
+    RETURNS void
+    AS 'MODULE_PATHNAME'
+    LANGUAGE C STRICT;
+
+CREATE FUNCTION testprs_lextype(internal)
+    RETURNS internal
+    AS 'MODULE_PATHNAME'
+    LANGUAGE C STRICT;
+
+CREATE TEXT SEARCH PARSER testparser (
+    START    = testprs_start,
+    GETTOKEN = testprs_getlexeme,
+    END      = testprs_end,
+    HEADLINE = pg_catalog.prsd_headline,
+    LEXTYPES = testprs_lextype
+);
+
+END;
--- a/contrib/test_parser/uninstall_test_parser.sql
+++ b/contrib/test_parser/uninstall_test_parser.sql
@ -0,0 +1,11 @@
+SET search_path = public;
+
+DROP TEXT SEARCH PARSER testparser;
+
+DROP FUNCTION testprs_start(internal, int4);
+
+DROP FUNCTION testprs_getlexeme(internal, internal, internal);
+
+DROP FUNCTION testprs_end(internal);
+
+DROP FUNCTION testprs_lextype(internal);