Tsearch2 functionality migrates to core. The bulk of this work is by

Oleg Bartunov and Teodor Sigaev, but I did a lot of editorializing, so anything that's broken is probably my fault. Documentation is nonexistent as yet, but let's land the patch so we can get some portability testing done.
2025-09-02 04:21:28 +03:00 · 2007-08-21 01:11:32 +00:00
parent 4e94d1f952
commit 140d4ebcb4
200 changed files with 54388 additions and 147 deletions
--- a/src/backend/tsearch/Makefile
+++ b/src/backend/tsearch/Makefile
@@ -0,0 +1,51 @@
+#-------------------------------------------------------------------------
+#
+# Makefile for backend/tsearch
+#
+# Copyright (c) 2006-2007, PostgreSQL Global Development Group
+#
+# $PostgreSQL: pgsql/src/backend/tsearch/Makefile,v 1.1 2007/08/21 01:11:18 tgl Exp $
+#
+#-------------------------------------------------------------------------
+subdir = src/backend/tsearch
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+DICTDIR=tsearch_data
+
+DICTFILES=synonym.syn.sample thesaurus.ths.sample 
+
+OBJS = ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o \
+	dict_simple.o dict_synonym.o dict_thesaurus.o \
+	dict_ispell.o regis.o spell.o \
+	to_tsany.o ts_utils.o
+
+all: SUBSYS.o 
+
+SUBSYS.o: $(OBJS)
+	$(LD) $(LDREL) $(LDOUT) SUBSYS.o $^
+
+depend dep:
+	$(CC) -MM $(CFLAGS) *.c >depend
+
+.PHONY: install-data
+install-data: $(DICTFILES) installdirs
+	for i in $(DICTFILES); \
+		do $(INSTALL_DATA) $$i '$(DESTDIR)$(datadir)/$(DICTDIR)/'$$i; \
+	done
+
+installdirs:
+	$(mkinstalldirs) '$(DESTDIR)$(datadir)' '$(DESTDIR)$(datadir)/$(DICTDIR)'
+
+.PHONY: uninstall-data
+uninstall-data:
+	for i in $(DICTFILES); \
+		do rm -rf '$(DESTDIR)$(datadir)/$(DICTDIR)/'$$i ; \
+	done
+
+clean distclean maintainer-clean: 
+	rm -f SUBSYS.o $(OBJS)
+
+ifeq (depend,$(wildcard depend))
+include depend
+endif
--- a/src/backend/tsearch/dict.c
+++ b/src/backend/tsearch/dict.c
@@ -0,0 +1,131 @@
+/*-------------------------------------------------------------------------
+ *
+ * dict.c
+ *		Standard interface to dictionary
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/dict.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "funcapi.h"
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/skey.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_ts_dict.h"
+#include "catalog/pg_type.h"
+#include "tsearch/ts_cache.h"
+#include "tsearch/ts_public.h"
+#include "tsearch/ts_utils.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+
+/*
+ * Lexize one word by dictionary, mostly debug function
+ */
+static ArrayType *
+ts_lexize_workhorse(Oid dictId, text *in)
+{
+	TSDictionaryCacheEntry *dict;
+	TSLexeme   *res,
+			   *ptr;
+	Datum	   *da;
+	ArrayType  *a;
+	DictSubState dstate = {false, false, NULL};
+
+	dict = lookup_ts_dictionary_cache(dictId);
+
+	res = (TSLexeme *) DatumGetPointer(FunctionCall4(&dict->lexize,
+											 PointerGetDatum(dict->dictData),
+												PointerGetDatum(VARDATA(in)),
+									   Int32GetDatum(VARSIZE(in) - VARHDRSZ),
+												 PointerGetDatum(&dstate)));
+
+	if (dstate.getnext)
+	{
+		dstate.isend = true;
+		ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&dict->lexize,
+											 PointerGetDatum(dict->dictData),
+												PointerGetDatum(VARDATA(in)),
+									   Int32GetDatum(VARSIZE(in) - VARHDRSZ),
+												 PointerGetDatum(&dstate)));
+		if (ptr != NULL)
+			res = ptr;
+	}
+
+	if (!res)
+		return NULL;
+
+	ptr = res;
+	while (ptr->lexeme)
+		ptr++;
+	da = (Datum *) palloc(sizeof(Datum) * (ptr - res + 1));
+	ptr = res;
+	while (ptr->lexeme)
+	{
+		da[ptr - res] = DirectFunctionCall1(textin, CStringGetDatum(ptr->lexeme));
+		ptr++;
+	}
+
+	a = construct_array(da,
+						ptr - res,
+						TEXTOID,
+						-1,
+						false,
+						'i');
+
+	ptr = res;
+	while (ptr->lexeme)
+	{
+		pfree(DatumGetPointer(da[ptr - res]));
+		pfree(ptr->lexeme);
+		ptr++;
+	}
+	pfree(res);
+	pfree(da);
+
+	return a;
+}
+
+Datum
+ts_lexize_byid(PG_FUNCTION_ARGS)
+{
+	Oid			dictId = PG_GETARG_OID(0);
+	text	   *in = PG_GETARG_TEXT_P(1);
+	ArrayType  *a;
+
+	a = ts_lexize_workhorse(dictId, in);
+
+	if (a)
+		PG_RETURN_POINTER(a);
+	else
+		PG_RETURN_NULL();
+}
+
+Datum
+ts_lexize_byname(PG_FUNCTION_ARGS)
+{
+	text	   *dictname = PG_GETARG_TEXT_P(0);
+	text	   *in = PG_GETARG_TEXT_P(1);
+	Oid			dictId;
+	ArrayType  *a;
+
+	dictId = TSDictionaryGetDictid(textToQualifiedNameList(dictname), false);
+	a = ts_lexize_workhorse(dictId, in);
+
+	if (a)
+		PG_RETURN_POINTER(a);
+	else
+		PG_RETURN_NULL();
+}
--- a/src/backend/tsearch/dict_ispell.c
+++ b/src/backend/tsearch/dict_ispell.c
@@ -0,0 +1,164 @@
+/*-------------------------------------------------------------------------
+ *
+ * dict_ispell.c
+ *		Ispell dictionary interface
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/dict_ispell.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "tsearch/dicts/spell.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_public.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+
+
+typedef struct
+{
+	StopList	stoplist;
+	IspellDict	obj;
+} DictISpell;
+
+Datum
+dispell_init(PG_FUNCTION_ARGS)
+{
+	DictISpell *d;
+	Map		   *cfg,
+			   *pcfg;
+	bool		affloaded = false,
+				dictloaded = false,
+				stoploaded = false;
+	text	   *in;
+
+	/* init functions must defend against NULLs for themselves */
+	if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("NULL config not allowed for ISpell")));
+	in = PG_GETARG_TEXT_P(0);
+
+	parse_keyvalpairs(in, &cfg);
+	PG_FREE_IF_COPY(in, 0);
+
+	d = (DictISpell *) palloc0(sizeof(DictISpell));
+	d->stoplist.wordop = recode_and_lowerstr;
+
+	pcfg = cfg;
+	while (pcfg->key)
+	{
+		if (pg_strcasecmp("DictFile", pcfg->key) == 0)
+		{
+			if (dictloaded)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("multiple DictFile parameters")));
+			NIImportDictionary(&(d->obj),
+							   get_tsearch_config_filename(pcfg->value,
+														   "dict"));
+			dictloaded = true;
+		}
+		else if (pg_strcasecmp("AffFile", pcfg->key) == 0)
+		{
+			if (affloaded)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("multiple AffFile parameters")));
+			NIImportAffixes(&(d->obj),
+							get_tsearch_config_filename(pcfg->value,
+														"affix"));
+			affloaded = true;
+		}
+		else if (pg_strcasecmp("StopWords", pcfg->key) == 0)
+		{
+			if (stoploaded)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("multiple StopWords parameters")));
+			readstoplist(pcfg->value, &(d->stoplist));
+			sortstoplist(&(d->stoplist));
+			stoploaded = true;
+		}
+		else
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("unrecognized ISpell parameter: \"%s\"",
+							pcfg->key)));
+		}
+		pfree(pcfg->key);
+		pfree(pcfg->value);
+		pcfg++;
+	}
+	pfree(cfg);
+
+	if (affloaded && dictloaded)
+	{
+		NISortDictionary(&(d->obj));
+		NISortAffixes(&(d->obj));
+	}
+	else if (!affloaded)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("missing AffFile parameter")));
+	}
+	else
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("missing DictFile parameter")));
+	}
+
+	MemoryContextDeleteChildren(CurrentMemoryContext);
+
+	PG_RETURN_POINTER(d);
+}
+
+Datum
+dispell_lexize(PG_FUNCTION_ARGS)
+{
+	DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0);
+	char	   *in = (char *) PG_GETARG_POINTER(1);
+	int32	   len = PG_GETARG_INT32(2);
+	char	   *txt;
+	TSLexeme   *res;
+	TSLexeme   *ptr,
+			   *cptr;
+
+	if (len <= 0)
+		PG_RETURN_POINTER(NULL);
+
+	txt = lowerstr_with_len(in, len);
+	res = NINormalizeWord(&(d->obj), txt);
+
+	if (res == NULL)
+		PG_RETURN_POINTER(NULL);
+
+	ptr = cptr = res;
+	while (ptr->lexeme)
+	{
+		if (searchstoplist(&(d->stoplist), ptr->lexeme))
+		{
+			pfree(ptr->lexeme);
+			ptr->lexeme = NULL;
+			ptr++;
+		}
+		else
+		{
+			memcpy(cptr, ptr, sizeof(TSLexeme));
+			cptr++;
+			ptr++;
+		}
+	}
+	cptr->lexeme = NULL;
+
+	PG_RETURN_POINTER(res);
+}
--- a/src/backend/tsearch/dict_simple.c
+++ b/src/backend/tsearch/dict_simple.c
@@ -0,0 +1,65 @@
+/*-------------------------------------------------------------------------
+ *
+ * dict_simple.c
+ *		Simple dictionary: just lowercase and check for stopword
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/dict_simple.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_public.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+
+
+typedef struct
+{
+	StopList	stoplist;
+} DictExample;
+
+
+Datum
+dsimple_init(PG_FUNCTION_ARGS)
+{
+	DictExample *d = (DictExample *) palloc0(sizeof(DictExample));
+
+	d->stoplist.wordop = recode_and_lowerstr;
+
+	if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
+	{
+		text   *in = PG_GETARG_TEXT_P(0);
+		char   *filename = TextPGetCString(in);
+
+		readstoplist(filename, &d->stoplist);
+		sortstoplist(&d->stoplist);
+		pfree(filename);
+	}
+
+	PG_RETURN_POINTER(d);
+}
+
+Datum
+dsimple_lexize(PG_FUNCTION_ARGS)
+{
+	DictExample *d = (DictExample *) PG_GETARG_POINTER(0);
+	char	   *in = (char *) PG_GETARG_POINTER(1);
+	int32	   len = PG_GETARG_INT32(2);
+	char	   *txt = lowerstr_with_len(in, len);
+	TSLexeme   *res = palloc0(sizeof(TSLexeme) * 2);
+
+	if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
+	{
+		pfree(txt);
+	}
+	else
+		res[0].lexeme = txt;
+
+	PG_RETURN_POINTER(res);
+}
--- a/src/backend/tsearch/dict_synonym.c
+++ b/src/backend/tsearch/dict_synonym.c
@@ -0,0 +1,176 @@
+/*-------------------------------------------------------------------------
+ *
+ * dict_synonym.c
+ *		Synonym dictionary: replace word by its synonym
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/fd.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_public.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+
+
+#define SYNBUFLEN	4096
+typedef struct
+{
+	char	   *in;
+	char	   *out;
+} Syn;
+
+typedef struct
+{
+	int			len;
+	Syn		   *syn;
+} DictSyn;
+
+static char *
+findwrd(char *in, char **end)
+{
+	char	   *start;
+
+	*end = NULL;
+	while (*in && t_isspace(in))
+		in += pg_mblen(in);
+
+	if (*in == '\0')
+		return NULL;
+	start = in;
+
+	while (*in && !t_isspace(in))
+		in += pg_mblen(in);
+
+	*end = in;
+	return start;
+}
+
+static int
+compareSyn(const void *a, const void *b)
+{
+	return strcmp(((Syn *) a)->in, ((Syn *) b)->in);
+}
+
+
+Datum
+dsynonym_init(PG_FUNCTION_ARGS)
+{
+	text	   *in;
+	DictSyn    *d;
+	int			cur = 0;
+	FILE	   *fin;
+	char	   *filename;
+	char		buf[SYNBUFLEN];
+	char	   *starti,
+			   *starto,
+			   *end = NULL;
+	int			slen;
+
+	/* init functions must defend against NULLs for themselves */
+	if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("NULL config not allowed for Synonym")));
+	in = PG_GETARG_TEXT_P(0);
+
+	filename = get_tsearch_config_filename(TextPGetCString(in), "syn");
+
+	PG_FREE_IF_COPY(in, 0);
+
+	if ((fin = AllocateFile(filename, "r")) == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_CONFIG_FILE_ERROR),
+				 errmsg("could not open synonym file \"%s\": %m",
+						filename)));
+
+	d = (DictSyn *) palloc0(sizeof(DictSyn));
+
+	while (fgets(buf, SYNBUFLEN, fin))
+	{
+		slen = strlen(buf);
+		pg_verifymbstr(buf, slen, false);
+		if (cur == d->len)
+		{
+			if (d->len == 0)
+			{
+				d->len = 16;
+				d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
+			}
+			else
+			{
+				d->len *= 2;
+				d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
+			}
+		}
+
+		starti = findwrd(buf, &end);
+		if (!starti)
+			continue;
+		*end = '\0';
+		if (end >= buf + slen)
+			continue;
+
+		starto = findwrd(end + 1, &end);
+		if (!starto)
+			continue;
+		*end = '\0';
+
+		d->syn[cur].in = recode_and_lowerstr(starti);
+		d->syn[cur].out = recode_and_lowerstr(starto);
+		if (!(d->syn[cur].in && d->syn[cur].out))
+		{
+			FreeFile(fin);
+			ereport(ERROR,
+					(errcode(ERRCODE_OUT_OF_MEMORY),
+					 errmsg("out of memory")));
+		}
+
+		cur++;
+	}
+
+	FreeFile(fin);
+
+	d->len = cur;
+	if (cur > 1)
+		qsort(d->syn, d->len, sizeof(Syn), compareSyn);
+
+	pfree(filename);
+	PG_RETURN_POINTER(d);
+}
+
+Datum
+dsynonym_lexize(PG_FUNCTION_ARGS)
+{
+	DictSyn    *d = (DictSyn *) PG_GETARG_POINTER(0);
+	char	   *in = (char *) PG_GETARG_POINTER(1);
+	int32	   len = PG_GETARG_INT32(2);
+	Syn			key,
+			   *found;
+	TSLexeme   *res;
+
+	if (len <= 0)
+		PG_RETURN_POINTER(NULL);
+
+	key.in = lowerstr_with_len(in, len);
+	key.out = NULL;
+
+	found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
+	pfree(key.in);
+
+	if (!found)
+		PG_RETURN_POINTER(NULL);
+
+	res = palloc(sizeof(TSLexeme) * 2);
+	memset(res, 0, sizeof(TSLexeme) * 2);
+	res[0].lexeme = pstrdup(found->out);
+
+	PG_RETURN_POINTER(res);
+}
--- a/src/backend/tsearch/dict_thesaurus.c
+++ b/src/backend/tsearch/dict_thesaurus.c
@@ -0,0 +1,887 @@
+/*-------------------------------------------------------------------------
+ *
+ * dict_thesaurus.c
+ *		Thesaurus dictionary: phrase to phrase substitution
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/dict_thesaurus.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/namespace.h"
+#include "storage/fd.h"
+#include "tsearch/ts_cache.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_public.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+
+
+/*
+ * Temporay we use TSLexeme.flags for inner use...
+ */
+#define DT_USEASIS		0x1000
+
+typedef struct LexemeInfo
+{
+	uint16		idsubst;		/* entry's number in DictThesaurus->subst */
+	uint16		posinsubst;		/* pos info in entry */
+	uint16		tnvariant;		/* total num lexemes in one variant */
+	struct LexemeInfo *nextentry;
+	struct LexemeInfo *nextvariant;
+} LexemeInfo;
+
+typedef struct
+{
+	char	   *lexeme;
+	LexemeInfo *entries;
+} TheLexeme;
+
+typedef struct
+{
+	uint16		lastlexeme;		/* number lexemes to substitute */
+	uint16		reslen;
+	TSLexeme   *res;			/* prepared substituted result */
+} TheSubstitute;
+
+typedef struct
+{
+	/* subdictionary to normalize lexemes */
+	Oid			subdictOid;
+	TSDictionaryCacheEntry *subdict;
+
+	/* Array to search lexeme by exact match */
+	TheLexeme  *wrds;
+	int			nwrds;
+	int			ntwrds;
+
+	/*
+	 * Storage of substituted result, n-th element is for n-th expression
+	 */
+	TheSubstitute *subst;
+	int			nsubst;
+} DictThesaurus;
+
+
+static void
+newLexeme(DictThesaurus * d, char *b, char *e, uint16 idsubst, uint16 posinsubst)
+{
+	TheLexeme  *ptr;
+
+	if (d->nwrds >= d->ntwrds)
+	{
+		if (d->ntwrds == 0)
+		{
+			d->ntwrds = 16;
+			d->wrds = (TheLexeme *) palloc(sizeof(TheLexeme) * d->ntwrds);
+		}
+		else
+		{
+			d->ntwrds *= 2;
+			d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->ntwrds);
+		}
+	}
+
+	ptr = d->wrds + d->nwrds;
+	d->nwrds++;
+
+	ptr->lexeme = palloc(e - b + 1);
+
+	memcpy(ptr->lexeme, b, e - b);
+	ptr->lexeme[e - b] = '\0';
+
+	ptr->entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));
+
+	ptr->entries->nextentry = NULL;
+	ptr->entries->idsubst = idsubst;
+	ptr->entries->posinsubst = posinsubst;
+}
+
+static void
+addWrd(DictThesaurus * d, char *b, char *e, uint16 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis)
+{
+	static int	nres = 0;
+	static int	ntres = 0;
+	TheSubstitute *ptr;
+
+	if (nwrd == 0)
+	{
+		nres = ntres = 0;
+
+		if (idsubst >= d->nsubst)
+		{
+			if (d->nsubst == 0)
+			{
+				d->nsubst = 16;
+				d->subst = (TheSubstitute *) palloc(sizeof(TheSubstitute) * d->nsubst);
+			}
+			else
+			{
+				d->nsubst *= 2;
+				d->subst = (TheSubstitute *) repalloc(d->subst, sizeof(TheSubstitute) * d->nsubst);
+			}
+		}
+	}
+
+	ptr = d->subst + idsubst;
+
+	ptr->lastlexeme = posinsubst - 1;
+
+	if (nres + 1 >= ntres)
+	{
+		if (ntres == 0)
+		{
+			ntres = 2;
+			ptr->res = (TSLexeme *) palloc(sizeof(TSLexeme) * ntres);
+		}
+		else
+		{
+			ntres *= 2;
+			ptr->res = (TSLexeme *) repalloc(ptr->res, sizeof(TSLexeme) * ntres);
+		}
+
+	}
+
+	ptr->res[nres].lexeme = palloc(e - b + 1);
+	memcpy(ptr->res[nres].lexeme, b, e - b);
+	ptr->res[nres].lexeme[e - b] = '\0';
+
+	ptr->res[nres].nvariant = nwrd;
+	if (useasis)
+		ptr->res[nres].flags = DT_USEASIS;
+	else
+		ptr->res[nres].flags = 0;
+
+	ptr->res[++nres].lexeme = NULL;
+}
+
+#define TR_WAITLEX	1
+#define TR_INLEX	2
+#define TR_WAITSUBS 3
+#define TR_INSUBS	4
+
+static void
+thesaurusRead(char *filename, DictThesaurus * d)
+{
+	FILE	   *fh;
+	char		str[BUFSIZ];
+	int			lineno = 0;
+	uint16		idsubst = 0;
+	bool		useasis = false;
+
+	filename = get_tsearch_config_filename(filename, "ths");
+	fh = AllocateFile(filename, "r");
+	if (!fh)
+		ereport(ERROR,
+				(errcode(ERRCODE_CONFIG_FILE_ERROR),
+				 errmsg("could not open thesaurus file \"%s\": %m",
+						filename)));
+
+	while (fgets(str, sizeof(str), fh))
+	{
+		char	   *ptr,
+				   *recoded;
+		int			state = TR_WAITLEX;
+		char	   *beginwrd = NULL;
+		uint16		posinsubst = 0;
+		uint16		nwrd = 0;
+
+		ptr = recoded = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str),
+											 GetDatabaseEncoding(), PG_UTF8);
+		if (recoded == NULL)
+			elog(ERROR, "encoding conversion failed");
+
+		lineno++;
+
+		/* is it comment ? */
+		while (t_isspace(ptr))
+			ptr += pg_mblen(ptr);
+		if (t_iseq(recoded, '#') || *recoded == '\0' || t_iseq(recoded, '\n') || t_iseq(recoded, '\r'))
+			continue;
+
+		while (*ptr)
+		{
+			if (state == TR_WAITLEX)
+			{
+				if (t_iseq(ptr, ':'))
+				{
+					if (posinsubst == 0)
+					{
+						FreeFile(fh);
+						ereport(ERROR,
+								(errcode(ERRCODE_CONFIG_FILE_ERROR),
+								 errmsg("unexpected delimiter at line %d of thesaurus file \"%s\"",
+										lineno, filename)));
+					}
+					state = TR_WAITSUBS;
+				}
+				else if (!t_isspace(ptr))
+				{
+					beginwrd = ptr;
+					state = TR_INLEX;
+				}
+			}
+			else if (state == TR_INLEX)
+			{
+				if (t_iseq(ptr, ':'))
+				{
+					newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
+					state = TR_WAITSUBS;
+				}
+				else if (t_isspace(ptr))
+				{
+					newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
+					state = TR_WAITLEX;
+				}
+			}
+			else if (state == TR_WAITSUBS)
+			{
+				if (t_iseq(ptr, '*'))
+				{
+					useasis = true;
+					state = TR_INSUBS;
+					beginwrd = ptr + pg_mblen(ptr);
+				}
+				else if (t_iseq(ptr, '\\'))
+				{
+					useasis = false;
+					state = TR_INSUBS;
+					beginwrd = ptr + pg_mblen(ptr);
+				}
+				else if (!t_isspace(ptr))
+				{
+					useasis = false;
+					beginwrd = ptr;
+					state = TR_INSUBS;
+				}
+			}
+			else if (state == TR_INSUBS)
+			{
+				if (t_isspace(ptr))
+				{
+					if (ptr == beginwrd)
+						ereport(ERROR,
+								(errcode(ERRCODE_CONFIG_FILE_ERROR),
+								 errmsg("unexpected end of line or lexeme at line %d of thesaurus file \"%s\"",
+										lineno, filename)));
+					addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
+					state = TR_WAITSUBS;
+				}
+			}
+			else
+				elog(ERROR, "unrecognized thesaurus state: %d", state);
+
+			ptr += pg_mblen(ptr);
+		}
+
+		if (state == TR_INSUBS)
+		{
+			if (ptr == beginwrd)
+				ereport(ERROR,
+						(errcode(ERRCODE_CONFIG_FILE_ERROR),
+						 errmsg("unexpected end of line or lexeme at line %d of thesaurus file \"%s\"",
+								lineno, filename)));
+			addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
+		}
+
+		idsubst++;
+
+		if (!(nwrd && posinsubst))
+		{
+			FreeFile(fh);
+			ereport(ERROR,
+					(errcode(ERRCODE_CONFIG_FILE_ERROR),
+					 errmsg("unexpected end of line at line %d of thesaurus file \"%s\"",
+							lineno, filename)));
+		}
+
+		if (recoded != str)
+			pfree(recoded);
+	}
+
+	d->nsubst = idsubst;
+
+	FreeFile(fh);
+}
+
+static TheLexeme *
+addCompiledLexeme(TheLexeme * newwrds, int *nnw, int *tnm, TSLexeme * lexeme, LexemeInfo * src, uint16 tnvariant)
+{
+
+	if (*nnw >= *tnm)
+	{
+		*tnm *= 2;
+		newwrds = (TheLexeme *) repalloc(newwrds, sizeof(TheLexeme) * *tnm);
+	}
+
+	newwrds[*nnw].entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));
+
+	if (lexeme && lexeme->lexeme)
+	{
+		newwrds[*nnw].lexeme = pstrdup(lexeme->lexeme);
+		newwrds[*nnw].entries->tnvariant = tnvariant;
+	}
+	else
+	{
+		newwrds[*nnw].lexeme = NULL;
+		newwrds[*nnw].entries->tnvariant = 1;
+	}
+
+	newwrds[*nnw].entries->idsubst = src->idsubst;
+	newwrds[*nnw].entries->posinsubst = src->posinsubst;
+
+	newwrds[*nnw].entries->nextentry = NULL;
+
+	(*nnw)++;
+	return newwrds;
+}
+
+static int
+cmpLexemeInfo(LexemeInfo * a, LexemeInfo * b)
+{
+	if (a == NULL || b == NULL)
+		return 0;
+
+	if (a->idsubst == b->idsubst)
+	{
+		if (a->posinsubst == b->posinsubst)
+		{
+			if (a->tnvariant == b->tnvariant)
+				return 0;
+
+			return (a->tnvariant > b->tnvariant) ? 1 : -1;
+		}
+
+		return (a->posinsubst > b->posinsubst) ? 1 : -1;
+	}
+
+	return (a->idsubst > b->idsubst) ? 1 : -1;
+}
+
+static int
+cmpLexeme(TheLexeme * a, TheLexeme * b)
+{
+	if (a->lexeme == NULL)
+	{
+		if (b->lexeme == NULL)
+			return 0;
+		else
+			return 1;
+	}
+	else if (b->lexeme == NULL)
+		return -1;
+
+	return strcmp(a->lexeme, b->lexeme);
+}
+
+static int
+cmpLexemeQ(const void *a, const void *b)
+{
+	return cmpLexeme((TheLexeme *) a, (TheLexeme *) b);
+}
+
+static int
+cmpTheLexeme(const void *a, const void *b)
+{
+	TheLexeme  *la = (TheLexeme *) a;
+	TheLexeme  *lb = (TheLexeme *) b;
+	int			res;
+
+	if ((res = cmpLexeme(la, lb)) != 0)
+		return res;
+
+	return -cmpLexemeInfo(la->entries, lb->entries);
+}
+
+static void
+compileTheLexeme(DictThesaurus * d)
+{
+	int			i,
+				nnw = 0,
+				tnm = 16;
+	TheLexeme  *newwrds = (TheLexeme *) palloc(sizeof(TheLexeme) * tnm),
+			   *ptrwrds;
+
+	for (i = 0; i < d->nwrds; i++)
+	{
+		TSLexeme   *ptr;
+
+		ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
+									   PointerGetDatum(d->subdict->dictData),
+										  PointerGetDatum(d->wrds[i].lexeme),
+									Int32GetDatum(strlen(d->wrds[i].lexeme)),
+													 PointerGetDatum(NULL)));
+
+		if (!(ptr && ptr->lexeme))
+		{
+			if (!ptr)
+				elog(ERROR, "thesaurus word-sample \"%s\" isn't recognized by subdictionary (rule %d)",
+					 d->wrds[i].lexeme, d->wrds[i].entries->idsubst + 1);
+			else
+				elog(NOTICE, "thesaurus word-sample \"%s\" is recognized as stop-word, assign any stop-word (rule %d)",
+					 d->wrds[i].lexeme, d->wrds[i].entries->idsubst + 1);
+
+			newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0);
+		}
+		else
+		{
+			while (ptr->lexeme)
+			{
+				TSLexeme   *remptr = ptr + 1;
+				int			tnvar = 1;
+				int			curvar = ptr->nvariant;
+
+				/* compute n words in one variant */
+				while (remptr->lexeme)
+				{
+					if (remptr->nvariant != (remptr - 1)->nvariant)
+						break;
+					tnvar++;
+					remptr++;
+				}
+
+				remptr = ptr;
+				while (remptr->lexeme && remptr->nvariant == curvar)
+				{
+					newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar);
+					remptr++;
+				}
+
+				ptr = remptr;
+			}
+		}
+
+		pfree(d->wrds[i].lexeme);
+		pfree(d->wrds[i].entries);
+	}
+
+	pfree(d->wrds);
+	d->wrds = newwrds;
+	d->nwrds = nnw;
+	d->ntwrds = tnm;
+
+	if (d->nwrds > 1)
+	{
+		qsort(d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme);
+
+		/* uniq */
+		newwrds = d->wrds;
+		ptrwrds = d->wrds + 1;
+		while (ptrwrds - d->wrds < d->nwrds)
+		{
+			if (cmpLexeme(ptrwrds, newwrds) == 0)
+			{
+				if (cmpLexemeInfo(ptrwrds->entries, newwrds->entries))
+				{
+					ptrwrds->entries->nextentry = newwrds->entries;
+					newwrds->entries = ptrwrds->entries;
+				}
+				else
+					pfree(ptrwrds->entries);
+
+				if (ptrwrds->lexeme)
+					pfree(ptrwrds->lexeme);
+			}
+			else
+			{
+				newwrds++;
+				*newwrds = *ptrwrds;
+			}
+
+			ptrwrds++;
+		}
+
+		d->nwrds = newwrds - d->wrds + 1;
+		d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->nwrds);
+	}
+}
+
+static void
+compileTheSubstitute(DictThesaurus * d)
+{
+	int			i;
+
+	for (i = 0; i < d->nsubst; i++)
+	{
+		TSLexeme   *rem = d->subst[i].res,
+				   *outptr,
+				   *inptr;
+		int			n = 2;
+
+		outptr = d->subst[i].res = (TSLexeme *) palloc(sizeof(TSLexeme) * n);
+		outptr->lexeme = NULL;
+		inptr = rem;
+
+		while (inptr && inptr->lexeme)
+		{
+			TSLexeme   *lexized,
+						tmplex[2];
+
+			if (inptr->flags & DT_USEASIS)
+			{					/* do not lexize */
+				tmplex[0] = *inptr;
+				tmplex[0].flags = 0;
+				tmplex[1].lexeme = NULL;
+				lexized = tmplex;
+			}
+			else
+			{
+				lexized = (TSLexeme *) DatumGetPointer(
+													   FunctionCall4(
+													   &(d->subdict->lexize),
+									   PointerGetDatum(d->subdict->dictData),
+											  PointerGetDatum(inptr->lexeme),
+										Int32GetDatum(strlen(inptr->lexeme)),
+														PointerGetDatum(NULL)
+																	 )
+					);
+			}
+
+			if (lexized && lexized->lexeme)
+			{
+				int			toset = (lexized->lexeme && outptr != d->subst[i].res) ? (outptr - d->subst[i].res) : -1;
+
+				while (lexized->lexeme)
+				{
+					if (outptr - d->subst[i].res + 1 >= n)
+					{
+						int			diff = outptr - d->subst[i].res;
+
+						n *= 2;
+						d->subst[i].res = (TSLexeme *) repalloc(d->subst[i].res, sizeof(TSLexeme) * n);
+						outptr = d->subst[i].res + diff;
+					}
+
+					*outptr = *lexized;
+					outptr->lexeme = pstrdup(lexized->lexeme);
+
+					outptr++;
+					lexized++;
+				}
+
+				if (toset > 0)
+					d->subst[i].res[toset].flags |= TSL_ADDPOS;
+			}
+			else if (lexized)
+			{
+				elog(NOTICE, "thesaurus word \"%s\" in substitution is a stop-word, ignored (rule %d)", inptr->lexeme, i + 1);
+			}
+			else
+			{
+				elog(ERROR, "thesaurus word \"%s\" in substitution isn't recognized (rule %d)", inptr->lexeme, i + 1);
+			}
+
+			if (inptr->lexeme)
+				pfree(inptr->lexeme);
+			inptr++;
+		}
+
+		if (outptr == d->subst[i].res)
+			elog(ERROR, "all words in thesaurus substitution are stop words (rule %d)", i + 1);
+
+		d->subst[i].reslen = outptr - d->subst[i].res;
+
+		pfree(rem);
+	}
+}
+
+Datum
+thesaurus_init(PG_FUNCTION_ARGS)
+{
+	DictThesaurus *d;
+	Map		   *cfg,
+			   *pcfg;
+	text	   *in;
+	char	   *subdictname = NULL;
+	bool		fileloaded = false;
+
+	/* init functions must defend against NULLs for themselves */
+	if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("NULL config not allowed for Thesaurus")));
+	in = PG_GETARG_TEXT_P(0);
+
+	parse_keyvalpairs(in, &cfg);
+	PG_FREE_IF_COPY(in, 0);
+
+	d = (DictThesaurus *) palloc0(sizeof(DictThesaurus));
+
+	pcfg = cfg;
+	while (pcfg->key)
+	{
+		if (pg_strcasecmp("DictFile", pcfg->key) == 0)
+		{
+			if (fileloaded)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("multiple DictFile parameters")));
+			thesaurusRead(pcfg->value, d);
+			fileloaded = true;
+		}
+		else if (pg_strcasecmp("Dictionary", pcfg->key) == 0)
+		{
+			if (subdictname)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("multiple Dictionary parameters")));
+			subdictname = pstrdup(pcfg->value);
+		}
+		else
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("unrecognized Thesaurus parameter: \"%s\"",
+							pcfg->key)));
+		}
+		pfree(pcfg->key);
+		pfree(pcfg->value);
+		pcfg++;
+	}
+	pfree(cfg);
+
+	if (!fileloaded)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("missing DictFile parameter")));
+	if (!subdictname)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("missing Dictionary parameter")));
+
+	d->subdictOid = TSDictionaryGetDictid(stringToQualifiedNameList(subdictname), false);
+	d->subdict = lookup_ts_dictionary_cache(d->subdictOid);
+
+	compileTheLexeme(d);
+	compileTheSubstitute(d);
+
+	PG_RETURN_POINTER(d);
+}
+
+static LexemeInfo *
+findTheLexeme(DictThesaurus * d, char *lexeme)
+{
+	TheLexeme	key = {lexeme, NULL}, *res;
+
+	if (d->nwrds == 0)
+		return NULL;
+
+	res = bsearch(&key, d->wrds, d->nwrds, sizeof(TheLexeme), cmpLexemeQ);
+
+	if (res == NULL)
+		return NULL;
+	return res->entries;
+}
+
+static bool
+matchIdSubst(LexemeInfo * stored, uint16 idsubst)
+{
+	bool		res = true;
+
+	if (stored)
+	{
+		res = false;
+
+		for (; stored; stored = stored->nextvariant)
+			if (stored->idsubst == idsubst)
+			{
+				res = true;
+				break;
+			}
+	}
+
+	return res;
+}
+
+static LexemeInfo *
+findVariant(LexemeInfo * in, LexemeInfo * stored, uint16 curpos, LexemeInfo ** newin, int newn)
+{
+	for (;;)
+	{
+		int			i;
+		LexemeInfo *ptr = newin[0];
+
+		for (i = 0; i < newn; i++)
+		{
+			while (newin[i] && newin[i]->idsubst < ptr->idsubst)
+				newin[i] = newin[i]->nextentry;
+
+			if (newin[i] == NULL)
+				return in;
+
+			if (newin[i]->idsubst > ptr->idsubst)
+			{
+				ptr = newin[i];
+				i = -1;
+				continue;
+			}
+
+			while (newin[i]->idsubst == ptr->idsubst)
+			{
+				if (newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn)
+				{
+					ptr = newin[i];
+					break;
+				}
+
+				newin[i] = newin[i]->nextentry;
+				if (newin[i] == NULL)
+					return in;
+			}
+
+			if (newin[i]->idsubst != ptr->idsubst)
+			{
+				ptr = newin[i];
+				i = -1;
+				continue;
+			}
+		}
+
+		if (i == newn && matchIdSubst(stored, ptr->idsubst) && (in == NULL || !matchIdSubst(in, ptr->idsubst)))
+		{						/* found */
+
+			ptr->nextvariant = in;
+			in = ptr;
+		}
+
+		/* step forward */
+		for (i = 0; i < newn; i++)
+			newin[i] = newin[i]->nextentry;
+	}
+
+	return NULL;
+}
+
+static TSLexeme *
+copyTSLexeme(TheSubstitute * ts)
+{
+	TSLexeme   *res;
+	uint16		i;
+
+	res = (TSLexeme *) palloc(sizeof(TSLexeme) * (ts->reslen + 1));
+	for (i = 0; i < ts->reslen; i++)
+	{
+		res[i] = ts->res[i];
+		res[i].lexeme = pstrdup(ts->res[i].lexeme);
+	}
+
+	res[ts->reslen].lexeme = NULL;
+
+	return res;
+}
+
+static TSLexeme *
+checkMatch(DictThesaurus * d, LexemeInfo * info, uint16 curpos, bool *moreres)
+{
+	*moreres = false;
+	while (info)
+	{
+		Assert(info->idsubst < d->nsubst);
+		if (info->nextvariant)
+			*moreres = true;
+		if (d->subst[info->idsubst].lastlexeme == curpos)
+			return copyTSLexeme(d->subst + info->idsubst);
+		info = info->nextvariant;
+	}
+
+	return NULL;
+}
+
+Datum
+thesaurus_lexize(PG_FUNCTION_ARGS)
+{
+	DictThesaurus *d = (DictThesaurus *) PG_GETARG_POINTER(0);
+	DictSubState *dstate = (DictSubState *) PG_GETARG_POINTER(3);
+	TSLexeme   *res = NULL;
+	LexemeInfo *stored,
+			   *info = NULL;
+	uint16		curpos = 0;
+	bool		moreres = false;
+
+	if (PG_NARGS() < 4 || dstate == NULL)
+		elog(ERROR, "forbidden call of thesaurus or nested call");
+
+	if (dstate->isend)
+		PG_RETURN_POINTER(NULL);
+	stored = (LexemeInfo *) dstate->private;
+
+	if (stored)
+		curpos = stored->posinsubst + 1;
+
+	if (!d->subdict->isvalid)
+		d->subdict = lookup_ts_dictionary_cache(d->subdictOid);
+
+	res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
+									   PointerGetDatum(d->subdict->dictData),
+													 PG_GETARG_DATUM(1),
+													 PG_GETARG_DATUM(2),
+													 PointerGetDatum(NULL)));
+
+	if (res && res->lexeme)
+	{
+		TSLexeme   *ptr = res,
+				   *basevar;
+
+		while (ptr->lexeme)
+		{
+			uint16		nv = ptr->nvariant;
+			uint16		i,
+						nlex = 0;
+			LexemeInfo **infos;
+
+			basevar = ptr;
+			while (ptr->lexeme && nv == ptr->nvariant)
+			{
+				nlex++;
+				ptr++;
+			}
+
+			infos = (LexemeInfo **) palloc(sizeof(LexemeInfo *) * nlex);
+			for (i = 0; i < nlex; i++)
+				if ((infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL)
+					break;
+
+			if (i < nlex)
+			{
+				/* no chance to find */
+				pfree(infos);
+				continue;
+			}
+
+			info = findVariant(info, stored, curpos, infos, nlex);
+		}
+	}
+	else if (res)
+	{							/* stop-word */
+		LexemeInfo *infos = findTheLexeme(d, NULL);
+
+		info = findVariant(NULL, stored, curpos, &infos, 1);
+	}
+	else
+	{
+		info = NULL;			/* word isn't recognized */
+	}
+
+	dstate->private = (void *) info;
+
+	if (!info)
+	{
+		dstate->getnext = false;
+		PG_RETURN_POINTER(NULL);
+	}
+
+	if ((res = checkMatch(d, info, curpos, &moreres)) != NULL)
+	{
+		dstate->getnext = moreres;
+		PG_RETURN_POINTER(res);
+	}
+
+	dstate->getnext = true;
+
+	PG_RETURN_POINTER(NULL);
+}
--- a/src/backend/tsearch/regis.c
+++ b/src/backend/tsearch/regis.c
@@ -0,0 +1,236 @@
+/*-------------------------------------------------------------------------
+ *
+ * regis.c
+ *		Fast regex subset
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/regis.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "tsearch/dicts/regis.h"
+#include "tsearch/ts_locale.h"
+
+bool
+RS_isRegis(const char *str)
+{
+	while (str && *str)
+	{
+		if (t_isalpha(str) ||
+			t_iseq(str, '[') ||
+			t_iseq(str, ']') ||
+			t_iseq(str, '^'))
+			str += pg_mblen(str);
+		else
+			return false;
+	}
+	return true;
+}
+
+#define RS_IN_ONEOF 1
+#define RS_IN_ONEOF_IN	2
+#define RS_IN_NONEOF	3
+#define RS_IN_WAIT	4
+
+static RegisNode *
+newRegisNode(RegisNode * prev, int len)
+{
+	RegisNode  *ptr;
+
+	ptr = (RegisNode *) palloc0(RNHDRSZ + len + 1);
+	if (prev)
+		prev->next = ptr;
+	return ptr;
+}
+
+void
+RS_compile(Regis * r, bool issuffix, char *str)
+{
+	int			len = strlen(str);
+	int			state = RS_IN_WAIT;
+	char	   *c = (char *) str;
+	RegisNode  *ptr = NULL;
+
+	memset(r, 0, sizeof(Regis));
+	r->issuffix = (issuffix) ? 1 : 0;
+
+	while (*c)
+	{
+		if (state == RS_IN_WAIT)
+		{
+			if (t_isalpha(c))
+			{
+				if (ptr)
+					ptr = newRegisNode(ptr, len);
+				else
+					ptr = r->node = newRegisNode(NULL, len);
+				COPYCHAR(ptr->data, c);
+				ptr->type = RSF_ONEOF;
+				ptr->len = pg_mblen(c);
+			}
+			else if (t_iseq(c, '['))
+			{
+				if (ptr)
+					ptr = newRegisNode(ptr, len);
+				else
+					ptr = r->node = newRegisNode(NULL, len);
+				ptr->type = RSF_ONEOF;
+				state = RS_IN_ONEOF;
+			}
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+						 errmsg("invalid regis pattern: \"%s\"",
+								str)));
+		}
+		else if (state == RS_IN_ONEOF)
+		{
+			if (t_iseq(c, '^'))
+			{
+				ptr->type = RSF_NONEOF;
+				state = RS_IN_NONEOF;
+			}
+			else if (t_isalpha(c))
+			{
+				COPYCHAR(ptr->data, c);
+				ptr->len = pg_mblen(c);
+				state = RS_IN_ONEOF_IN;
+			}
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+						 errmsg("invalid regis pattern: \"%s\"",
+								str)));
+		}
+		else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
+		{
+			if (t_isalpha(c))
+			{
+				COPYCHAR(ptr->data + ptr->len, c);
+				ptr->len += pg_mblen(c);
+			}
+			else if (t_iseq(c, ']'))
+				state = RS_IN_WAIT;
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+						 errmsg("invalid regis pattern: \"%s\"",
+								str)));
+		}
+		else
+			elog(ERROR, "internal error in RS_compile: state %d", state);
+		c += pg_mblen(c);
+	}
+
+	ptr = r->node;
+	while (ptr)
+	{
+		r->nchar++;
+		ptr = ptr->next;
+	}
+}
+
+void
+RS_free(Regis * r)
+{
+	RegisNode  *ptr = r->node,
+			   *tmp;
+
+	while (ptr)
+	{
+		tmp = ptr->next;
+		pfree(ptr);
+		ptr = tmp;
+	}
+
+	r->node = NULL;
+}
+
+#ifdef TS_USE_WIDE
+static bool
+mb_strchr(char *str, char *c)
+{
+	int			clen = pg_mblen(c),
+				plen,
+				i;
+	char	   *ptr = str;
+	bool		res = false;
+
+	clen = pg_mblen(c);
+	while (*ptr && !res)
+	{
+		plen = pg_mblen(ptr);
+		if (plen == clen)
+		{
+			i = plen;
+			res = true;
+			while (i--)
+				if (*(ptr + i) != *(c + i))
+				{
+					res = false;
+					break;
+				}
+		}
+
+		ptr += plen;
+	}
+
+	return res;
+}
+#else
+#define mb_strchr(s,c)	( (strchr((s),*(c)) == NULL) ? false : true )
+#endif
+
+
+bool
+RS_execute(Regis * r, char *str)
+{
+	RegisNode  *ptr = r->node;
+	char	   *c = str;
+	int			len = 0;
+
+	while (*c)
+	{
+		len++;
+		c += pg_mblen(c);
+	}
+
+	if (len < r->nchar)
+		return 0;
+
+	c = str;
+	if (r->issuffix)
+	{
+		len -= r->nchar;
+		while (len-- > 0)
+			c += pg_mblen(c);
+	}
+
+
+	while (ptr)
+	{
+		switch (ptr->type)
+		{
+			case RSF_ONEOF:
+				if (mb_strchr((char *) ptr->data, c) != true)
+					return false;
+				break;
+			case RSF_NONEOF:
+				if (mb_strchr((char *) ptr->data, c) == true)
+					return false;
+				break;
+			default:
+				elog(ERROR, "unrecognized regis node type: %d", ptr->type);
+		}
+		ptr = ptr->next;
+		c += pg_mblen(c);
+	}
+
+	return true;
+}
--- a/src/backend/tsearch/spell.c
+++ b/src/backend/tsearch/spell.c
--- a/src/backend/tsearch/synonym.syn.sample
+++ b/src/backend/tsearch/synonym.syn.sample
@@ -0,0 +1,3 @@
+skies	sky
+booking	book
+bookings	book
--- a/src/backend/tsearch/thesaurus.ths.sample
+++ b/src/backend/tsearch/thesaurus.ths.sample
@@ -0,0 +1,20 @@
+#
+# Theasurus config file. Character ':' separates string from replacement, eg
+# sample-words : substitute-words
+#
+# Any substitute-word can be marked by preceding '*' character,
+# which means do not lexize this word
+# Docs: http://www.sai.msu.su/~megera/oddmuse/index.cgi/Thesaurus_dictionary
+
+one two three : *123
+one two : *12
+one : *1
+two : *2
+
+#foo bar : blah blah
+#f   bar : fbar
+#e   bar : ebar
+#g   bar bar : gbarbar
+#asd:sdffff
+#qwerty:qwer wert erty
+
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -0,0 +1,363 @@
+/*-------------------------------------------------------------------------
+ *
+ * to_tsany.c
+ *		to_ts* function definitions
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/namespace.h"
+#include "tsearch/ts_cache.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+#include "utils/syscache.h"
+
+
+Datum
+get_current_ts_config(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_OID(getTSCurrentConfig(true));
+}
+
+/*
+ * to_tsvector
+ */
+static int
+compareWORD(const void *a, const void *b)
+{
+	if (((ParsedWord *) a)->len == ((ParsedWord *) b)->len)
+	{
+		int			res = strncmp(
+								  ((ParsedWord *) a)->word,
+								  ((ParsedWord *) b)->word,
+								  ((ParsedWord *) b)->len);
+
+		if (res == 0)
+			return (((ParsedWord *) a)->pos.pos > ((ParsedWord *) b)->pos.pos) ? 1 : -1;
+		return res;
+	}
+	return (((ParsedWord *) a)->len > ((ParsedWord *) b)->len) ? 1 : -1;
+}
+
+static int
+uniqueWORD(ParsedWord * a, int4 l)
+{
+	ParsedWord *ptr,
+			   *res;
+	int			tmppos;
+
+	if (l == 1)
+	{
+		tmppos = LIMITPOS(a->pos.pos);
+		a->alen = 2;
+		a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
+		a->pos.apos[0] = 1;
+		a->pos.apos[1] = tmppos;
+		return l;
+	}
+
+	res = a;
+	ptr = a + 1;
+
+	qsort((void *) a, l, sizeof(ParsedWord), compareWORD);
+	tmppos = LIMITPOS(a->pos.pos);
+	a->alen = 2;
+	a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
+	a->pos.apos[0] = 1;
+	a->pos.apos[1] = tmppos;
+
+	while (ptr - a < l)
+	{
+		if (!(ptr->len == res->len &&
+			  strncmp(ptr->word, res->word, res->len) == 0))
+		{
+			res++;
+			res->len = ptr->len;
+			res->word = ptr->word;
+			tmppos = LIMITPOS(ptr->pos.pos);
+			res->alen = 2;
+			res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen);
+			res->pos.apos[0] = 1;
+			res->pos.apos[1] = tmppos;
+		}
+		else
+		{
+			pfree(ptr->word);
+			if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1)
+			{
+				if (res->pos.apos[0] + 1 >= res->alen)
+				{
+					res->alen *= 2;
+					res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen);
+				}
+				if (res->pos.apos[0] == 0 || res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
+				{
+					res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos);
+					res->pos.apos[0]++;
+				}
+			}
+		}
+		ptr++;
+	}
+
+	return res + 1 - a;
+}
+
+/*
+ * make value of tsvector, given parsed text
+ */
+TSVector
+make_tsvector(ParsedText *prs)
+{
+	int4		i,
+				j,
+				lenstr = 0,
+				totallen;
+	TSVector	in;
+	WordEntry  *ptr;
+	char	   *str,
+			   *cur;
+
+	prs->curwords = uniqueWORD(prs->words, prs->curwords);
+	for (i = 0; i < prs->curwords; i++)
+	{
+		lenstr += SHORTALIGN(prs->words[i].len);
+
+		if (prs->words[i].alen)
+			lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+	}
+
+	totallen = CALCDATASIZE(prs->curwords, lenstr);
+	in = (TSVector) palloc0(totallen);
+	SET_VARSIZE(in, totallen);
+	in->size = prs->curwords;
+
+	ptr = ARRPTR(in);
+	cur = str = STRPTR(in);
+	for (i = 0; i < prs->curwords; i++)
+	{
+		ptr->len = prs->words[i].len;
+		if (cur - str > MAXSTRPOS)
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("string is too long for tsvector")));
+		ptr->pos = cur - str;
+		memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
+		pfree(prs->words[i].word);
+		cur += SHORTALIGN(prs->words[i].len);
+		if (prs->words[i].alen)
+		{
+			WordEntryPos *wptr;
+
+			ptr->haspos = 1;
+			*(uint16 *) cur = prs->words[i].pos.apos[0];
+			wptr = POSDATAPTR(in, ptr);
+			for (j = 0; j < *(uint16 *) cur; j++)
+			{
+				WEP_SETWEIGHT(wptr[j], 0);
+				WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]);
+			}
+			cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
+			pfree(prs->words[i].pos.apos);
+		}
+		else
+			ptr->haspos = 0;
+		ptr++;
+	}
+	pfree(prs->words);
+	return in;
+}
+
+Datum
+to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+	Oid			cfgId = PG_GETARG_OID(0);
+	text	   *in = PG_GETARG_TEXT_P(1);
+	ParsedText	prs;
+	TSVector	out;
+
+	prs.lenwords = (VARSIZE(in) - VARHDRSZ) / 6;		/* just estimation of
+														 * word's number */
+	if (prs.lenwords == 0)
+		prs.lenwords = 2;
+	prs.curwords = 0;
+	prs.pos = 0;
+	prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
+
+	parsetext(cfgId, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+	PG_FREE_IF_COPY(in, 1);
+
+	if (prs.curwords)
+		out = make_tsvector(&prs);
+	else
+	{
+		pfree(prs.words);
+		out = palloc(CALCDATASIZE(0, 0));
+		SET_VARSIZE(out, CALCDATASIZE(0, 0));
+		out->size = 0;
+	}
+
+	PG_RETURN_POINTER(out);
+}
+
+Datum
+to_tsvector(PG_FUNCTION_ARGS)
+{
+	text	   *in = PG_GETARG_TEXT_P(0);
+	Oid			cfgId;
+
+	cfgId = getTSCurrentConfig(true);
+	PG_RETURN_DATUM(DirectFunctionCall2(to_tsvector_byid,
+										ObjectIdGetDatum(cfgId),
+										PointerGetDatum(in)));
+}
+
+/*
+ * to_tsquery
+ */
+
+
+/*
+ * This function is used for morph parsing
+ */
+static void
+pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval, int2 weight)
+{
+	int4		count = 0;
+	ParsedText	prs;
+	uint32		variant,
+				pos,
+				cntvar = 0,
+				cntpos = 0,
+				cnt = 0;
+
+	prs.lenwords = 4;
+	prs.curwords = 0;
+	prs.pos = 0;
+	prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
+
+	parsetext(state->cfg_id, &prs, strval, lenval);
+
+	if (prs.curwords > 0)
+	{
+
+		while (count < prs.curwords)
+		{
+			pos = prs.words[count].pos.pos;
+			cntvar = 0;
+			while (count < prs.curwords && pos == prs.words[count].pos.pos)
+			{
+				variant = prs.words[count].nvariant;
+
+				cnt = 0;
+				while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
+				{
+
+					pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+					pfree(prs.words[count].word);
+					if (cnt)
+						pushquery(state, OPR, (int4) '&', 0, 0, 0);
+					cnt++;
+					count++;
+				}
+
+				if (cntvar)
+					pushquery(state, OPR, (int4) '|', 0, 0, 0);
+				cntvar++;
+			}
+
+			if (cntpos)
+				pushquery(state, OPR, (int4) '&', 0, 0, 0);
+
+			cntpos++;
+		}
+
+		pfree(prs.words);
+
+	}
+	else
+		pushval_asis(state, VALSTOP, NULL, 0, 0);
+}
+
+Datum
+to_tsquery_byid(PG_FUNCTION_ARGS)
+{
+	Oid			cfgid = PG_GETARG_OID(0);
+	text	   *in = PG_GETARG_TEXT_P(1);
+	TSQuery		query;
+	QueryItem  *res;
+	int4		len;
+
+	query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, false);
+
+	if (query->size == 0)
+		PG_RETURN_TSQUERY(query);
+
+	res = clean_fakeval(GETQUERY(query), &len);
+	if (!res)
+	{
+		SET_VARSIZE(query, HDRSIZETQ);
+		query->size = 0;
+		PG_RETURN_POINTER(query);
+	}
+	memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
+	pfree(res);
+	PG_RETURN_TSQUERY(query);
+}
+
+Datum
+to_tsquery(PG_FUNCTION_ARGS)
+{
+	text	   *in = PG_GETARG_TEXT_P(0);
+	Oid			cfgId;
+
+	cfgId = getTSCurrentConfig(true);
+	PG_RETURN_DATUM(DirectFunctionCall2(to_tsquery_byid,
+										ObjectIdGetDatum(cfgId),
+										PointerGetDatum(in)));
+}
+
+Datum
+plainto_tsquery_byid(PG_FUNCTION_ARGS)
+{
+	Oid			cfgid = PG_GETARG_OID(0);
+	text	   *in = PG_GETARG_TEXT_P(1);
+	TSQuery		query;
+	QueryItem  *res;
+	int4		len;
+
+	query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, true);
+
+	if (query->size == 0)
+		PG_RETURN_TSQUERY(query);
+
+	res = clean_fakeval(GETQUERY(query), &len);
+	if (!res)
+	{
+		SET_VARSIZE(query, HDRSIZETQ);
+		query->size = 0;
+		PG_RETURN_POINTER(query);
+	}
+	memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
+	pfree(res);
+	PG_RETURN_POINTER(query);
+}
+
+Datum
+plainto_tsquery(PG_FUNCTION_ARGS)
+{
+	text	   *in = PG_GETARG_TEXT_P(0);
+	Oid			cfgId;
+
+	cfgId = getTSCurrentConfig(true);
+	PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery_byid,
+										ObjectIdGetDatum(cfgId),
+										PointerGetDatum(in)));
+}
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -0,0 +1,241 @@
+/*-------------------------------------------------------------------------
+ *
+ * ts_locale.c
+ *		locale compatiblility layer for tsearch
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_public.h"
+
+#ifdef TS_USE_WIDE
+
+#ifdef WIN32
+
+size_t
+wchar2char(char *to, const wchar_t *from, size_t len)
+{
+	if (len == 0)
+		return 0;
+
+	if (GetDatabaseEncoding() == PG_UTF8)
+	{
+		int			r;
+
+		r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, len,
+								NULL, NULL);
+
+		if (r == 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+					 errmsg("UTF-16 to UTF-8 translation failed: %lu",
+							GetLastError())));
+		Assert(r <= len);
+
+		return r;
+	}
+
+	return wcstombs(to, from, len);
+}
+#endif   /* WIN32 */
+
+size_t
+char2wchar(wchar_t *to, const char *from, size_t len)
+{
+	if (len == 0)
+		return 0;
+
+#ifdef WIN32
+	if (GetDatabaseEncoding() == PG_UTF8)
+	{
+		int			r;
+
+		r = MultiByteToWideChar(CP_UTF8, 0, from, len, to, len);
+
+		if (!r)
+		{
+			pg_verifymbstr(from, len, false);
+			ereport(ERROR,
+					(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+					 errmsg("invalid multibyte character for locale"),
+					 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
+		}
+
+		Assert(r <= len);
+
+		return r;
+	}
+	else
+#endif   /* WIN32 */
+	if (lc_ctype_is_c())
+	{
+		/*
+		 * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
+		 * allocated with sufficient space
+		 */
+		return pg_mb2wchar_with_len(from, (pg_wchar *) to, len);
+	}
+	else
+	{
+		/*
+		 * mbstowcs require ending '\0'
+		 */
+		char	   *str = pnstrdup(from, len);
+		size_t		tolen;
+
+		tolen = mbstowcs(to, str, len);
+		pfree(str);
+
+		return tolen;
+	}
+}
+
+int
+_t_isalpha(const char *ptr)
+{
+	wchar_t		character[2];
+
+	if (lc_ctype_is_c())
+		return isalpha(TOUCHAR(ptr));
+
+	char2wchar(character, ptr, 1);
+
+	return iswalpha((wint_t) *character);
+}
+
+int
+_t_isprint(const char *ptr)
+{
+	wchar_t		character[2];
+
+	if (lc_ctype_is_c())
+		return isprint(TOUCHAR(ptr));
+
+	char2wchar(character, ptr, 1);
+
+	return iswprint((wint_t) *character);
+}
+#endif   /* TS_USE_WIDE */
+
+/*
+ * Convert C-string from UTF8 to server encoding and
+ * lower it
+ */
+char *
+recode_and_lowerstr(char *str)
+{
+	char	   *recoded;
+	char	   *ret;
+
+	recoded = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str),
+											 PG_UTF8, GetDatabaseEncoding());
+
+	if (recoded == NULL)
+		elog(ERROR, "encoding conversion failed");
+
+	ret = lowerstr(recoded);
+
+	if (recoded != str)
+		pfree(recoded);
+
+	return ret;
+}
+
+char *
+lowerstr(char *str)
+{
+	return lowerstr_with_len(str, strlen(str));
+}
+
+char *
+lowerstr_with_len(char *str, int len)
+{
+	char	   *ptr = str;
+	char	   *out;
+
+	if (len == 0)
+		return pstrdup("");
+
+#ifdef TS_USE_WIDE
+
+	/*
+	 * Use wide char code only when max encoding length > 1 and ctype != C.
+	 * Some operating systems fail with multi-byte encodings and a C locale.
+	 * Also, for a C locale there is no need to process as multibyte. From
+	 * backend/utils/adt/oracle_compat.c Teodor
+	 */
+	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
+	{
+		wchar_t    *wstr,
+				   *wptr;
+		int			wlen;
+
+		/*
+		 * alloc number of wchar_t for worst case, len contains number of
+		 * bytes <= number of characters and alloc 1 wchar_t for 0, because
+		 * wchar2char(wcstombs in really) wants zero-terminated string
+		 */
+		wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));
+
+		/*
+		 * str SHOULD be cstring, so wlen contains number of converted
+		 * character
+		 */
+		wlen = char2wchar(wstr, str, len);
+		if (wlen < 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+			  errmsg("translation failed from server encoding to wchar_t")));
+
+		Assert(wlen <= len);
+		wstr[wlen] = 0;
+
+		while (*wptr)
+		{
+			*wptr = towlower((wint_t) *wptr);
+			wptr++;
+		}
+
+		/*
+		 * Alloc result string for worst case + '\0'
+		 */
+		len = sizeof(char) * pg_database_encoding_max_length() *(wlen + 1);
+		out = (char *) palloc(len);
+
+		/*
+		 * wlen now is number of bytes which is always >= number of characters
+		 */
+		wlen = wchar2char(out, wstr, len);
+		pfree(wstr);
+
+		if (wlen < 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+					 errmsg("translation failed from wchar_t to server encoding %d", errno)));
+		Assert(wlen <= len);
+		out[wlen] = '\0';
+	}
+	else
+#endif
+	{
+		char	   *outptr;
+
+		outptr = out = (char *) palloc(sizeof(char) * (len + 1));
+		while (*ptr && ptr - str < len)
+		{
+			*outptr++ = tolower(*(unsigned char *) ptr);
+			ptr++;
+		}
+		*outptr = '\0';
+	}
+
+	return out;
+}
--- a/src/backend/tsearch/ts_parse.c
+++ b/src/backend/tsearch/ts_parse.c
@@ -0,0 +1,626 @@
+/*-------------------------------------------------------------------------
+ *
+ * ts_parse.c
+ *		main parse functions for tsearch
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "tsearch/ts_cache.h"
+#include "tsearch/ts_public.h"
+#include "tsearch/ts_utils.h"
+
+#define IGNORE_LONGLEXEME	1
+
+/*
+ * Lexize subsystem
+ */
+
+typedef struct ParsedLex
+{
+	int			type;
+	char	   *lemm;
+	int			lenlemm;
+	bool		resfollow;
+	struct ParsedLex *next;
+} ParsedLex;
+
+typedef struct ListParsedLex
+{
+	ParsedLex  *head;
+	ParsedLex  *tail;
+} ListParsedLex;
+
+typedef struct
+{
+	TSConfigCacheEntry *cfg;
+	Oid			curDictId;
+	int			posDict;
+	DictSubState dictState;
+	ParsedLex  *curSub;
+	ListParsedLex towork;		/* current list to work */
+	ListParsedLex waste;		/* list of lexemes that already lexized */
+
+	/*
+	 * fields to store last variant to lexize (basically, thesaurus or similar
+	 * to, which wants	several lexemes
+	 */
+
+	ParsedLex  *lastRes;
+	TSLexeme   *tmpRes;
+} LexizeData;
+
+static void
+LexizeInit(LexizeData * ld, TSConfigCacheEntry * cfg)
+{
+	ld->cfg = cfg;
+	ld->curDictId = InvalidOid;
+	ld->posDict = 0;
+	ld->towork.head = ld->towork.tail = ld->curSub = NULL;
+	ld->waste.head = ld->waste.tail = NULL;
+	ld->lastRes = NULL;
+	ld->tmpRes = NULL;
+}
+
+static void
+LPLAddTail(ListParsedLex * list, ParsedLex * newpl)
+{
+	if (list->tail)
+	{
+		list->tail->next = newpl;
+		list->tail = newpl;
+	}
+	else
+		list->head = list->tail = newpl;
+	newpl->next = NULL;
+}
+
+static ParsedLex *
+LPLRemoveHead(ListParsedLex * list)
+{
+	ParsedLex  *res = list->head;
+
+	if (list->head)
+		list->head = list->head->next;
+
+	if (list->head == NULL)
+		list->tail = NULL;
+
+	return res;
+}
+
+static void
+LexizeAddLemm(LexizeData * ld, int type, char *lemm, int lenlemm)
+{
+	ParsedLex  *newpl = (ParsedLex *) palloc(sizeof(ParsedLex));
+
+	newpl = (ParsedLex *) palloc(sizeof(ParsedLex));
+	newpl->type = type;
+	newpl->lemm = lemm;
+	newpl->lenlemm = lenlemm;
+	LPLAddTail(&ld->towork, newpl);
+	ld->curSub = ld->towork.tail;
+}
+
+static void
+RemoveHead(LexizeData * ld)
+{
+	LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork));
+
+	ld->posDict = 0;
+}
+
+static void
+setCorrLex(LexizeData * ld, ParsedLex ** correspondLexem)
+{
+	if (correspondLexem)
+	{
+		*correspondLexem = ld->waste.head;
+	}
+	else
+	{
+		ParsedLex  *tmp,
+				   *ptr = ld->waste.head;
+
+		while (ptr)
+		{
+			tmp = ptr->next;
+			pfree(ptr);
+			ptr = tmp;
+		}
+	}
+	ld->waste.head = ld->waste.tail = NULL;
+}
+
+static void
+moveToWaste(LexizeData * ld, ParsedLex * stop)
+{
+	bool		go = true;
+
+	while (ld->towork.head && go)
+	{
+		if (ld->towork.head == stop)
+		{
+			ld->curSub = stop->next;
+			go = false;
+		}
+		RemoveHead(ld);
+	}
+}
+
+static void
+setNewTmpRes(LexizeData * ld, ParsedLex * lex, TSLexeme * res)
+{
+	if (ld->tmpRes)
+	{
+		TSLexeme   *ptr;
+
+		for (ptr = ld->tmpRes; ptr->lexeme; ptr++)
+			pfree(ptr->lexeme);
+		pfree(ld->tmpRes);
+	}
+	ld->tmpRes = res;
+	ld->lastRes = lex;
+}
+
+static TSLexeme *
+LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem)
+{
+	int			i;
+	ListDictionary *map;
+	TSDictionaryCacheEntry *dict;
+	TSLexeme   *res;
+
+	if (ld->curDictId == InvalidOid)
+	{
+		/*
+		 * usial mode: dictionary wants only one word, but we should keep in
+		 * mind that we should go through all stack
+		 */
+
+		while (ld->towork.head)
+		{
+			ParsedLex  *curVal = ld->towork.head;
+
+			map = ld->cfg->map + curVal->type;
+
+			if (curVal->type == 0 || curVal->type >= ld->cfg->lenmap || map->len == 0)
+			{
+				/* skip this type of lexeme */
+				RemoveHead(ld);
+				continue;
+			}
+
+			for (i = ld->posDict; i < map->len; i++)
+			{
+				dict = lookup_ts_dictionary_cache(map->dictIds[i]);
+
+				ld->dictState.isend = ld->dictState.getnext = false;
+				ld->dictState.private = NULL;
+				res = (TSLexeme *) DatumGetPointer(FunctionCall4(
+															 &(dict->lexize),
+											 PointerGetDatum(dict->dictData),
+											   PointerGetDatum(curVal->lemm),
+											  Int32GetDatum(curVal->lenlemm),
+											  PointerGetDatum(&ld->dictState)
+																 ));
+
+				if (ld->dictState.getnext)
+				{
+					/*
+					 * dictionary wants next word, so setup and store current
+					 * position and go to multiword mode
+					 */
+
+					ld->curDictId = DatumGetObjectId(map->dictIds[i]);
+					ld->posDict = i + 1;
+					ld->curSub = curVal->next;
+					if (res)
+						setNewTmpRes(ld, curVal, res);
+					return LexizeExec(ld, correspondLexem);
+				}
+
+				if (!res)		/* dictionary doesn't know this lexeme */
+					continue;
+
+				RemoveHead(ld);
+				setCorrLex(ld, correspondLexem);
+				return res;
+			}
+
+			RemoveHead(ld);
+		}
+	}
+	else
+	{							/* curDictId is valid */
+		dict = lookup_ts_dictionary_cache(ld->curDictId);
+
+		/*
+		 * Dictionary ld->curDictId asks  us about following words
+		 */
+
+		while (ld->curSub)
+		{
+			ParsedLex  *curVal = ld->curSub;
+
+			map = ld->cfg->map + curVal->type;
+
+			if (curVal->type != 0)
+			{
+				bool		dictExists = false;
+
+				if (curVal->type >= ld->cfg->lenmap || map->len == 0)
+				{
+					/* skip this type of lexeme */
+					ld->curSub = curVal->next;
+					continue;
+				}
+
+				/*
+				 * We should be sure that current type of lexeme is recognized
+				 * by our dictinonary: we just check is it exist in list of
+				 * dictionaries ?
+				 */
+				for (i = 0; i < map->len && !dictExists; i++)
+					if (ld->curDictId == DatumGetObjectId(map->dictIds[i]))
+						dictExists = true;
+
+				if (!dictExists)
+				{
+					/*
+					 * Dictionary can't work with current tpe of lexeme,
+					 * return to basic mode and redo all stored lexemes
+					 */
+					ld->curDictId = InvalidOid;
+					return LexizeExec(ld, correspondLexem);
+				}
+			}
+
+			ld->dictState.isend = (curVal->type == 0) ? true : false;
+			ld->dictState.getnext = false;
+
+			res = (TSLexeme *) DatumGetPointer(FunctionCall4(
+															 &(dict->lexize),
+											 PointerGetDatum(dict->dictData),
+											   PointerGetDatum(curVal->lemm),
+											  Int32GetDatum(curVal->lenlemm),
+											  PointerGetDatum(&ld->dictState)
+															 ));
+
+			if (ld->dictState.getnext)
+			{
+				/* Dictionary wants one more */
+				ld->curSub = curVal->next;
+				if (res)
+					setNewTmpRes(ld, curVal, res);
+				continue;
+			}
+
+			if (res || ld->tmpRes)
+			{
+				/*
+				 * Dictionary normalizes lexemes, so we remove from stack all
+				 * used lexemes , return to basic mode and redo end of stack
+				 * (if it exists)
+				 */
+				if (res)
+				{
+					moveToWaste(ld, ld->curSub);
+				}
+				else
+				{
+					res = ld->tmpRes;
+					moveToWaste(ld, ld->lastRes);
+				}
+
+				/* reset to initial state */
+				ld->curDictId = InvalidOid;
+				ld->posDict = 0;
+				ld->lastRes = NULL;
+				ld->tmpRes = NULL;
+				setCorrLex(ld, correspondLexem);
+				return res;
+			}
+
+			/*
+			 * Dict don't want next lexem and didn't recognize anything, redo
+			 * from ld->towork.head
+			 */
+			ld->curDictId = InvalidOid;
+			return LexizeExec(ld, correspondLexem);
+		}
+	}
+
+	setCorrLex(ld, correspondLexem);
+	return NULL;
+}
+
+/*
+ * Parse string and lexize words
+ */
+void
+parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen)
+{
+	int			type,
+				lenlemm;
+	char	   *lemm = NULL;
+	LexizeData	ldata;
+	TSLexeme   *norms;
+	TSConfigCacheEntry *cfg;
+	TSParserCacheEntry *prsobj;
+	void	   *prsdata;
+
+	cfg = lookup_ts_config_cache(cfgId);
+	prsobj = lookup_ts_parser_cache(cfg->prsId);
+
+	prsdata = (void *) DatumGetPointer(FunctionCall2(&prsobj->prsstart,
+													 PointerGetDatum(buf),
+													 Int32GetDatum(buflen)));
+
+	LexizeInit(&ldata, cfg);
+
+	do
+	{
+		type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken),
+										   PointerGetDatum(prsdata),
+										   PointerGetDatum(&lemm),
+										   PointerGetDatum(&lenlemm)));
+
+		if (type > 0 && lenlemm >= MAXSTRLEN)
+		{
+#ifdef IGNORE_LONGLEXEME
+			ereport(NOTICE,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("word is too long to be indexed"),
+					 errdetail("Words longer than %d characters are ignored.",
+							   MAXSTRLEN)));
+			continue;
+#else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("word is too long to be indexed")));
+#endif
+		}
+
+		LexizeAddLemm(&ldata, type, lemm, lenlemm);
+
+		while ((norms = LexizeExec(&ldata, NULL)) != NULL)
+		{
+			TSLexeme   *ptr = norms;
+
+			prs->pos++;			/* set pos */
+
+			while (ptr->lexeme)
+			{
+				if (prs->curwords == prs->lenwords)
+				{
+					prs->lenwords *= 2;
+					prs->words = (ParsedWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(ParsedWord));
+				}
+
+				if (ptr->flags & TSL_ADDPOS)
+					prs->pos++;
+				prs->words[prs->curwords].len = strlen(ptr->lexeme);
+				prs->words[prs->curwords].word = ptr->lexeme;
+				prs->words[prs->curwords].nvariant = ptr->nvariant;
+				prs->words[prs->curwords].alen = 0;
+				prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
+				ptr++;
+				prs->curwords++;
+			}
+			pfree(norms);
+		}
+	} while (type > 0);
+
+	FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
+}
+
+/*
+ * Headline framework
+ */
+static void
+hladdword(HeadlineText * prs, char *buf, int4 buflen, int type)
+{
+	while (prs->curwords >= prs->lenwords)
+	{
+		prs->lenwords *= 2;
+		prs->words = (HeadlineWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWord));
+	}
+	memset(&(prs->words[prs->curwords]), 0, sizeof(HeadlineWord));
+	prs->words[prs->curwords].type = (uint8) type;
+	prs->words[prs->curwords].len = buflen;
+	prs->words[prs->curwords].word = palloc(buflen);
+	memcpy(prs->words[prs->curwords].word, buf, buflen);
+	prs->curwords++;
+}
+
+static void
+hlfinditem(HeadlineText * prs, TSQuery query, char *buf, int buflen)
+{
+	int			i;
+	QueryItem  *item = GETQUERY(query);
+	HeadlineWord *word;
+
+	while (prs->curwords + query->size >= prs->lenwords)
+	{
+		prs->lenwords *= 2;
+		prs->words = (HeadlineWord *) repalloc((void *) prs->words, prs->lenwords * sizeof(HeadlineWord));
+	}
+
+	word = &(prs->words[prs->curwords - 1]);
+	for (i = 0; i < query->size; i++)
+	{
+		if (item->type == VAL && item->length == buflen && strncmp(GETOPERAND(query) + item->distance, buf, buflen) == 0)
+		{
+			if (word->item)
+			{
+				memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWord));
+				prs->words[prs->curwords].item = item;
+				prs->words[prs->curwords].repeated = 1;
+				prs->curwords++;
+			}
+			else
+				word->item = item;
+		}
+		item++;
+	}
+}
+
+static void
+addHLParsedLex(HeadlineText * prs, TSQuery query, ParsedLex * lexs, TSLexeme * norms)
+{
+	ParsedLex  *tmplexs;
+	TSLexeme   *ptr;
+
+	while (lexs)
+	{
+
+		if (lexs->type > 0)
+			hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type);
+
+		ptr = norms;
+		while (ptr && ptr->lexeme)
+		{
+			hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme));
+			ptr++;
+		}
+
+		tmplexs = lexs->next;
+		pfree(lexs);
+		lexs = tmplexs;
+	}
+
+	if (norms)
+	{
+		ptr = norms;
+		while (ptr->lexeme)
+		{
+			pfree(ptr->lexeme);
+			ptr++;
+		}
+		pfree(norms);
+	}
+}
+
+void
+hlparsetext(Oid cfgId, HeadlineText * prs, TSQuery query, char *buf, int4 buflen)
+{
+	int			type,
+				lenlemm;
+	char	   *lemm = NULL;
+	LexizeData	ldata;
+	TSLexeme   *norms;
+	ParsedLex  *lexs;
+	TSConfigCacheEntry *cfg;
+	TSParserCacheEntry *prsobj;
+	void	   *prsdata;
+
+	cfg = lookup_ts_config_cache(cfgId);
+	prsobj = lookup_ts_parser_cache(cfg->prsId);
+
+	prsdata = (void *) DatumGetPointer(FunctionCall2(&(prsobj->prsstart),
+													 PointerGetDatum(buf),
+													 Int32GetDatum(buflen)));
+
+	LexizeInit(&ldata, cfg);
+
+	do
+	{
+		type = DatumGetInt32(FunctionCall3(&(prsobj->prstoken),
+										   PointerGetDatum(prsdata),
+										   PointerGetDatum(&lemm),
+										   PointerGetDatum(&lenlemm)));
+
+		if (type > 0 && lenlemm >= MAXSTRLEN)
+		{
+#ifdef IGNORE_LONGLEXEME
+			ereport(NOTICE,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("word is too long to be indexed"),
+					 errdetail("Words longer than %d characters are ignored.",
+							   MAXSTRLEN)));
+			continue;
+#else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("word is too long to be indexed")));
+#endif
+		}
+
+		LexizeAddLemm(&ldata, type, lemm, lenlemm);
+
+		do
+		{
+			if ((norms = LexizeExec(&ldata, &lexs)) != NULL)
+				addHLParsedLex(prs, query, lexs, norms);
+			else
+				addHLParsedLex(prs, query, lexs, NULL);
+		} while (norms);
+
+	} while (type > 0);
+
+	FunctionCall1(&(prsobj->prsend), PointerGetDatum(prsdata));
+}
+
+text *
+generatHeadline(HeadlineText * prs)
+{
+	text	   *out;
+	int			len = 128;
+	char	   *ptr;
+	HeadlineWord *wrd = prs->words;
+
+	out = (text *) palloc(len);
+	ptr = ((char *) out) + VARHDRSZ;
+
+	while (wrd - prs->words < prs->curwords)
+	{
+		while (wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char *) out)) >= len)
+		{
+			int			dist = ptr - ((char *) out);
+
+			len *= 2;
+			out = (text *) repalloc(out, len);
+			ptr = ((char *) out) + dist;
+		}
+
+		if (wrd->in && !wrd->repeated)
+		{
+			if (wrd->replace)
+			{
+				*ptr = ' ';
+				ptr++;
+			}
+			else
+			{
+				if (wrd->selected)
+				{
+					memcpy(ptr, prs->startsel, prs->startsellen);
+					ptr += prs->startsellen;
+				}
+				memcpy(ptr, wrd->word, wrd->len);
+				ptr += wrd->len;
+				if (wrd->selected)
+				{
+					memcpy(ptr, prs->stopsel, prs->stopsellen);
+					ptr += prs->stopsellen;
+				}
+			}
+		}
+		else if (!wrd->repeated)
+			pfree(wrd->word);
+
+		wrd++;
+	}
+
+	SET_VARSIZE(out, ptr - ((char *) out));
+	return out;
+}
--- a/src/backend/tsearch/ts_utils.c
+++ b/src/backend/tsearch/ts_utils.c
@@ -0,0 +1,330 @@
+/*-------------------------------------------------------------------------
+ *
+ * ts_utils.c
+ *		various support functions
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <ctype.h>
+
+#include "miscadmin.h"
+#include "storage/fd.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_public.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+
+
+#define CS_WAITKEY	0
+#define CS_INKEY	1
+#define CS_WAITEQ	2
+#define CS_WAITVALUE	3
+#define CS_INVALUE	4
+#define CS_IN2VALUE 5
+#define CS_WAITDELIM	6
+#define CS_INESC	7
+#define CS_IN2ESC	8
+
+static char *
+nstrdup(char *ptr, int len)
+{
+	char	   *res = palloc(len + 1),
+			   *cptr;
+
+	memcpy(res, ptr, len);
+	res[len] = '\0';
+	cptr = ptr = res;
+	while (*ptr)
+	{
+		if (t_iseq(ptr, '\\'))
+			ptr++;
+		COPYCHAR(cptr, ptr);
+		cptr += pg_mblen(ptr);
+		ptr += pg_mblen(ptr);
+	}
+	*cptr = '\0';
+
+	return res;
+}
+
+/*
+ * Parse a parameter string consisting of key = value clauses
+ */
+void
+parse_keyvalpairs(text *in, Map ** m)
+{
+	Map		   *mptr;
+	char	   *ptr = VARDATA(in),
+			   *begin = NULL;
+	char		num = 0;
+	int			state = CS_WAITKEY;
+
+	while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ)
+	{
+		if (t_iseq(ptr, ','))
+			num++;
+		ptr += pg_mblen(ptr);
+	}
+
+	*m = mptr = (Map *) palloc(sizeof(Map) * (num + 2));
+	memset(mptr, 0, sizeof(Map) * (num + 2));
+	ptr = VARDATA(in);
+	while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ)
+	{
+		if (state == CS_WAITKEY)
+		{
+			if (t_isalpha(ptr))
+			{
+				begin = ptr;
+				state = CS_INKEY;
+			}
+			else if (!t_isspace(ptr))
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("invalid parameter list format: \"%s\"",
+								TextPGetCString(in))));
+		}
+		else if (state == CS_INKEY)
+		{
+			if (t_isspace(ptr))
+			{
+				mptr->key = nstrdup(begin, ptr - begin);
+				state = CS_WAITEQ;
+			}
+			else if (t_iseq(ptr, '='))
+			{
+				mptr->key = nstrdup(begin, ptr - begin);
+				state = CS_WAITVALUE;
+			}
+			else if (!t_isalpha(ptr))
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("invalid parameter list format: \"%s\"",
+								TextPGetCString(in))));
+		}
+		else if (state == CS_WAITEQ)
+		{
+			if (t_iseq(ptr, '='))
+				state = CS_WAITVALUE;
+			else if (!t_isspace(ptr))
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("invalid parameter list format: \"%s\"",
+								TextPGetCString(in))));
+		}
+		else if (state == CS_WAITVALUE)
+		{
+			if (t_iseq(ptr, '"'))
+			{
+				begin = ptr + 1;
+				state = CS_INVALUE;
+			}
+			else if (!t_isspace(ptr))
+			{
+				begin = ptr;
+				state = CS_IN2VALUE;
+			}
+		}
+		else if (state == CS_INVALUE)
+		{
+			if (t_iseq(ptr, '"'))
+			{
+				mptr->value = nstrdup(begin, ptr - begin);
+				mptr++;
+				state = CS_WAITDELIM;
+			}
+			else if (t_iseq(ptr, '\\'))
+				state = CS_INESC;
+		}
+		else if (state == CS_IN2VALUE)
+		{
+			if (t_isspace(ptr) || t_iseq(ptr, ','))
+			{
+				mptr->value = nstrdup(begin, ptr - begin);
+				mptr++;
+				state = (t_iseq(ptr, ',')) ? CS_WAITKEY : CS_WAITDELIM;
+			}
+			else if (t_iseq(ptr, '\\'))
+				state = CS_INESC;
+		}
+		else if (state == CS_WAITDELIM)
+		{
+			if (t_iseq(ptr, ','))
+				state = CS_WAITKEY;
+			else if (!t_isspace(ptr))
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("invalid parameter list format: \"%s\"",
+								TextPGetCString(in))));
+		}
+		else if (state == CS_INESC)
+			state = CS_INVALUE;
+		else if (state == CS_IN2ESC)
+			state = CS_IN2VALUE;
+		else
+			elog(ERROR, "unrecognized parse_keyvalpairs state: %d", state);
+		ptr += pg_mblen(ptr);
+	}
+
+	if (state == CS_IN2VALUE)
+	{
+		mptr->value = nstrdup(begin, ptr - begin);
+		mptr++;
+	}
+	else if (!(state == CS_WAITDELIM || state == CS_WAITKEY))
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("invalid parameter list format: \"%s\"",
+						TextPGetCString(in))));
+}
+
+/*
+ * Given the base name and extension of a tsearch config file, return
+ * its full path name.  The base name is assumed to be user-supplied,
+ * and is checked to prevent pathname attacks.  The extension is assumed
+ * to be safe.
+ *
+ * The result is a palloc'd string.
+ */
+char *
+get_tsearch_config_filename(const char *basename,
+							const char *extension)
+{
+	char		sharepath[MAXPGPATH];
+	char	   *result;
+	const char *p;
+
+	/*
+	 * We enforce that the basename is all alpha characters.  This may be
+	 * overly restrictive, but we don't want to allow access to anything
+	 * outside the tsearch_data directory, so for instance '/' *must* be
+	 * rejected.  This is the same test used for timezonesets names.
+	 */
+	for (p = basename; *p; p++)
+	{
+		if (!isalpha((unsigned char) *p))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("invalid text search configuration file name \"%s\"",
+							basename)));
+	}
+
+	get_share_path(my_exec_path, sharepath);
+	result = palloc(MAXPGPATH);
+	snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s",
+			 sharepath, basename, extension);
+
+	return result;
+}
+
+#define STOPBUFLEN	4096
+
+void
+readstoplist(char *in, StopList * s)
+{
+	char	  **stop = NULL;
+
+	s->len = 0;
+	if (in && *in)
+	{
+		char	   *filename = get_tsearch_config_filename(in, "stop");
+		FILE	   *hin;
+		char		buf[STOPBUFLEN];
+		int			reallen = 0;
+		int			line = 0;
+
+		if ((hin = AllocateFile(filename, "r")) == NULL)
+			ereport(ERROR,
+					(errcode(ERRCODE_CONFIG_FILE_ERROR),
+					 errmsg("could not open stopword file \"%s\": %m",
+							filename)));
+
+		while (fgets(buf, STOPBUFLEN, hin))
+		{
+			char	   *pbuf = buf;
+
+			line++;
+			while (*pbuf && !isspace(*pbuf))
+				pbuf++;
+			*pbuf = '\0';
+
+			if (*buf == '\0')
+				continue;
+
+			if (!pg_verifymbstr(buf, strlen(buf), true))
+			{
+				FreeFile(hin);
+				ereport(ERROR,
+						(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+						 errmsg("invalid multibyte encoding at line %d in file \"%s\"",
+								line, filename)));
+			}
+
+			if (s->len >= reallen)
+			{
+				if (reallen == 0)
+				{
+					reallen = 16;
+					stop = (char **) palloc(sizeof(char *) * reallen);
+				}
+				else
+				{
+					reallen *= 2;
+					stop = (char **) repalloc((void *) stop, sizeof(char *) * reallen);
+				}
+			}
+
+
+			if (s->wordop)
+				stop[s->len] = s->wordop(buf);
+			else
+				stop[s->len] = pstrdup(buf);
+
+			(s->len)++;
+		}
+		FreeFile(hin);
+		pfree(filename);
+	}
+
+	s->stop = stop;
+}
+
+static int
+comparestr(const void *a, const void *b)
+{
+	return strcmp(*(char **) a, *(char **) b);
+}
+
+void
+sortstoplist(StopList * s)
+{
+	if (s->stop && s->len > 0)
+		qsort(s->stop, s->len, sizeof(char *), comparestr);
+}
+
+bool
+searchstoplist(StopList * s, char *key)
+{
+	return (s->stop && s->len > 0 &&
+			bsearch(&key, s->stop, s->len,
+					sizeof(char *), comparestr)) ? true : false;
+}
+
+char *
+pnstrdup(const char *in, int len)
+{
+	char	   *out = palloc(len + 1);
+
+	memcpy(out, in, len);
+	out[len] = '\0';
+	return out;
+}
--- a/src/backend/tsearch/wparser.c
+++ b/src/backend/tsearch/wparser.c
@@ -0,0 +1,360 @@
+/*-------------------------------------------------------------------------
+ *
+ * wparser.c
+ *		Standard interface to word parser
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  $PostgreSQL: pgsql/src/backend/tsearch/wparser.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "funcapi.h"
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/skey.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_ts_parser.h"
+#include "catalog/pg_type.h"
+#include "tsearch/ts_cache.h"
+#include "tsearch/ts_public.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+
+/******sql-level interface******/
+
+typedef struct
+{
+	int			cur;
+	LexDescr   *list;
+} TSTokenTypeStorage;
+
+static void
+tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
+{
+	TupleDesc	tupdesc;
+	MemoryContext oldcontext;
+	TSTokenTypeStorage *st;
+	TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
+
+	if (!OidIsValid(prs->lextypeOid))
+		elog(ERROR, "method lextype isn't defined for text search parser %u",
+			 prsid);
+
+	oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+	st = (TSTokenTypeStorage *) palloc(sizeof(TSTokenTypeStorage));
+	st->cur = 0;
+	/* OidFunctionCall0 is absent */
+	st->list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid,
+															 (Datum) 0));
+	funcctx->user_fctx = (void *) st;
+
+	tupdesc = CreateTemplateTupleDesc(3, false);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
+					   INT4OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "alias",
+					   TEXTOID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "description",
+					   TEXTOID, -1, 0);
+
+	funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+	MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+tt_process_call(FuncCallContext *funcctx)
+{
+	TSTokenTypeStorage *st;
+
+	st = (TSTokenTypeStorage *) funcctx->user_fctx;
+	if (st->list && st->list[st->cur].lexid)
+	{
+		Datum		result;
+		char	   *values[3];
+		char		txtid[16];
+		HeapTuple	tuple;
+
+		sprintf(txtid, "%d", st->list[st->cur].lexid);
+		values[0] = txtid;
+		values[1] = st->list[st->cur].alias;
+		values[2] = st->list[st->cur].descr;
+
+		tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+		result = HeapTupleGetDatum(tuple);
+
+		pfree(values[1]);
+		pfree(values[2]);
+		st->cur++;
+		return result;
+	}
+	if (st->list)
+		pfree(st->list);
+	pfree(st);
+	return (Datum) 0;
+}
+
+Datum
+ts_token_type_byid(PG_FUNCTION_ARGS)
+{
+	FuncCallContext *funcctx;
+	Datum		result;
+
+	if (SRF_IS_FIRSTCALL())
+	{
+		funcctx = SRF_FIRSTCALL_INIT();
+		tt_setup_firstcall(funcctx, PG_GETARG_OID(0));
+	}
+
+	funcctx = SRF_PERCALL_SETUP();
+
+	if ((result = tt_process_call(funcctx)) != (Datum) 0)
+		SRF_RETURN_NEXT(funcctx, result);
+	SRF_RETURN_DONE(funcctx);
+}
+
+Datum
+ts_token_type_byname(PG_FUNCTION_ARGS)
+{
+	FuncCallContext *funcctx;
+	Datum		result;
+
+	if (SRF_IS_FIRSTCALL())
+	{
+		text	*prsname = PG_GETARG_TEXT_P(0);
+		Oid		prsId;
+
+		funcctx = SRF_FIRSTCALL_INIT();
+		prsId = TSParserGetPrsid(textToQualifiedNameList(prsname), false);
+		tt_setup_firstcall(funcctx, prsId);
+	}
+
+	funcctx = SRF_PERCALL_SETUP();
+
+	if ((result = tt_process_call(funcctx)) != (Datum) 0)
+		SRF_RETURN_NEXT(funcctx, result);
+	SRF_RETURN_DONE(funcctx);
+}
+
+typedef struct
+{
+	int			type;
+	char	   *lexeme;
+} LexemeEntry;
+
+typedef struct
+{
+	int			cur;
+	int			len;
+	LexemeEntry *list;
+} PrsStorage;
+
+
+static void
+prs_setup_firstcall(FuncCallContext *funcctx, Oid prsid, text *txt)
+{
+	TupleDesc	tupdesc;
+	MemoryContext oldcontext;
+	PrsStorage *st;
+	TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
+	char	   *lex = NULL;
+	int			llen = 0,
+				type = 0;
+	void	   *prsdata;
+
+	oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+	st = (PrsStorage *) palloc(sizeof(PrsStorage));
+	st->cur = 0;
+	st->len = 16;
+	st->list = (LexemeEntry *) palloc(sizeof(LexemeEntry) * st->len);
+
+	prsdata = (void *) DatumGetPointer(FunctionCall2(&prs->prsstart,
+											   PointerGetDatum(VARDATA(txt)),
+									   Int32GetDatum(VARSIZE(txt) - VARHDRSZ)));
+
+	while ((type = DatumGetInt32(FunctionCall3(&prs->prstoken,
+											   PointerGetDatum(prsdata),
+											   PointerGetDatum(&lex),
+											   PointerGetDatum(&llen)))) != 0)
+	{
+		if (st->cur >= st->len)
+		{
+			st->len = 2 * st->len;
+			st->list = (LexemeEntry *) repalloc(st->list, sizeof(LexemeEntry) * st->len);
+		}
+		st->list[st->cur].lexeme = palloc(llen + 1);
+		memcpy(st->list[st->cur].lexeme, lex, llen);
+		st->list[st->cur].lexeme[llen] = '\0';
+		st->list[st->cur].type = type;
+		st->cur++;
+	}
+
+	FunctionCall1(&prs->prsend, PointerGetDatum(prsdata));
+
+	st->len = st->cur;
+	st->cur = 0;
+
+	funcctx->user_fctx = (void *) st;
+	tupdesc = CreateTemplateTupleDesc(2, false);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
+					   INT4OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "token",
+					   TEXTOID, -1, 0);
+
+	funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+	MemoryContextSwitchTo(oldcontext);
+}
+
+static Datum
+prs_process_call(FuncCallContext *funcctx)
+{
+	PrsStorage *st;
+
+	st = (PrsStorage *) funcctx->user_fctx;
+	if (st->cur < st->len)
+	{
+		Datum		result;
+		char	   *values[2];
+		char		tid[16];
+		HeapTuple	tuple;
+
+		values[0] = tid;
+		sprintf(tid, "%d", st->list[st->cur].type);
+		values[1] = st->list[st->cur].lexeme;
+		tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+		result = HeapTupleGetDatum(tuple);
+
+		pfree(values[1]);
+		st->cur++;
+		return result;
+	}
+	else
+	{
+		if (st->list)
+			pfree(st->list);
+		pfree(st);
+	}
+	return (Datum) 0;
+}
+
+Datum
+ts_parse_byid(PG_FUNCTION_ARGS)
+{
+	FuncCallContext *funcctx;
+	Datum		result;
+
+	if (SRF_IS_FIRSTCALL())
+	{
+		text	   *txt = PG_GETARG_TEXT_P(1);
+
+		funcctx = SRF_FIRSTCALL_INIT();
+		prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt);
+		PG_FREE_IF_COPY(txt, 1);
+	}
+
+	funcctx = SRF_PERCALL_SETUP();
+
+	if ((result = prs_process_call(funcctx)) != (Datum) 0)
+		SRF_RETURN_NEXT(funcctx, result);
+	SRF_RETURN_DONE(funcctx);
+}
+
+Datum
+ts_parse_byname(PG_FUNCTION_ARGS)
+{
+	FuncCallContext *funcctx;
+	Datum		result;
+
+	if (SRF_IS_FIRSTCALL())
+	{
+		text	*prsname = PG_GETARG_TEXT_P(0);
+		text	*txt = PG_GETARG_TEXT_P(1);
+		Oid		prsId;
+
+		funcctx = SRF_FIRSTCALL_INIT();
+		prsId = TSParserGetPrsid(textToQualifiedNameList(prsname), false);
+		prs_setup_firstcall(funcctx, prsId, txt);
+	}
+
+	funcctx = SRF_PERCALL_SETUP();
+
+	if ((result = prs_process_call(funcctx)) != (Datum) 0)
+		SRF_RETURN_NEXT(funcctx, result);
+	SRF_RETURN_DONE(funcctx);
+}
+
+Datum
+ts_headline_byid_opt(PG_FUNCTION_ARGS)
+{
+	text	   *in = PG_GETARG_TEXT_P(1);
+	TSQuery		query = PG_GETARG_TSQUERY(2);
+	text	   *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+	HeadlineText prs;
+	text	   *out;
+	TSConfigCacheEntry *cfg;
+	TSParserCacheEntry *prsobj;
+
+	cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
+	prsobj = lookup_ts_parser_cache(cfg->prsId);
+
+	memset(&prs, 0, sizeof(HeadlineText));
+	prs.lenwords = 32;
+	prs.words = (HeadlineWord *) palloc(sizeof(HeadlineWord) * prs.lenwords);
+
+	hlparsetext(cfg->cfgId, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
+
+	FunctionCall3(&(prsobj->prsheadline),
+				  PointerGetDatum(&prs),
+				  PointerGetDatum(opt),
+				  PointerGetDatum(query));
+
+	out = generatHeadline(&prs);
+
+	PG_FREE_IF_COPY(in, 1);
+	PG_FREE_IF_COPY(query, 2);
+	if (opt)
+		PG_FREE_IF_COPY(opt, 3);
+	pfree(prs.words);
+	pfree(prs.startsel);
+	pfree(prs.stopsel);
+
+	PG_RETURN_POINTER(out);
+}
+
+Datum
+ts_headline_byid(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
+									ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_opt(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_byid_opt,
+									ObjectIdGetDatum(getTSCurrentConfig(true)),
+										PG_GETARG_DATUM(0),
+										PG_GETARG_DATUM(1),
+										PG_GETARG_DATUM(2)));
+}
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c