1
0
mirror of https://github.com/postgres/postgres.git synced 2025-09-02 04:21:28 +03:00

Simplify the syntax of CREATE/ALTER TEXT SEARCH DICTIONARY by treating the

init options of the template as top-level options in the syntax.  This also
makes ALTER a bit easier to use, since options can be replaced individually.
I also made these statements verify that the tmplinit method will accept
the new settings before they get stored; in the original coding you didn't
find out about mistakes until the dictionary got invoked.

Under the hood, init methods now get options as a List of DefElem instead
of a raw text string --- that lets tsearch use existing options-pushing code
instead of duplicating functionality.
This commit is contained in:
Tom Lane
2007-08-22 01:39:46 +00:00
parent fd33d90a23
commit d321421d0a
17 changed files with 618 additions and 417 deletions

View File

@@ -7,12 +7,13 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/dict_ispell.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/dict_ispell.c,v 1.2 2007/08/22 01:39:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "commands/defrem.h"
#include "tsearch/dicts/spell.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
@@ -30,59 +31,49 @@ typedef struct
Datum
dispell_init(PG_FUNCTION_ARGS)
{
List *dictoptions = (List *) PG_GETARG_POINTER(0);
DictISpell *d;
Map *cfg,
*pcfg;
bool affloaded = false,
dictloaded = false,
stoploaded = false;
text *in;
/* init functions must defend against NULLs for themselves */
if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("NULL config not allowed for ISpell")));
in = PG_GETARG_TEXT_P(0);
parse_keyvalpairs(in, &cfg);
PG_FREE_IF_COPY(in, 0);
ListCell *l;
d = (DictISpell *) palloc0(sizeof(DictISpell));
d->stoplist.wordop = recode_and_lowerstr;
pcfg = cfg;
while (pcfg->key)
foreach(l, dictoptions)
{
if (pg_strcasecmp("DictFile", pcfg->key) == 0)
DefElem *defel = (DefElem *) lfirst(l);
if (pg_strcasecmp(defel->defname, "DictFile") == 0)
{
if (dictloaded)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple DictFile parameters")));
NIImportDictionary(&(d->obj),
get_tsearch_config_filename(pcfg->value,
get_tsearch_config_filename(defGetString(defel),
"dict"));
dictloaded = true;
}
else if (pg_strcasecmp("AffFile", pcfg->key) == 0)
else if (pg_strcasecmp(defel->defname, "AffFile") == 0)
{
if (affloaded)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple AffFile parameters")));
NIImportAffixes(&(d->obj),
get_tsearch_config_filename(pcfg->value,
get_tsearch_config_filename(defGetString(defel),
"affix"));
affloaded = true;
}
else if (pg_strcasecmp("StopWords", pcfg->key) == 0)
else if (pg_strcasecmp(defel->defname, "StopWords") == 0)
{
if (stoploaded)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple StopWords parameters")));
readstoplist(pcfg->value, &(d->stoplist));
readstoplist(defGetString(defel), &(d->stoplist));
sortstoplist(&(d->stoplist));
stoploaded = true;
}
@@ -91,13 +82,9 @@ dispell_init(PG_FUNCTION_ARGS)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized ISpell parameter: \"%s\"",
pcfg->key)));
defel->defname)));
}
pfree(pcfg->key);
pfree(pcfg->value);
pcfg++;
}
pfree(cfg);
if (affloaded && dictloaded)
{

View File

@@ -7,12 +7,13 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/dict_simple.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/dict_simple.c,v 1.2 2007/08/22 01:39:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "commands/defrem.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_utils.h"
@@ -28,18 +29,34 @@ typedef struct
Datum
dsimple_init(PG_FUNCTION_ARGS)
{
List *dictoptions = (List *) PG_GETARG_POINTER(0);
DictExample *d = (DictExample *) palloc0(sizeof(DictExample));
bool stoploaded = false;
ListCell *l;
d->stoplist.wordop = recode_and_lowerstr;
if (!PG_ARGISNULL(0) && PG_GETARG_POINTER(0) != NULL)
foreach(l, dictoptions)
{
text *in = PG_GETARG_TEXT_P(0);
char *filename = TextPGetCString(in);
DefElem *defel = (DefElem *) lfirst(l);
readstoplist(filename, &d->stoplist);
sortstoplist(&d->stoplist);
pfree(filename);
if (pg_strcasecmp("StopWords", defel->defname) == 0)
{
if (stoploaded)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple StopWords parameters")));
readstoplist(defGetString(defel), &d->stoplist);
sortstoplist(&d->stoplist);
stoploaded = true;
}
else
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized simple dictionary parameter: \"%s\"",
defel->defname)));
}
}
PG_RETURN_POINTER(d);

View File

@@ -7,13 +7,14 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/dict_thesaurus.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/dict_thesaurus.c,v 1.2 2007/08/22 01:39:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "catalog/namespace.h"
#include "commands/defrem.h"
#include "storage/fd.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_locale.h"
@@ -593,57 +594,43 @@ compileTheSubstitute(DictThesaurus * d)
Datum
thesaurus_init(PG_FUNCTION_ARGS)
{
List *dictoptions = (List *) PG_GETARG_POINTER(0);
DictThesaurus *d;
Map *cfg,
*pcfg;
text *in;
char *subdictname = NULL;
bool fileloaded = false;
/* init functions must defend against NULLs for themselves */
if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("NULL config not allowed for Thesaurus")));
in = PG_GETARG_TEXT_P(0);
parse_keyvalpairs(in, &cfg);
PG_FREE_IF_COPY(in, 0);
ListCell *l;
d = (DictThesaurus *) palloc0(sizeof(DictThesaurus));
pcfg = cfg;
while (pcfg->key)
foreach(l, dictoptions)
{
if (pg_strcasecmp("DictFile", pcfg->key) == 0)
DefElem *defel = (DefElem *) lfirst(l);
if (pg_strcasecmp("DictFile", defel->defname) == 0)
{
if (fileloaded)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple DictFile parameters")));
thesaurusRead(pcfg->value, d);
thesaurusRead(defGetString(defel), d);
fileloaded = true;
}
else if (pg_strcasecmp("Dictionary", pcfg->key) == 0)
else if (pg_strcasecmp("Dictionary", defel->defname) == 0)
{
if (subdictname)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("multiple Dictionary parameters")));
subdictname = pstrdup(pcfg->value);
subdictname = pstrdup(defGetString(defel));
}
else
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized Thesaurus parameter: \"%s\"",
pcfg->key)));
defel->defname)));
}
pfree(pcfg->key);
pfree(pcfg->value);
pcfg++;
}
pfree(cfg);
if (!fileloaded)
ereport(ERROR,

View File

@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.2 2007/08/22 01:39:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -24,169 +24,6 @@
#include "utils/builtins.h"
#define CS_WAITKEY 0
#define CS_INKEY 1
#define CS_WAITEQ 2
#define CS_WAITVALUE 3
#define CS_INVALUE 4
#define CS_IN2VALUE 5
#define CS_WAITDELIM 6
#define CS_INESC 7
#define CS_IN2ESC 8
static char *
nstrdup(char *ptr, int len)
{
char *res = palloc(len + 1),
*cptr;
memcpy(res, ptr, len);
res[len] = '\0';
cptr = ptr = res;
while (*ptr)
{
if (t_iseq(ptr, '\\'))
ptr++;
COPYCHAR(cptr, ptr);
cptr += pg_mblen(ptr);
ptr += pg_mblen(ptr);
}
*cptr = '\0';
return res;
}
/*
* Parse a parameter string consisting of key = value clauses
*/
void
parse_keyvalpairs(text *in, Map ** m)
{
Map *mptr;
char *ptr = VARDATA(in),
*begin = NULL;
char num = 0;
int state = CS_WAITKEY;
while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ)
{
if (t_iseq(ptr, ','))
num++;
ptr += pg_mblen(ptr);
}
*m = mptr = (Map *) palloc(sizeof(Map) * (num + 2));
memset(mptr, 0, sizeof(Map) * (num + 2));
ptr = VARDATA(in);
while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ)
{
if (state == CS_WAITKEY)
{
if (t_isalpha(ptr))
{
begin = ptr;
state = CS_INKEY;
}
else if (!t_isspace(ptr))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid parameter list format: \"%s\"",
TextPGetCString(in))));
}
else if (state == CS_INKEY)
{
if (t_isspace(ptr))
{
mptr->key = nstrdup(begin, ptr - begin);
state = CS_WAITEQ;
}
else if (t_iseq(ptr, '='))
{
mptr->key = nstrdup(begin, ptr - begin);
state = CS_WAITVALUE;
}
else if (!t_isalpha(ptr))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid parameter list format: \"%s\"",
TextPGetCString(in))));
}
else if (state == CS_WAITEQ)
{
if (t_iseq(ptr, '='))
state = CS_WAITVALUE;
else if (!t_isspace(ptr))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid parameter list format: \"%s\"",
TextPGetCString(in))));
}
else if (state == CS_WAITVALUE)
{
if (t_iseq(ptr, '"'))
{
begin = ptr + 1;
state = CS_INVALUE;
}
else if (!t_isspace(ptr))
{
begin = ptr;
state = CS_IN2VALUE;
}
}
else if (state == CS_INVALUE)
{
if (t_iseq(ptr, '"'))
{
mptr->value = nstrdup(begin, ptr - begin);
mptr++;
state = CS_WAITDELIM;
}
else if (t_iseq(ptr, '\\'))
state = CS_INESC;
}
else if (state == CS_IN2VALUE)
{
if (t_isspace(ptr) || t_iseq(ptr, ','))
{
mptr->value = nstrdup(begin, ptr - begin);
mptr++;
state = (t_iseq(ptr, ',')) ? CS_WAITKEY : CS_WAITDELIM;
}
else if (t_iseq(ptr, '\\'))
state = CS_INESC;
}
else if (state == CS_WAITDELIM)
{
if (t_iseq(ptr, ','))
state = CS_WAITKEY;
else if (!t_isspace(ptr))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid parameter list format: \"%s\"",
TextPGetCString(in))));
}
else if (state == CS_INESC)
state = CS_INVALUE;
else if (state == CS_IN2ESC)
state = CS_IN2VALUE;
else
elog(ERROR, "unrecognized parse_keyvalpairs state: %d", state);
ptr += pg_mblen(ptr);
}
if (state == CS_IN2VALUE)
{
mptr->value = nstrdup(begin, ptr - begin);
mptr++;
}
else if (!(state == CS_WAITDELIM || state == CS_WAITKEY))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid parameter list format: \"%s\"",
TextPGetCString(in))));
}
/*
* Given the base name and extension of a tsearch config file, return
* its full path name. The base name is assumed to be user-supplied,

View File

@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/wparser.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/wparser.c,v 1.2 2007/08/22 01:39:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -21,6 +21,7 @@
#include "catalog/namespace.h"
#include "catalog/pg_ts_parser.h"
#include "catalog/pg_type.h"
#include "commands/defrem.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_utils.h"
@@ -300,6 +301,7 @@ ts_headline_byid_opt(PG_FUNCTION_ARGS)
TSQuery query = PG_GETARG_TSQUERY(2);
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
HeadlineText prs;
List *prsoptions;
text *out;
TSConfigCacheEntry *cfg;
TSParserCacheEntry *prsobj;
@@ -313,9 +315,14 @@ ts_headline_byid_opt(PG_FUNCTION_ARGS)
hlparsetext(cfg->cfgId, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
if (opt)
prsoptions = deserialize_deflist(PointerGetDatum(opt));
else
prsoptions = NIL;
FunctionCall3(&(prsobj->prsheadline),
PointerGetDatum(&prs),
PointerGetDatum(opt),
PointerGetDatum(prsoptions),
PointerGetDatum(query));
out = generatHeadline(&prs);

View File

@@ -7,13 +7,14 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.2 2007/08/22 01:39:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "commands/defrem.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_type.h"
@@ -1662,7 +1663,7 @@ Datum
prsd_headline(PG_FUNCTION_ARGS)
{
HeadlineParsedText *prs = (HeadlineParsedText *) PG_GETARG_POINTER(0);
text *opt = (text *) PG_GETARG_POINTER(1); /* can't be toasted */
List *prsoptions = (List *) PG_GETARG_POINTER(1);
TSQuery query = PG_GETARG_TSQUERY(2);
/* from opt + start and and tag */
@@ -1682,66 +1683,55 @@ prsd_headline(PG_FUNCTION_ARGS)
int i;
int highlight = 0;
ListCell *l;
/* config */
prs->startsel = NULL;
prs->stopsel = NULL;
if (opt)
foreach(l, prsoptions)
{
Map *map,
*mptr;
DefElem *defel = (DefElem *) lfirst(l);
char *val = defGetString(defel);
parse_keyvalpairs(opt, &map);
mptr = map;
while (mptr && mptr->key)
{
if (pg_strcasecmp(mptr->key, "MaxWords") == 0)
max_words = pg_atoi(mptr->value, 4, 1);
else if (pg_strcasecmp(mptr->key, "MinWords") == 0)
min_words = pg_atoi(mptr->value, 4, 1);
else if (pg_strcasecmp(mptr->key, "ShortWord") == 0)
shortword = pg_atoi(mptr->value, 4, 1);
else if (pg_strcasecmp(mptr->key, "StartSel") == 0)
prs->startsel = pstrdup(mptr->value);
else if (pg_strcasecmp(mptr->key, "StopSel") == 0)
prs->stopsel = pstrdup(mptr->value);
else if (pg_strcasecmp(mptr->key, "HighlightAll") == 0)
highlight = (
pg_strcasecmp(mptr->value, "1") == 0 ||
pg_strcasecmp(mptr->value, "on") == 0 ||
pg_strcasecmp(mptr->value, "true") == 0 ||
pg_strcasecmp(mptr->value, "t") == 0 ||
pg_strcasecmp(mptr->value, "y") == 0 ||
pg_strcasecmp(mptr->value, "yes") == 0) ?
1 : 0;
pfree(mptr->key);
pfree(mptr->value);
mptr++;
}
pfree(map);
if (highlight == 0)
{
if (min_words >= max_words)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("MinWords should be less than MaxWords")));
if (min_words <= 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("MinWords should be positive")));
if (shortword < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("ShortWord should be >= 0")));
}
if (pg_strcasecmp(defel->defname, "MaxWords") == 0)
max_words = pg_atoi(val, sizeof(int32), 0);
else if (pg_strcasecmp(defel->defname, "MinWords") == 0)
min_words = pg_atoi(val, sizeof(int32), 0);
else if (pg_strcasecmp(defel->defname, "ShortWord") == 0)
shortword = pg_atoi(val, sizeof(int32), 0);
else if (pg_strcasecmp(defel->defname, "StartSel") == 0)
prs->startsel = pstrdup(val);
else if (pg_strcasecmp(defel->defname, "StopSel") == 0)
prs->stopsel = pstrdup(val);
else if (pg_strcasecmp(defel->defname, "HighlightAll") == 0)
highlight = (pg_strcasecmp(val, "1") == 0 ||
pg_strcasecmp(val, "on") == 0 ||
pg_strcasecmp(val, "true") == 0 ||
pg_strcasecmp(val, "t") == 0 ||
pg_strcasecmp(val, "y") == 0 ||
pg_strcasecmp(val, "yes") == 0);
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized headline parameter: \"%s\"",
defel->defname)));
}
if (highlight == 0)
{
if (min_words >= max_words)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("MinWords should be less than MaxWords")));
if (min_words <= 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("MinWords should be positive")));
if (shortword < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("ShortWord should be >= 0")));
while (hlCover(prs, query, &p, &q))
{
/* find cover len in words */