1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-21 02:52:47 +03:00

Tsearch2 functionality migrates to core. The bulk of this work is by

Oleg Bartunov and Teodor Sigaev, but I did a lot of editorializing,
so anything that's broken is probably my fault.

Documentation is nonexistent as yet, but let's land the patch so we can
get some portability testing done.
This commit is contained in:
Tom Lane
2007-08-21 01:11:32 +00:00
parent 4e94d1f952
commit 140d4ebcb4
200 changed files with 54388 additions and 147 deletions

View File

@@ -0,0 +1,330 @@
/*-------------------------------------------------------------------------
*
* ts_utils.c
* various support functions
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <ctype.h>
#include "miscadmin.h"
#include "storage/fd.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
#define CS_WAITKEY 0
#define CS_INKEY 1
#define CS_WAITEQ 2
#define CS_WAITVALUE 3
#define CS_INVALUE 4
#define CS_IN2VALUE 5
#define CS_WAITDELIM 6
#define CS_INESC 7
#define CS_IN2ESC 8
static char *
nstrdup(char *ptr, int len)
{
char *res = palloc(len + 1),
*cptr;
memcpy(res, ptr, len);
res[len] = '\0';
cptr = ptr = res;
while (*ptr)
{
if (t_iseq(ptr, '\\'))
ptr++;
COPYCHAR(cptr, ptr);
cptr += pg_mblen(ptr);
ptr += pg_mblen(ptr);
}
*cptr = '\0';
return res;
}
/*
* Parse a parameter string consisting of key = value clauses
*/
void
parse_keyvalpairs(text *in, Map ** m)
{
Map *mptr;
char *ptr = VARDATA(in),
*begin = NULL;
char num = 0;
int state = CS_WAITKEY;
while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ)
{
if (t_iseq(ptr, ','))
num++;
ptr += pg_mblen(ptr);
}
*m = mptr = (Map *) palloc(sizeof(Map) * (num + 2));
memset(mptr, 0, sizeof(Map) * (num + 2));
ptr = VARDATA(in);
while (ptr - VARDATA(in) < VARSIZE(in) - VARHDRSZ)
{
if (state == CS_WAITKEY)
{
if (t_isalpha(ptr))
{
begin = ptr;
state = CS_INKEY;
}
else if (!t_isspace(ptr))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid parameter list format: \"%s\"",
TextPGetCString(in))));
}
else if (state == CS_INKEY)
{
if (t_isspace(ptr))
{
mptr->key = nstrdup(begin, ptr - begin);
state = CS_WAITEQ;
}
else if (t_iseq(ptr, '='))
{
mptr->key = nstrdup(begin, ptr - begin);
state = CS_WAITVALUE;
}
else if (!t_isalpha(ptr))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid parameter list format: \"%s\"",
TextPGetCString(in))));
}
else if (state == CS_WAITEQ)
{
if (t_iseq(ptr, '='))
state = CS_WAITVALUE;
else if (!t_isspace(ptr))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid parameter list format: \"%s\"",
TextPGetCString(in))));
}
else if (state == CS_WAITVALUE)
{
if (t_iseq(ptr, '"'))
{
begin = ptr + 1;
state = CS_INVALUE;
}
else if (!t_isspace(ptr))
{
begin = ptr;
state = CS_IN2VALUE;
}
}
else if (state == CS_INVALUE)
{
if (t_iseq(ptr, '"'))
{
mptr->value = nstrdup(begin, ptr - begin);
mptr++;
state = CS_WAITDELIM;
}
else if (t_iseq(ptr, '\\'))
state = CS_INESC;
}
else if (state == CS_IN2VALUE)
{
if (t_isspace(ptr) || t_iseq(ptr, ','))
{
mptr->value = nstrdup(begin, ptr - begin);
mptr++;
state = (t_iseq(ptr, ',')) ? CS_WAITKEY : CS_WAITDELIM;
}
else if (t_iseq(ptr, '\\'))
state = CS_INESC;
}
else if (state == CS_WAITDELIM)
{
if (t_iseq(ptr, ','))
state = CS_WAITKEY;
else if (!t_isspace(ptr))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid parameter list format: \"%s\"",
TextPGetCString(in))));
}
else if (state == CS_INESC)
state = CS_INVALUE;
else if (state == CS_IN2ESC)
state = CS_IN2VALUE;
else
elog(ERROR, "unrecognized parse_keyvalpairs state: %d", state);
ptr += pg_mblen(ptr);
}
if (state == CS_IN2VALUE)
{
mptr->value = nstrdup(begin, ptr - begin);
mptr++;
}
else if (!(state == CS_WAITDELIM || state == CS_WAITKEY))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid parameter list format: \"%s\"",
TextPGetCString(in))));
}
/*
* Given the base name and extension of a tsearch config file, return
* its full path name. The base name is assumed to be user-supplied,
* and is checked to prevent pathname attacks. The extension is assumed
* to be safe.
*
* The result is a palloc'd string.
*/
char *
get_tsearch_config_filename(const char *basename,
const char *extension)
{
char sharepath[MAXPGPATH];
char *result;
const char *p;
/*
* We enforce that the basename is all alpha characters. This may be
* overly restrictive, but we don't want to allow access to anything
* outside the tsearch_data directory, so for instance '/' *must* be
* rejected. This is the same test used for timezonesets names.
*/
for (p = basename; *p; p++)
{
if (!isalpha((unsigned char) *p))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid text search configuration file name \"%s\"",
basename)));
}
get_share_path(my_exec_path, sharepath);
result = palloc(MAXPGPATH);
snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s",
sharepath, basename, extension);
return result;
}
#define STOPBUFLEN 4096
void
readstoplist(char *in, StopList * s)
{
char **stop = NULL;
s->len = 0;
if (in && *in)
{
char *filename = get_tsearch_config_filename(in, "stop");
FILE *hin;
char buf[STOPBUFLEN];
int reallen = 0;
int line = 0;
if ((hin = AllocateFile(filename, "r")) == NULL)
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not open stopword file \"%s\": %m",
filename)));
while (fgets(buf, STOPBUFLEN, hin))
{
char *pbuf = buf;
line++;
while (*pbuf && !isspace(*pbuf))
pbuf++;
*pbuf = '\0';
if (*buf == '\0')
continue;
if (!pg_verifymbstr(buf, strlen(buf), true))
{
FreeFile(hin);
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("invalid multibyte encoding at line %d in file \"%s\"",
line, filename)));
}
if (s->len >= reallen)
{
if (reallen == 0)
{
reallen = 16;
stop = (char **) palloc(sizeof(char *) * reallen);
}
else
{
reallen *= 2;
stop = (char **) repalloc((void *) stop, sizeof(char *) * reallen);
}
}
if (s->wordop)
stop[s->len] = s->wordop(buf);
else
stop[s->len] = pstrdup(buf);
(s->len)++;
}
FreeFile(hin);
pfree(filename);
}
s->stop = stop;
}
static int
comparestr(const void *a, const void *b)
{
return strcmp(*(char **) a, *(char **) b);
}
void
sortstoplist(StopList * s)
{
if (s->stop && s->len > 0)
qsort(s->stop, s->len, sizeof(char *), comparestr);
}
bool
searchstoplist(StopList * s, char *key)
{
return (s->stop && s->len > 0 &&
bsearch(&key, s->stop, s->len,
sizeof(char *), comparestr)) ? true : false;
}
char *
pnstrdup(const char *in, int len)
{
char *out = palloc(len + 1);
memcpy(out, in, len);
out[len] = '\0';
return out;
}