mirror of
https://github.com/postgres/postgres.git
synced 2025-12-07 12:02:30 +03:00
The idea is to encourage more the use of these new routines across the tree, as these offer stronger type safety guarantees than palloc(). In an ideal world, palloc() would then act as an internal routine of these flavors, whose footprint in the tree is minimal. The patch sent by the author is very large, and this chunk of changes represents something like 10% of the overall patch submitted. The code compiled is the same before and after this commit, using objdump to do some validation with a difference taken in-between. There are some diffs, which are caused by changes in line numbers because some of the new allocation formulas are shorter, for the following files: trgm_regexp.c, xpath.c and pg_walinspect.c. Author: David Geier <geidav.pg@gmail.com> Discussion: https://postgr.es/m/ad0748d4-3080-436e-b0bc-ac8f86a3466a@gmail.com
265 lines
5.2 KiB
C
265 lines
5.2 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* dict_xsyn.c
|
|
* Extended synonym dictionary
|
|
*
|
|
* Copyright (c) 2007-2025, PostgreSQL Global Development Group
|
|
*
|
|
* IDENTIFICATION
|
|
* contrib/dict_xsyn/dict_xsyn.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include <ctype.h>
|
|
|
|
#include "catalog/pg_collation_d.h"
|
|
#include "commands/defrem.h"
|
|
#include "tsearch/ts_locale.h"
|
|
#include "tsearch/ts_public.h"
|
|
#include "utils/formatting.h"
|
|
|
|
PG_MODULE_MAGIC_EXT(
|
|
.name = "dict_xsyn",
|
|
.version = PG_VERSION
|
|
);
|
|
|
|
typedef struct
|
|
{
|
|
char *key; /* Word */
|
|
char *value; /* Unparsed list of synonyms, including the
|
|
* word itself */
|
|
} Syn;
|
|
|
|
typedef struct
|
|
{
|
|
int len;
|
|
Syn *syn;
|
|
|
|
bool matchorig;
|
|
bool keeporig;
|
|
bool matchsynonyms;
|
|
bool keepsynonyms;
|
|
} DictSyn;
|
|
|
|
|
|
PG_FUNCTION_INFO_V1(dxsyn_init);
|
|
PG_FUNCTION_INFO_V1(dxsyn_lexize);
|
|
|
|
static char *
|
|
find_word(char *in, char **end)
|
|
{
|
|
char *start;
|
|
|
|
*end = NULL;
|
|
while (*in && isspace((unsigned char) *in))
|
|
in += pg_mblen(in);
|
|
|
|
if (!*in || *in == '#')
|
|
return NULL;
|
|
start = in;
|
|
|
|
while (*in && !isspace((unsigned char) *in))
|
|
in += pg_mblen(in);
|
|
|
|
*end = in;
|
|
|
|
return start;
|
|
}
|
|
|
|
static int
|
|
compare_syn(const void *a, const void *b)
|
|
{
|
|
return strcmp(((const Syn *) a)->key, ((const Syn *) b)->key);
|
|
}
|
|
|
|
static void
|
|
read_dictionary(DictSyn *d, const char *filename)
|
|
{
|
|
char *real_filename = get_tsearch_config_filename(filename, "rules");
|
|
tsearch_readline_state trst;
|
|
char *line;
|
|
int cur = 0;
|
|
|
|
if (!tsearch_readline_begin(&trst, real_filename))
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_CONFIG_FILE_ERROR),
|
|
errmsg("could not open synonym file \"%s\": %m",
|
|
real_filename)));
|
|
|
|
while ((line = tsearch_readline(&trst)) != NULL)
|
|
{
|
|
char *value;
|
|
char *key;
|
|
char *pos;
|
|
char *end;
|
|
|
|
if (*line == '\0')
|
|
continue;
|
|
|
|
value = str_tolower(line, strlen(line), DEFAULT_COLLATION_OID);
|
|
pfree(line);
|
|
|
|
pos = value;
|
|
while ((key = find_word(pos, &end)) != NULL)
|
|
{
|
|
/* Enlarge syn structure if full */
|
|
if (cur == d->len)
|
|
{
|
|
d->len = (d->len > 0) ? 2 * d->len : 16;
|
|
if (d->syn)
|
|
d->syn = repalloc_array(d->syn, Syn, d->len);
|
|
else
|
|
d->syn = palloc_array(Syn, d->len);
|
|
}
|
|
|
|
/* Save first word only if we will match it */
|
|
if (pos != value || d->matchorig)
|
|
{
|
|
d->syn[cur].key = pnstrdup(key, end - key);
|
|
d->syn[cur].value = pstrdup(value);
|
|
|
|
cur++;
|
|
}
|
|
|
|
pos = end;
|
|
|
|
/* Don't bother scanning synonyms if we will not match them */
|
|
if (!d->matchsynonyms)
|
|
break;
|
|
}
|
|
|
|
pfree(value);
|
|
}
|
|
|
|
tsearch_readline_end(&trst);
|
|
|
|
d->len = cur;
|
|
if (cur > 1)
|
|
qsort(d->syn, d->len, sizeof(Syn), compare_syn);
|
|
|
|
pfree(real_filename);
|
|
}
|
|
|
|
Datum
|
|
dxsyn_init(PG_FUNCTION_ARGS)
|
|
{
|
|
List *dictoptions = (List *) PG_GETARG_POINTER(0);
|
|
DictSyn *d;
|
|
ListCell *l;
|
|
char *filename = NULL;
|
|
|
|
d = palloc0_object(DictSyn);
|
|
d->len = 0;
|
|
d->syn = NULL;
|
|
d->matchorig = true;
|
|
d->keeporig = true;
|
|
d->matchsynonyms = false;
|
|
d->keepsynonyms = true;
|
|
|
|
foreach(l, dictoptions)
|
|
{
|
|
DefElem *defel = (DefElem *) lfirst(l);
|
|
|
|
if (strcmp(defel->defname, "matchorig") == 0)
|
|
{
|
|
d->matchorig = defGetBoolean(defel);
|
|
}
|
|
else if (strcmp(defel->defname, "keeporig") == 0)
|
|
{
|
|
d->keeporig = defGetBoolean(defel);
|
|
}
|
|
else if (strcmp(defel->defname, "matchsynonyms") == 0)
|
|
{
|
|
d->matchsynonyms = defGetBoolean(defel);
|
|
}
|
|
else if (strcmp(defel->defname, "keepsynonyms") == 0)
|
|
{
|
|
d->keepsynonyms = defGetBoolean(defel);
|
|
}
|
|
else if (strcmp(defel->defname, "rules") == 0)
|
|
{
|
|
/* we can't read the rules before parsing all options! */
|
|
filename = defGetString(defel);
|
|
}
|
|
else
|
|
{
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("unrecognized xsyn parameter: \"%s\"",
|
|
defel->defname)));
|
|
}
|
|
}
|
|
|
|
if (filename)
|
|
read_dictionary(d, filename);
|
|
|
|
PG_RETURN_POINTER(d);
|
|
}
|
|
|
|
Datum
|
|
dxsyn_lexize(PG_FUNCTION_ARGS)
|
|
{
|
|
DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
|
|
char *in = (char *) PG_GETARG_POINTER(1);
|
|
int length = PG_GETARG_INT32(2);
|
|
Syn word;
|
|
Syn *found;
|
|
TSLexeme *res = NULL;
|
|
|
|
if (!length || d->len == 0)
|
|
PG_RETURN_POINTER(NULL);
|
|
|
|
/* Create search pattern */
|
|
{
|
|
char *temp = pnstrdup(in, length);
|
|
|
|
word.key = str_tolower(temp, length, DEFAULT_COLLATION_OID);
|
|
pfree(temp);
|
|
word.value = NULL;
|
|
}
|
|
|
|
/* Look for matching syn */
|
|
found = (Syn *) bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn);
|
|
pfree(word.key);
|
|
|
|
if (!found)
|
|
PG_RETURN_POINTER(NULL);
|
|
|
|
/* Parse string of synonyms and return array of words */
|
|
{
|
|
char *value = found->value;
|
|
char *syn;
|
|
char *pos;
|
|
char *end;
|
|
int nsyns = 0;
|
|
|
|
res = palloc_object(TSLexeme);
|
|
|
|
pos = value;
|
|
while ((syn = find_word(pos, &end)) != NULL)
|
|
{
|
|
res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2));
|
|
|
|
/* The first word is output only if keeporig=true */
|
|
if (pos != value || d->keeporig)
|
|
{
|
|
res[nsyns].lexeme = pnstrdup(syn, end - syn);
|
|
res[nsyns].nvariant = 0;
|
|
res[nsyns].flags = 0;
|
|
nsyns++;
|
|
}
|
|
|
|
pos = end;
|
|
|
|
/* Stop if we are not to output the synonyms */
|
|
if (!d->keepsynonyms)
|
|
break;
|
|
}
|
|
res[nsyns].lexeme = NULL;
|
|
}
|
|
|
|
PG_RETURN_POINTER(res);
|
|
}
|