1
0
mirror of https://github.com/postgres/postgres.git synced 2025-12-07 12:02:30 +03:00
Files
postgres/contrib/dict_xsyn/dict_xsyn.c
Michael Paquier 31d3847a37 Use more palloc_object() and palloc_array() in contrib/
The idea is to encourage more the use of these new routines across the
tree, as these offer stronger type safety guarantees than palloc().  In
an ideal world, palloc() would then act as an internal routine of these
flavors, whose footprint in the tree is minimal.

The patch sent by the author is very large, and this chunk of changes
represents something like 10% of the overall patch submitted.

The code compiled is the same before and after this commit, using
objdump to do some validation with a difference taken in-between.  There
are some diffs, which are caused by changes in line numbers because some
of the new allocation formulas are shorter, for the following files:
trgm_regexp.c, xpath.c and pg_walinspect.c.

Author: David Geier <geidav.pg@gmail.com>
Discussion: https://postgr.es/m/ad0748d4-3080-436e-b0bc-ac8f86a3466a@gmail.com
2025-12-05 16:40:26 +09:00

265 lines
5.2 KiB
C

/*-------------------------------------------------------------------------
*
* dict_xsyn.c
* Extended synonym dictionary
*
* Copyright (c) 2007-2025, PostgreSQL Global Development Group
*
* IDENTIFICATION
* contrib/dict_xsyn/dict_xsyn.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <ctype.h>
#include "catalog/pg_collation_d.h"
#include "commands/defrem.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_public.h"
#include "utils/formatting.h"
PG_MODULE_MAGIC_EXT(
.name = "dict_xsyn",
.version = PG_VERSION
);
typedef struct
{
char *key; /* Word */
char *value; /* Unparsed list of synonyms, including the
* word itself */
} Syn;
typedef struct
{
int len;
Syn *syn;
bool matchorig;
bool keeporig;
bool matchsynonyms;
bool keepsynonyms;
} DictSyn;
PG_FUNCTION_INFO_V1(dxsyn_init);
PG_FUNCTION_INFO_V1(dxsyn_lexize);
static char *
find_word(char *in, char **end)
{
char *start;
*end = NULL;
while (*in && isspace((unsigned char) *in))
in += pg_mblen(in);
if (!*in || *in == '#')
return NULL;
start = in;
while (*in && !isspace((unsigned char) *in))
in += pg_mblen(in);
*end = in;
return start;
}
static int
compare_syn(const void *a, const void *b)
{
return strcmp(((const Syn *) a)->key, ((const Syn *) b)->key);
}
static void
read_dictionary(DictSyn *d, const char *filename)
{
char *real_filename = get_tsearch_config_filename(filename, "rules");
tsearch_readline_state trst;
char *line;
int cur = 0;
if (!tsearch_readline_begin(&trst, real_filename))
ereport(ERROR,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not open synonym file \"%s\": %m",
real_filename)));
while ((line = tsearch_readline(&trst)) != NULL)
{
char *value;
char *key;
char *pos;
char *end;
if (*line == '\0')
continue;
value = str_tolower(line, strlen(line), DEFAULT_COLLATION_OID);
pfree(line);
pos = value;
while ((key = find_word(pos, &end)) != NULL)
{
/* Enlarge syn structure if full */
if (cur == d->len)
{
d->len = (d->len > 0) ? 2 * d->len : 16;
if (d->syn)
d->syn = repalloc_array(d->syn, Syn, d->len);
else
d->syn = palloc_array(Syn, d->len);
}
/* Save first word only if we will match it */
if (pos != value || d->matchorig)
{
d->syn[cur].key = pnstrdup(key, end - key);
d->syn[cur].value = pstrdup(value);
cur++;
}
pos = end;
/* Don't bother scanning synonyms if we will not match them */
if (!d->matchsynonyms)
break;
}
pfree(value);
}
tsearch_readline_end(&trst);
d->len = cur;
if (cur > 1)
qsort(d->syn, d->len, sizeof(Syn), compare_syn);
pfree(real_filename);
}
Datum
dxsyn_init(PG_FUNCTION_ARGS)
{
List *dictoptions = (List *) PG_GETARG_POINTER(0);
DictSyn *d;
ListCell *l;
char *filename = NULL;
d = palloc0_object(DictSyn);
d->len = 0;
d->syn = NULL;
d->matchorig = true;
d->keeporig = true;
d->matchsynonyms = false;
d->keepsynonyms = true;
foreach(l, dictoptions)
{
DefElem *defel = (DefElem *) lfirst(l);
if (strcmp(defel->defname, "matchorig") == 0)
{
d->matchorig = defGetBoolean(defel);
}
else if (strcmp(defel->defname, "keeporig") == 0)
{
d->keeporig = defGetBoolean(defel);
}
else if (strcmp(defel->defname, "matchsynonyms") == 0)
{
d->matchsynonyms = defGetBoolean(defel);
}
else if (strcmp(defel->defname, "keepsynonyms") == 0)
{
d->keepsynonyms = defGetBoolean(defel);
}
else if (strcmp(defel->defname, "rules") == 0)
{
/* we can't read the rules before parsing all options! */
filename = defGetString(defel);
}
else
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized xsyn parameter: \"%s\"",
defel->defname)));
}
}
if (filename)
read_dictionary(d, filename);
PG_RETURN_POINTER(d);
}
Datum
dxsyn_lexize(PG_FUNCTION_ARGS)
{
DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
int length = PG_GETARG_INT32(2);
Syn word;
Syn *found;
TSLexeme *res = NULL;
if (!length || d->len == 0)
PG_RETURN_POINTER(NULL);
/* Create search pattern */
{
char *temp = pnstrdup(in, length);
word.key = str_tolower(temp, length, DEFAULT_COLLATION_OID);
pfree(temp);
word.value = NULL;
}
/* Look for matching syn */
found = (Syn *) bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn);
pfree(word.key);
if (!found)
PG_RETURN_POINTER(NULL);
/* Parse string of synonyms and return array of words */
{
char *value = found->value;
char *syn;
char *pos;
char *end;
int nsyns = 0;
res = palloc_object(TSLexeme);
pos = value;
while ((syn = find_word(pos, &end)) != NULL)
{
res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2));
/* The first word is output only if keeporig=true */
if (pos != value || d->keeporig)
{
res[nsyns].lexeme = pnstrdup(syn, end - syn);
res[nsyns].nvariant = 0;
res[nsyns].flags = 0;
nsyns++;
}
pos = end;
/* Stop if we are not to output the synonyms */
if (!d->keepsynonyms)
break;
}
res[nsyns].lexeme = NULL;
}
PG_RETURN_POINTER(res);
}