mirror of
https://github.com/postgres/postgres.git
synced 2025-04-22 23:02:54 +03:00
Improve make_tsvector() to handle empty input, and simplify its callers.
It seemed a bit silly that each caller of make_tsvector() was laboriously special-casing the situation where no lexemes were found, when it would be easy and much more bullet-proof to make make_tsvector() handle that.
This commit is contained in:
parent
b4c6d31c0b
commit
04a2c7f412
@ -149,6 +149,8 @@ uniqueWORD(ParsedWord *a, int32 l)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* make value of tsvector, given parsed text
|
* make value of tsvector, given parsed text
|
||||||
|
*
|
||||||
|
* Note: frees prs->words and subsidiary data.
|
||||||
*/
|
*/
|
||||||
TSVector
|
TSVector
|
||||||
make_tsvector(ParsedText *prs)
|
make_tsvector(ParsedText *prs)
|
||||||
@ -162,7 +164,11 @@ make_tsvector(ParsedText *prs)
|
|||||||
char *str;
|
char *str;
|
||||||
int stroff;
|
int stroff;
|
||||||
|
|
||||||
|
/* Merge duplicate words */
|
||||||
|
if (prs->curwords > 0)
|
||||||
prs->curwords = uniqueWORD(prs->words, prs->curwords);
|
prs->curwords = uniqueWORD(prs->words, prs->curwords);
|
||||||
|
|
||||||
|
/* Determine space needed */
|
||||||
for (i = 0; i < prs->curwords; i++)
|
for (i = 0; i < prs->curwords; i++)
|
||||||
{
|
{
|
||||||
lenstr += prs->words[i].len;
|
lenstr += prs->words[i].len;
|
||||||
@ -217,7 +223,10 @@ make_tsvector(ParsedText *prs)
|
|||||||
ptr->haspos = 0;
|
ptr->haspos = 0;
|
||||||
ptr++;
|
ptr++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (prs->words)
|
||||||
pfree(prs->words);
|
pfree(prs->words);
|
||||||
|
|
||||||
return in;
|
return in;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -231,26 +240,19 @@ to_tsvector_byid(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
prs.lenwords = VARSIZE_ANY_EXHDR(in) / 6; /* just estimation of word's
|
prs.lenwords = VARSIZE_ANY_EXHDR(in) / 6; /* just estimation of word's
|
||||||
* number */
|
* number */
|
||||||
if (prs.lenwords == 0)
|
if (prs.lenwords < 2)
|
||||||
prs.lenwords = 2;
|
prs.lenwords = 2;
|
||||||
prs.curwords = 0;
|
prs.curwords = 0;
|
||||||
prs.pos = 0;
|
prs.pos = 0;
|
||||||
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
|
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
|
||||||
|
|
||||||
parsetext(cfgId, &prs, VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));
|
parsetext(cfgId, &prs, VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));
|
||||||
|
|
||||||
PG_FREE_IF_COPY(in, 1);
|
PG_FREE_IF_COPY(in, 1);
|
||||||
|
|
||||||
if (prs.curwords)
|
|
||||||
out = make_tsvector(&prs);
|
out = make_tsvector(&prs);
|
||||||
else
|
|
||||||
{
|
|
||||||
pfree(prs.words);
|
|
||||||
out = palloc(CALCDATASIZE(0, 0));
|
|
||||||
SET_VARSIZE(out, CALCDATASIZE(0, 0));
|
|
||||||
out->size = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
PG_RETURN_POINTER(out);
|
PG_RETURN_TSVECTOR(out);
|
||||||
}
|
}
|
||||||
|
|
||||||
Datum
|
Datum
|
||||||
@ -281,21 +283,10 @@ jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
iterate_jsonb_string_values(jb, &state, add_to_tsvector);
|
iterate_jsonb_string_values(jb, &state, add_to_tsvector);
|
||||||
|
|
||||||
if (prs.curwords > 0)
|
|
||||||
result = make_tsvector(&prs);
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* There weren't any string elements in jsonb, so we need to return an
|
|
||||||
* empty vector
|
|
||||||
*/
|
|
||||||
result = palloc(CALCDATASIZE(0, 0));
|
|
||||||
SET_VARSIZE(result, CALCDATASIZE(0, 0));
|
|
||||||
result->size = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
PG_FREE_IF_COPY(jb, 1);
|
PG_FREE_IF_COPY(jb, 1);
|
||||||
|
|
||||||
|
result = make_tsvector(&prs);
|
||||||
|
|
||||||
PG_RETURN_TSVECTOR(result);
|
PG_RETURN_TSVECTOR(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -327,21 +318,10 @@ json_to_tsvector_byid(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
iterate_json_string_values(json, &state, add_to_tsvector);
|
iterate_json_string_values(json, &state, add_to_tsvector);
|
||||||
|
|
||||||
if (prs.curwords > 0)
|
|
||||||
result = make_tsvector(&prs);
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* There weren't any string elements in json, so we need to return an
|
|
||||||
* empty vector
|
|
||||||
*/
|
|
||||||
result = palloc(CALCDATASIZE(0, 0));
|
|
||||||
SET_VARSIZE(result, CALCDATASIZE(0, 0));
|
|
||||||
result->size = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
PG_FREE_IF_COPY(json, 1);
|
PG_FREE_IF_COPY(json, 1);
|
||||||
|
|
||||||
|
result = make_tsvector(&prs);
|
||||||
|
|
||||||
PG_RETURN_TSVECTOR(result);
|
PG_RETURN_TSVECTOR(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2579,28 +2579,15 @@ tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* make tsvector value */
|
/* make tsvector value */
|
||||||
if (prs.curwords)
|
datum = TSVectorGetDatum(make_tsvector(&prs));
|
||||||
{
|
|
||||||
datum = PointerGetDatum(make_tsvector(&prs));
|
|
||||||
isnull = false;
|
isnull = false;
|
||||||
rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
|
|
||||||
1, &tsvector_attr_num,
|
|
||||||
&datum, &isnull);
|
|
||||||
pfree(DatumGetPointer(datum));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
TSVector out = palloc(CALCDATASIZE(0, 0));
|
|
||||||
|
|
||||||
SET_VARSIZE(out, CALCDATASIZE(0, 0));
|
/* and insert it into tuple */
|
||||||
out->size = 0;
|
|
||||||
datum = PointerGetDatum(out);
|
|
||||||
isnull = false;
|
|
||||||
rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
|
rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
|
||||||
1, &tsvector_attr_num,
|
1, &tsvector_attr_num,
|
||||||
&datum, &isnull);
|
&datum, &isnull);
|
||||||
pfree(prs.words);
|
|
||||||
}
|
pfree(DatumGetPointer(datum));
|
||||||
|
|
||||||
return PointerGetDatum(rettuple);
|
return PointerGetDatum(rettuple);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user