mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Make array_to_tsvector() sort and de-duplicate the given strings.
This is required for the result to be a legal tsvector value. Noted while fooling with Andreas Seltenreich's ts_delete() crash. Discussion: <87invhoj6e.fsf@credativ.de>
This commit is contained in:
@ -9294,7 +9294,7 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
|
|||||||
<entry><type>tsvector</type></entry>
|
<entry><type>tsvector</type></entry>
|
||||||
<entry>convert array of lexemes to <type>tsvector</type></entry>
|
<entry>convert array of lexemes to <type>tsvector</type></entry>
|
||||||
<entry><literal>array_to_tsvector('{fat,cat,rat}'::text[])</literal></entry>
|
<entry><literal>array_to_tsvector('{fat,cat,rat}'::text[])</literal></entry>
|
||||||
<entry><literal>'fat' 'cat' 'rat'</literal></entry>
|
<entry><literal>'cat' 'fat' 'rat'</literal></entry>
|
||||||
</row>
|
</row>
|
||||||
<row>
|
<row>
|
||||||
<entry>
|
<entry>
|
||||||
|
@ -416,17 +416,34 @@ tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* qsort comparator functions
|
||||||
|
*/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
compareint(const void *va, const void *vb)
|
compare_int(const void *va, const void *vb)
|
||||||
{
|
{
|
||||||
int32 a = *((const int32 *) va);
|
int a = *((const int *) va);
|
||||||
int32 b = *((const int32 *) vb);
|
int b = *((const int *) vb);
|
||||||
|
|
||||||
if (a == b)
|
if (a == b)
|
||||||
return 0;
|
return 0;
|
||||||
return (a > b) ? 1 : -1;
|
return (a > b) ? 1 : -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
compare_text_lexemes(const void *va, const void *vb)
|
||||||
|
{
|
||||||
|
Datum a = *((const Datum *) va);
|
||||||
|
Datum b = *((const Datum *) vb);
|
||||||
|
char *alex = VARDATA_ANY(a);
|
||||||
|
int alex_len = VARSIZE_ANY_EXHDR(a);
|
||||||
|
char *blex = VARDATA_ANY(b);
|
||||||
|
int blex_len = VARSIZE_ANY_EXHDR(b);
|
||||||
|
|
||||||
|
return tsCompareString(alex, alex_len, blex, blex_len, false);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Internal routine to delete lexemes from TSVector by array of offsets.
|
* Internal routine to delete lexemes from TSVector by array of offsets.
|
||||||
*
|
*
|
||||||
@ -459,7 +476,7 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
|
|||||||
{
|
{
|
||||||
int kp;
|
int kp;
|
||||||
|
|
||||||
qsort(indices_to_delete, indices_count, sizeof(int), compareint);
|
qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
|
||||||
kp = 0;
|
kp = 0;
|
||||||
for (k = 1; k < indices_count; k++)
|
for (k = 1; k < indices_count; k++)
|
||||||
{
|
{
|
||||||
@ -743,32 +760,50 @@ array_to_tsvector(PG_FUNCTION_ARGS)
|
|||||||
bool *nulls;
|
bool *nulls;
|
||||||
int nitems,
|
int nitems,
|
||||||
i,
|
i,
|
||||||
|
j,
|
||||||
tslen,
|
tslen,
|
||||||
datalen = 0;
|
datalen = 0;
|
||||||
char *cur;
|
char *cur;
|
||||||
|
|
||||||
deconstruct_array(v, TEXTOID, -1, false, 'i', &dlexemes, &nulls, &nitems);
|
deconstruct_array(v, TEXTOID, -1, false, 'i', &dlexemes, &nulls, &nitems);
|
||||||
|
|
||||||
|
/* Reject nulls (maybe we should just ignore them, instead?) */
|
||||||
for (i = 0; i < nitems; i++)
|
for (i = 0; i < nitems; i++)
|
||||||
{
|
{
|
||||||
if (nulls[i])
|
if (nulls[i])
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
|
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
|
||||||
errmsg("lexeme array may not contain nulls")));
|
errmsg("lexeme array may not contain nulls")));
|
||||||
|
|
||||||
datalen += VARSIZE_ANY_EXHDR(dlexemes[i]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Sort and de-dup, because this is required for a valid tsvector. */
|
||||||
|
if (nitems > 1)
|
||||||
|
{
|
||||||
|
qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
|
||||||
|
j = 0;
|
||||||
|
for (i = 1; i < nitems; i++)
|
||||||
|
{
|
||||||
|
if (compare_text_lexemes(&dlexemes[j], &dlexemes[i]) < 0)
|
||||||
|
dlexemes[++j] = dlexemes[i];
|
||||||
|
}
|
||||||
|
nitems = ++j;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Calculate space needed for surviving lexemes. */
|
||||||
|
for (i = 0; i < nitems; i++)
|
||||||
|
datalen += VARSIZE_ANY_EXHDR(dlexemes[i]);
|
||||||
tslen = CALCDATASIZE(nitems, datalen);
|
tslen = CALCDATASIZE(nitems, datalen);
|
||||||
|
|
||||||
|
/* Allocate and fill tsvector. */
|
||||||
tsout = (TSVector) palloc0(tslen);
|
tsout = (TSVector) palloc0(tslen);
|
||||||
SET_VARSIZE(tsout, tslen);
|
SET_VARSIZE(tsout, tslen);
|
||||||
tsout->size = nitems;
|
tsout->size = nitems;
|
||||||
|
|
||||||
arrout = ARRPTR(tsout);
|
arrout = ARRPTR(tsout);
|
||||||
cur = STRPTR(tsout);
|
cur = STRPTR(tsout);
|
||||||
|
|
||||||
for (i = 0; i < nitems; i++)
|
for (i = 0; i < nitems; i++)
|
||||||
{
|
{
|
||||||
char *lex = VARDATA(dlexemes[i]);
|
char *lex = VARDATA_ANY(dlexemes[i]);
|
||||||
int lex_len = VARSIZE_ANY_EXHDR(dlexemes[i]);
|
int lex_len = VARSIZE_ANY_EXHDR(dlexemes[i]);
|
||||||
|
|
||||||
memcpy(cur, lex, lex_len);
|
memcpy(cur, lex, lex_len);
|
||||||
|
@ -1165,6 +1165,13 @@ SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
|
|||||||
|
|
||||||
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
|
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
|
||||||
ERROR: lexeme array may not contain nulls
|
ERROR: lexeme array may not contain nulls
|
||||||
|
-- array_to_tsvector must sort and de-dup
|
||||||
|
SELECT array_to_tsvector(ARRAY['foo','bar','baz','bar']);
|
||||||
|
array_to_tsvector
|
||||||
|
-------------------
|
||||||
|
'bar' 'baz' 'foo'
|
||||||
|
(1 row)
|
||||||
|
|
||||||
SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
|
SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
|
||||||
setweight
|
setweight
|
||||||
----------------------------------------------------------
|
----------------------------------------------------------
|
||||||
|
@ -226,6 +226,8 @@ SELECT tsvector_to_array('base hidden rebel spaceship strike'::tsvector);
|
|||||||
|
|
||||||
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
|
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
|
||||||
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
|
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
|
||||||
|
-- array_to_tsvector must sort and de-dup
|
||||||
|
SELECT array_to_tsvector(ARRAY['foo','bar','baz','bar']);
|
||||||
|
|
||||||
SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
|
SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
|
||||||
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c');
|
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c');
|
||||||
|
Reference in New Issue
Block a user