mirror of
https://github.com/postgres/postgres.git
synced 2025-07-31 22:04:40 +03:00
improve support of agglutinative languages (query with compound words).
regression=# select to_tsquery( '\'fotballklubber\''); to_tsquery ------------------------------------------------ 'fotball' & 'klubb' | 'fot' & 'ball' & 'klubb' (1 row) So, changed interface to dictionaries, lexize method of dictionary shoud return pointer to aray of TSLexeme structs instead of char**. Last element should have TSLexeme->lexeme == NULL. typedef struct { /* number of variant of split word , for example Word 'fotballklubber' (norwegian) has two varian to split: ( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary should return: nvariant lexeme 1 fotball 1 klubb 2 fot 2 ball 2 klubb */ uint16 nvariant; /* currently unused */ uint16 flags; /* C-string */ char *lexeme; } TSLexeme;
This commit is contained in:
@ -265,6 +265,7 @@ pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 we
|
||||
{
|
||||
int4 count = 0;
|
||||
PRSTEXT prs;
|
||||
uint32 variant, pos, cntvar=0, cntpos=0, cnt=0;
|
||||
|
||||
prs.lenwords = 32;
|
||||
prs.curwords = 0;
|
||||
@ -273,17 +274,39 @@ pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 we
|
||||
|
||||
parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
|
||||
|
||||
for (count = 0; count < prs.curwords; count++)
|
||||
{
|
||||
pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
|
||||
pfree(prs.words[count].word);
|
||||
if (count)
|
||||
pushquery(state, OPR, (int4) '&', 0, 0, 0);
|
||||
}
|
||||
pfree(prs.words);
|
||||
if ( prs.curwords>0 ) {
|
||||
|
||||
/* XXX */
|
||||
if (prs.curwords == 0)
|
||||
while (count < prs.curwords) {
|
||||
pos = prs.words[count].pos.pos;
|
||||
cntvar=0;
|
||||
while(count < prs.curwords && pos==prs.words[count].pos.pos) {
|
||||
variant = prs.words[count].nvariant;
|
||||
|
||||
cnt=0;
|
||||
while(count < prs.curwords && pos==prs.words[count].pos.pos && variant==prs.words[count].nvariant) {
|
||||
|
||||
pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
|
||||
pfree(prs.words[count].word);
|
||||
if ( cnt )
|
||||
pushquery(state, OPR, (int4) '&', 0, 0, 0);
|
||||
cnt++;
|
||||
count++;
|
||||
}
|
||||
|
||||
if ( cntvar )
|
||||
pushquery(state, OPR, (int4) '|', 0, 0, 0);
|
||||
cntvar++;
|
||||
}
|
||||
|
||||
if (cntpos)
|
||||
pushquery(state, OPR, (int4) '&', 0, 0, 0);
|
||||
|
||||
cntpos++;
|
||||
}
|
||||
|
||||
pfree(prs.words);
|
||||
|
||||
} else
|
||||
pushval_asis(state, VALSTOP, NULL, 0, 0);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user