1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-31 22:04:40 +03:00

improve support of agglutinative languages (query with compound words).

regression=# select to_tsquery( '\'fotballklubber\'');
                   to_tsquery
------------------------------------------------
 'fotball' & 'klubb' | 'fot' & 'ball' & 'klubb'
(1 row)

So, changed interface to dictionaries, lexize method of dictionary shoud return
pointer to aray of TSLexeme structs instead of char**. Last element should
have TSLexeme->lexeme == NULL.

typedef struct {
        /* number of variant of split word , for example
                Word 'fotballklubber' (norwegian) has two varian to split:
                ( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary
                should return:
                nvariant        lexeme
                1               fotball
                1               klubb
                2               fot
                2               ball
                2               klubb

        */
        uint16  nvariant;

        /* currently unused */
        uint16  flags;

        /* C-string */
        char    *lexeme;
} TSLexeme;
This commit is contained in:
Teodor Sigaev
2005-01-25 15:24:38 +00:00
parent d314616d12
commit 324300bc7c
12 changed files with 146 additions and 85 deletions

View File

@ -265,6 +265,7 @@ pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 we
{
int4 count = 0;
PRSTEXT prs;
uint32 variant, pos, cntvar=0, cntpos=0, cnt=0;
prs.lenwords = 32;
prs.curwords = 0;
@ -273,17 +274,39 @@ pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 we
parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
for (count = 0; count < prs.curwords; count++)
{
pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
pfree(prs.words[count].word);
if (count)
pushquery(state, OPR, (int4) '&', 0, 0, 0);
}
pfree(prs.words);
if ( prs.curwords>0 ) {
/* XXX */
if (prs.curwords == 0)
while (count < prs.curwords) {
pos = prs.words[count].pos.pos;
cntvar=0;
while(count < prs.curwords && pos==prs.words[count].pos.pos) {
variant = prs.words[count].nvariant;
cnt=0;
while(count < prs.curwords && pos==prs.words[count].pos.pos && variant==prs.words[count].nvariant) {
pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
pfree(prs.words[count].word);
if ( cnt )
pushquery(state, OPR, (int4) '&', 0, 0, 0);
cnt++;
count++;
}
if ( cntvar )
pushquery(state, OPR, (int4) '|', 0, 0, 0);
cntvar++;
}
if (cntpos)
pushquery(state, OPR, (int4) '&', 0, 0, 0);
cntpos++;
}
pfree(prs.words);
} else
pushval_asis(state, VALSTOP, NULL, 0, 0);
}