1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-30 11:03:19 +03:00

improve support of agglutinative languages (query with compound words).

regression=# select to_tsquery( '\'fotballklubber\'');
                   to_tsquery
------------------------------------------------
 'fotball' & 'klubb' | 'fot' & 'ball' & 'klubb'
(1 row)

So, changed interface to dictionaries, lexize method of dictionary shoud return
pointer to aray of TSLexeme structs instead of char**. Last element should
have TSLexeme->lexeme == NULL.

typedef struct {
        /* number of variant of split word , for example
                Word 'fotballklubber' (norwegian) has two varian to split:
                ( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary
                should return:
                nvariant        lexeme
                1               fotball
                1               klubb
                2               fot
                2               ball
                2               klubb

        */
        uint16  nvariant;

        /* currently unused */
        uint16  flags;

        /* C-string */
        char    *lexeme;
} TSLexeme;
This commit is contained in:
Teodor Sigaev
2005-01-25 15:24:38 +00:00
parent d314616d12
commit 324300bc7c
12 changed files with 146 additions and 85 deletions

View File

@ -159,14 +159,13 @@ spell_lexize(PG_FUNCTION_ARGS)
DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0);
char *in = (char *) PG_GETARG_POINTER(1);
char *txt;
char **res;
char **ptr,
**cptr;
TSLexeme *res;
TSLexeme *ptr,
*cptr;
if (!PG_GETARG_INT32(2))
PG_RETURN_POINTER(NULL);
res = palloc(sizeof(char *) * 2);
txt = pnstrdup(in, PG_GETARG_INT32(2));
res = NINormalizeWord(&(d->obj), txt);
pfree(txt);
@ -175,22 +174,22 @@ spell_lexize(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(NULL);
ptr = cptr = res;
while (*ptr)
while (ptr->lexeme)
{
if (searchstoplist(&(d->stoplist), *ptr))
if (searchstoplist(&(d->stoplist), ptr->lexeme))
{
pfree(*ptr);
*ptr = NULL;
pfree(ptr->lexeme);
ptr->lexeme = NULL;
ptr++;
}
else
{
*cptr = *ptr;
memcpy(cptr, ptr, sizeof(TSLexeme));
cptr++;
ptr++;
}
}
*cptr = NULL;
cptr->lexeme = NULL;
PG_RETURN_POINTER(res);
}