1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-31 22:04:40 +03:00

improve support of agglutinative languages (query with compound words).

regression=# select to_tsquery( '\'fotballklubber\'');
                   to_tsquery
------------------------------------------------
 'fotball' & 'klubb' | 'fot' & 'ball' & 'klubb'
(1 row)

So, changed interface to dictionaries, lexize method of dictionary shoud return
pointer to aray of TSLexeme structs instead of char**. Last element should
have TSLexeme->lexeme == NULL.

typedef struct {
        /* number of variant of split word , for example
                Word 'fotballklubber' (norwegian) has two varian to split:
                ( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary
                should return:
                nvariant        lexeme
                1               fotball
                1               klubb
                2               fot
                2               ball
                2               klubb

        */
        uint16  nvariant;

        /* currently unused */
        uint16  flags;

        /* C-string */
        char    *lexeme;
} TSLexeme;
This commit is contained in:
Teodor Sigaev
2005-01-25 15:24:38 +00:00
parent d314616d12
commit 324300bc7c
12 changed files with 146 additions and 85 deletions

View File

@ -183,15 +183,15 @@ lexize(PG_FUNCTION_ARGS)
{
text *in = PG_GETARG_TEXT_P(1);
DictInfo *dict;
char **res,
**ptr;
TSLexeme *res,
*ptr;
Datum *da;
ArrayType *a;
SET_FUNCOID();
dict = finddict(PG_GETARG_OID(0));
ptr = res = (char **) DatumGetPointer(
ptr = res = (TSLexeme *) DatumGetPointer(
FunctionCall3(&(dict->lexize_info),
PointerGetDatum(dict->dictionary),
PointerGetDatum(VARDATA(in)),
@ -207,13 +207,13 @@ lexize(PG_FUNCTION_ARGS)
PG_RETURN_NULL();
}
while (*ptr)
while (ptr->lexeme)
ptr++;
da = (Datum *) palloc(sizeof(Datum) * (ptr - res + 1));
ptr = res;
while (*ptr)
while (ptr->lexeme)
{
da[ptr - res] = PointerGetDatum(char2text(*ptr));
da[ptr - res] = PointerGetDatum(char2text(ptr->lexeme));
ptr++;
}
@ -227,10 +227,10 @@ lexize(PG_FUNCTION_ARGS)
);
ptr = res;
while (*ptr)
while (ptr->lexeme)
{
pfree(DatumGetPointer(da[ptr - res]));
pfree(*ptr);
pfree(ptr->lexeme);
ptr++;
}
pfree(res);