1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-31 22:04:40 +03:00

improve support of agglutinative languages (query with compound words).

regression=# select to_tsquery( '\'fotballklubber\'');
                   to_tsquery
------------------------------------------------
 'fotball' & 'klubb' | 'fot' & 'ball' & 'klubb'
(1 row)

So, changed interface to dictionaries, lexize method of dictionary shoud return
pointer to aray of TSLexeme structs instead of char**. Last element should
have TSLexeme->lexeme == NULL.

typedef struct {
        /* number of variant of split word , for example
                Word 'fotballklubber' (norwegian) has two varian to split:
                ( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary
                should return:
                nvariant        lexeme
                1               fotball
                1               klubb
                2               fot
                2               ball
                2               klubb

        */
        uint16  nvariant;

        /* currently unused */
        uint16  flags;

        /* C-string */
        char    *lexeme;
} TSLexeme;
This commit is contained in:
Teodor Sigaev
2005-01-25 15:24:38 +00:00
parent d314616d12
commit 324300bc7c
12 changed files with 146 additions and 85 deletions

View File

@ -162,7 +162,7 @@ syn_lexize(PG_FUNCTION_ARGS)
char *in = (char *) PG_GETARG_POINTER(1);
Syn key,
*found;
char **res = NULL;
TSLexeme *res = NULL;
if (!PG_GETARG_INT32(2))
PG_RETURN_POINTER(NULL);
@ -176,10 +176,9 @@ syn_lexize(PG_FUNCTION_ARGS)
if (!found)
PG_RETURN_POINTER(NULL);
res = palloc(sizeof(char *) * 2);
res[0] = pstrdup(found->out);
res[1] = NULL;
res = palloc(sizeof(TSLexeme) * 2);
memset(res,0,sizeof(TSLexeme) * 2);
res[0].lexeme = pstrdup(found->out);
PG_RETURN_POINTER(res);
}