mirror of
https://github.com/postgres/postgres.git
synced 2025-12-24 06:01:07 +03:00
improve support of agglutinative languages (query with compound words).
regression=# select to_tsquery( '\'fotballklubber\'');
to_tsquery
------------------------------------------------
'fotball' & 'klubb' | 'fot' & 'ball' & 'klubb'
(1 row)
So, changed interface to dictionaries, lexize method of dictionary shoud return
pointer to aray of TSLexeme structs instead of char**. Last element should
have TSLexeme->lexeme == NULL.
typedef struct {
/* number of variant of split word , for example
Word 'fotballklubber' (norwegian) has two varian to split:
( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary
should return:
nvariant lexeme
1 fotball
1 klubb
2 fot
2 ball
2 klubb
*/
uint16 nvariant;
/* currently unused */
uint16 flags;
/* C-string */
char *lexeme;
} TSLexeme;
This commit is contained in:
@@ -105,12 +105,12 @@ snb_lexize(PG_FUNCTION_ARGS)
|
||||
DictSnowball *d = (DictSnowball *) PG_GETARG_POINTER(0);
|
||||
char *in = (char *) PG_GETARG_POINTER(1);
|
||||
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
|
||||
char **res = palloc(sizeof(char *) * 2);
|
||||
TSLexeme *res = palloc(sizeof(TSLexeme) * 2);
|
||||
|
||||
memset(res, 0, sizeof(TSLexeme) * 2);
|
||||
if (*txt == '\0' || searchstoplist(&(d->stoplist), txt))
|
||||
{
|
||||
pfree(txt);
|
||||
res[0] = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -122,10 +122,8 @@ snb_lexize(PG_FUNCTION_ARGS)
|
||||
memcpy(txt, d->z->p, d->z->l);
|
||||
txt[d->z->l] = '\0';
|
||||
}
|
||||
res[0] = txt;
|
||||
res->lexeme = txt;
|
||||
}
|
||||
res[1] = NULL;
|
||||
|
||||
|
||||
PG_RETURN_POINTER(res);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user