mirror of
https://github.com/postgres/postgres.git
synced 2025-07-30 11:03:19 +03:00
improve support of agglutinative languages (query with compound words).
regression=# select to_tsquery( '\'fotballklubber\''); to_tsquery ------------------------------------------------ 'fotball' & 'klubb' | 'fot' & 'ball' & 'klubb' (1 row) So, changed interface to dictionaries, lexize method of dictionary shoud return pointer to aray of TSLexeme structs instead of char**. Last element should have TSLexeme->lexeme == NULL. typedef struct { /* number of variant of split word , for example Word 'fotballklubber' (norwegian) has two varian to split: ( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary should return: nvariant lexeme 1 fotball 1 klubb 2 fot 2 ball 2 klubb */ uint16 nvariant; /* currently unused */ uint16 flags; /* C-string */ char *lexeme; } TSLexeme;
This commit is contained in:
@ -159,14 +159,13 @@ spell_lexize(PG_FUNCTION_ARGS)
|
||||
DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0);
|
||||
char *in = (char *) PG_GETARG_POINTER(1);
|
||||
char *txt;
|
||||
char **res;
|
||||
char **ptr,
|
||||
**cptr;
|
||||
TSLexeme *res;
|
||||
TSLexeme *ptr,
|
||||
*cptr;
|
||||
|
||||
if (!PG_GETARG_INT32(2))
|
||||
PG_RETURN_POINTER(NULL);
|
||||
|
||||
res = palloc(sizeof(char *) * 2);
|
||||
txt = pnstrdup(in, PG_GETARG_INT32(2));
|
||||
res = NINormalizeWord(&(d->obj), txt);
|
||||
pfree(txt);
|
||||
@ -175,22 +174,22 @@ spell_lexize(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_POINTER(NULL);
|
||||
|
||||
ptr = cptr = res;
|
||||
while (*ptr)
|
||||
while (ptr->lexeme)
|
||||
{
|
||||
if (searchstoplist(&(d->stoplist), *ptr))
|
||||
if (searchstoplist(&(d->stoplist), ptr->lexeme))
|
||||
{
|
||||
pfree(*ptr);
|
||||
*ptr = NULL;
|
||||
pfree(ptr->lexeme);
|
||||
ptr->lexeme = NULL;
|
||||
ptr++;
|
||||
}
|
||||
else
|
||||
{
|
||||
*cptr = *ptr;
|
||||
memcpy(cptr, ptr, sizeof(TSLexeme));
|
||||
cptr++;
|
||||
ptr++;
|
||||
}
|
||||
}
|
||||
*cptr = NULL;
|
||||
cptr->lexeme = NULL;
|
||||
|
||||
PG_RETURN_POINTER(res);
|
||||
}
|
||||
|
Reference in New Issue
Block a user