mirror of
https://github.com/postgres/postgres.git
synced 2025-07-31 22:04:40 +03:00
improve support of agglutinative languages (query with compound words).
regression=# select to_tsquery( '\'fotballklubber\''); to_tsquery ------------------------------------------------ 'fotball' & 'klubb' | 'fot' & 'ball' & 'klubb' (1 row) So, changed interface to dictionaries, lexize method of dictionary shoud return pointer to aray of TSLexeme structs instead of char**. Last element should have TSLexeme->lexeme == NULL. typedef struct { /* number of variant of split word , for example Word 'fotballklubber' (norwegian) has two varian to split: ( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary should return: nvariant lexeme 1 fotball 1 klubb 2 fot 2 ball 2 klubb */ uint16 nvariant; /* currently unused */ uint16 flags; /* C-string */ char *lexeme; } TSLexeme;
This commit is contained in:
@ -38,4 +38,27 @@ typedef struct
|
||||
|
||||
void parse_cfgdict(text *in, Map ** m);
|
||||
|
||||
/* return struct for any lexize function */
|
||||
typedef struct {
|
||||
/* number of variant of split word , for example
|
||||
Word 'fotballklubber' (norwegian) has two varian to split:
|
||||
( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary
|
||||
should return:
|
||||
nvariant lexeme
|
||||
1 fotball
|
||||
1 klubb
|
||||
2 fot
|
||||
2 ball
|
||||
2 klubb
|
||||
|
||||
*/
|
||||
uint16 nvariant;
|
||||
|
||||
/* currently unused */
|
||||
uint16 flags;
|
||||
|
||||
/* C-string */
|
||||
char *lexeme;
|
||||
} TSLexeme;
|
||||
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user