1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-30 11:03:19 +03:00

improve support of agglutinative languages (query with compound words).

regression=# select to_tsquery( '\'fotballklubber\'');
                   to_tsquery
------------------------------------------------
 'fotball' & 'klubb' | 'fot' & 'ball' & 'klubb'
(1 row)

So, changed interface to dictionaries, lexize method of dictionary shoud return
pointer to aray of TSLexeme structs instead of char**. Last element should
have TSLexeme->lexeme == NULL.

typedef struct {
        /* number of variant of split word , for example
                Word 'fotballklubber' (norwegian) has two varian to split:
                ( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary
                should return:
                nvariant        lexeme
                1               fotball
                1               klubb
                2               fot
                2               ball
                2               klubb

        */
        uint16  nvariant;

        /* currently unused */
        uint16  flags;

        /* C-string */
        char    *lexeme;
} TSLexeme;
This commit is contained in:
Teodor Sigaev
2005-01-25 15:24:38 +00:00
parent d314616d12
commit 324300bc7c
12 changed files with 146 additions and 85 deletions

View File

@ -1119,17 +1119,32 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
return var;
}
char **
TSLexeme *
NINormalizeWord(IspellDict * Conf, char *word)
{
char **res = NormalizeSubWord(Conf, word, 0);
TSLexeme *lcur=NULL, *lres=NULL;
u_int16_t NVariant=1;
if (res) {
char **ptr = res;
lcur = lres = (TSLexeme*)palloc( MAX_NORM * sizeof(TSLexeme) );
while(*ptr) {
lcur->lexeme=*ptr;
lcur->flags=0;
lcur->nvariant = NVariant++;
lcur++;
ptr++;
}
lcur->lexeme=NULL;
pfree(res);
}
if (Conf->compoundcontrol != '\t')
{
int wordlen = strlen(word);
SplitVar *ptr,
*var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1);
char **cur = res;
int i;
while (var)
@ -1140,30 +1155,31 @@ NINormalizeWord(IspellDict * Conf, char *word)
if (subres)
{
char **ptr = subres;
char **subptr = subres;
if (cur)
{
while (*cur)
cur++;
}
else
res = cur = (char **) palloc(MAX_NORM * sizeof(char *));
if ( !lcur )
lcur = lres = (TSLexeme*)palloc( MAX_NORM * sizeof(TSLexeme) );
while(*subptr) {
for(i=0;i<var->nstem-1;i++) {
lcur->lexeme=(subptr==subres) ? var->stem[ i ] : pstrdup(var->stem[ i ]);
lcur->flags=0;
lcur->nvariant = NVariant;
lcur++;
}
for (i = 0; i < var->nstem - 1; i++)
{
*cur = var->stem[i];
cur++;
}
while (*ptr)
{
*cur = *ptr;
cur++;
ptr++;
}
*cur = NULL;
lcur->lexeme=*subptr;
lcur->flags=0;
lcur->nvariant = NVariant;
lcur++;
subptr++;
NVariant++;
}
lcur->lexeme=NULL;
pfree(subres);
var->stem[0] = NULL;
pfree( var->stem[ var->nstem-1 ] );
}
}
@ -1175,7 +1191,7 @@ NINormalizeWord(IspellDict * Conf, char *word)
var = ptr;
}
}
return res;
return lres;
}

View File

@ -3,10 +3,11 @@
#include <sys/types.h>
#include "regex/regex.h"
#include "regis.h"
#include "c.h"
#include "regis.h"
#include "dict.h"
struct SPNode;
@ -116,7 +117,7 @@ typedef struct
} IspellDict;
char **NINormalizeWord(IspellDict * Conf, char *word);
TSLexeme *NINormalizeWord(IspellDict * Conf, char *word);
int NIImportAffixes(IspellDict * Conf, const char *filename);
int NIImportDictionary(IspellDict * Conf, const char *filename);