mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	improve support of agglutinative languages (query with compound words).
regression=# select to_tsquery( '\'fotballklubber\'');
                   to_tsquery
------------------------------------------------
 'fotball' & 'klubb' | 'fot' & 'ball' & 'klubb'
(1 row)
So, changed interface to dictionaries, lexize method of dictionary shoud return
pointer to aray of TSLexeme structs instead of char**. Last element should
have TSLexeme->lexeme == NULL.
typedef struct {
        /* number of variant of split word , for example
                Word 'fotballklubber' (norwegian) has two varian to split:
                ( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary
                should return:
                nvariant        lexeme
                1               fotball
                1               klubb
                2               fot
                2               ball
                2               klubb
        */
        uint16  nvariant;
        /* currently unused */
        uint16  flags;
        /* C-string */
        char    *lexeme;
} TSLexeme;
			
			
This commit is contained in:
		| @@ -183,15 +183,15 @@ lexize(PG_FUNCTION_ARGS) | |||||||
| { | { | ||||||
| 	text	   *in = PG_GETARG_TEXT_P(1); | 	text	   *in = PG_GETARG_TEXT_P(1); | ||||||
| 	DictInfo   *dict; | 	DictInfo   *dict; | ||||||
| 	char	  **res, | 	TSLexeme	  *res, | ||||||
| 			  **ptr; | 			  *ptr; | ||||||
| 	Datum	   *da; | 	Datum	   *da; | ||||||
| 	ArrayType  *a; | 	ArrayType  *a; | ||||||
|  |  | ||||||
| 	SET_FUNCOID(); | 	SET_FUNCOID(); | ||||||
| 	dict = finddict(PG_GETARG_OID(0)); | 	dict = finddict(PG_GETARG_OID(0)); | ||||||
|  |  | ||||||
| 	ptr = res = (char **) DatumGetPointer( | 	ptr = res = (TSLexeme *) DatumGetPointer( | ||||||
| 									  FunctionCall3(&(dict->lexize_info), | 									  FunctionCall3(&(dict->lexize_info), | ||||||
| 									   PointerGetDatum(dict->dictionary), | 									   PointerGetDatum(dict->dictionary), | ||||||
| 											PointerGetDatum(VARDATA(in)), | 											PointerGetDatum(VARDATA(in)), | ||||||
| @@ -207,13 +207,13 @@ lexize(PG_FUNCTION_ARGS) | |||||||
| 			PG_RETURN_NULL(); | 			PG_RETURN_NULL(); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	while (*ptr) | 	while (ptr->lexeme) | ||||||
| 		ptr++; | 		ptr++; | ||||||
| 	da = (Datum *) palloc(sizeof(Datum) * (ptr - res + 1)); | 	da = (Datum *) palloc(sizeof(Datum) * (ptr - res + 1)); | ||||||
| 	ptr = res; | 	ptr = res; | ||||||
| 	while (*ptr) | 	while (ptr->lexeme) | ||||||
| 	{ | 	{ | ||||||
| 		da[ptr - res] = PointerGetDatum(char2text(*ptr)); | 		da[ptr - res] = PointerGetDatum(char2text(ptr->lexeme)); | ||||||
| 		ptr++; | 		ptr++; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| @@ -227,10 +227,10 @@ lexize(PG_FUNCTION_ARGS) | |||||||
| 		); | 		); | ||||||
|  |  | ||||||
| 	ptr = res; | 	ptr = res; | ||||||
| 	while (*ptr) | 	while (ptr->lexeme) | ||||||
| 	{ | 	{ | ||||||
| 		pfree(DatumGetPointer(da[ptr - res])); | 		pfree(DatumGetPointer(da[ptr - res])); | ||||||
| 		pfree(*ptr); | 		pfree(ptr->lexeme); | ||||||
| 		ptr++; | 		ptr++; | ||||||
| 	} | 	} | ||||||
| 	pfree(res); | 	pfree(res); | ||||||
|   | |||||||
| @@ -38,4 +38,27 @@ typedef struct | |||||||
|  |  | ||||||
| void		parse_cfgdict(text *in, Map ** m); | void		parse_cfgdict(text *in, Map ** m); | ||||||
|  |  | ||||||
|  | /* return struct for any lexize function */ | ||||||
|  | typedef struct { | ||||||
|  | 	/* number of variant of split word , for example | ||||||
|  | 		Word 'fotballklubber' (norwegian) has two varian to split: | ||||||
|  | 		( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary | ||||||
|  | 		should return: | ||||||
|  | 		nvariant	lexeme | ||||||
|  | 		1		fotball | ||||||
|  | 		1		klubb | ||||||
|  | 		2		fot | ||||||
|  | 		2		ball | ||||||
|  | 		2		klubb | ||||||
|  |  | ||||||
|  | 	*/ | ||||||
|  | 	uint16	nvariant; | ||||||
|  |  | ||||||
|  | 	/* currently unused */ | ||||||
|  | 	uint16	flags; | ||||||
|  |  | ||||||
|  | 	/* C-string */ | ||||||
|  | 	char	*lexeme; | ||||||
|  | } TSLexeme; | ||||||
|  |  | ||||||
| #endif | #endif | ||||||
|   | |||||||
| @@ -54,16 +54,16 @@ dex_lexize(PG_FUNCTION_ARGS) | |||||||
| 	DictExample *d = (DictExample *) PG_GETARG_POINTER(0); | 	DictExample *d = (DictExample *) PG_GETARG_POINTER(0); | ||||||
| 	char	   *in = (char *) PG_GETARG_POINTER(1); | 	char	   *in = (char *) PG_GETARG_POINTER(1); | ||||||
| 	char	   *txt = pnstrdup(in, PG_GETARG_INT32(2)); | 	char	   *txt = pnstrdup(in, PG_GETARG_INT32(2)); | ||||||
| 	char	  **res = palloc(sizeof(char *) * 2); | 	TSLexeme   *res = palloc(sizeof(TSLexeme) * 2); | ||||||
|  |  | ||||||
|  | 	memset(res,0,sizeof(TSLexeme) * 2); | ||||||
|  |  | ||||||
| 	if (*txt == '\0' || searchstoplist(&(d->stoplist), txt)) | 	if (*txt == '\0' || searchstoplist(&(d->stoplist), txt)) | ||||||
| 	{ | 	{ | ||||||
| 		pfree(txt); | 		pfree(txt); | ||||||
| 		res[0] = NULL; |  | ||||||
| 	} | 	} | ||||||
| 	else | 	else | ||||||
| 		res[0] = txt; | 		res[0].lexeme = txt; | ||||||
| 	res[1] = NULL; |  | ||||||
|  |  | ||||||
| 	PG_RETURN_POINTER(res); | 	PG_RETURN_POINTER(res); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -159,14 +159,13 @@ spell_lexize(PG_FUNCTION_ARGS) | |||||||
| 	DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0); | 	DictISpell *d = (DictISpell *) PG_GETARG_POINTER(0); | ||||||
| 	char	   *in = (char *) PG_GETARG_POINTER(1); | 	char	   *in = (char *) PG_GETARG_POINTER(1); | ||||||
| 	char	   *txt; | 	char	   *txt; | ||||||
| 	char	  **res; | 	TSLexeme	  *res; | ||||||
| 	char	  **ptr, | 	TSLexeme	  *ptr, | ||||||
| 			  **cptr; | 			  *cptr; | ||||||
|  |  | ||||||
| 	if (!PG_GETARG_INT32(2)) | 	if (!PG_GETARG_INT32(2)) | ||||||
| 		PG_RETURN_POINTER(NULL); | 		PG_RETURN_POINTER(NULL); | ||||||
|  |  | ||||||
| 	res = palloc(sizeof(char *) * 2); |  | ||||||
| 	txt = pnstrdup(in, PG_GETARG_INT32(2)); | 	txt = pnstrdup(in, PG_GETARG_INT32(2)); | ||||||
| 	res = NINormalizeWord(&(d->obj), txt); | 	res = NINormalizeWord(&(d->obj), txt); | ||||||
| 	pfree(txt); | 	pfree(txt); | ||||||
| @@ -175,22 +174,22 @@ spell_lexize(PG_FUNCTION_ARGS) | |||||||
| 		PG_RETURN_POINTER(NULL); | 		PG_RETURN_POINTER(NULL); | ||||||
|  |  | ||||||
| 	ptr = cptr = res; | 	ptr = cptr = res; | ||||||
| 	while (*ptr) | 	while (ptr->lexeme) | ||||||
| 	{ | 	{ | ||||||
| 		if (searchstoplist(&(d->stoplist), *ptr)) | 		if (searchstoplist(&(d->stoplist), ptr->lexeme)) | ||||||
| 		{ | 		{ | ||||||
| 			pfree(*ptr); | 			pfree(ptr->lexeme); | ||||||
| 			*ptr = NULL; | 			ptr->lexeme = NULL; | ||||||
| 			ptr++; | 			ptr++; | ||||||
| 		} | 		} | ||||||
| 		else | 		else | ||||||
| 		{ | 		{ | ||||||
| 			*cptr = *ptr; | 			memcpy(cptr, ptr, sizeof(TSLexeme)); | ||||||
| 			cptr++; | 			cptr++; | ||||||
| 			ptr++; | 			ptr++; | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 	*cptr = NULL; | 	cptr->lexeme = NULL; | ||||||
|  |  | ||||||
| 	PG_RETURN_POINTER(res); | 	PG_RETURN_POINTER(res); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -105,12 +105,12 @@ snb_lexize(PG_FUNCTION_ARGS) | |||||||
| 	DictSnowball *d = (DictSnowball *) PG_GETARG_POINTER(0); | 	DictSnowball *d = (DictSnowball *) PG_GETARG_POINTER(0); | ||||||
| 	char	   *in = (char *) PG_GETARG_POINTER(1); | 	char	   *in = (char *) PG_GETARG_POINTER(1); | ||||||
| 	char	   *txt = pnstrdup(in, PG_GETARG_INT32(2)); | 	char	   *txt = pnstrdup(in, PG_GETARG_INT32(2)); | ||||||
| 	char	  **res = palloc(sizeof(char *) * 2); | 	TSLexeme	  *res = palloc(sizeof(TSLexeme) * 2); | ||||||
|  |  | ||||||
|  | 	memset(res, 0, sizeof(TSLexeme) * 2); | ||||||
| 	if (*txt == '\0' || searchstoplist(&(d->stoplist), txt)) | 	if (*txt == '\0' || searchstoplist(&(d->stoplist), txt)) | ||||||
| 	{ | 	{ | ||||||
| 		pfree(txt); | 		pfree(txt); | ||||||
| 		res[0] = NULL; |  | ||||||
| 	} | 	} | ||||||
| 	else | 	else | ||||||
| 	{ | 	{ | ||||||
| @@ -122,10 +122,8 @@ snb_lexize(PG_FUNCTION_ARGS) | |||||||
| 			memcpy(txt, d->z->p, d->z->l); | 			memcpy(txt, d->z->p, d->z->l); | ||||||
| 			txt[d->z->l] = '\0'; | 			txt[d->z->l] = '\0'; | ||||||
| 		} | 		} | ||||||
| 		res[0] = txt; | 		res->lexeme = txt; | ||||||
| 	} | 	} | ||||||
| 	res[1] = NULL; |  | ||||||
|  |  | ||||||
|  |  | ||||||
| 	PG_RETURN_POINTER(res); | 	PG_RETURN_POINTER(res); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -162,7 +162,7 @@ syn_lexize(PG_FUNCTION_ARGS) | |||||||
| 	char	   *in = (char *) PG_GETARG_POINTER(1); | 	char	   *in = (char *) PG_GETARG_POINTER(1); | ||||||
| 	Syn			key, | 	Syn			key, | ||||||
| 			   *found; | 			   *found; | ||||||
| 	char	  **res = NULL; | 	TSLexeme	  *res = NULL; | ||||||
|  |  | ||||||
| 	if (!PG_GETARG_INT32(2)) | 	if (!PG_GETARG_INT32(2)) | ||||||
| 		PG_RETURN_POINTER(NULL); | 		PG_RETURN_POINTER(NULL); | ||||||
| @@ -176,10 +176,9 @@ syn_lexize(PG_FUNCTION_ARGS) | |||||||
| 	if (!found) | 	if (!found) | ||||||
| 		PG_RETURN_POINTER(NULL); | 		PG_RETURN_POINTER(NULL); | ||||||
|  |  | ||||||
| 	res = palloc(sizeof(char *) * 2); | 	res = palloc(sizeof(TSLexeme) * 2); | ||||||
|  | 	memset(res,0,sizeof(TSLexeme) * 2); | ||||||
| 	res[0] = pstrdup(found->out); | 	res[0].lexeme = pstrdup(found->out); | ||||||
| 	res[1] = NULL; |  | ||||||
|  |  | ||||||
| 	PG_RETURN_POINTER(res); | 	PG_RETURN_POINTER(res); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -52,15 +52,15 @@ dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) { | |||||||
| HASINIT 	DictExample *d = (DictExample*)PG_GETARG_POINTER(0); | HASINIT 	DictExample *d = (DictExample*)PG_GETARG_POINTER(0); | ||||||
| 	char       *in = (char*)PG_GETARG_POINTER(1); | 	char       *in = (char*)PG_GETARG_POINTER(1); | ||||||
| 	char *txt = pnstrdup(in, PG_GETARG_INT32(2)); | 	char *txt = pnstrdup(in, PG_GETARG_INT32(2)); | ||||||
| 	char	**res=palloc(sizeof(char*)*2); | 	TSLexeme	*res=palloc(sizeof(TSLexeme*)*2); | ||||||
|  |  | ||||||
| 	/* Your INIT dictionary code */ | 	/* Your LEXIZE dictionary code */ | ||||||
| HASINIT 	if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) { | HASINIT 	if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) { | ||||||
| HASINIT 		pfree(txt); | HASINIT 		pfree(txt); | ||||||
| HASINIT 		res[0]=NULL; | HASINIT 		res[0].lexeme=NULL; | ||||||
| HASINIT 	} else  | HASINIT 	} else  | ||||||
| 		res[0]=txt; | 		res[0].lexeme=txt; | ||||||
| 	res[1]=NULL; | 	res[1].lexeme=NULL; | ||||||
|  |  | ||||||
| 	PG_RETURN_POINTER(res); | 	PG_RETURN_POINTER(res); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1119,17 +1119,32 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word, | |||||||
| 	return var; | 	return var; | ||||||
| } | } | ||||||
|  |  | ||||||
| char	  ** | TSLexeme * | ||||||
| NINormalizeWord(IspellDict * Conf, char *word) | NINormalizeWord(IspellDict * Conf, char *word) | ||||||
| { | { | ||||||
| 	char	  **res = NormalizeSubWord(Conf, word, 0); | 	char	  **res = NormalizeSubWord(Conf, word, 0); | ||||||
|  | 	TSLexeme *lcur=NULL, *lres=NULL; | ||||||
|  | 	u_int16_t NVariant=1; | ||||||
|  |  | ||||||
|  | 	if (res) { | ||||||
|  | 		char **ptr = res; | ||||||
|  | 		lcur = lres = (TSLexeme*)palloc( MAX_NORM * sizeof(TSLexeme) ); | ||||||
|  | 		while(*ptr) { | ||||||
|  | 			lcur->lexeme=*ptr; | ||||||
|  | 			lcur->flags=0; | ||||||
|  | 			lcur->nvariant = NVariant++; | ||||||
|  | 			lcur++; | ||||||
|  | 			ptr++; | ||||||
|  | 		} | ||||||
|  | 		lcur->lexeme=NULL; | ||||||
|  | 		pfree(res); | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	if (Conf->compoundcontrol != '\t') | 	if (Conf->compoundcontrol != '\t') | ||||||
| 	{ | 	{ | ||||||
| 		int			wordlen = strlen(word); | 		int			wordlen = strlen(word); | ||||||
| 		SplitVar   *ptr, | 		SplitVar   *ptr, | ||||||
| 				   *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1); | 				   *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1); | ||||||
| 		char	  **cur = res; |  | ||||||
| 		int			i; | 		int			i; | ||||||
|  |  | ||||||
| 		while (var) | 		while (var) | ||||||
| @@ -1140,30 +1155,31 @@ NINormalizeWord(IspellDict * Conf, char *word) | |||||||
|  |  | ||||||
| 				if (subres) | 				if (subres) | ||||||
| 				{ | 				{ | ||||||
| 					char	  **ptr = subres; | 					char	  **subptr = subres; | ||||||
|  |  | ||||||
| 					if (cur) | 					if ( !lcur ) | ||||||
| 					{ | 						lcur = lres = (TSLexeme*)palloc( MAX_NORM * sizeof(TSLexeme) ); | ||||||
| 						while (*cur) |  | ||||||
| 							cur++; |  | ||||||
| 					} |  | ||||||
| 					else |  | ||||||
| 						res = cur = (char **) palloc(MAX_NORM * sizeof(char *)); |  | ||||||
| 		 | 		 | ||||||
| 					for (i = 0; i < var->nstem - 1; i++) | 					while(*subptr) { | ||||||
| 					{ | 						for(i=0;i<var->nstem-1;i++) { | ||||||
| 						*cur = var->stem[i]; | 							lcur->lexeme=(subptr==subres) ? var->stem[ i ] : pstrdup(var->stem[ i ]); | ||||||
| 						cur++; | 							lcur->flags=0; | ||||||
|  | 							lcur->nvariant = NVariant; | ||||||
|  | 							lcur++; | ||||||
| 						} | 						} | ||||||
| 					while (*ptr) |  | ||||||
| 					{ | 						lcur->lexeme=*subptr; | ||||||
| 						*cur = *ptr; | 						lcur->flags=0; | ||||||
| 						cur++; | 						lcur->nvariant = NVariant; | ||||||
| 						ptr++; | 						lcur++; | ||||||
|  | 						subptr++; | ||||||
|  | 						NVariant++; | ||||||
| 					}	 | 					}	 | ||||||
| 					*cur = NULL; |  | ||||||
|  | 					lcur->lexeme=NULL; | ||||||
| 					pfree(subres); | 					pfree(subres); | ||||||
| 					var->stem[0] = NULL; | 					var->stem[0] = NULL; | ||||||
|  | 					pfree( var->stem[ var->nstem-1 ] );	 | ||||||
| 				} | 				} | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
| @@ -1175,7 +1191,7 @@ NINormalizeWord(IspellDict * Conf, char *word) | |||||||
| 			var = ptr; | 			var = ptr; | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 	return res; | 	return lres; | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -3,9 +3,10 @@ | |||||||
|  |  | ||||||
| #include <sys/types.h> | #include <sys/types.h> | ||||||
| #include "regex/regex.h" | #include "regex/regex.h" | ||||||
| #include "regis.h" |  | ||||||
| #include "c.h" | #include "c.h" | ||||||
|  |  | ||||||
|  | #include "regis.h" | ||||||
|  | #include "dict.h" | ||||||
|   |   | ||||||
| struct SPNode; | struct SPNode; | ||||||
|  |  | ||||||
| @@ -116,7 +117,7 @@ typedef struct | |||||||
|  |  | ||||||
| }	IspellDict; | }	IspellDict; | ||||||
|  |  | ||||||
| char	  **NINormalizeWord(IspellDict * Conf, char *word); | TSLexeme	  *NINormalizeWord(IspellDict * Conf, char *word); | ||||||
| int			NIImportAffixes(IspellDict * Conf, const char *filename); | int			NIImportAffixes(IspellDict * Conf, const char *filename); | ||||||
| int			NIImportDictionary(IspellDict * Conf, const char *filename); | int			NIImportDictionary(IspellDict * Conf, const char *filename); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -265,6 +265,7 @@ pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 we | |||||||
| { | { | ||||||
| 	int4		count = 0; | 	int4		count = 0; | ||||||
| 	PRSTEXT		prs; | 	PRSTEXT		prs; | ||||||
|  | 	uint32		variant, pos, cntvar=0, cntpos=0, cnt=0; | ||||||
|  |  | ||||||
| 	prs.lenwords = 32; | 	prs.lenwords = 32; | ||||||
| 	prs.curwords = 0; | 	prs.curwords = 0; | ||||||
| @@ -273,17 +274,39 @@ pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 we | |||||||
|  |  | ||||||
| 	parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval); | 	parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval); | ||||||
|  |  | ||||||
| 	for (count = 0; count < prs.curwords; count++) | 	if ( prs.curwords>0 ) { | ||||||
| 	{ |  | ||||||
|  | 		while (count < prs.curwords) { | ||||||
|  | 			pos = prs.words[count].pos.pos; | ||||||
|  | 			cntvar=0; | ||||||
|  | 			while(count < prs.curwords && pos==prs.words[count].pos.pos) { | ||||||
|  | 				variant = prs.words[count].nvariant; | ||||||
|  |  | ||||||
|  | 				cnt=0; | ||||||
|  | 				while(count < prs.curwords && pos==prs.words[count].pos.pos && variant==prs.words[count].nvariant)	{ | ||||||
|  | 					 | ||||||
| 					pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight); | 					pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight); | ||||||
| 					pfree(prs.words[count].word); | 					pfree(prs.words[count].word); | ||||||
| 		if (count) | 					if ( cnt )  | ||||||
| 						pushquery(state, OPR, (int4) '&', 0, 0, 0); | 						pushquery(state, OPR, (int4) '&', 0, 0, 0); | ||||||
|  | 					cnt++; | ||||||
|  | 					count++; | ||||||
| 				} | 				} | ||||||
|  |  | ||||||
|  | 				if ( cntvar )  | ||||||
|  | 					pushquery(state, OPR, (int4) '|', 0, 0, 0); | ||||||
|  | 				cntvar++; | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			if (cntpos)  | ||||||
|  | 				pushquery(state, OPR, (int4) '&', 0, 0, 0); | ||||||
|  | 		 | ||||||
|  | 			cntpos++; | ||||||
|  | 		} | ||||||
|  |  | ||||||
| 		pfree(prs.words); | 		pfree(prs.words); | ||||||
|  |  | ||||||
| 	/* XXX */ | 	} else | ||||||
| 	if (prs.curwords == 0) |  | ||||||
| 		pushval_asis(state, VALSTOP, NULL, 0, 0); | 		pushval_asis(state, VALSTOP, NULL, 0, 0); | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -321,10 +321,10 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen) | |||||||
| 		for (i = 0; i < cfg->map[type].len; i++) | 		for (i = 0; i < cfg->map[type].len; i++) | ||||||
| 		{ | 		{ | ||||||
| 			DictInfo   *dict = finddict(DatumGetObjectId(cfg->map[type].dict_id[i])); | 			DictInfo   *dict = finddict(DatumGetObjectId(cfg->map[type].dict_id[i])); | ||||||
| 			char	  **norms, | 			TSLexeme	  *norms, | ||||||
| 					  **ptr; | 					  *ptr; | ||||||
|  |  | ||||||
| 			norms = ptr = (char **) DatumGetPointer( | 			norms = ptr = (TSLexeme *) DatumGetPointer( | ||||||
| 													FunctionCall3( | 													FunctionCall3( | ||||||
| 													&(dict->lexize_info), | 													&(dict->lexize_info), | ||||||
| 									   PointerGetDatum(dict->dictionary), | 									   PointerGetDatum(dict->dictionary), | ||||||
| @@ -337,7 +337,7 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen) | |||||||
|  |  | ||||||
| 			prs->pos++;			/* set pos */ | 			prs->pos++;			/* set pos */ | ||||||
|  |  | ||||||
| 			while (*ptr) | 			while (ptr->lexeme) | ||||||
| 			{ | 			{ | ||||||
| 				if (prs->curwords == prs->lenwords) | 				if (prs->curwords == prs->lenwords) | ||||||
| 				{ | 				{ | ||||||
| @@ -345,8 +345,9 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen) | |||||||
| 					prs->words = (TSWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(TSWORD)); | 					prs->words = (TSWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(TSWORD)); | ||||||
| 				} | 				} | ||||||
|  |  | ||||||
| 				prs->words[prs->curwords].len = strlen(*ptr); | 				prs->words[prs->curwords].len = strlen(ptr->lexeme); | ||||||
| 				prs->words[prs->curwords].word = *ptr; | 				prs->words[prs->curwords].word = ptr->lexeme; | ||||||
|  | 				prs->words[prs->curwords].nvariant = ptr->nvariant; | ||||||
| 				prs->words[prs->curwords].alen = 0; | 				prs->words[prs->curwords].alen = 0; | ||||||
| 				prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos); | 				prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos); | ||||||
| 				ptr++; | 				ptr++; | ||||||
| @@ -458,10 +459,10 @@ hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4 | |||||||
| 		for (i = 0; i < cfg->map[type].len; i++) | 		for (i = 0; i < cfg->map[type].len; i++) | ||||||
| 		{ | 		{ | ||||||
| 			DictInfo   *dict = finddict(DatumGetObjectId(cfg->map[type].dict_id[i])); | 			DictInfo   *dict = finddict(DatumGetObjectId(cfg->map[type].dict_id[i])); | ||||||
| 			char	  **norms, | 			TSLexeme	  *norms, | ||||||
| 					  **ptr; | 					  *ptr; | ||||||
|  |  | ||||||
| 			norms = ptr = (char **) DatumGetPointer( | 			norms = ptr = (TSLexeme *) DatumGetPointer( | ||||||
| 													FunctionCall3( | 													FunctionCall3( | ||||||
| 													&(dict->lexize_info), | 													&(dict->lexize_info), | ||||||
| 									   PointerGetDatum(dict->dictionary), | 									   PointerGetDatum(dict->dictionary), | ||||||
| @@ -472,10 +473,10 @@ hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4 | |||||||
| 			if (!norms)			/* dictionary doesn't know this lexem */ | 			if (!norms)			/* dictionary doesn't know this lexem */ | ||||||
| 				continue; | 				continue; | ||||||
|  |  | ||||||
| 			while (*ptr) | 			while (ptr->lexeme) | ||||||
| 			{ | 			{ | ||||||
| 				hlfinditem(prs, query, *ptr, strlen(*ptr)); | 				hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme)); | ||||||
| 				pfree(*ptr); | 				pfree(ptr->lexeme); | ||||||
| 				ptr++; | 				ptr++; | ||||||
| 			} | 			} | ||||||
| 			pfree(norms); | 			pfree(norms); | ||||||
|   | |||||||
| @@ -27,6 +27,7 @@ void		reset_cfg(void); | |||||||
| typedef struct | typedef struct | ||||||
| { | { | ||||||
| 	uint16		len; | 	uint16		len; | ||||||
|  | 	uint16		nvariant; | ||||||
| 	union | 	union | ||||||
| 	{ | 	{ | ||||||
| 		uint16		pos; | 		uint16		pos; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user