improve support of agglutinative languages (query with compound words).

regression=# select to_tsquery( '\'fotballklubber\''); to_tsquery ------------------------------------------------ 'fotball' & 'klubb' | 'fot' & 'ball' & 'klubb' (1 row) So, changed interface to dictionaries, lexize method of dictionary shoud return pointer to aray of TSLexeme structs instead of char**. Last element should have TSLexeme->lexeme == NULL. typedef struct { /* number of variant of split word , for example Word 'fotballklubber' (norwegian) has two varian to split: ( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary should return: nvariant lexeme 1 fotball 1 klubb 2 fot 2 ball 2 klubb */ uint16 nvariant; /* currently unused */ uint16 flags; /* C-string */ char *lexeme; } TSLexeme;
2025-07-30 11:03:19 +03:00 · 2005-01-25 15:24:38 +00:00
parent d314616d12
commit 324300bc7c
12 changed files with 146 additions and 85 deletions
--- a/contrib/tsearch2/ispell/spell.c
+++ b/contrib/tsearch2/ispell/spell.c
@ -1119,17 +1119,32 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
 	return var;
 }

-char	  **
+TSLexeme *
 NINormalizeWord(IspellDict * Conf, char *word)
 {
 	char	  **res = NormalizeSubWord(Conf, word, 0);
+	TSLexeme *lcur=NULL, *lres=NULL;
+	u_int16_t NVariant=1;
+
+	if (res) {
+		char **ptr = res;
+		lcur = lres = (TSLexeme*)palloc( MAX_NORM * sizeof(TSLexeme) );
+		while(*ptr) {
+			lcur->lexeme=*ptr;
+			lcur->flags=0;
+			lcur->nvariant = NVariant++;
+			lcur++;
+			ptr++;
+		}
+		lcur->lexeme=NULL;
+		pfree(res);
+	}

 	if (Conf->compoundcontrol != '\t')
 	{
 		int			wordlen = strlen(word);
 		SplitVar   *ptr,
 				   *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1);
-		char	  **cur = res;
 		int			i;

 		while (var)
@ -1140,30 +1155,31 @@ NINormalizeWord(IspellDict * Conf, char *word)

 				if (subres)
 				{
-					char	  **ptr = subres;
+					char	  **subptr = subres;

-					if (cur)
-					{
-						while (*cur)
-							cur++;
-					}
-					else
-						res = cur = (char **) palloc(MAX_NORM * sizeof(char *));
+					if ( !lcur )
+						lcur = lres = (TSLexeme*)palloc( MAX_NORM * sizeof(TSLexeme) );
+		
+					while(*subptr) {
+						for(i=0;i<var->nstem-1;i++) {
+							lcur->lexeme=(subptr==subres) ? var->stem[ i ] : pstrdup(var->stem[ i ]);
+							lcur->flags=0;
+							lcur->nvariant = NVariant;
+							lcur++;
+						}

-					for (i = 0; i < var->nstem - 1; i++)
-					{
-						*cur = var->stem[i];
-						cur++;
-					}
-					while (*ptr)
-					{
-						*cur = *ptr;
-						cur++;
-						ptr++;
-					}
-					*cur = NULL;
+						lcur->lexeme=*subptr;
+						lcur->flags=0;
+						lcur->nvariant = NVariant;
+						lcur++;
+						subptr++;
+						NVariant++;
+					}	
+
+					lcur->lexeme=NULL;
 					pfree(subres);
 					var->stem[0] = NULL;
+					pfree( var->stem[ var->nstem-1 ] );	
 				}
 			}

@ -1175,7 +1191,7 @@ NINormalizeWord(IspellDict * Conf, char *word)
 			var = ptr;
 		}
 	}
-	return res;
+	return lres;
 }


--- a/contrib/tsearch2/ispell/spell.h
+++ b/contrib/tsearch2/ispell/spell.h
@ -3,10 +3,11 @@

 #include <sys/types.h>
 #include "regex/regex.h"
-#include "regis.h"
 #include "c.h"

-
+#include "regis.h"
+#include "dict.h"
+ 
 struct SPNode;


@ -116,7 +117,7 @@ typedef struct

 }	IspellDict;

-char	  **NINormalizeWord(IspellDict * Conf, char *word);
+TSLexeme	  *NINormalizeWord(IspellDict * Conf, char *word);
 int			NIImportAffixes(IspellDict * Conf, const char *filename);
 int			NIImportDictionary(IspellDict * Conf, const char *filename);