mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	It required some changes in lexize algorithm, but interface with dictionaries stays compatible with old dictionaries. Funded by Georgia Public Library Service and LibLime, Inc.
		
			
				
	
	
		
			112 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			112 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* $PostgreSQL: pgsql/contrib/tsearch2/dict.h,v 1.7 2006/05/31 14:05:31 teodor Exp $ */
 | 
						|
 | 
						|
#ifndef __DICT_H__
 | 
						|
#define __DICT_H__
 | 
						|
#include "postgres.h"
 | 
						|
#include "fmgr.h"
 | 
						|
#include "ts_cfg.h"
 | 
						|
 | 
						|
typedef struct
 | 
						|
{
 | 
						|
	int			len;
 | 
						|
	char	  **stop;
 | 
						|
	char	   *(*wordop) (char *);
 | 
						|
}	StopList;
 | 
						|
 | 
						|
void		sortstoplist(StopList * s);
 | 
						|
void		freestoplist(StopList * s);
 | 
						|
void		readstoplist(text *in, StopList * s);
 | 
						|
bool		searchstoplist(StopList * s, char *key);
 | 
						|
 | 
						|
typedef struct
 | 
						|
{
 | 
						|
	Oid			dict_id;
 | 
						|
	FmgrInfo	lexize_info;
 | 
						|
	void	   *dictionary;
 | 
						|
}	DictInfo;
 | 
						|
 | 
						|
void		init_dict(Oid id, DictInfo * dict);
 | 
						|
DictInfo   *finddict(Oid id);
 | 
						|
Oid			name2id_dict(text *name);
 | 
						|
void		reset_dict(void);
 | 
						|
 | 
						|
typedef struct {
 | 
						|
	bool isend; /* in: marks for lexize_info about text end is reached */
 | 
						|
	bool getnext; /* out: dict wants next lexeme */
 | 
						|
	void	*private;  /* internal dict state between calls with getnext == true */
 | 
						|
} DictSubState;
 | 
						|
 | 
						|
/* simple parser of cfg string */
 | 
						|
typedef struct
 | 
						|
{
 | 
						|
	char	   *key;
 | 
						|
	char	   *value;
 | 
						|
}	Map;
 | 
						|
 | 
						|
void		parse_cfgdict(text *in, Map ** m);
 | 
						|
 | 
						|
/* return struct for any lexize function */
 | 
						|
typedef struct
 | 
						|
{
 | 
						|
	/*
 | 
						|
	 * number of variant of split word , for example Word 'fotballklubber'
 | 
						|
	 * (norwegian) has two varian to split: ( fotball, klubb ) and ( fot,
 | 
						|
	 * ball, klubb ). So, dictionary should return: 
 | 
						|
	 * nvariant	lexeme 
 | 
						|
	 *   1 		fotball 
 | 
						|
	 *   1	   	klubb 
 | 
						|
	 *	 2		fot 
 | 
						|
	 *	 2		ball 
 | 
						|
	 *   2		klubb
 | 
						|
	 */
 | 
						|
	uint16		nvariant;
 | 
						|
 | 
						|
	uint16		flags;
 | 
						|
 | 
						|
	/* C-string */
 | 
						|
	char	   *lexeme;
 | 
						|
}	TSLexeme;
 | 
						|
 | 
						|
#define TSL_ADDPOS		0x01
 | 
						|
 | 
						|
 | 
						|
/*
 | 
						|
 * Lexize subsystem
 | 
						|
 */
 | 
						|
 | 
						|
typedef struct ParsedLex {
 | 
						|
    int     	type;
 | 
						|
    char    	*lemm;
 | 
						|
    int     	lenlemm;
 | 
						|
	bool		resfollow;
 | 
						|
    struct ParsedLex *next;
 | 
						|
} ParsedLex;
 | 
						|
 | 
						|
typedef struct ListParsedLex {
 | 
						|
	ParsedLex	*head;
 | 
						|
	ParsedLex	*tail;
 | 
						|
} ListParsedLex;
 | 
						|
 | 
						|
typedef struct {
 | 
						|
    TSCfgInfo       *cfg;
 | 
						|
    Oid             curDictId;
 | 
						|
    int             posDict;
 | 
						|
    DictSubState    dictState;
 | 
						|
    ParsedLex       *curSub;
 | 
						|
	ListParsedLex	towork;   /* current list to work */
 | 
						|
	ListParsedLex	waste;    /* list of lexemes that already lexized */
 | 
						|
 | 
						|
	/* fields to store last variant to lexize (basically, thesaurus 
 | 
						|
	   or similar to, which wants  several lexemes */	
 | 
						|
	   
 | 
						|
	ParsedLex		*lastRes;
 | 
						|
	TSLexeme		*tmpRes;
 | 
						|
} LexizeData;
 | 
						|
 | 
						|
 | 
						|
void LexizeInit(LexizeData *ld, TSCfgInfo *cfg);
 | 
						|
void LexizeAddLemm(LexizeData *ld, int type, char *lemm, int lenlemm);
 | 
						|
TSLexeme* LexizeExec(LexizeData *ld, ParsedLex **correspondLexem);
 | 
						|
 | 
						|
#endif
 |