mirror of
https://github.com/postgres/postgres.git
synced 2025-07-30 11:03:19 +03:00
Text parser rewritten:
- supports multibyte encodings - more strict rules for lexemes - flex isn't used Add: - tsquery plainto_tsquery(text) Function makes tsquery from plain text. - &&, ||, !! operation for tsquery for combining tsquery from it's parts: 'foo & bar' || 'asd' => 'foo & bar | asd'
This commit is contained in:
@ -1,10 +1,147 @@
|
||||
#ifndef __PARSER_H__
|
||||
#define __PARSER_H__
|
||||
|
||||
extern char *token;
|
||||
extern int tokenlen;
|
||||
int tsearch2_yylex(void);
|
||||
void tsearch2_start_parse_str(char *, int);
|
||||
void tsearch2_end_parse(void);
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include "ts_locale.h"
|
||||
|
||||
typedef enum {
|
||||
TPS_Base = 0,
|
||||
TPS_InUWord,
|
||||
TPS_InLatWord,
|
||||
TPS_InCyrWord,
|
||||
TPS_InUnsignedInt,
|
||||
TPS_InSignedIntFirst,
|
||||
TPS_InSignedInt,
|
||||
TPS_InSpace,
|
||||
TPS_InUDecimalFirst,
|
||||
TPS_InUDecimal,
|
||||
TPS_InDecimalFirst,
|
||||
TPS_InDecimal,
|
||||
TPS_InVersionFirst,
|
||||
TPS_InVersion,
|
||||
TPS_InMantissaFirst,
|
||||
TPS_InMantissaSign,
|
||||
TPS_InMantissa,
|
||||
TPS_InHTMLEntityFirst,
|
||||
TPS_InHTMLEntity,
|
||||
TPS_InHTMLEntityNumFirst,
|
||||
TPS_InHTMLEntityNum,
|
||||
TPS_InHTMLEntityEnd,
|
||||
TPS_InTagFirst,
|
||||
TPS_InTagCloseFirst,
|
||||
TPS_InTag,
|
||||
TPS_InTagEscapeK,
|
||||
TPS_InTagEscapeKK,
|
||||
TPS_InTagBackSleshed,
|
||||
TPS_InTagEnd,
|
||||
TPS_InCommentFirst,
|
||||
TPS_InCommentLast,
|
||||
TPS_InComment,
|
||||
TPS_InCloseCommentFirst,
|
||||
TPS_InCloseCommentLast,
|
||||
TPS_InCommentEnd,
|
||||
TPS_InHostFirstDomen,
|
||||
TPS_InHostDomenSecond,
|
||||
TPS_InHostDomen,
|
||||
TPS_InPortFirst,
|
||||
TPS_InPort,
|
||||
TPS_InHostFirstAN,
|
||||
TPS_InHost,
|
||||
TPS_InEmail,
|
||||
TPS_InFileFirst,
|
||||
TPS_InFile,
|
||||
TPS_InFileNext,
|
||||
TPS_InURIFirst,
|
||||
TPS_InURIStart,
|
||||
TPS_InURI,
|
||||
TPS_InFURL,
|
||||
TPS_InProtocolFirst,
|
||||
TPS_InProtocolSecond,
|
||||
TPS_InProtocolEnd,
|
||||
TPS_InHyphenLatWordFirst,
|
||||
TPS_InHyphenLatWord,
|
||||
TPS_InHyphenCyrWordFirst,
|
||||
TPS_InHyphenCyrWord,
|
||||
TPS_InHyphenUWordFirst,
|
||||
TPS_InHyphenUWord,
|
||||
TPS_InHyphenValueFirst,
|
||||
TPS_InHyphenValue,
|
||||
TPS_InHyphenValueExact,
|
||||
TPS_InParseHyphen,
|
||||
TPS_InParseHyphenHyphen,
|
||||
TPS_InHyphenCyrWordPart,
|
||||
TPS_InHyphenLatWordPart,
|
||||
TPS_InHyphenUWordPart,
|
||||
TPS_InHyphenUnsignedInt,
|
||||
TPS_InHDecimalPartFirst,
|
||||
TPS_InHDecimalPart,
|
||||
TPS_InHVersionPartFirst,
|
||||
TPS_InHVersionPart,
|
||||
TPS_Null /* last state (fake value) */
|
||||
} TParserState;
|
||||
|
||||
/* forward declaration */
|
||||
struct TParser;
|
||||
|
||||
|
||||
typedef int (*TParserCharTest)(struct TParser*); /* any p_is* functions except p_iseq */
|
||||
typedef void (*TParserSpecial)(struct TParser*); /* special handler for special cases... */
|
||||
|
||||
typedef struct {
|
||||
TParserCharTest isclass;
|
||||
char c;
|
||||
uint16 flags;
|
||||
TParserState tostate;
|
||||
int type;
|
||||
TParserSpecial special;
|
||||
} TParserStateActionItem;
|
||||
|
||||
typedef struct {
|
||||
TParserState state;
|
||||
TParserStateActionItem *action;
|
||||
} TParserStateAction;
|
||||
|
||||
typedef struct TParserPosition {
|
||||
int posbyte; /* position of parser in bytes */
|
||||
int poschar; /* osition of parser in characters */
|
||||
int charlen; /* length of current char */
|
||||
int lenbytelexeme;
|
||||
int lencharlexeme;
|
||||
TParserState state;
|
||||
struct TParserPosition *prev;
|
||||
int flags;
|
||||
TParserStateActionItem *pushedAtAction;
|
||||
} TParserPosition;
|
||||
|
||||
typedef struct TParser {
|
||||
/* string and position information */
|
||||
char *str; /* multibyte string */
|
||||
int lenstr; /* length of mbstring */
|
||||
wchar_t *wstr; /* wide character string */
|
||||
int lenwstr; /* length of wsting */
|
||||
|
||||
/* State of parse */
|
||||
int charmaxlen;
|
||||
bool usewide;
|
||||
TParserPosition *state;
|
||||
bool ignore;
|
||||
bool wanthost;
|
||||
|
||||
/* silly char */
|
||||
char c;
|
||||
|
||||
/* out */
|
||||
char *lexeme;
|
||||
int lenbytelexeme;
|
||||
int lencharlexeme;
|
||||
int type;
|
||||
|
||||
} TParser;
|
||||
|
||||
|
||||
TParser* TParserInit( char *, int );
|
||||
bool TParserGet( TParser* );
|
||||
void TParserClose( TParser* );
|
||||
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user