mirror of
https://github.com/postgres/postgres.git
synced 2025-07-31 22:04:40 +03:00
Support indexing of regular-expression searches in contrib/pg_trgm.
This works by extracting trigrams from the given regular expression, in generally the same spirit as the previously-existing support for LIKE searches, though of course the details are far more complicated. Currently, only GIN indexes are supported. We might be able to make it work with GiST indexes later. The implementation includes adding API functions to backend/regex/ to provide a view of the search NFA created from a regular expression. These functions are meant to be generic enough to be supportable in a standalone version of the regex library, should that ever happen. Alexander Korotkov, reviewed by Heikki Linnakangas and Tom Lane
This commit is contained in:
@ -7,18 +7,20 @@
|
||||
#include "access/gist.h"
|
||||
#include "access/itup.h"
|
||||
#include "storage/bufpage.h"
|
||||
#include "utils/builtins.h"
|
||||
|
||||
/* options */
|
||||
/*
|
||||
* Options ... but note that trgm_regexp.c effectively assumes these values
|
||||
* of LPADDING and RPADDING.
|
||||
*/
|
||||
#define LPADDING 2
|
||||
#define RPADDING 1
|
||||
#define KEEPONLYALNUM
|
||||
/*
|
||||
* Caution: IGNORECASE macro means that trigrams are case-insensitive.
|
||||
* If this macro is disabled, the ~~* operator must be removed from the
|
||||
* operator classes, because we can't handle case-insensitive wildcard search
|
||||
* with case-sensitive trigrams. Failure to do this will result in "cannot
|
||||
* handle ~~* with case-sensitive trigrams" errors.
|
||||
* If this macro is disabled, the ~* and ~~* operators must be removed from
|
||||
* the operator classes, because we can't handle case-insensitive wildcard
|
||||
* search with case-sensitive trigrams. Failure to do this will result in
|
||||
* "cannot handle ~*(~~*) with case-sensitive trigrams" errors.
|
||||
*/
|
||||
#define IGNORECASE
|
||||
#define DIVUNION
|
||||
@ -28,6 +30,8 @@
|
||||
#define DistanceStrategyNumber 2
|
||||
#define LikeStrategyNumber 3
|
||||
#define ILikeStrategyNumber 4
|
||||
#define RegExpStrategyNumber 5
|
||||
#define RegExpICaseStrategyNumber 6
|
||||
|
||||
|
||||
typedef char trgm[3];
|
||||
@ -42,11 +46,11 @@ typedef char trgm[3];
|
||||
*(((char*)(a))+2) = *(((char*)(b))+2); \
|
||||
} while(0);
|
||||
|
||||
uint32 trgm2int(trgm *ptr);
|
||||
|
||||
#ifdef KEEPONLYALNUM
|
||||
#define ISWORDCHR(c) (t_isalpha(c) || t_isdigit(c))
|
||||
#define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') )
|
||||
#else
|
||||
#define ISWORDCHR(c) (!t_isspace(c))
|
||||
#define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && isprint( *(unsigned char*)(a) ) )
|
||||
#endif
|
||||
#define ISPRINTABLETRGM(t) ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) )
|
||||
@ -99,11 +103,18 @@ typedef char *BITVECP;
|
||||
#define GETARR(x) ( (trgm*)( (char*)x+TRGMHDRSIZE ) )
|
||||
#define ARRNELEM(x) ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) )
|
||||
|
||||
typedef struct TrgmPackedGraph TrgmPackedGraph;
|
||||
|
||||
extern float4 trgm_limit;
|
||||
|
||||
TRGM *generate_trgm(char *str, int slen);
|
||||
TRGM *generate_wildcard_trgm(const char *str, int slen);
|
||||
float4 cnt_sml(TRGM *trg1, TRGM *trg2);
|
||||
bool trgm_contained_by(TRGM *trg1, TRGM *trg2);
|
||||
extern uint32 trgm2int(trgm *ptr);
|
||||
extern void compact_trigram(trgm *tptr, char *str, int bytelen);
|
||||
extern TRGM *generate_trgm(char *str, int slen);
|
||||
extern TRGM *generate_wildcard_trgm(const char *str, int slen);
|
||||
extern float4 cnt_sml(TRGM *trg1, TRGM *trg2);
|
||||
extern bool trgm_contained_by(TRGM *trg1, TRGM *trg2);
|
||||
extern TRGM *createTrgmNFA(text *text_re, TrgmPackedGraph **graph,
|
||||
Oid collation);
|
||||
extern bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check);
|
||||
|
||||
#endif /* __TRGM_H__ */
|
||||
|
Reference in New Issue
Block a user