mirror of
https://github.com/postgres/postgres.git
synced 2025-07-31 22:04:40 +03:00
Add word_similarity to pg_trgm contrib module.
Patch introduces a concept of similarity over string and just a word from another string. Version of extension is not changed because 1.2 was already introduced in 9.6 release cycle, so, there wasn't a public version. Author: Alexander Korotkov, Artur Zakirov
This commit is contained in:
@ -26,13 +26,14 @@
|
||||
#define DIVUNION
|
||||
|
||||
/* operator strategy numbers */
|
||||
#define SimilarityStrategyNumber 1
|
||||
#define DistanceStrategyNumber 2
|
||||
#define LikeStrategyNumber 3
|
||||
#define ILikeStrategyNumber 4
|
||||
#define RegExpStrategyNumber 5
|
||||
#define RegExpICaseStrategyNumber 6
|
||||
|
||||
#define SimilarityStrategyNumber 1
|
||||
#define DistanceStrategyNumber 2
|
||||
#define LikeStrategyNumber 3
|
||||
#define ILikeStrategyNumber 4
|
||||
#define RegExpStrategyNumber 5
|
||||
#define RegExpICaseStrategyNumber 6
|
||||
#define WordSimilarityStrategyNumber 7
|
||||
#define WordDistanceStrategyNumber 8
|
||||
|
||||
typedef char trgm[3];
|
||||
|
||||
@ -103,15 +104,28 @@ typedef char *BITVECP;
|
||||
#define GETARR(x) ( (trgm*)( (char*)x+TRGMHDRSIZE ) )
|
||||
#define ARRNELEM(x) ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) )
|
||||
|
||||
/*
|
||||
* If DIVUNION is defined then similarity formula is:
|
||||
* count / (len1 + len2 - count)
|
||||
* else if DIVUNION is not defined then similarity formula is:
|
||||
* count / max(len1, len2)
|
||||
*/
|
||||
#ifdef DIVUNION
|
||||
#define CALCSML(count, len1, len2) ((float4) (count)) / ((float4) ((len1) + (len2) - (count)))
|
||||
#else
|
||||
#define CALCSML(count, len1, len2) ((float4) (count)) / ((float4) (((len1) > (len2)) ? (len1) : (len2)))
|
||||
#endif
|
||||
|
||||
typedef struct TrgmPackedGraph TrgmPackedGraph;
|
||||
|
||||
extern double similarity_threshold;
|
||||
extern double word_similarity_threshold;
|
||||
|
||||
extern uint32 trgm2int(trgm *ptr);
|
||||
extern void compact_trigram(trgm *tptr, char *str, int bytelen);
|
||||
extern TRGM *generate_trgm(char *str, int slen);
|
||||
extern TRGM *generate_wildcard_trgm(const char *str, int slen);
|
||||
extern float4 cnt_sml(TRGM *trg1, TRGM *trg2);
|
||||
extern float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact);
|
||||
extern bool trgm_contained_by(TRGM *trg1, TRGM *trg2);
|
||||
extern bool *trgm_presence_map(TRGM *query, TRGM *key);
|
||||
extern TRGM *createTrgmNFA(text *text_re, Oid collation,
|
||||
|
Reference in New Issue
Block a user