mirror of
https://github.com/postgres/postgres.git
synced 2025-07-24 14:22:24 +03:00
GIN and GiST indexes utilizing pg_trgm's opclasses store sorted
trigrams within index tuples. When comparing and sorting each trigram,
pg_trgm treats each character as a 'char[3]' type in C. However, the
char type in C can be interpreted as either signed char or unsigned
char, depending on the platform, if the signedness is not explicitly
specified. Consequently, during replication between different CPU
architectures, there was an issue where index scans on standby servers
could not locate matching index tuples due to the differing treatment
of character signedness.
This change introduces comparison functions for trgm that explicitly
handle signed char and unsigned char. The appropriate comparison
function will be dynamically selected based on the character
signedness stored in the control file. Therefore, upgraded clusters
can utilize the indexes without rebuilding, provided the cluster
upgrade occurs on platforms with the same character signedness as the
original cluster initialization.
The default char signedness information was introduced in 44fe30fdab
,
so no backpatch.
Reviewed-by: Noah Misch <noah@leadboat.com>
Discussion: https://postgr.es/m/CB11ADBC-0C3F-4FE0-A678-666EE80CBB07%40amazon.com
132 lines
4.5 KiB
C
132 lines
4.5 KiB
C
/*
|
|
* contrib/pg_trgm/trgm.h
|
|
*/
|
|
#ifndef __TRGM_H__
|
|
#define __TRGM_H__
|
|
|
|
#include "access/gist.h"
|
|
#include "access/itup.h"
|
|
#include "access/stratnum.h"
|
|
#include "storage/bufpage.h"
|
|
|
|
/*
|
|
* Options ... but note that trgm_regexp.c effectively assumes these values
|
|
* of LPADDING and RPADDING.
|
|
*/
|
|
#define LPADDING 2
|
|
#define RPADDING 1
|
|
/*
|
|
* Caution: IGNORECASE macro means that trigrams are case-insensitive.
|
|
* If this macro is disabled, the ~* and ~~* operators must be removed from
|
|
* the operator classes, because we can't handle case-insensitive wildcard
|
|
* search with case-sensitive trigrams. Failure to do this will result in
|
|
* "cannot handle ~*(~~*) with case-sensitive trigrams" errors.
|
|
*/
|
|
#define IGNORECASE
|
|
#define DIVUNION
|
|
|
|
/* operator strategy numbers */
|
|
#define SimilarityStrategyNumber 1
|
|
#define DistanceStrategyNumber 2
|
|
#define LikeStrategyNumber 3
|
|
#define ILikeStrategyNumber 4
|
|
#define RegExpStrategyNumber 5
|
|
#define RegExpICaseStrategyNumber 6
|
|
#define WordSimilarityStrategyNumber 7
|
|
#define WordDistanceStrategyNumber 8
|
|
#define StrictWordSimilarityStrategyNumber 9
|
|
#define StrictWordDistanceStrategyNumber 10
|
|
#define EqualStrategyNumber 11
|
|
|
|
typedef char trgm[3];
|
|
|
|
#define CPTRGM(a,b) do { \
|
|
*(((char*)(a))+0) = *(((char*)(b))+0); \
|
|
*(((char*)(a))+1) = *(((char*)(b))+1); \
|
|
*(((char*)(a))+2) = *(((char*)(b))+2); \
|
|
} while(0)
|
|
extern int (*CMPTRGM) (const void *a, const void *b);
|
|
|
|
#define ISWORDCHR(c) (t_isalnum(c))
|
|
#define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') )
|
|
#define ISPRINTABLETRGM(t) ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) )
|
|
|
|
#define ISESCAPECHAR(x) (*(x) == '\\') /* Wildcard escape character */
|
|
#define ISWILDCARDCHAR(x) (*(x) == '_' || *(x) == '%') /* Wildcard
|
|
* meta-character */
|
|
|
|
typedef struct
|
|
{
|
|
int32 vl_len_; /* varlena header (do not touch directly!) */
|
|
uint8 flag;
|
|
char data[FLEXIBLE_ARRAY_MEMBER];
|
|
} TRGM;
|
|
|
|
#define TRGMHDRSIZE (VARHDRSZ + sizeof(uint8))
|
|
|
|
/* gist */
|
|
#define SIGLEN_DEFAULT (sizeof(int) * 3)
|
|
#define SIGLEN_MAX GISTMaxIndexKeySize
|
|
#define BITBYTE 8
|
|
|
|
#define SIGLENBIT(siglen) ((siglen) * BITBYTE - 1) /* see makesign */
|
|
|
|
typedef char *BITVECP;
|
|
|
|
#define LOOPBYTE(siglen) \
|
|
for (i = 0; i < (siglen); i++)
|
|
|
|
#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
|
|
#define GETBITBYTE(x,i) ( (((char)(x)) >> (i)) & 0x01 )
|
|
#define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
|
|
#define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )
|
|
#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
|
|
|
|
#define HASHVAL(val, siglen) (((unsigned int)(val)) % SIGLENBIT(siglen))
|
|
#define HASH(sign, val, siglen) SETBIT((sign), HASHVAL(val, siglen))
|
|
|
|
#define ARRKEY 0x01
|
|
#define SIGNKEY 0x02
|
|
#define ALLISTRUE 0x04
|
|
|
|
#define ISARRKEY(x) ( ((TRGM*)x)->flag & ARRKEY )
|
|
#define ISSIGNKEY(x) ( ((TRGM*)x)->flag & SIGNKEY )
|
|
#define ISALLTRUE(x) ( ((TRGM*)x)->flag & ALLISTRUE )
|
|
|
|
#define CALCGTSIZE(flag, len) ( TRGMHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(trgm)) : (((flag) & ALLISTRUE) ? 0 : (len)) ) )
|
|
#define GETSIGN(x) ( (BITVECP)( (char*)x+TRGMHDRSIZE ) )
|
|
#define GETARR(x) ( (trgm*)( (char*)x+TRGMHDRSIZE ) )
|
|
#define ARRNELEM(x) ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) )
|
|
|
|
/*
|
|
* If DIVUNION is defined then similarity formula is:
|
|
* count / (len1 + len2 - count)
|
|
* else if DIVUNION is not defined then similarity formula is:
|
|
* count / max(len1, len2)
|
|
*/
|
|
#ifdef DIVUNION
|
|
#define CALCSML(count, len1, len2) ((float4) (count)) / ((float4) ((len1) + (len2) - (count)))
|
|
#else
|
|
#define CALCSML(count, len1, len2) ((float4) (count)) / ((float4) (((len1) > (len2)) ? (len1) : (len2)))
|
|
#endif
|
|
|
|
typedef struct TrgmPackedGraph TrgmPackedGraph;
|
|
|
|
extern double similarity_threshold;
|
|
extern double word_similarity_threshold;
|
|
extern double strict_word_similarity_threshold;
|
|
|
|
extern double index_strategy_get_limit(StrategyNumber strategy);
|
|
extern uint32 trgm2int(trgm *ptr);
|
|
extern void compact_trigram(trgm *tptr, char *str, int bytelen);
|
|
extern TRGM *generate_trgm(char *str, int slen);
|
|
extern TRGM *generate_wildcard_trgm(const char *str, int slen);
|
|
extern float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact);
|
|
extern bool trgm_contained_by(TRGM *trg1, TRGM *trg2);
|
|
extern bool *trgm_presence_map(TRGM *query, TRGM *key);
|
|
extern TRGM *createTrgmNFA(text *text_re, Oid collation,
|
|
TrgmPackedGraph **graph, MemoryContext rcontext);
|
|
extern bool trigramsMatchGraph(TrgmPackedGraph *graph, bool *check);
|
|
|
|
#endif /* __TRGM_H__ */
|