mirror of
https://github.com/postgres/postgres.git
synced 2025-06-13 07:41:39 +03:00
Support LIKE and ILIKE index searches via contrib/pg_trgm indexes.
Unlike Btree-based LIKE optimization, this works for non-left-anchored search patterns. The effectiveness of the search depends on how many trigrams can be extracted from the pattern. (The worst case, with no trigrams, degrades to a full-table scan, so this isn't a panacea. But it can be very useful.) Alexander Korotkov, reviewed by Jan Urbanski
This commit is contained in:
@ -3467,3 +3467,93 @@ select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu198
|
|||||||
qwertyu0988 | 0.333333
|
qwertyu0988 | 0.333333
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
create table test2(t text);
|
||||||
|
insert into test2 values ('abcdef');
|
||||||
|
insert into test2 values ('quark');
|
||||||
|
create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
|
||||||
|
set enable_seqscan=off;
|
||||||
|
explain (costs off)
|
||||||
|
select * from test2 where t like '%BCD%';
|
||||||
|
QUERY PLAN
|
||||||
|
------------------------------------------
|
||||||
|
Bitmap Heap Scan on test2
|
||||||
|
Recheck Cond: (t ~~ '%BCD%'::text)
|
||||||
|
-> Bitmap Index Scan on test2_idx_gin
|
||||||
|
Index Cond: (t ~~ '%BCD%'::text)
|
||||||
|
(4 rows)
|
||||||
|
|
||||||
|
explain (costs off)
|
||||||
|
select * from test2 where t ilike '%BCD%';
|
||||||
|
QUERY PLAN
|
||||||
|
-------------------------------------------
|
||||||
|
Bitmap Heap Scan on test2
|
||||||
|
Recheck Cond: (t ~~* '%BCD%'::text)
|
||||||
|
-> Bitmap Index Scan on test2_idx_gin
|
||||||
|
Index Cond: (t ~~* '%BCD%'::text)
|
||||||
|
(4 rows)
|
||||||
|
|
||||||
|
select * from test2 where t like '%BCD%';
|
||||||
|
t
|
||||||
|
---
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
select * from test2 where t like '%bcd%';
|
||||||
|
t
|
||||||
|
--------
|
||||||
|
abcdef
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select * from test2 where t ilike '%BCD%';
|
||||||
|
t
|
||||||
|
--------
|
||||||
|
abcdef
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select * from test2 where t ilike 'qua%';
|
||||||
|
t
|
||||||
|
-------
|
||||||
|
quark
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
drop index test2_idx_gin;
|
||||||
|
create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
|
||||||
|
set enable_seqscan=off;
|
||||||
|
explain (costs off)
|
||||||
|
select * from test2 where t like '%BCD%';
|
||||||
|
QUERY PLAN
|
||||||
|
------------------------------------------
|
||||||
|
Index Scan using test2_idx_gist on test2
|
||||||
|
Index Cond: (t ~~ '%BCD%'::text)
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
explain (costs off)
|
||||||
|
select * from test2 where t ilike '%BCD%';
|
||||||
|
QUERY PLAN
|
||||||
|
------------------------------------------
|
||||||
|
Index Scan using test2_idx_gist on test2
|
||||||
|
Index Cond: (t ~~* '%BCD%'::text)
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
select * from test2 where t like '%BCD%';
|
||||||
|
t
|
||||||
|
---
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
select * from test2 where t like '%bcd%';
|
||||||
|
t
|
||||||
|
--------
|
||||||
|
abcdef
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select * from test2 where t ilike '%BCD%';
|
||||||
|
t
|
||||||
|
--------
|
||||||
|
abcdef
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select * from test2 where t ilike 'qua%';
|
||||||
|
t
|
||||||
|
-------
|
||||||
|
quark
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
@ -113,6 +113,8 @@ FOR TYPE text USING gist
|
|||||||
AS
|
AS
|
||||||
OPERATOR 1 % (text, text),
|
OPERATOR 1 % (text, text),
|
||||||
OPERATOR 2 <-> (text, text) FOR ORDER BY pg_catalog.float_ops,
|
OPERATOR 2 <-> (text, text) FOR ORDER BY pg_catalog.float_ops,
|
||||||
|
OPERATOR 3 pg_catalog.~~ (text, text),
|
||||||
|
OPERATOR 4 pg_catalog.~~* (text, text),
|
||||||
FUNCTION 1 gtrgm_consistent (internal, text, int, oid, internal),
|
FUNCTION 1 gtrgm_consistent (internal, text, int, oid, internal),
|
||||||
FUNCTION 2 gtrgm_union (bytea, internal),
|
FUNCTION 2 gtrgm_union (bytea, internal),
|
||||||
FUNCTION 3 gtrgm_compress (internal),
|
FUNCTION 3 gtrgm_compress (internal),
|
||||||
@ -124,17 +126,17 @@ AS
|
|||||||
STORAGE gtrgm;
|
STORAGE gtrgm;
|
||||||
|
|
||||||
-- support functions for gin
|
-- support functions for gin
|
||||||
CREATE OR REPLACE FUNCTION gin_extract_trgm(text, internal)
|
CREATE OR REPLACE FUNCTION gin_extract_value_trgm(text, internal)
|
||||||
RETURNS internal
|
RETURNS internal
|
||||||
AS 'MODULE_PATHNAME'
|
AS 'MODULE_PATHNAME'
|
||||||
LANGUAGE C IMMUTABLE STRICT;
|
LANGUAGE C IMMUTABLE STRICT;
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION gin_extract_trgm(text, internal, int2, internal, internal)
|
CREATE OR REPLACE FUNCTION gin_extract_query_trgm(text, internal, int2, internal, internal, internal, internal)
|
||||||
RETURNS internal
|
RETURNS internal
|
||||||
AS 'MODULE_PATHNAME'
|
AS 'MODULE_PATHNAME'
|
||||||
LANGUAGE C IMMUTABLE STRICT;
|
LANGUAGE C IMMUTABLE STRICT;
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION gin_trgm_consistent(internal, int2, text, int4, internal, internal)
|
CREATE OR REPLACE FUNCTION gin_trgm_consistent(internal, int2, text, int4, internal, internal, internal, internal)
|
||||||
RETURNS bool
|
RETURNS bool
|
||||||
AS 'MODULE_PATHNAME'
|
AS 'MODULE_PATHNAME'
|
||||||
LANGUAGE C IMMUTABLE STRICT;
|
LANGUAGE C IMMUTABLE STRICT;
|
||||||
@ -144,8 +146,10 @@ CREATE OPERATOR CLASS gin_trgm_ops
|
|||||||
FOR TYPE text USING gin
|
FOR TYPE text USING gin
|
||||||
AS
|
AS
|
||||||
OPERATOR 1 % (text, text),
|
OPERATOR 1 % (text, text),
|
||||||
|
OPERATOR 3 pg_catalog.~~ (text, text),
|
||||||
|
OPERATOR 4 pg_catalog.~~* (text, text),
|
||||||
FUNCTION 1 btint4cmp (int4, int4),
|
FUNCTION 1 btint4cmp (int4, int4),
|
||||||
FUNCTION 2 gin_extract_trgm (text, internal),
|
FUNCTION 2 gin_extract_value_trgm (text, internal),
|
||||||
FUNCTION 3 gin_extract_trgm (text, internal, int2, internal, internal),
|
FUNCTION 3 gin_extract_query_trgm (text, internal, int2, internal, internal, internal, internal),
|
||||||
FUNCTION 4 gin_trgm_consistent (internal, int2, text, int4, internal, internal),
|
FUNCTION 4 gin_trgm_consistent (internal, int2, text, int4, internal, internal, internal, internal),
|
||||||
STORAGE int4;
|
STORAGE int4;
|
||||||
|
@ -45,3 +45,28 @@ set enable_seqscan=off;
|
|||||||
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
|
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
|
||||||
select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t;
|
select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu0988' order by sml desc, t;
|
||||||
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;
|
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;
|
||||||
|
|
||||||
|
create table test2(t text);
|
||||||
|
insert into test2 values ('abcdef');
|
||||||
|
insert into test2 values ('quark');
|
||||||
|
create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
|
||||||
|
set enable_seqscan=off;
|
||||||
|
explain (costs off)
|
||||||
|
select * from test2 where t like '%BCD%';
|
||||||
|
explain (costs off)
|
||||||
|
select * from test2 where t ilike '%BCD%';
|
||||||
|
select * from test2 where t like '%BCD%';
|
||||||
|
select * from test2 where t like '%bcd%';
|
||||||
|
select * from test2 where t ilike '%BCD%';
|
||||||
|
select * from test2 where t ilike 'qua%';
|
||||||
|
drop index test2_idx_gin;
|
||||||
|
create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
|
||||||
|
set enable_seqscan=off;
|
||||||
|
explain (costs off)
|
||||||
|
select * from test2 where t like '%BCD%';
|
||||||
|
explain (costs off)
|
||||||
|
select * from test2 where t ilike '%BCD%';
|
||||||
|
select * from test2 where t like '%BCD%';
|
||||||
|
select * from test2 where t like '%bcd%';
|
||||||
|
select * from test2 where t ilike '%BCD%';
|
||||||
|
select * from test2 where t ilike 'qua%';
|
||||||
|
@ -13,12 +13,21 @@
|
|||||||
#define LPADDING 2
|
#define LPADDING 2
|
||||||
#define RPADDING 1
|
#define RPADDING 1
|
||||||
#define KEEPONLYALNUM
|
#define KEEPONLYALNUM
|
||||||
|
/*
|
||||||
|
* Caution: IGNORECASE macro means that trigrams are case-insensitive.
|
||||||
|
* If this macro is disabled, the ~~* operator must be removed from the
|
||||||
|
* operator classes, because we can't handle case-insensitive wildcard search
|
||||||
|
* with case-sensitive trigrams. Failure to do this will result in "cannot
|
||||||
|
* handle ~~* with case-sensitive trigrams" errors.
|
||||||
|
*/
|
||||||
#define IGNORECASE
|
#define IGNORECASE
|
||||||
#define DIVUNION
|
#define DIVUNION
|
||||||
|
|
||||||
/* operator strategy numbers */
|
/* operator strategy numbers */
|
||||||
#define SimilarityStrategyNumber 1
|
#define SimilarityStrategyNumber 1
|
||||||
#define DistanceStrategyNumber 2
|
#define DistanceStrategyNumber 2
|
||||||
|
#define LikeStrategyNumber 3
|
||||||
|
#define ILikeStrategyNumber 4
|
||||||
|
|
||||||
|
|
||||||
typedef char trgm[3];
|
typedef char trgm[3];
|
||||||
@ -40,7 +49,10 @@ uint32 trgm2int(trgm *ptr);
|
|||||||
#else
|
#else
|
||||||
#define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && isprint( *(unsigned char*)(a) ) )
|
#define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && isprint( *(unsigned char*)(a) ) )
|
||||||
#endif
|
#endif
|
||||||
#define ISPRINTABLETRGM(t) ( ISPRINTABLECHAR( ((char*)t) ) && ISPRINTABLECHAR( ((char*)t)+1 ) && ISPRINTABLECHAR( ((char*)t)+2 ) )
|
#define ISPRINTABLETRGM(t) ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) )
|
||||||
|
|
||||||
|
#define ISESCAPECHAR(x) (*(x) == '\\') /* Wildcard escape character */
|
||||||
|
#define ISWILDCARDCHAR(x) (*(x) == '_' || *(x) == '%') /* Wildcard meta-character */
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
@ -65,7 +77,7 @@ typedef char *BITVECP;
|
|||||||
for(i=0;i<SIGLEN;i++)
|
for(i=0;i<SIGLEN;i++)
|
||||||
|
|
||||||
#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
|
#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
|
||||||
#define GETBITBYTE(x,i) ( ((char)(x)) >> i & 0x01 )
|
#define GETBITBYTE(x,i) ( (((char)(x)) >> (i)) & 0x01 )
|
||||||
#define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
|
#define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
|
||||||
#define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )
|
#define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )
|
||||||
#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
|
#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
|
||||||
@ -89,6 +101,8 @@ typedef char *BITVECP;
|
|||||||
extern float4 trgm_limit;
|
extern float4 trgm_limit;
|
||||||
|
|
||||||
TRGM *generate_trgm(char *str, int slen);
|
TRGM *generate_trgm(char *str, int slen);
|
||||||
|
TRGM *generate_wildcard_trgm(const char *str, int slen);
|
||||||
float4 cnt_sml(TRGM *trg1, TRGM *trg2);
|
float4 cnt_sml(TRGM *trg1, TRGM *trg2);
|
||||||
|
bool trgm_contained_by(TRGM *trg1, TRGM *trg2);
|
||||||
|
|
||||||
#endif /* __TRGM_H__ */
|
#endif /* __TRGM_H__ */
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
#include "access/gin.h"
|
#include "access/gin.h"
|
||||||
#include "access/itup.h"
|
#include "access/itup.h"
|
||||||
|
#include "access/skey.h"
|
||||||
#include "access/tuptoaster.h"
|
#include "access/tuptoaster.h"
|
||||||
#include "storage/bufpage.h"
|
#include "storage/bufpage.h"
|
||||||
#include "utils/array.h"
|
#include "utils/array.h"
|
||||||
@ -16,14 +17,31 @@
|
|||||||
PG_FUNCTION_INFO_V1(gin_extract_trgm);
|
PG_FUNCTION_INFO_V1(gin_extract_trgm);
|
||||||
Datum gin_extract_trgm(PG_FUNCTION_ARGS);
|
Datum gin_extract_trgm(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
|
||||||
|
Datum gin_extract_value_trgm(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
|
||||||
|
Datum gin_extract_query_trgm(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
PG_FUNCTION_INFO_V1(gin_trgm_consistent);
|
PG_FUNCTION_INFO_V1(gin_trgm_consistent);
|
||||||
Datum gin_trgm_consistent(PG_FUNCTION_ARGS);
|
Datum gin_trgm_consistent(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This function is used as both extractValue and extractQuery
|
* This function can only be called if a pre-9.1 version of the GIN operator
|
||||||
|
* class definition is present in the catalogs (probably as a consequence
|
||||||
|
* of upgrade-in-place). Complain.
|
||||||
*/
|
*/
|
||||||
Datum
|
Datum
|
||||||
gin_extract_trgm(PG_FUNCTION_ARGS)
|
gin_extract_trgm(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
ereport(ERROR,
|
||||||
|
(errmsg("GIN operator class for pg_trgm is out of date"),
|
||||||
|
errhint("Please drop and re-create the pg_trgm catalog entries.")));
|
||||||
|
PG_RETURN_NULL();
|
||||||
|
}
|
||||||
|
|
||||||
|
Datum
|
||||||
|
gin_extract_value_trgm(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
text *val = (text *) PG_GETARG_TEXT_P(0);
|
text *val = (text *) PG_GETARG_TEXT_P(0);
|
||||||
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
||||||
@ -57,34 +75,124 @@ gin_extract_trgm(PG_FUNCTION_ARGS)
|
|||||||
PG_RETURN_POINTER(entries);
|
PG_RETURN_POINTER(entries);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Datum
|
||||||
|
gin_extract_query_trgm(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
text *val = (text *) PG_GETARG_TEXT_P(0);
|
||||||
|
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
||||||
|
StrategyNumber strategy = PG_GETARG_UINT16(2);
|
||||||
|
/* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */
|
||||||
|
/* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
|
||||||
|
/* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
|
||||||
|
int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
|
||||||
|
Datum *entries = NULL;
|
||||||
|
TRGM *trg;
|
||||||
|
int32 trglen;
|
||||||
|
trgm *ptr;
|
||||||
|
int32 i;
|
||||||
|
|
||||||
|
switch (strategy)
|
||||||
|
{
|
||||||
|
case SimilarityStrategyNumber:
|
||||||
|
trg = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
|
||||||
|
break;
|
||||||
|
case ILikeStrategyNumber:
|
||||||
|
#ifndef IGNORECASE
|
||||||
|
elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
|
||||||
|
#endif
|
||||||
|
/* FALL THRU */
|
||||||
|
case LikeStrategyNumber:
|
||||||
|
/*
|
||||||
|
* For wildcard search we extract all the trigrams that every
|
||||||
|
* potentially-matching string must include.
|
||||||
|
*/
|
||||||
|
trg = generate_wildcard_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
elog(ERROR, "unrecognized strategy number: %d", strategy);
|
||||||
|
trg = NULL; /* keep compiler quiet */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
trglen = ARRNELEM(trg);
|
||||||
|
*nentries = trglen;
|
||||||
|
|
||||||
|
if (trglen > 0)
|
||||||
|
{
|
||||||
|
entries = (Datum *) palloc(sizeof(Datum) * trglen);
|
||||||
|
ptr = GETARR(trg);
|
||||||
|
for (i = 0; i < trglen; i++)
|
||||||
|
{
|
||||||
|
int32 item = trgm2int(ptr);
|
||||||
|
|
||||||
|
entries[i] = Int32GetDatum(item);
|
||||||
|
ptr++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If no trigram was extracted then we have to scan all the index.
|
||||||
|
*/
|
||||||
|
if (trglen == 0)
|
||||||
|
*searchMode = GIN_SEARCH_MODE_ALL;
|
||||||
|
|
||||||
|
PG_RETURN_POINTER(entries);
|
||||||
|
}
|
||||||
|
|
||||||
Datum
|
Datum
|
||||||
gin_trgm_consistent(PG_FUNCTION_ARGS)
|
gin_trgm_consistent(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
bool *check = (bool *) PG_GETARG_POINTER(0);
|
bool *check = (bool *) PG_GETARG_POINTER(0);
|
||||||
/* StrategyNumber strategy = PG_GETARG_UINT16(1); */
|
StrategyNumber strategy = PG_GETARG_UINT16(1);
|
||||||
/* text *query = PG_GETARG_TEXT_P(2); */
|
/* text *query = PG_GETARG_TEXT_P(2); */
|
||||||
int32 nkeys = PG_GETARG_INT32(3);
|
int32 nkeys = PG_GETARG_INT32(3);
|
||||||
/* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
|
/* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
|
||||||
bool *recheck = (bool *) PG_GETARG_POINTER(5);
|
bool *recheck = (bool *) PG_GETARG_POINTER(5);
|
||||||
bool res = FALSE;
|
bool res;
|
||||||
int32 i,
|
int32 i,
|
||||||
ntrue = 0;
|
ntrue;
|
||||||
|
|
||||||
/* All cases served by this function are inexact */
|
/* All cases served by this function are inexact */
|
||||||
*recheck = true;
|
*recheck = true;
|
||||||
|
|
||||||
/* Count the matches */
|
switch (strategy)
|
||||||
for (i = 0; i < nkeys; i++)
|
|
||||||
{
|
{
|
||||||
if (check[i])
|
case SimilarityStrategyNumber:
|
||||||
ntrue++;
|
/* Count the matches */
|
||||||
}
|
ntrue = 0;
|
||||||
|
for (i = 0; i < nkeys; i++)
|
||||||
|
{
|
||||||
|
if (check[i])
|
||||||
|
ntrue++;
|
||||||
|
}
|
||||||
#ifdef DIVUNION
|
#ifdef DIVUNION
|
||||||
res = (nkeys == ntrue) ? true : ((((((float4) ntrue) / ((float4) (nkeys - ntrue)))) >= trgm_limit) ? true : false);
|
res = (nkeys == ntrue) ? true : ((((((float4) ntrue) / ((float4) (nkeys - ntrue)))) >= trgm_limit) ? true : false);
|
||||||
#else
|
#else
|
||||||
res = (nkeys == 0) ? false : ((((((float4) ntrue) / ((float4) nkeys))) >= trgm_limit) ? true : false);
|
res = (nkeys == 0) ? false : ((((((float4) ntrue) / ((float4) nkeys))) >= trgm_limit) ? true : false);
|
||||||
#endif
|
#endif
|
||||||
|
break;
|
||||||
|
case ILikeStrategyNumber:
|
||||||
|
#ifndef IGNORECASE
|
||||||
|
elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
|
||||||
|
#endif
|
||||||
|
/* FALL THRU */
|
||||||
|
case LikeStrategyNumber:
|
||||||
|
/* Check if all extracted trigrams are presented. */
|
||||||
|
res = true;
|
||||||
|
for (i = 0; i < nkeys; i++)
|
||||||
|
{
|
||||||
|
if (!check[i])
|
||||||
|
{
|
||||||
|
res = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
elog(ERROR, "unrecognized strategy number: %d", strategy);
|
||||||
|
res = false; /* keep compiler quiet */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
PG_RETURN_BOOL(res);
|
PG_RETURN_BOOL(res);
|
||||||
}
|
}
|
||||||
|
@ -195,31 +195,61 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
|
|||||||
TRGM *key = (TRGM *) DatumGetPointer(entry->key);
|
TRGM *key = (TRGM *) DatumGetPointer(entry->key);
|
||||||
TRGM *qtrg;
|
TRGM *qtrg;
|
||||||
bool res;
|
bool res;
|
||||||
char *cache = (char *) fcinfo->flinfo->fn_extra;
|
char *cache = (char *) fcinfo->flinfo->fn_extra,
|
||||||
|
*cacheContents = cache + MAXALIGN(sizeof(StrategyNumber));
|
||||||
|
|
||||||
/* All cases served by this function are exact */
|
/*
|
||||||
*recheck = false;
|
* Store both the strategy number and extracted trigrams in cache, because
|
||||||
|
* trigram extraction is relatively CPU-expensive. We must include
|
||||||
if (cache == NULL || VARSIZE(cache) != VARSIZE(query) || memcmp(cache, query, VARSIZE(query)) != 0)
|
* strategy number because trigram extraction depends on strategy.
|
||||||
|
*/
|
||||||
|
if (cache == NULL || strategy != *((StrategyNumber *) cache) ||
|
||||||
|
VARSIZE(cacheContents) != VARSIZE(query) ||
|
||||||
|
memcmp(cacheContents, query, VARSIZE(query)) != 0)
|
||||||
{
|
{
|
||||||
qtrg = generate_trgm(VARDATA(query), VARSIZE(query) - VARHDRSZ);
|
switch (strategy)
|
||||||
|
{
|
||||||
|
case SimilarityStrategyNumber:
|
||||||
|
qtrg = generate_trgm(VARDATA(query), VARSIZE(query) - VARHDRSZ);
|
||||||
|
break;
|
||||||
|
case ILikeStrategyNumber:
|
||||||
|
#ifndef IGNORECASE
|
||||||
|
elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
|
||||||
|
#endif
|
||||||
|
/* FALL THRU */
|
||||||
|
case LikeStrategyNumber:
|
||||||
|
qtrg = generate_wildcard_trgm(VARDATA(query), VARSIZE(query) - VARHDRSZ);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
elog(ERROR, "unrecognized strategy number: %d", strategy);
|
||||||
|
qtrg = NULL; /* keep compiler quiet */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (cache)
|
if (cache)
|
||||||
pfree(cache);
|
pfree(cache);
|
||||||
|
|
||||||
fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
|
fcinfo->flinfo->fn_extra =
|
||||||
MAXALIGN(VARSIZE(query)) + VARSIZE(qtrg));
|
MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
|
||||||
|
MAXALIGN(sizeof(StrategyNumber)) +
|
||||||
|
MAXALIGN(VARSIZE(query)) +
|
||||||
|
VARSIZE(qtrg));
|
||||||
cache = (char *) fcinfo->flinfo->fn_extra;
|
cache = (char *) fcinfo->flinfo->fn_extra;
|
||||||
|
cacheContents = cache + MAXALIGN(sizeof(StrategyNumber));
|
||||||
|
|
||||||
memcpy(cache, query, VARSIZE(query));
|
*((StrategyNumber *) cache) = strategy;
|
||||||
memcpy(cache + MAXALIGN(VARSIZE(query)), qtrg, VARSIZE(qtrg));
|
memcpy(cacheContents, query, VARSIZE(query));
|
||||||
|
memcpy(cacheContents + MAXALIGN(VARSIZE(query)), qtrg, VARSIZE(qtrg));
|
||||||
}
|
}
|
||||||
|
|
||||||
qtrg = (TRGM *) (cache + MAXALIGN(VARSIZE(query)));
|
qtrg = (TRGM *) (cacheContents + MAXALIGN(VARSIZE(query)));
|
||||||
|
|
||||||
switch (strategy)
|
switch (strategy)
|
||||||
{
|
{
|
||||||
case SimilarityStrategyNumber:
|
case SimilarityStrategyNumber:
|
||||||
|
/* Similarity search is exact */
|
||||||
|
*recheck = false;
|
||||||
|
|
||||||
if (GIST_LEAF(entry))
|
if (GIST_LEAF(entry))
|
||||||
{ /* all leafs contains orig trgm */
|
{ /* all leafs contains orig trgm */
|
||||||
float4 tmpsml = cnt_sml(key, qtrg);
|
float4 tmpsml = cnt_sml(key, qtrg);
|
||||||
@ -242,6 +272,47 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
|
|||||||
res = (((((float8) count) / ((float8) len))) >= trgm_limit) ? true : false;
|
res = (((((float8) count) / ((float8) len))) >= trgm_limit) ? true : false;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case ILikeStrategyNumber:
|
||||||
|
#ifndef IGNORECASE
|
||||||
|
elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
|
||||||
|
#endif
|
||||||
|
/* FALL THRU */
|
||||||
|
case LikeStrategyNumber:
|
||||||
|
/* Wildcard search is inexact */
|
||||||
|
*recheck = true;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if all the extracted trigrams can be present in child
|
||||||
|
* nodes.
|
||||||
|
*/
|
||||||
|
if (GIST_LEAF(entry))
|
||||||
|
{ /* all leafs contains orig trgm */
|
||||||
|
res = trgm_contained_by(qtrg, key);
|
||||||
|
}
|
||||||
|
else if (ISALLTRUE(key))
|
||||||
|
{ /* non-leaf contains signature */
|
||||||
|
res = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{ /* non-leaf contains signature */
|
||||||
|
int32 k,
|
||||||
|
tmp = 0,
|
||||||
|
len = ARRNELEM(qtrg);
|
||||||
|
trgm *ptr = GETARR(qtrg);
|
||||||
|
BITVECP sign = GETSIGN(key);
|
||||||
|
|
||||||
|
res = true;
|
||||||
|
for (k = 0; k < len; k++)
|
||||||
|
{
|
||||||
|
CPTRGM(((char *) &tmp), ptr + k);
|
||||||
|
if (!GETBIT(sign, HASHVAL(tmp)))
|
||||||
|
{
|
||||||
|
res = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
elog(ERROR, "unrecognized strategy number: %d", strategy);
|
elog(ERROR, "unrecognized strategy number: %d", strategy);
|
||||||
res = false; /* keep compiler quiet */
|
res = false; /* keep compiler quiet */
|
||||||
|
@ -18,6 +18,23 @@ float4 trgm_limit = 0.3f;
|
|||||||
|
|
||||||
PG_FUNCTION_INFO_V1(set_limit);
|
PG_FUNCTION_INFO_V1(set_limit);
|
||||||
Datum set_limit(PG_FUNCTION_ARGS);
|
Datum set_limit(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(show_limit);
|
||||||
|
Datum show_limit(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(show_trgm);
|
||||||
|
Datum show_trgm(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(similarity);
|
||||||
|
Datum similarity(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(similarity_dist);
|
||||||
|
Datum similarity_dist(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(similarity_op);
|
||||||
|
Datum similarity_op(PG_FUNCTION_ARGS);
|
||||||
|
|
||||||
|
|
||||||
Datum
|
Datum
|
||||||
set_limit(PG_FUNCTION_ARGS)
|
set_limit(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
@ -29,8 +46,6 @@ set_limit(PG_FUNCTION_ARGS)
|
|||||||
PG_RETURN_FLOAT4(trgm_limit);
|
PG_RETURN_FLOAT4(trgm_limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
PG_FUNCTION_INFO_V1(show_limit);
|
|
||||||
Datum show_limit(PG_FUNCTION_ARGS);
|
|
||||||
Datum
|
Datum
|
||||||
show_limit(PG_FUNCTION_ARGS)
|
show_limit(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
@ -120,7 +135,7 @@ cnt_trigram(trgm *tptr, char *str, int bytelen)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Adds trigramm from words (already padded).
|
* Adds trigrams from words (already padded).
|
||||||
*/
|
*/
|
||||||
static trgm *
|
static trgm *
|
||||||
make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
|
make_trigrams(trgm *tptr, char *str, int bytelen, int charlen)
|
||||||
@ -236,6 +251,225 @@ generate_trgm(char *str, int slen)
|
|||||||
return trg;
|
return trg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Extract the next non-wildcard part of a search string, ie, a word bounded
|
||||||
|
* by '_' or '%' meta-characters, non-word characters or string end.
|
||||||
|
*
|
||||||
|
* str: source string, of length lenstr bytes (need not be null-terminated)
|
||||||
|
* buf: where to return the substring (must be long enough)
|
||||||
|
* *bytelen: receives byte length of the found substring
|
||||||
|
* *charlen: receives character length of the found substring
|
||||||
|
*
|
||||||
|
* Returns pointer to end+1 of the found substring in the source string.
|
||||||
|
* Returns NULL if no word found (in which case buf, bytelen, charlen not set)
|
||||||
|
*
|
||||||
|
* If the found word is bounded by non-word characters or string boundaries
|
||||||
|
* then this function will include corresponding padding spaces into buf.
|
||||||
|
*/
|
||||||
|
static const char *
|
||||||
|
get_wildcard_part(const char *str, int lenstr,
|
||||||
|
char *buf, int *bytelen, int *charlen)
|
||||||
|
{
|
||||||
|
const char *beginword = str;
|
||||||
|
const char *endword;
|
||||||
|
char *s = buf;
|
||||||
|
bool in_wildcard_meta = false;
|
||||||
|
bool in_escape = false;
|
||||||
|
int clen;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find the first word character remembering whether last character was
|
||||||
|
* wildcard meta-character.
|
||||||
|
*/
|
||||||
|
while (beginword - str < lenstr)
|
||||||
|
{
|
||||||
|
if (in_escape)
|
||||||
|
{
|
||||||
|
in_escape = false;
|
||||||
|
in_wildcard_meta = false;
|
||||||
|
if (iswordchr(beginword))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (ISESCAPECHAR(beginword))
|
||||||
|
in_escape = true;
|
||||||
|
else if (ISWILDCARDCHAR(beginword))
|
||||||
|
in_wildcard_meta = true;
|
||||||
|
else if (iswordchr(beginword))
|
||||||
|
break;
|
||||||
|
else
|
||||||
|
in_wildcard_meta = false;
|
||||||
|
}
|
||||||
|
beginword += pg_mblen(beginword);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Handle string end.
|
||||||
|
*/
|
||||||
|
if (beginword - str >= lenstr)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Add left padding spaces if last character wasn't wildcard
|
||||||
|
* meta-character.
|
||||||
|
*/
|
||||||
|
*charlen = 0;
|
||||||
|
if (!in_wildcard_meta)
|
||||||
|
{
|
||||||
|
if (LPADDING > 0)
|
||||||
|
{
|
||||||
|
*s++ = ' ';
|
||||||
|
(*charlen)++;
|
||||||
|
if (LPADDING > 1)
|
||||||
|
{
|
||||||
|
*s++ = ' ';
|
||||||
|
(*charlen)++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copy data into buf until wildcard meta-character, non-word character or
|
||||||
|
* string boundary. Strip escapes during copy.
|
||||||
|
*/
|
||||||
|
endword = beginword;
|
||||||
|
in_wildcard_meta = false;
|
||||||
|
in_escape = false;
|
||||||
|
while (endword - str < lenstr)
|
||||||
|
{
|
||||||
|
clen = pg_mblen(endword);
|
||||||
|
if (in_escape)
|
||||||
|
{
|
||||||
|
in_escape = false;
|
||||||
|
in_wildcard_meta = false;
|
||||||
|
if (iswordchr(endword))
|
||||||
|
{
|
||||||
|
memcpy(s, endword, clen);
|
||||||
|
(*charlen)++;
|
||||||
|
s += clen;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (ISESCAPECHAR(endword))
|
||||||
|
in_escape = true;
|
||||||
|
else if (ISWILDCARDCHAR(endword))
|
||||||
|
{
|
||||||
|
in_wildcard_meta = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else if (iswordchr(endword))
|
||||||
|
{
|
||||||
|
memcpy(s, endword, clen);
|
||||||
|
(*charlen)++;
|
||||||
|
s += clen;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
in_wildcard_meta = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
endword += clen;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Add right padding spaces if last character wasn't wildcard
|
||||||
|
* meta-character.
|
||||||
|
*/
|
||||||
|
if (!in_wildcard_meta)
|
||||||
|
{
|
||||||
|
if (RPADDING > 0)
|
||||||
|
{
|
||||||
|
*s++ = ' ';
|
||||||
|
(*charlen)++;
|
||||||
|
if (RPADDING > 1)
|
||||||
|
{
|
||||||
|
*s++ = ' ';
|
||||||
|
(*charlen)++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*bytelen = s - buf;
|
||||||
|
return endword;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Generates trigrams for wildcard search string.
|
||||||
|
*
|
||||||
|
* Returns array of trigrams that must occur in any string that matches the
|
||||||
|
* wildcard string. For example, given pattern "a%bcd%" the trigrams
|
||||||
|
* " a", "bcd" would be extracted.
|
||||||
|
*/
|
||||||
|
TRGM *
|
||||||
|
generate_wildcard_trgm(const char *str, int slen)
|
||||||
|
{
|
||||||
|
TRGM *trg;
|
||||||
|
char *buf,
|
||||||
|
*buf2;
|
||||||
|
trgm *tptr;
|
||||||
|
int len,
|
||||||
|
charlen,
|
||||||
|
bytelen;
|
||||||
|
const char *eword;
|
||||||
|
|
||||||
|
trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3);
|
||||||
|
trg->flag = ARRKEY;
|
||||||
|
SET_VARSIZE(trg, TRGMHDRSIZE);
|
||||||
|
|
||||||
|
if (slen + LPADDING + RPADDING < 3 || slen == 0)
|
||||||
|
return trg;
|
||||||
|
|
||||||
|
tptr = GETARR(trg);
|
||||||
|
|
||||||
|
buf = palloc(sizeof(char) * (slen + 4));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Extract trigrams from each substring extracted by get_wildcard_part.
|
||||||
|
*/
|
||||||
|
eword = str;
|
||||||
|
while ((eword = get_wildcard_part(eword, slen - (eword - str),
|
||||||
|
buf, &bytelen, &charlen)) != NULL)
|
||||||
|
{
|
||||||
|
#ifdef IGNORECASE
|
||||||
|
buf2 = lowerstr_with_len(buf, bytelen);
|
||||||
|
bytelen = strlen(buf2);
|
||||||
|
#else
|
||||||
|
buf2 = buf;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* count trigrams
|
||||||
|
*/
|
||||||
|
tptr = make_trigrams(tptr, buf2, bytelen, charlen);
|
||||||
|
#ifdef IGNORECASE
|
||||||
|
pfree(buf2);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
pfree(buf);
|
||||||
|
|
||||||
|
if ((len = tptr - GETARR(trg)) == 0)
|
||||||
|
return trg;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make trigrams unique.
|
||||||
|
*/
|
||||||
|
if (len > 0)
|
||||||
|
{
|
||||||
|
qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm);
|
||||||
|
len = unique_array(GETARR(trg), len);
|
||||||
|
}
|
||||||
|
|
||||||
|
SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
|
||||||
|
|
||||||
|
return trg;
|
||||||
|
}
|
||||||
|
|
||||||
uint32
|
uint32
|
||||||
trgm2int(trgm *ptr)
|
trgm2int(trgm *ptr)
|
||||||
{
|
{
|
||||||
@ -250,8 +484,6 @@ trgm2int(trgm *ptr)
|
|||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
|
|
||||||
PG_FUNCTION_INFO_V1(show_trgm);
|
|
||||||
Datum show_trgm(PG_FUNCTION_ARGS);
|
|
||||||
Datum
|
Datum
|
||||||
show_trgm(PG_FUNCTION_ARGS)
|
show_trgm(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
@ -340,8 +572,44 @@ cnt_sml(TRGM *trg1, TRGM *trg2)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PG_FUNCTION_INFO_V1(similarity);
|
/*
|
||||||
Datum similarity(PG_FUNCTION_ARGS);
|
* Returns whether trg2 contains all trigrams in trg1.
|
||||||
|
* This relies on the trigram arrays being sorted.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
trgm_contained_by(TRGM *trg1, TRGM *trg2)
|
||||||
|
{
|
||||||
|
trgm *ptr1,
|
||||||
|
*ptr2;
|
||||||
|
int len1,
|
||||||
|
len2;
|
||||||
|
|
||||||
|
ptr1 = GETARR(trg1);
|
||||||
|
ptr2 = GETARR(trg2);
|
||||||
|
|
||||||
|
len1 = ARRNELEM(trg1);
|
||||||
|
len2 = ARRNELEM(trg2);
|
||||||
|
|
||||||
|
while (ptr1 - GETARR(trg1) < len1 && ptr2 - GETARR(trg2) < len2)
|
||||||
|
{
|
||||||
|
int res = CMPTRGM(ptr1, ptr2);
|
||||||
|
|
||||||
|
if (res < 0)
|
||||||
|
return false;
|
||||||
|
else if (res > 0)
|
||||||
|
ptr2++;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ptr1++;
|
||||||
|
ptr2++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ptr1 - GETARR(trg1) < len1)
|
||||||
|
return false;
|
||||||
|
else
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
Datum
|
Datum
|
||||||
similarity(PG_FUNCTION_ARGS)
|
similarity(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
@ -364,8 +632,6 @@ similarity(PG_FUNCTION_ARGS)
|
|||||||
PG_RETURN_FLOAT4(res);
|
PG_RETURN_FLOAT4(res);
|
||||||
}
|
}
|
||||||
|
|
||||||
PG_FUNCTION_INFO_V1(similarity_dist);
|
|
||||||
Datum similarity_dist(PG_FUNCTION_ARGS);
|
|
||||||
Datum
|
Datum
|
||||||
similarity_dist(PG_FUNCTION_ARGS)
|
similarity_dist(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
@ -375,8 +641,6 @@ similarity_dist(PG_FUNCTION_ARGS)
|
|||||||
PG_RETURN_FLOAT4(1.0 - res);
|
PG_RETURN_FLOAT4(1.0 - res);
|
||||||
}
|
}
|
||||||
|
|
||||||
PG_FUNCTION_INFO_V1(similarity_op);
|
|
||||||
Datum similarity_op(PG_FUNCTION_ARGS);
|
|
||||||
Datum
|
Datum
|
||||||
similarity_op(PG_FUNCTION_ARGS)
|
similarity_op(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
|
@ -25,11 +25,11 @@ DROP TYPE gtrgm CASCADE;
|
|||||||
|
|
||||||
DROP OPERATOR CLASS gin_trgm_ops USING gin;
|
DROP OPERATOR CLASS gin_trgm_ops USING gin;
|
||||||
|
|
||||||
DROP FUNCTION gin_extract_trgm(text, internal);
|
DROP FUNCTION gin_extract_value_trgm(text, internal);
|
||||||
|
|
||||||
DROP FUNCTION gin_extract_trgm(text, internal, int2, internal, internal);
|
DROP FUNCTION gin_extract_query_trgm(text, internal, int2, internal, internal, internal, internal);
|
||||||
|
|
||||||
DROP FUNCTION gin_trgm_consistent(internal, int2, text, int4, internal, internal);
|
DROP FUNCTION gin_trgm_consistent(internal, int2, text, int4, internal, internal, internal, internal);
|
||||||
|
|
||||||
DROP OPERATOR % (text, text);
|
DROP OPERATOR % (text, text);
|
||||||
|
|
||||||
|
@ -137,8 +137,10 @@
|
|||||||
The <filename>pg_trgm</filename> module provides GiST and GIN index
|
The <filename>pg_trgm</filename> module provides GiST and GIN index
|
||||||
operator classes that allow you to create an index over a text column for
|
operator classes that allow you to create an index over a text column for
|
||||||
the purpose of very fast similarity searches. These index types support
|
the purpose of very fast similarity searches. These index types support
|
||||||
the above-described similarity operators (and no other operators, so you may
|
the above-described similarity operators, and additionally support
|
||||||
want a regular B-tree index too).
|
trigram-based index searches for <literal>LIKE</> and <literal>ILIKE</>
|
||||||
|
queries. (These indexes do not support equality nor simple comparison
|
||||||
|
operators, so you may need a regular B-tree index too.)
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
@ -181,6 +183,18 @@ SELECT t, t <-> '<replaceable>word</>' AS dist
|
|||||||
a small number of the closest matches is wanted.
|
a small number of the closest matches is wanted.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Beginning in <productname>PostgreSQL</> 9.1, these index types also support
|
||||||
|
index searches for <literal>LIKE</> and <literal>ILIKE</>, for example
|
||||||
|
<programlisting>
|
||||||
|
SELECT * FROM test_trgm WHERE t LIKE '%foo%bar';
|
||||||
|
</programlisting>
|
||||||
|
The index search works by extracting trigrams from the search string
|
||||||
|
and then looking these up in the index. The more trigrams in the search
|
||||||
|
string, the more effective the index search is. Unlike B-tree based
|
||||||
|
searches, the search string need not be left-anchored.
|
||||||
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
The choice between GiST and GIN indexing depends on the relative
|
The choice between GiST and GIN indexing depends on the relative
|
||||||
performance characteristics of GiST and GIN, which are discussed elsewhere.
|
performance characteristics of GiST and GIN, which are discussed elsewhere.
|
||||||
|
Reference in New Issue
Block a user