mirror of
https://github.com/postgres/postgres.git
synced 2025-07-30 11:03:19 +03:00
Support LIKE and ILIKE index searches via contrib/pg_trgm indexes.
Unlike Btree-based LIKE optimization, this works for non-left-anchored search patterns. The effectiveness of the search depends on how many trigrams can be extracted from the pattern. (The worst case, with no trigrams, degrades to a full-table scan, so this isn't a panacea. But it can be very useful.) Alexander Korotkov, reviewed by Jan Urbanski
This commit is contained in:
@ -7,6 +7,7 @@
|
||||
|
||||
#include "access/gin.h"
|
||||
#include "access/itup.h"
|
||||
#include "access/skey.h"
|
||||
#include "access/tuptoaster.h"
|
||||
#include "storage/bufpage.h"
|
||||
#include "utils/array.h"
|
||||
@ -16,14 +17,31 @@
|
||||
PG_FUNCTION_INFO_V1(gin_extract_trgm);
|
||||
Datum gin_extract_trgm(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
|
||||
Datum gin_extract_value_trgm(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
|
||||
Datum gin_extract_query_trgm(PG_FUNCTION_ARGS);
|
||||
|
||||
PG_FUNCTION_INFO_V1(gin_trgm_consistent);
|
||||
Datum gin_trgm_consistent(PG_FUNCTION_ARGS);
|
||||
|
||||
/*
|
||||
* This function is used as both extractValue and extractQuery
|
||||
* This function can only be called if a pre-9.1 version of the GIN operator
|
||||
* class definition is present in the catalogs (probably as a consequence
|
||||
* of upgrade-in-place). Complain.
|
||||
*/
|
||||
Datum
|
||||
gin_extract_trgm(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errmsg("GIN operator class for pg_trgm is out of date"),
|
||||
errhint("Please drop and re-create the pg_trgm catalog entries.")));
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
Datum
|
||||
gin_extract_value_trgm(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *val = (text *) PG_GETARG_TEXT_P(0);
|
||||
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
||||
@ -57,34 +75,124 @@ gin_extract_trgm(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_POINTER(entries);
|
||||
}
|
||||
|
||||
Datum
|
||||
gin_extract_query_trgm(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *val = (text *) PG_GETARG_TEXT_P(0);
|
||||
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
||||
StrategyNumber strategy = PG_GETARG_UINT16(2);
|
||||
/* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */
|
||||
/* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
|
||||
/* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
|
||||
int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
|
||||
Datum *entries = NULL;
|
||||
TRGM *trg;
|
||||
int32 trglen;
|
||||
trgm *ptr;
|
||||
int32 i;
|
||||
|
||||
switch (strategy)
|
||||
{
|
||||
case SimilarityStrategyNumber:
|
||||
trg = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
|
||||
break;
|
||||
case ILikeStrategyNumber:
|
||||
#ifndef IGNORECASE
|
||||
elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
|
||||
#endif
|
||||
/* FALL THRU */
|
||||
case LikeStrategyNumber:
|
||||
/*
|
||||
* For wildcard search we extract all the trigrams that every
|
||||
* potentially-matching string must include.
|
||||
*/
|
||||
trg = generate_wildcard_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
|
||||
break;
|
||||
default:
|
||||
elog(ERROR, "unrecognized strategy number: %d", strategy);
|
||||
trg = NULL; /* keep compiler quiet */
|
||||
break;
|
||||
}
|
||||
|
||||
trglen = ARRNELEM(trg);
|
||||
*nentries = trglen;
|
||||
|
||||
if (trglen > 0)
|
||||
{
|
||||
entries = (Datum *) palloc(sizeof(Datum) * trglen);
|
||||
ptr = GETARR(trg);
|
||||
for (i = 0; i < trglen; i++)
|
||||
{
|
||||
int32 item = trgm2int(ptr);
|
||||
|
||||
entries[i] = Int32GetDatum(item);
|
||||
ptr++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If no trigram was extracted then we have to scan all the index.
|
||||
*/
|
||||
if (trglen == 0)
|
||||
*searchMode = GIN_SEARCH_MODE_ALL;
|
||||
|
||||
PG_RETURN_POINTER(entries);
|
||||
}
|
||||
|
||||
Datum
|
||||
gin_trgm_consistent(PG_FUNCTION_ARGS)
|
||||
{
|
||||
bool *check = (bool *) PG_GETARG_POINTER(0);
|
||||
/* StrategyNumber strategy = PG_GETARG_UINT16(1); */
|
||||
StrategyNumber strategy = PG_GETARG_UINT16(1);
|
||||
/* text *query = PG_GETARG_TEXT_P(2); */
|
||||
int32 nkeys = PG_GETARG_INT32(3);
|
||||
/* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
|
||||
bool *recheck = (bool *) PG_GETARG_POINTER(5);
|
||||
bool res = FALSE;
|
||||
bool res;
|
||||
int32 i,
|
||||
ntrue = 0;
|
||||
ntrue;
|
||||
|
||||
/* All cases served by this function are inexact */
|
||||
*recheck = true;
|
||||
|
||||
/* Count the matches */
|
||||
for (i = 0; i < nkeys; i++)
|
||||
switch (strategy)
|
||||
{
|
||||
if (check[i])
|
||||
ntrue++;
|
||||
}
|
||||
|
||||
case SimilarityStrategyNumber:
|
||||
/* Count the matches */
|
||||
ntrue = 0;
|
||||
for (i = 0; i < nkeys; i++)
|
||||
{
|
||||
if (check[i])
|
||||
ntrue++;
|
||||
}
|
||||
#ifdef DIVUNION
|
||||
res = (nkeys == ntrue) ? true : ((((((float4) ntrue) / ((float4) (nkeys - ntrue)))) >= trgm_limit) ? true : false);
|
||||
res = (nkeys == ntrue) ? true : ((((((float4) ntrue) / ((float4) (nkeys - ntrue)))) >= trgm_limit) ? true : false);
|
||||
#else
|
||||
res = (nkeys == 0) ? false : ((((((float4) ntrue) / ((float4) nkeys))) >= trgm_limit) ? true : false);
|
||||
res = (nkeys == 0) ? false : ((((((float4) ntrue) / ((float4) nkeys))) >= trgm_limit) ? true : false);
|
||||
#endif
|
||||
break;
|
||||
case ILikeStrategyNumber:
|
||||
#ifndef IGNORECASE
|
||||
elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
|
||||
#endif
|
||||
/* FALL THRU */
|
||||
case LikeStrategyNumber:
|
||||
/* Check if all extracted trigrams are presented. */
|
||||
res = true;
|
||||
for (i = 0; i < nkeys; i++)
|
||||
{
|
||||
if (!check[i])
|
||||
{
|
||||
res = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
elog(ERROR, "unrecognized strategy number: %d", strategy);
|
||||
res = false; /* keep compiler quiet */
|
||||
break;
|
||||
}
|
||||
|
||||
PG_RETURN_BOOL(res);
|
||||
}
|
||||
|
Reference in New Issue
Block a user