1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-30 11:03:19 +03:00

Add word_similarity to pg_trgm contrib module.

Patch introduces a concept of similarity over string and just a word from
another string.

Version of extension is not changed because 1.2 was already introduced in 9.6
release cycle, so, there wasn't a public version.

Author: Alexander Korotkov, Artur Zakirov
This commit is contained in:
Teodor Sigaev
2016-03-16 18:59:21 +03:00
parent 1c4f001b79
commit f576b17cd6
10 changed files with 726 additions and 75 deletions

View File

@ -89,6 +89,7 @@ gin_extract_query_trgm(PG_FUNCTION_ARGS)
switch (strategy)
{
case SimilarityStrategyNumber:
case WordSimilarityStrategyNumber:
trg = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
break;
case ILikeStrategyNumber:
@ -176,6 +177,7 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
bool res;
int32 i,
ntrue;
double nlimit;
/* All cases served by this function are inexact */
*recheck = true;
@ -183,6 +185,10 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
switch (strategy)
{
case SimilarityStrategyNumber:
case WordSimilarityStrategyNumber:
nlimit = (strategy == SimilarityStrategyNumber) ?
similarity_threshold : word_similarity_threshold;
/* Count the matches */
ntrue = 0;
for (i = 0; i < nkeys; i++)
@ -207,8 +213,7 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
* So, independly on DIVUNION the upper bound formula is the same.
*/
res = (nkeys == 0) ? false :
((((((float4) ntrue) / ((float4) nkeys))) >= similarity_threshold)
? true : false);
(((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
break;
case ILikeStrategyNumber:
#ifndef IGNORECASE
@ -270,10 +275,15 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS)
int32 i,
ntrue;
bool *boolcheck;
double nlimit;
switch (strategy)
{
case SimilarityStrategyNumber:
case WordSimilarityStrategyNumber:
nlimit = (strategy == SimilarityStrategyNumber) ?
similarity_threshold : word_similarity_threshold;
/* Count the matches */
ntrue = 0;
for (i = 0; i < nkeys; i++)
@ -285,9 +295,9 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS)
/*
* See comment in gin_trgm_consistent() about * upper bound formula
*/
res = (nkeys == 0) ? GIN_FALSE :
(((((float4) ntrue) / ((float4) nkeys)) >= similarity_threshold)
? GIN_MAYBE : GIN_FALSE);
res = (nkeys == 0)
? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
? GIN_MAYBE : GIN_FALSE);
break;
case ILikeStrategyNumber:
#ifndef IGNORECASE