mirror of
https://github.com/postgres/postgres.git
synced 2025-06-13 07:41:39 +03:00
Add word_similarity to pg_trgm contrib module.
Patch introduces a concept of similarity over string and just a word from another string. Version of extension is not changed because 1.2 was already introduced in 9.6 release cycle, so, there wasn't a public version. Author: Alexander Korotkov, Artur Zakirov
This commit is contained in:
@ -191,6 +191,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
|
||||
bool res;
|
||||
Size querysize = VARSIZE(query);
|
||||
gtrgm_consistent_cache *cache;
|
||||
double nlimit;
|
||||
|
||||
/*
|
||||
* We keep the extracted trigrams in cache, because trigram extraction is
|
||||
@ -218,6 +219,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
|
||||
switch (strategy)
|
||||
{
|
||||
case SimilarityStrategyNumber:
|
||||
case WordSimilarityStrategyNumber:
|
||||
qtrg = generate_trgm(VARDATA(query),
|
||||
querysize - VARHDRSZ);
|
||||
break;
|
||||
@ -286,16 +288,23 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
|
||||
switch (strategy)
|
||||
{
|
||||
case SimilarityStrategyNumber:
|
||||
/* Similarity search is exact */
|
||||
*recheck = false;
|
||||
case WordSimilarityStrategyNumber:
|
||||
/* Similarity search is exact. Word similarity search is inexact */
|
||||
*recheck = (strategy == WordSimilarityStrategyNumber);
|
||||
nlimit = (strategy == SimilarityStrategyNumber) ?
|
||||
similarity_threshold : word_similarity_threshold;
|
||||
|
||||
if (GIST_LEAF(entry))
|
||||
{ /* all leafs contains orig trgm */
|
||||
float4 tmpsml = cnt_sml(key, qtrg);
|
||||
/*
|
||||
* Prevent gcc optimizing the tmpsml variable using volatile
|
||||
* keyword. Otherwise comparison of nlimit and tmpsml may give
|
||||
* wrong results.
|
||||
*/
|
||||
float4 volatile tmpsml = cnt_sml(qtrg, key, *recheck);
|
||||
|
||||
/* strange bug at freebsd 5.2.1 and gcc 3.3.3 */
|
||||
res = (*(int *) &tmpsml == *(int *) &similarity_threshold
|
||||
|| tmpsml > similarity_threshold) ? true : false;
|
||||
res = (*(int *) &tmpsml == *(int *) &nlimit || tmpsml > nlimit);
|
||||
}
|
||||
else if (ISALLTRUE(key))
|
||||
{ /* non-leaf contains signature */
|
||||
@ -309,8 +318,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
|
||||
if (len == 0)
|
||||
res = false;
|
||||
else
|
||||
res = (((((float8) count) / ((float8) len))) >= similarity_threshold)
|
||||
? true : false;
|
||||
res = (((((float8) count) / ((float8) len))) >= nlimit);
|
||||
}
|
||||
break;
|
||||
case ILikeStrategyNumber:
|
||||
@ -428,6 +436,7 @@ gtrgm_distance(PG_FUNCTION_ARGS)
|
||||
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
|
||||
|
||||
/* Oid subtype = PG_GETARG_OID(3); */
|
||||
bool *recheck = (bool *) PG_GETARG_POINTER(4);
|
||||
TRGM *key = (TRGM *) DatumGetPointer(entry->key);
|
||||
TRGM *qtrg;
|
||||
float8 res;
|
||||
@ -463,9 +472,17 @@ gtrgm_distance(PG_FUNCTION_ARGS)
|
||||
switch (strategy)
|
||||
{
|
||||
case DistanceStrategyNumber:
|
||||
case WordDistanceStrategyNumber:
|
||||
*recheck = strategy == WordDistanceStrategyNumber;
|
||||
if (GIST_LEAF(entry))
|
||||
{ /* all leafs contains orig trgm */
|
||||
res = 1.0 - cnt_sml(key, qtrg);
|
||||
/*
|
||||
* Prevent gcc optimizing the sml variable using volatile
|
||||
* keyword. Otherwise res can differ from the
|
||||
* word_similarity_dist_op() function.
|
||||
*/
|
||||
float4 volatile sml = cnt_sml(qtrg, key, *recheck);
|
||||
res = 1.0 - sml;
|
||||
}
|
||||
else if (ISALLTRUE(key))
|
||||
{ /* all leafs contains orig trgm */
|
||||
|
Reference in New Issue
Block a user