mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	The initial coding just descended the index if any of the target trigrams were possibly present at the next level down. But actually we can apply trigramsMatchGraph() so as to take advantage of AND requirements when there are some. The input data might contain false positive matches, but that can only result in a false positive result, not false negative, so it's safe to do it this way. Alexander Korotkov
		
			
				
	
	
		
			959 lines
		
	
	
		
			22 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			959 lines
		
	
	
		
			22 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * contrib/pg_trgm/trgm_gist.c
 | 
						|
 */
 | 
						|
#include "postgres.h"
 | 
						|
 | 
						|
#include "trgm.h"
 | 
						|
 | 
						|
#include "access/skey.h"
 | 
						|
 | 
						|
 | 
						|
typedef struct
 | 
						|
{
 | 
						|
	/* most recent inputs to gtrgm_consistent */
 | 
						|
	StrategyNumber strategy;
 | 
						|
	text	   *query;
 | 
						|
	/* extracted trigrams for query */
 | 
						|
	TRGM	   *trigrams;
 | 
						|
	/* if a regex operator, the extracted graph */
 | 
						|
	TrgmPackedGraph *graph;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * The "query" and "trigrams" are stored in the same palloc block as this
 | 
						|
	 * cache struct, at MAXALIGN'ed offsets.  The graph however isn't.
 | 
						|
	 */
 | 
						|
} gtrgm_consistent_cache;
 | 
						|
 | 
						|
#define GETENTRY(vec,pos) ((TRGM *) DatumGetPointer((vec)->vector[(pos)].key))
 | 
						|
 | 
						|
 | 
						|
PG_FUNCTION_INFO_V1(gtrgm_in);
 | 
						|
Datum		gtrgm_in(PG_FUNCTION_ARGS);
 | 
						|
 | 
						|
PG_FUNCTION_INFO_V1(gtrgm_out);
 | 
						|
Datum		gtrgm_out(PG_FUNCTION_ARGS);
 | 
						|
 | 
						|
PG_FUNCTION_INFO_V1(gtrgm_compress);
 | 
						|
Datum		gtrgm_compress(PG_FUNCTION_ARGS);
 | 
						|
 | 
						|
PG_FUNCTION_INFO_V1(gtrgm_decompress);
 | 
						|
Datum		gtrgm_decompress(PG_FUNCTION_ARGS);
 | 
						|
 | 
						|
PG_FUNCTION_INFO_V1(gtrgm_consistent);
 | 
						|
Datum		gtrgm_consistent(PG_FUNCTION_ARGS);
 | 
						|
 | 
						|
PG_FUNCTION_INFO_V1(gtrgm_distance);
 | 
						|
Datum		gtrgm_distance(PG_FUNCTION_ARGS);
 | 
						|
 | 
						|
PG_FUNCTION_INFO_V1(gtrgm_union);
 | 
						|
Datum		gtrgm_union(PG_FUNCTION_ARGS);
 | 
						|
 | 
						|
PG_FUNCTION_INFO_V1(gtrgm_same);
 | 
						|
Datum		gtrgm_same(PG_FUNCTION_ARGS);
 | 
						|
 | 
						|
PG_FUNCTION_INFO_V1(gtrgm_penalty);
 | 
						|
Datum		gtrgm_penalty(PG_FUNCTION_ARGS);
 | 
						|
 | 
						|
PG_FUNCTION_INFO_V1(gtrgm_picksplit);
 | 
						|
Datum		gtrgm_picksplit(PG_FUNCTION_ARGS);
 | 
						|
 | 
						|
/* Number of one-bits in an unsigned byte */
 | 
						|
static const uint8 number_of_ones[256] = {
 | 
						|
	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
 | 
						|
	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 | 
						|
	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 | 
						|
	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 | 
						|
	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 | 
						|
	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 | 
						|
	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 | 
						|
	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 | 
						|
	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 | 
						|
	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 | 
						|
	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 | 
						|
	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 | 
						|
	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 | 
						|
	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 | 
						|
	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 | 
						|
	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
 | 
						|
};
 | 
						|
 | 
						|
 | 
						|
Datum
 | 
						|
gtrgm_in(PG_FUNCTION_ARGS)
 | 
						|
{
 | 
						|
	elog(ERROR, "not implemented");
 | 
						|
	PG_RETURN_DATUM(0);
 | 
						|
}
 | 
						|
 | 
						|
Datum
 | 
						|
gtrgm_out(PG_FUNCTION_ARGS)
 | 
						|
{
 | 
						|
	elog(ERROR, "not implemented");
 | 
						|
	PG_RETURN_DATUM(0);
 | 
						|
}
 | 
						|
 | 
						|
static void
 | 
						|
makesign(BITVECP sign, TRGM *a)
 | 
						|
{
 | 
						|
	int32		k,
 | 
						|
				len = ARRNELEM(a);
 | 
						|
	trgm	   *ptr = GETARR(a);
 | 
						|
	int32		tmp = 0;
 | 
						|
 | 
						|
	MemSet((void *) sign, 0, sizeof(BITVEC));
 | 
						|
	SETBIT(sign, SIGLENBIT);	/* set last unused bit */
 | 
						|
	for (k = 0; k < len; k++)
 | 
						|
	{
 | 
						|
		CPTRGM(((char *) &tmp), ptr + k);
 | 
						|
		HASH(sign, tmp);
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
Datum
 | 
						|
gtrgm_compress(PG_FUNCTION_ARGS)
 | 
						|
{
 | 
						|
	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
 | 
						|
	GISTENTRY  *retval = entry;
 | 
						|
 | 
						|
	if (entry->leafkey)
 | 
						|
	{							/* trgm */
 | 
						|
		TRGM	   *res;
 | 
						|
		text	   *val = DatumGetTextP(entry->key);
 | 
						|
 | 
						|
		res = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
 | 
						|
		retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
 | 
						|
		gistentryinit(*retval, PointerGetDatum(res),
 | 
						|
					  entry->rel, entry->page,
 | 
						|
					  entry->offset, FALSE);
 | 
						|
	}
 | 
						|
	else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
 | 
						|
			 !ISALLTRUE(DatumGetPointer(entry->key)))
 | 
						|
	{
 | 
						|
		int32		i,
 | 
						|
					len;
 | 
						|
		TRGM	   *res;
 | 
						|
		BITVECP		sign = GETSIGN(DatumGetPointer(entry->key));
 | 
						|
 | 
						|
		LOOPBYTE
 | 
						|
		{
 | 
						|
			if ((sign[i] & 0xff) != 0xff)
 | 
						|
				PG_RETURN_POINTER(retval);
 | 
						|
		}
 | 
						|
 | 
						|
		len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
 | 
						|
		res = (TRGM *) palloc(len);
 | 
						|
		SET_VARSIZE(res, len);
 | 
						|
		res->flag = SIGNKEY | ALLISTRUE;
 | 
						|
 | 
						|
		retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
 | 
						|
		gistentryinit(*retval, PointerGetDatum(res),
 | 
						|
					  entry->rel, entry->page,
 | 
						|
					  entry->offset, FALSE);
 | 
						|
	}
 | 
						|
	PG_RETURN_POINTER(retval);
 | 
						|
}
 | 
						|
 | 
						|
Datum
 | 
						|
gtrgm_decompress(PG_FUNCTION_ARGS)
 | 
						|
{
 | 
						|
	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
 | 
						|
	GISTENTRY  *retval;
 | 
						|
	text	   *key;
 | 
						|
 | 
						|
	key = DatumGetTextP(entry->key);
 | 
						|
 | 
						|
	if (key != (text *) DatumGetPointer(entry->key))
 | 
						|
	{
 | 
						|
		/* need to pass back the decompressed item */
 | 
						|
		retval = palloc(sizeof(GISTENTRY));
 | 
						|
		gistentryinit(*retval, PointerGetDatum(key),
 | 
						|
					  entry->rel, entry->page, entry->offset, entry->leafkey);
 | 
						|
		PG_RETURN_POINTER(retval);
 | 
						|
	}
 | 
						|
	else
 | 
						|
	{
 | 
						|
		/* we can return the entry as-is */
 | 
						|
		PG_RETURN_POINTER(entry);
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
static int32
 | 
						|
cnt_sml_sign_common(TRGM *qtrg, BITVECP sign)
 | 
						|
{
 | 
						|
	int32		count = 0;
 | 
						|
	int32		k,
 | 
						|
				len = ARRNELEM(qtrg);
 | 
						|
	trgm	   *ptr = GETARR(qtrg);
 | 
						|
	int32		tmp = 0;
 | 
						|
 | 
						|
	for (k = 0; k < len; k++)
 | 
						|
	{
 | 
						|
		CPTRGM(((char *) &tmp), ptr + k);
 | 
						|
		count += GETBIT(sign, HASHVAL(tmp));
 | 
						|
	}
 | 
						|
 | 
						|
	return count;
 | 
						|
}
 | 
						|
 | 
						|
Datum
 | 
						|
gtrgm_consistent(PG_FUNCTION_ARGS)
 | 
						|
{
 | 
						|
	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
 | 
						|
	text	   *query = PG_GETARG_TEXT_P(1);
 | 
						|
	StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
 | 
						|
 | 
						|
	/* Oid		subtype = PG_GETARG_OID(3); */
 | 
						|
	bool	   *recheck = (bool *) PG_GETARG_POINTER(4);
 | 
						|
	TRGM	   *key = (TRGM *) DatumGetPointer(entry->key);
 | 
						|
	TRGM	   *qtrg;
 | 
						|
	bool		res;
 | 
						|
	Size		querysize = VARSIZE(query);
 | 
						|
	gtrgm_consistent_cache *cache;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * We keep the extracted trigrams in cache, because trigram extraction is
 | 
						|
	 * relatively CPU-expensive.  When trying to reuse a cached value, check
 | 
						|
	 * strategy number not just query itself, because trigram extraction
 | 
						|
	 * depends on strategy.
 | 
						|
	 *
 | 
						|
	 * The cached structure is a single palloc chunk containing the
 | 
						|
	 * gtrgm_consistent_cache header, then the input query (starting at a
 | 
						|
	 * MAXALIGN boundary), then the TRGM value (also starting at a MAXALIGN
 | 
						|
	 * boundary).  However we don't try to include the regex graph (if any) in
 | 
						|
	 * that struct.  (XXX currently, this approach can leak regex graphs
 | 
						|
	 * across index rescans.  Not clear if that's worth fixing.)
 | 
						|
	 */
 | 
						|
	cache = (gtrgm_consistent_cache *) fcinfo->flinfo->fn_extra;
 | 
						|
	if (cache == NULL ||
 | 
						|
		cache->strategy != strategy ||
 | 
						|
		VARSIZE(cache->query) != querysize ||
 | 
						|
		memcmp((char *) cache->query, (char *) query, querysize) != 0)
 | 
						|
	{
 | 
						|
		gtrgm_consistent_cache *newcache;
 | 
						|
		TrgmPackedGraph *graph = NULL;
 | 
						|
		Size		qtrgsize;
 | 
						|
 | 
						|
		switch (strategy)
 | 
						|
		{
 | 
						|
			case SimilarityStrategyNumber:
 | 
						|
				qtrg = generate_trgm(VARDATA(query),
 | 
						|
									 querysize - VARHDRSZ);
 | 
						|
				break;
 | 
						|
			case ILikeStrategyNumber:
 | 
						|
#ifndef IGNORECASE
 | 
						|
				elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
 | 
						|
#endif
 | 
						|
				/* FALL THRU */
 | 
						|
			case LikeStrategyNumber:
 | 
						|
				qtrg = generate_wildcard_trgm(VARDATA(query),
 | 
						|
											  querysize - VARHDRSZ);
 | 
						|
				break;
 | 
						|
			case RegExpICaseStrategyNumber:
 | 
						|
#ifndef IGNORECASE
 | 
						|
				elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
 | 
						|
#endif
 | 
						|
				/* FALL THRU */
 | 
						|
			case RegExpStrategyNumber:
 | 
						|
				qtrg = createTrgmNFA(query, PG_GET_COLLATION(),
 | 
						|
									 &graph, fcinfo->flinfo->fn_mcxt);
 | 
						|
				/* just in case an empty array is returned ... */
 | 
						|
				if (qtrg && ARRNELEM(qtrg) <= 0)
 | 
						|
				{
 | 
						|
					pfree(qtrg);
 | 
						|
					qtrg = NULL;
 | 
						|
				}
 | 
						|
				break;
 | 
						|
			default:
 | 
						|
				elog(ERROR, "unrecognized strategy number: %d", strategy);
 | 
						|
				qtrg = NULL;	/* keep compiler quiet */
 | 
						|
				break;
 | 
						|
		}
 | 
						|
 | 
						|
		qtrgsize = qtrg ? VARSIZE(qtrg) : 0;
 | 
						|
 | 
						|
		newcache = (gtrgm_consistent_cache *)
 | 
						|
			MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
 | 
						|
							   MAXALIGN(sizeof(gtrgm_consistent_cache)) +
 | 
						|
							   MAXALIGN(querysize) +
 | 
						|
							   qtrgsize);
 | 
						|
 | 
						|
		newcache->strategy = strategy;
 | 
						|
		newcache->query = (text *)
 | 
						|
			((char *) newcache + MAXALIGN(sizeof(gtrgm_consistent_cache)));
 | 
						|
		memcpy((char *) newcache->query, (char *) query, querysize);
 | 
						|
		if (qtrg)
 | 
						|
		{
 | 
						|
			newcache->trigrams = (TRGM *)
 | 
						|
				((char *) newcache->query + MAXALIGN(querysize));
 | 
						|
			memcpy((char *) newcache->trigrams, (char *) qtrg, qtrgsize);
 | 
						|
			/* release qtrg in case it was made in fn_mcxt */
 | 
						|
			pfree(qtrg);
 | 
						|
		}
 | 
						|
		else
 | 
						|
			newcache->trigrams = NULL;
 | 
						|
		newcache->graph = graph;
 | 
						|
 | 
						|
		if (cache)
 | 
						|
			pfree(cache);
 | 
						|
		fcinfo->flinfo->fn_extra = (void *) newcache;
 | 
						|
		cache = newcache;
 | 
						|
	}
 | 
						|
 | 
						|
	qtrg = cache->trigrams;
 | 
						|
 | 
						|
	switch (strategy)
 | 
						|
	{
 | 
						|
		case SimilarityStrategyNumber:
 | 
						|
			/* Similarity search is exact */
 | 
						|
			*recheck = false;
 | 
						|
 | 
						|
			if (GIST_LEAF(entry))
 | 
						|
			{					/* all leafs contains orig trgm */
 | 
						|
				float4		tmpsml = cnt_sml(key, qtrg);
 | 
						|
 | 
						|
				/* strange bug at freebsd 5.2.1 and gcc 3.3.3 */
 | 
						|
				res = (*(int *) &tmpsml == *(int *) &trgm_limit || tmpsml > trgm_limit) ? true : false;
 | 
						|
			}
 | 
						|
			else if (ISALLTRUE(key))
 | 
						|
			{					/* non-leaf contains signature */
 | 
						|
				res = true;
 | 
						|
			}
 | 
						|
			else
 | 
						|
			{					/* non-leaf contains signature */
 | 
						|
				int32		count = cnt_sml_sign_common(qtrg, GETSIGN(key));
 | 
						|
				int32		len = ARRNELEM(qtrg);
 | 
						|
 | 
						|
				if (len == 0)
 | 
						|
					res = false;
 | 
						|
				else
 | 
						|
					res = (((((float8) count) / ((float8) len))) >= trgm_limit) ? true : false;
 | 
						|
			}
 | 
						|
			break;
 | 
						|
		case ILikeStrategyNumber:
 | 
						|
#ifndef IGNORECASE
 | 
						|
			elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
 | 
						|
#endif
 | 
						|
			/* FALL THRU */
 | 
						|
		case LikeStrategyNumber:
 | 
						|
			/* Wildcard search is inexact */
 | 
						|
			*recheck = true;
 | 
						|
 | 
						|
			/*
 | 
						|
			 * Check if all the extracted trigrams can be present in child
 | 
						|
			 * nodes.
 | 
						|
			 */
 | 
						|
			if (GIST_LEAF(entry))
 | 
						|
			{					/* all leafs contains orig trgm */
 | 
						|
				res = trgm_contained_by(qtrg, key);
 | 
						|
			}
 | 
						|
			else if (ISALLTRUE(key))
 | 
						|
			{					/* non-leaf contains signature */
 | 
						|
				res = true;
 | 
						|
			}
 | 
						|
			else
 | 
						|
			{					/* non-leaf contains signature */
 | 
						|
				int32		k,
 | 
						|
							tmp = 0,
 | 
						|
							len = ARRNELEM(qtrg);
 | 
						|
				trgm	   *ptr = GETARR(qtrg);
 | 
						|
				BITVECP		sign = GETSIGN(key);
 | 
						|
 | 
						|
				res = true;
 | 
						|
				for (k = 0; k < len; k++)
 | 
						|
				{
 | 
						|
					CPTRGM(((char *) &tmp), ptr + k);
 | 
						|
					if (!GETBIT(sign, HASHVAL(tmp)))
 | 
						|
					{
 | 
						|
						res = false;
 | 
						|
						break;
 | 
						|
					}
 | 
						|
				}
 | 
						|
			}
 | 
						|
			break;
 | 
						|
		case RegExpICaseStrategyNumber:
 | 
						|
#ifndef IGNORECASE
 | 
						|
			elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
 | 
						|
#endif
 | 
						|
			/* FALL THRU */
 | 
						|
		case RegExpStrategyNumber:
 | 
						|
			/* Regexp search is inexact */
 | 
						|
			*recheck = true;
 | 
						|
 | 
						|
			/* Check regex match as much as we can with available info */
 | 
						|
			if (qtrg)
 | 
						|
			{
 | 
						|
				if (GIST_LEAF(entry))
 | 
						|
				{				/* all leafs contains orig trgm */
 | 
						|
					bool	   *check;
 | 
						|
 | 
						|
					check = trgm_presence_map(qtrg, key);
 | 
						|
					res = trigramsMatchGraph(cache->graph, check);
 | 
						|
					pfree(check);
 | 
						|
				}
 | 
						|
				else if (ISALLTRUE(key))
 | 
						|
				{				/* non-leaf contains signature */
 | 
						|
					res = true;
 | 
						|
				}
 | 
						|
				else
 | 
						|
				{				/* non-leaf contains signature */
 | 
						|
					int32		k,
 | 
						|
								tmp = 0,
 | 
						|
								len = ARRNELEM(qtrg);
 | 
						|
					trgm	   *ptr = GETARR(qtrg);
 | 
						|
					BITVECP		sign = GETSIGN(key);
 | 
						|
					bool	   *check;
 | 
						|
 | 
						|
					/*
 | 
						|
					 * GETBIT() tests may give false positives, due to limited
 | 
						|
					 * size of the sign array.	But since trigramsMatchGraph()
 | 
						|
					 * implements a monotone boolean function, false positives
 | 
						|
					 * in the check array can't lead to false negative answer.
 | 
						|
					 * So we can apply trigramsMatchGraph despite uncertainty,
 | 
						|
					 * and that usefully improves the quality of the search.
 | 
						|
					 */
 | 
						|
					check = (bool *) palloc(len * sizeof(bool));
 | 
						|
					for (k = 0; k < len; k++)
 | 
						|
					{
 | 
						|
						CPTRGM(((char *) &tmp), ptr + k);
 | 
						|
						check[k] = GETBIT(sign, HASHVAL(tmp));
 | 
						|
					}
 | 
						|
					res = trigramsMatchGraph(cache->graph, check);
 | 
						|
					pfree(check);
 | 
						|
				}
 | 
						|
			}
 | 
						|
			else
 | 
						|
			{
 | 
						|
				/* trigram-free query must be rechecked everywhere */
 | 
						|
				res = true;
 | 
						|
			}
 | 
						|
			break;
 | 
						|
		default:
 | 
						|
			elog(ERROR, "unrecognized strategy number: %d", strategy);
 | 
						|
			res = false;		/* keep compiler quiet */
 | 
						|
			break;
 | 
						|
	}
 | 
						|
 | 
						|
	PG_RETURN_BOOL(res);
 | 
						|
}
 | 
						|
 | 
						|
Datum
 | 
						|
gtrgm_distance(PG_FUNCTION_ARGS)
 | 
						|
{
 | 
						|
	GISTENTRY  *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
 | 
						|
	text	   *query = PG_GETARG_TEXT_P(1);
 | 
						|
	StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
 | 
						|
 | 
						|
	/* Oid		subtype = PG_GETARG_OID(3); */
 | 
						|
	TRGM	   *key = (TRGM *) DatumGetPointer(entry->key);
 | 
						|
	TRGM	   *qtrg;
 | 
						|
	float8		res;
 | 
						|
	Size		querysize = VARSIZE(query);
 | 
						|
	char	   *cache = (char *) fcinfo->flinfo->fn_extra;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Cache the generated trigrams across multiple calls with the same query.
 | 
						|
	 */
 | 
						|
	if (cache == NULL ||
 | 
						|
		VARSIZE(cache) != querysize ||
 | 
						|
		memcmp(cache, query, querysize) != 0)
 | 
						|
	{
 | 
						|
		char	   *newcache;
 | 
						|
 | 
						|
		qtrg = generate_trgm(VARDATA(query), querysize - VARHDRSZ);
 | 
						|
 | 
						|
		newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
 | 
						|
									  MAXALIGN(querysize) +
 | 
						|
									  VARSIZE(qtrg));
 | 
						|
 | 
						|
		memcpy(newcache, query, querysize);
 | 
						|
		memcpy(newcache + MAXALIGN(querysize), qtrg, VARSIZE(qtrg));
 | 
						|
 | 
						|
		if (cache)
 | 
						|
			pfree(cache);
 | 
						|
		fcinfo->flinfo->fn_extra = newcache;
 | 
						|
		cache = newcache;
 | 
						|
	}
 | 
						|
 | 
						|
	qtrg = (TRGM *) (cache + MAXALIGN(querysize));
 | 
						|
 | 
						|
	switch (strategy)
 | 
						|
	{
 | 
						|
		case DistanceStrategyNumber:
 | 
						|
			if (GIST_LEAF(entry))
 | 
						|
			{					/* all leafs contains orig trgm */
 | 
						|
				res = 1.0 - cnt_sml(key, qtrg);
 | 
						|
			}
 | 
						|
			else if (ISALLTRUE(key))
 | 
						|
			{					/* all leafs contains orig trgm */
 | 
						|
				res = 0.0;
 | 
						|
			}
 | 
						|
			else
 | 
						|
			{					/* non-leaf contains signature */
 | 
						|
				int32		count = cnt_sml_sign_common(qtrg, GETSIGN(key));
 | 
						|
				int32		len = ARRNELEM(qtrg);
 | 
						|
 | 
						|
				res = (len == 0) ? -1.0 : 1.0 - ((float8) count) / ((float8) len);
 | 
						|
			}
 | 
						|
			break;
 | 
						|
		default:
 | 
						|
			elog(ERROR, "unrecognized strategy number: %d", strategy);
 | 
						|
			res = 0;			/* keep compiler quiet */
 | 
						|
			break;
 | 
						|
	}
 | 
						|
 | 
						|
	PG_RETURN_FLOAT8(res);
 | 
						|
}
 | 
						|
 | 
						|
static int32
 | 
						|
unionkey(BITVECP sbase, TRGM *add)
 | 
						|
{
 | 
						|
	int32		i;
 | 
						|
 | 
						|
	if (ISSIGNKEY(add))
 | 
						|
	{
 | 
						|
		BITVECP		sadd = GETSIGN(add);
 | 
						|
 | 
						|
		if (ISALLTRUE(add))
 | 
						|
			return 1;
 | 
						|
 | 
						|
		LOOPBYTE
 | 
						|
			sbase[i] |= sadd[i];
 | 
						|
	}
 | 
						|
	else
 | 
						|
	{
 | 
						|
		trgm	   *ptr = GETARR(add);
 | 
						|
		int32		tmp = 0;
 | 
						|
 | 
						|
		for (i = 0; i < ARRNELEM(add); i++)
 | 
						|
		{
 | 
						|
			CPTRGM(((char *) &tmp), ptr + i);
 | 
						|
			HASH(sbase, tmp);
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
Datum
 | 
						|
gtrgm_union(PG_FUNCTION_ARGS)
 | 
						|
{
 | 
						|
	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
 | 
						|
	int32		len = entryvec->n;
 | 
						|
	int		   *size = (int *) PG_GETARG_POINTER(1);
 | 
						|
	BITVEC		base;
 | 
						|
	int32		i;
 | 
						|
	int32		flag = 0;
 | 
						|
	TRGM	   *result;
 | 
						|
 | 
						|
	MemSet((void *) base, 0, sizeof(BITVEC));
 | 
						|
	for (i = 0; i < len; i++)
 | 
						|
	{
 | 
						|
		if (unionkey(base, GETENTRY(entryvec, i)))
 | 
						|
		{
 | 
						|
			flag = ALLISTRUE;
 | 
						|
			break;
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	flag |= SIGNKEY;
 | 
						|
	len = CALCGTSIZE(flag, 0);
 | 
						|
	result = (TRGM *) palloc(len);
 | 
						|
	SET_VARSIZE(result, len);
 | 
						|
	result->flag = flag;
 | 
						|
	if (!ISALLTRUE(result))
 | 
						|
		memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
 | 
						|
	*size = len;
 | 
						|
 | 
						|
	PG_RETURN_POINTER(result);
 | 
						|
}
 | 
						|
 | 
						|
Datum
 | 
						|
gtrgm_same(PG_FUNCTION_ARGS)
 | 
						|
{
 | 
						|
	TRGM	   *a = (TRGM *) PG_GETARG_POINTER(0);
 | 
						|
	TRGM	   *b = (TRGM *) PG_GETARG_POINTER(1);
 | 
						|
	bool	   *result = (bool *) PG_GETARG_POINTER(2);
 | 
						|
 | 
						|
	if (ISSIGNKEY(a))
 | 
						|
	{							/* then b also ISSIGNKEY */
 | 
						|
		if (ISALLTRUE(a) && ISALLTRUE(b))
 | 
						|
			*result = true;
 | 
						|
		else if (ISALLTRUE(a))
 | 
						|
			*result = false;
 | 
						|
		else if (ISALLTRUE(b))
 | 
						|
			*result = false;
 | 
						|
		else
 | 
						|
		{
 | 
						|
			int32		i;
 | 
						|
			BITVECP		sa = GETSIGN(a),
 | 
						|
						sb = GETSIGN(b);
 | 
						|
 | 
						|
			*result = true;
 | 
						|
			LOOPBYTE
 | 
						|
			{
 | 
						|
				if (sa[i] != sb[i])
 | 
						|
				{
 | 
						|
					*result = false;
 | 
						|
					break;
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	else
 | 
						|
	{							/* a and b ISARRKEY */
 | 
						|
		int32		lena = ARRNELEM(a),
 | 
						|
					lenb = ARRNELEM(b);
 | 
						|
 | 
						|
		if (lena != lenb)
 | 
						|
			*result = false;
 | 
						|
		else
 | 
						|
		{
 | 
						|
			trgm	   *ptra = GETARR(a),
 | 
						|
					   *ptrb = GETARR(b);
 | 
						|
			int32		i;
 | 
						|
 | 
						|
			*result = true;
 | 
						|
			for (i = 0; i < lena; i++)
 | 
						|
				if (CMPTRGM(ptra + i, ptrb + i))
 | 
						|
				{
 | 
						|
					*result = false;
 | 
						|
					break;
 | 
						|
				}
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	PG_RETURN_POINTER(result);
 | 
						|
}
 | 
						|
 | 
						|
static int32
 | 
						|
sizebitvec(BITVECP sign)
 | 
						|
{
 | 
						|
	int32		size = 0,
 | 
						|
				i;
 | 
						|
 | 
						|
	LOOPBYTE
 | 
						|
		size += number_of_ones[(unsigned char) sign[i]];
 | 
						|
	return size;
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
hemdistsign(BITVECP a, BITVECP b)
 | 
						|
{
 | 
						|
	int			i,
 | 
						|
				diff,
 | 
						|
				dist = 0;
 | 
						|
 | 
						|
	LOOPBYTE
 | 
						|
	{
 | 
						|
		diff = (unsigned char) (a[i] ^ b[i]);
 | 
						|
		dist += number_of_ones[diff];
 | 
						|
	}
 | 
						|
	return dist;
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
hemdist(TRGM *a, TRGM *b)
 | 
						|
{
 | 
						|
	if (ISALLTRUE(a))
 | 
						|
	{
 | 
						|
		if (ISALLTRUE(b))
 | 
						|
			return 0;
 | 
						|
		else
 | 
						|
			return SIGLENBIT - sizebitvec(GETSIGN(b));
 | 
						|
	}
 | 
						|
	else if (ISALLTRUE(b))
 | 
						|
		return SIGLENBIT - sizebitvec(GETSIGN(a));
 | 
						|
 | 
						|
	return hemdistsign(GETSIGN(a), GETSIGN(b));
 | 
						|
}
 | 
						|
 | 
						|
Datum
 | 
						|
gtrgm_penalty(PG_FUNCTION_ARGS)
 | 
						|
{
 | 
						|
	GISTENTRY  *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
 | 
						|
	GISTENTRY  *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
 | 
						|
	float	   *penalty = (float *) PG_GETARG_POINTER(2);
 | 
						|
	TRGM	   *origval = (TRGM *) DatumGetPointer(origentry->key);
 | 
						|
	TRGM	   *newval = (TRGM *) DatumGetPointer(newentry->key);
 | 
						|
	BITVECP		orig = GETSIGN(origval);
 | 
						|
 | 
						|
	*penalty = 0.0;
 | 
						|
 | 
						|
	if (ISARRKEY(newval))
 | 
						|
	{
 | 
						|
		char	   *cache = (char *) fcinfo->flinfo->fn_extra;
 | 
						|
		TRGM	   *cachedVal = (TRGM *) (cache + MAXALIGN(sizeof(BITVEC)));
 | 
						|
		Size		newvalsize = VARSIZE(newval);
 | 
						|
		BITVECP		sign;
 | 
						|
 | 
						|
		/*
 | 
						|
		 * Cache the sign data across multiple calls with the same newval.
 | 
						|
		 */
 | 
						|
		if (cache == NULL ||
 | 
						|
			VARSIZE(cachedVal) != newvalsize ||
 | 
						|
			memcmp(cachedVal, newval, newvalsize) != 0)
 | 
						|
		{
 | 
						|
			char	   *newcache;
 | 
						|
 | 
						|
			newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
 | 
						|
										  MAXALIGN(sizeof(BITVEC)) +
 | 
						|
										  newvalsize);
 | 
						|
 | 
						|
			makesign((BITVECP) newcache, newval);
 | 
						|
 | 
						|
			cachedVal = (TRGM *) (newcache + MAXALIGN(sizeof(BITVEC)));
 | 
						|
			memcpy(cachedVal, newval, newvalsize);
 | 
						|
 | 
						|
			if (cache)
 | 
						|
				pfree(cache);
 | 
						|
			fcinfo->flinfo->fn_extra = newcache;
 | 
						|
			cache = newcache;
 | 
						|
		}
 | 
						|
 | 
						|
		sign = (BITVECP) cache;
 | 
						|
 | 
						|
		if (ISALLTRUE(origval))
 | 
						|
			*penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1);
 | 
						|
		else
 | 
						|
			*penalty = hemdistsign(sign, orig);
 | 
						|
	}
 | 
						|
	else
 | 
						|
		*penalty = hemdist(origval, newval);
 | 
						|
	PG_RETURN_POINTER(penalty);
 | 
						|
}
 | 
						|
 | 
						|
typedef struct
 | 
						|
{
 | 
						|
	bool		allistrue;
 | 
						|
	BITVEC		sign;
 | 
						|
} CACHESIGN;
 | 
						|
 | 
						|
static void
 | 
						|
fillcache(CACHESIGN *item, TRGM *key)
 | 
						|
{
 | 
						|
	item->allistrue = false;
 | 
						|
	if (ISARRKEY(key))
 | 
						|
		makesign(item->sign, key);
 | 
						|
	else if (ISALLTRUE(key))
 | 
						|
		item->allistrue = true;
 | 
						|
	else
 | 
						|
		memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
 | 
						|
}
 | 
						|
 | 
						|
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
 | 
						|
typedef struct
 | 
						|
{
 | 
						|
	OffsetNumber pos;
 | 
						|
	int32		cost;
 | 
						|
} SPLITCOST;
 | 
						|
 | 
						|
static int
 | 
						|
comparecost(const void *a, const void *b)
 | 
						|
{
 | 
						|
	if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost)
 | 
						|
		return 0;
 | 
						|
	else
 | 
						|
		return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static int
 | 
						|
hemdistcache(CACHESIGN *a, CACHESIGN *b)
 | 
						|
{
 | 
						|
	if (a->allistrue)
 | 
						|
	{
 | 
						|
		if (b->allistrue)
 | 
						|
			return 0;
 | 
						|
		else
 | 
						|
			return SIGLENBIT - sizebitvec(b->sign);
 | 
						|
	}
 | 
						|
	else if (b->allistrue)
 | 
						|
		return SIGLENBIT - sizebitvec(a->sign);
 | 
						|
 | 
						|
	return hemdistsign(a->sign, b->sign);
 | 
						|
}
 | 
						|
 | 
						|
Datum
 | 
						|
gtrgm_picksplit(PG_FUNCTION_ARGS)
 | 
						|
{
 | 
						|
	GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
 | 
						|
	OffsetNumber maxoff = entryvec->n - 2;
 | 
						|
	GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
 | 
						|
	OffsetNumber k,
 | 
						|
				j;
 | 
						|
	TRGM	   *datum_l,
 | 
						|
			   *datum_r;
 | 
						|
	BITVECP		union_l,
 | 
						|
				union_r;
 | 
						|
	int32		size_alpha,
 | 
						|
				size_beta;
 | 
						|
	int32		size_waste,
 | 
						|
				waste = -1;
 | 
						|
	int32		nbytes;
 | 
						|
	OffsetNumber seed_1 = 0,
 | 
						|
				seed_2 = 0;
 | 
						|
	OffsetNumber *left,
 | 
						|
			   *right;
 | 
						|
	BITVECP		ptr;
 | 
						|
	int			i;
 | 
						|
	CACHESIGN  *cache;
 | 
						|
	SPLITCOST  *costvector;
 | 
						|
 | 
						|
	/* cache the sign data for each existing item */
 | 
						|
	cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
 | 
						|
	for (k = FirstOffsetNumber; k <= maxoff; k = OffsetNumberNext(k))
 | 
						|
		fillcache(&cache[k], GETENTRY(entryvec, k));
 | 
						|
 | 
						|
	/* now find the two furthest-apart items */
 | 
						|
	for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
 | 
						|
	{
 | 
						|
		for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
 | 
						|
		{
 | 
						|
			size_waste = hemdistcache(&(cache[j]), &(cache[k]));
 | 
						|
			if (size_waste > waste)
 | 
						|
			{
 | 
						|
				waste = size_waste;
 | 
						|
				seed_1 = k;
 | 
						|
				seed_2 = j;
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	/* just in case we didn't make a selection ... */
 | 
						|
	if (seed_1 == 0 || seed_2 == 0)
 | 
						|
	{
 | 
						|
		seed_1 = 1;
 | 
						|
		seed_2 = 2;
 | 
						|
	}
 | 
						|
 | 
						|
	/* initialize the result vectors */
 | 
						|
	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
 | 
						|
	v->spl_left = left = (OffsetNumber *) palloc(nbytes);
 | 
						|
	v->spl_right = right = (OffsetNumber *) palloc(nbytes);
 | 
						|
	v->spl_nleft = 0;
 | 
						|
	v->spl_nright = 0;
 | 
						|
 | 
						|
	/* form initial .. */
 | 
						|
	if (cache[seed_1].allistrue)
 | 
						|
	{
 | 
						|
		datum_l = (TRGM *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
 | 
						|
		SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
 | 
						|
		datum_l->flag = SIGNKEY | ALLISTRUE;
 | 
						|
	}
 | 
						|
	else
 | 
						|
	{
 | 
						|
		datum_l = (TRGM *) palloc(CALCGTSIZE(SIGNKEY, 0));
 | 
						|
		SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0));
 | 
						|
		datum_l->flag = SIGNKEY;
 | 
						|
		memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
 | 
						|
	}
 | 
						|
	if (cache[seed_2].allistrue)
 | 
						|
	{
 | 
						|
		datum_r = (TRGM *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
 | 
						|
		SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
 | 
						|
		datum_r->flag = SIGNKEY | ALLISTRUE;
 | 
						|
	}
 | 
						|
	else
 | 
						|
	{
 | 
						|
		datum_r = (TRGM *) palloc(CALCGTSIZE(SIGNKEY, 0));
 | 
						|
		SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0));
 | 
						|
		datum_r->flag = SIGNKEY;
 | 
						|
		memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
 | 
						|
	}
 | 
						|
 | 
						|
	union_l = GETSIGN(datum_l);
 | 
						|
	union_r = GETSIGN(datum_r);
 | 
						|
	maxoff = OffsetNumberNext(maxoff);
 | 
						|
	fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
 | 
						|
	/* sort before ... */
 | 
						|
	costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
 | 
						|
	for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
 | 
						|
	{
 | 
						|
		costvector[j - 1].pos = j;
 | 
						|
		size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
 | 
						|
		size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]));
 | 
						|
		costvector[j - 1].cost = abs(size_alpha - size_beta);
 | 
						|
	}
 | 
						|
	qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
 | 
						|
 | 
						|
	for (k = 0; k < maxoff; k++)
 | 
						|
	{
 | 
						|
		j = costvector[k].pos;
 | 
						|
		if (j == seed_1)
 | 
						|
		{
 | 
						|
			*left++ = j;
 | 
						|
			v->spl_nleft++;
 | 
						|
			continue;
 | 
						|
		}
 | 
						|
		else if (j == seed_2)
 | 
						|
		{
 | 
						|
			*right++ = j;
 | 
						|
			v->spl_nright++;
 | 
						|
			continue;
 | 
						|
		}
 | 
						|
 | 
						|
		if (ISALLTRUE(datum_l) || cache[j].allistrue)
 | 
						|
		{
 | 
						|
			if (ISALLTRUE(datum_l) && cache[j].allistrue)
 | 
						|
				size_alpha = 0;
 | 
						|
			else
 | 
						|
				size_alpha = SIGLENBIT - sizebitvec(
 | 
						|
													(cache[j].allistrue) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)
 | 
						|
					);
 | 
						|
		}
 | 
						|
		else
 | 
						|
			size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l));
 | 
						|
 | 
						|
		if (ISALLTRUE(datum_r) || cache[j].allistrue)
 | 
						|
		{
 | 
						|
			if (ISALLTRUE(datum_r) && cache[j].allistrue)
 | 
						|
				size_beta = 0;
 | 
						|
			else
 | 
						|
				size_beta = SIGLENBIT - sizebitvec(
 | 
						|
												   (cache[j].allistrue) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)
 | 
						|
					);
 | 
						|
		}
 | 
						|
		else
 | 
						|
			size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r));
 | 
						|
 | 
						|
		if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
 | 
						|
		{
 | 
						|
			if (ISALLTRUE(datum_l) || cache[j].allistrue)
 | 
						|
			{
 | 
						|
				if (!ISALLTRUE(datum_l))
 | 
						|
					MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
 | 
						|
			}
 | 
						|
			else
 | 
						|
			{
 | 
						|
				ptr = cache[j].sign;
 | 
						|
				LOOPBYTE
 | 
						|
					union_l[i] |= ptr[i];
 | 
						|
			}
 | 
						|
			*left++ = j;
 | 
						|
			v->spl_nleft++;
 | 
						|
		}
 | 
						|
		else
 | 
						|
		{
 | 
						|
			if (ISALLTRUE(datum_r) || cache[j].allistrue)
 | 
						|
			{
 | 
						|
				if (!ISALLTRUE(datum_r))
 | 
						|
					MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
 | 
						|
			}
 | 
						|
			else
 | 
						|
			{
 | 
						|
				ptr = cache[j].sign;
 | 
						|
				LOOPBYTE
 | 
						|
					union_r[i] |= ptr[i];
 | 
						|
			}
 | 
						|
			*right++ = j;
 | 
						|
			v->spl_nright++;
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	*right = *left = FirstOffsetNumber;
 | 
						|
	v->spl_ldatum = PointerGetDatum(datum_l);
 | 
						|
	v->spl_rdatum = PointerGetDatum(datum_r);
 | 
						|
 | 
						|
	PG_RETURN_POINTER(v);
 | 
						|
}
 |