mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-31 10:30:33 +03:00 
			
		
		
		
	This new header contains all the variable-length data types support (TOAST support) from postgres.h, which isn't needed by large parts of the backend code. Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://www.postgresql.org/message-id/flat/ddcce239-0f29-6e62-4b47-1f8ca742addf%40enterprisedb.com
		
			
				
	
	
		
			361 lines
		
	
	
		
			8.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			361 lines
		
	
	
		
			8.8 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * contrib/pg_trgm/trgm_gin.c
 | |
|  */
 | |
| #include "postgres.h"
 | |
| 
 | |
| #include "access/gin.h"
 | |
| #include "access/stratnum.h"
 | |
| #include "fmgr.h"
 | |
| #include "trgm.h"
 | |
| #include "varatt.h"
 | |
| 
 | |
| PG_FUNCTION_INFO_V1(gin_extract_trgm);
 | |
| PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
 | |
| PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
 | |
| PG_FUNCTION_INFO_V1(gin_trgm_consistent);
 | |
| PG_FUNCTION_INFO_V1(gin_trgm_triconsistent);
 | |
| 
 | |
| /*
 | |
|  * This function can only be called if a pre-9.1 version of the GIN operator
 | |
|  * class definition is present in the catalogs (probably as a consequence
 | |
|  * of upgrade-in-place).  Cope.
 | |
|  */
 | |
| Datum
 | |
| gin_extract_trgm(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	if (PG_NARGS() == 3)
 | |
| 		return gin_extract_value_trgm(fcinfo);
 | |
| 	if (PG_NARGS() == 7)
 | |
| 		return gin_extract_query_trgm(fcinfo);
 | |
| 	elog(ERROR, "unexpected number of arguments to gin_extract_trgm");
 | |
| 	PG_RETURN_NULL();
 | |
| }
 | |
| 
 | |
| Datum
 | |
| gin_extract_value_trgm(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	text	   *val = (text *) PG_GETARG_TEXT_PP(0);
 | |
| 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
 | |
| 	Datum	   *entries = NULL;
 | |
| 	TRGM	   *trg;
 | |
| 	int32		trglen;
 | |
| 
 | |
| 	*nentries = 0;
 | |
| 
 | |
| 	trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
 | |
| 	trglen = ARRNELEM(trg);
 | |
| 
 | |
| 	if (trglen > 0)
 | |
| 	{
 | |
| 		trgm	   *ptr;
 | |
| 		int32		i;
 | |
| 
 | |
| 		*nentries = trglen;
 | |
| 		entries = (Datum *) palloc(sizeof(Datum) * trglen);
 | |
| 
 | |
| 		ptr = GETARR(trg);
 | |
| 		for (i = 0; i < trglen; i++)
 | |
| 		{
 | |
| 			int32		item = trgm2int(ptr);
 | |
| 
 | |
| 			entries[i] = Int32GetDatum(item);
 | |
| 			ptr++;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	PG_RETURN_POINTER(entries);
 | |
| }
 | |
| 
 | |
| Datum
 | |
| gin_extract_query_trgm(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	text	   *val = (text *) PG_GETARG_TEXT_PP(0);
 | |
| 	int32	   *nentries = (int32 *) PG_GETARG_POINTER(1);
 | |
| 	StrategyNumber strategy = PG_GETARG_UINT16(2);
 | |
| 
 | |
| 	/* bool   **pmatch = (bool **) PG_GETARG_POINTER(3); */
 | |
| 	Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);
 | |
| 
 | |
| 	/* bool   **nullFlags = (bool **) PG_GETARG_POINTER(5); */
 | |
| 	int32	   *searchMode = (int32 *) PG_GETARG_POINTER(6);
 | |
| 	Datum	   *entries = NULL;
 | |
| 	TRGM	   *trg;
 | |
| 	int32		trglen;
 | |
| 	trgm	   *ptr;
 | |
| 	TrgmPackedGraph *graph;
 | |
| 	int32		i;
 | |
| 
 | |
| 	switch (strategy)
 | |
| 	{
 | |
| 		case SimilarityStrategyNumber:
 | |
| 		case WordSimilarityStrategyNumber:
 | |
| 		case StrictWordSimilarityStrategyNumber:
 | |
| 		case EqualStrategyNumber:
 | |
| 			trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
 | |
| 			break;
 | |
| 		case ILikeStrategyNumber:
 | |
| #ifndef IGNORECASE
 | |
| 			elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
 | |
| #endif
 | |
| 			/* FALL THRU */
 | |
| 		case LikeStrategyNumber:
 | |
| 
 | |
| 			/*
 | |
| 			 * For wildcard search we extract all the trigrams that every
 | |
| 			 * potentially-matching string must include.
 | |
| 			 */
 | |
| 			trg = generate_wildcard_trgm(VARDATA_ANY(val),
 | |
| 										 VARSIZE_ANY_EXHDR(val));
 | |
| 			break;
 | |
| 		case RegExpICaseStrategyNumber:
 | |
| #ifndef IGNORECASE
 | |
| 			elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
 | |
| #endif
 | |
| 			/* FALL THRU */
 | |
| 		case RegExpStrategyNumber:
 | |
| 			trg = createTrgmNFA(val, PG_GET_COLLATION(),
 | |
| 								&graph, CurrentMemoryContext);
 | |
| 			if (trg && ARRNELEM(trg) > 0)
 | |
| 			{
 | |
| 				/*
 | |
| 				 * Successful regex processing: store NFA-like graph as
 | |
| 				 * extra_data.  GIN API requires an array of nentries
 | |
| 				 * Pointers, but we just put the same value in each element.
 | |
| 				 */
 | |
| 				trglen = ARRNELEM(trg);
 | |
| 				*extra_data = (Pointer *) palloc(sizeof(Pointer) * trglen);
 | |
| 				for (i = 0; i < trglen; i++)
 | |
| 					(*extra_data)[i] = (Pointer) graph;
 | |
| 			}
 | |
| 			else
 | |
| 			{
 | |
| 				/* No result: have to do full index scan. */
 | |
| 				*nentries = 0;
 | |
| 				*searchMode = GIN_SEARCH_MODE_ALL;
 | |
| 				PG_RETURN_POINTER(entries);
 | |
| 			}
 | |
| 			break;
 | |
| 		default:
 | |
| 			elog(ERROR, "unrecognized strategy number: %d", strategy);
 | |
| 			trg = NULL;			/* keep compiler quiet */
 | |
| 			break;
 | |
| 	}
 | |
| 
 | |
| 	trglen = ARRNELEM(trg);
 | |
| 	*nentries = trglen;
 | |
| 
 | |
| 	if (trglen > 0)
 | |
| 	{
 | |
| 		entries = (Datum *) palloc(sizeof(Datum) * trglen);
 | |
| 		ptr = GETARR(trg);
 | |
| 		for (i = 0; i < trglen; i++)
 | |
| 		{
 | |
| 			int32		item = trgm2int(ptr);
 | |
| 
 | |
| 			entries[i] = Int32GetDatum(item);
 | |
| 			ptr++;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * If no trigram was extracted then we have to scan all the index.
 | |
| 	 */
 | |
| 	if (trglen == 0)
 | |
| 		*searchMode = GIN_SEARCH_MODE_ALL;
 | |
| 
 | |
| 	PG_RETURN_POINTER(entries);
 | |
| }
 | |
| 
 | |
| Datum
 | |
| gin_trgm_consistent(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	bool	   *check = (bool *) PG_GETARG_POINTER(0);
 | |
| 	StrategyNumber strategy = PG_GETARG_UINT16(1);
 | |
| 
 | |
| 	/* text    *query = PG_GETARG_TEXT_PP(2); */
 | |
| 	int32		nkeys = PG_GETARG_INT32(3);
 | |
| 	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
 | |
| 	bool	   *recheck = (bool *) PG_GETARG_POINTER(5);
 | |
| 	bool		res;
 | |
| 	int32		i,
 | |
| 				ntrue;
 | |
| 	double		nlimit;
 | |
| 
 | |
| 	/* All cases served by this function are inexact */
 | |
| 	*recheck = true;
 | |
| 
 | |
| 	switch (strategy)
 | |
| 	{
 | |
| 		case SimilarityStrategyNumber:
 | |
| 		case WordSimilarityStrategyNumber:
 | |
| 		case StrictWordSimilarityStrategyNumber:
 | |
| 			nlimit = index_strategy_get_limit(strategy);
 | |
| 
 | |
| 			/* Count the matches */
 | |
| 			ntrue = 0;
 | |
| 			for (i = 0; i < nkeys; i++)
 | |
| 			{
 | |
| 				if (check[i])
 | |
| 					ntrue++;
 | |
| 			}
 | |
| 
 | |
| 			/*--------------------
 | |
| 			 * If DIVUNION is defined then similarity formula is:
 | |
| 			 * c / (len1 + len2 - c)
 | |
| 			 * where c is number of common trigrams and it stands as ntrue in
 | |
| 			 * this code.  Here we don't know value of len2 but we can assume
 | |
| 			 * that c (ntrue) is a lower bound of len2, so upper bound of
 | |
| 			 * similarity is:
 | |
| 			 * c / (len1 + c - c)  => c / len1
 | |
| 			 * If DIVUNION is not defined then similarity formula is:
 | |
| 			 * c / max(len1, len2)
 | |
| 			 * And again, c (ntrue) is a lower bound of len2, but c <= len1
 | |
| 			 * just by definition and, consequently, upper bound of
 | |
| 			 * similarity is just c / len1.
 | |
| 			 * So, independently on DIVUNION the upper bound formula is the same.
 | |
| 			 */
 | |
| 			res = (nkeys == 0) ? false :
 | |
| 				(((((float4) ntrue) / ((float4) nkeys))) >= nlimit);
 | |
| 			break;
 | |
| 		case ILikeStrategyNumber:
 | |
| #ifndef IGNORECASE
 | |
| 			elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
 | |
| #endif
 | |
| 			/* FALL THRU */
 | |
| 		case LikeStrategyNumber:
 | |
| 		case EqualStrategyNumber:
 | |
| 			/* Check if all extracted trigrams are presented. */
 | |
| 			res = true;
 | |
| 			for (i = 0; i < nkeys; i++)
 | |
| 			{
 | |
| 				if (!check[i])
 | |
| 				{
 | |
| 					res = false;
 | |
| 					break;
 | |
| 				}
 | |
| 			}
 | |
| 			break;
 | |
| 		case RegExpICaseStrategyNumber:
 | |
| #ifndef IGNORECASE
 | |
| 			elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
 | |
| #endif
 | |
| 			/* FALL THRU */
 | |
| 		case RegExpStrategyNumber:
 | |
| 			if (nkeys < 1)
 | |
| 			{
 | |
| 				/* Regex processing gave no result: do full index scan */
 | |
| 				res = true;
 | |
| 			}
 | |
| 			else
 | |
| 				res = trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
 | |
| 										 check);
 | |
| 			break;
 | |
| 		default:
 | |
| 			elog(ERROR, "unrecognized strategy number: %d", strategy);
 | |
| 			res = false;		/* keep compiler quiet */
 | |
| 			break;
 | |
| 	}
 | |
| 
 | |
| 	PG_RETURN_BOOL(res);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * In all cases, GIN_TRUE is at least as favorable to inclusion as
 | |
|  * GIN_MAYBE. If no better option is available, simply treat
 | |
|  * GIN_MAYBE as if it were GIN_TRUE and apply the same test as the binary
 | |
|  * consistent function.
 | |
|  */
 | |
| Datum
 | |
| gin_trgm_triconsistent(PG_FUNCTION_ARGS)
 | |
| {
 | |
| 	GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
 | |
| 	StrategyNumber strategy = PG_GETARG_UINT16(1);
 | |
| 
 | |
| 	/* text    *query = PG_GETARG_TEXT_PP(2); */
 | |
| 	int32		nkeys = PG_GETARG_INT32(3);
 | |
| 	Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
 | |
| 	GinTernaryValue res = GIN_MAYBE;
 | |
| 	int32		i,
 | |
| 				ntrue;
 | |
| 	bool	   *boolcheck;
 | |
| 	double		nlimit;
 | |
| 
 | |
| 	switch (strategy)
 | |
| 	{
 | |
| 		case SimilarityStrategyNumber:
 | |
| 		case WordSimilarityStrategyNumber:
 | |
| 		case StrictWordSimilarityStrategyNumber:
 | |
| 			nlimit = index_strategy_get_limit(strategy);
 | |
| 
 | |
| 			/* Count the matches */
 | |
| 			ntrue = 0;
 | |
| 			for (i = 0; i < nkeys; i++)
 | |
| 			{
 | |
| 				if (check[i] != GIN_FALSE)
 | |
| 					ntrue++;
 | |
| 			}
 | |
| 
 | |
| 			/*
 | |
| 			 * See comment in gin_trgm_consistent() about * upper bound
 | |
| 			 * formula
 | |
| 			 */
 | |
| 			res = (nkeys == 0)
 | |
| 				? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= nlimit)
 | |
| 							   ? GIN_MAYBE : GIN_FALSE);
 | |
| 			break;
 | |
| 		case ILikeStrategyNumber:
 | |
| #ifndef IGNORECASE
 | |
| 			elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
 | |
| #endif
 | |
| 			/* FALL THRU */
 | |
| 		case LikeStrategyNumber:
 | |
| 		case EqualStrategyNumber:
 | |
| 			/* Check if all extracted trigrams are presented. */
 | |
| 			res = GIN_MAYBE;
 | |
| 			for (i = 0; i < nkeys; i++)
 | |
| 			{
 | |
| 				if (check[i] == GIN_FALSE)
 | |
| 				{
 | |
| 					res = GIN_FALSE;
 | |
| 					break;
 | |
| 				}
 | |
| 			}
 | |
| 			break;
 | |
| 		case RegExpICaseStrategyNumber:
 | |
| #ifndef IGNORECASE
 | |
| 			elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
 | |
| #endif
 | |
| 			/* FALL THRU */
 | |
| 		case RegExpStrategyNumber:
 | |
| 			if (nkeys < 1)
 | |
| 			{
 | |
| 				/* Regex processing gave no result: do full index scan */
 | |
| 				res = GIN_MAYBE;
 | |
| 			}
 | |
| 			else
 | |
| 			{
 | |
| 				/*
 | |
| 				 * As trigramsMatchGraph implements a monotonic boolean
 | |
| 				 * function, promoting all GIN_MAYBE keys to GIN_TRUE will
 | |
| 				 * give a conservative result.
 | |
| 				 */
 | |
| 				boolcheck = (bool *) palloc(sizeof(bool) * nkeys);
 | |
| 				for (i = 0; i < nkeys; i++)
 | |
| 					boolcheck[i] = (check[i] != GIN_FALSE);
 | |
| 				if (!trigramsMatchGraph((TrgmPackedGraph *) extra_data[0],
 | |
| 										boolcheck))
 | |
| 					res = GIN_FALSE;
 | |
| 				pfree(boolcheck);
 | |
| 			}
 | |
| 			break;
 | |
| 		default:
 | |
| 			elog(ERROR, "unrecognized strategy number: %d", strategy);
 | |
| 			res = GIN_FALSE;	/* keep compiler quiet */
 | |
| 			break;
 | |
| 	}
 | |
| 
 | |
| 	/* All cases served by this function are inexact */
 | |
| 	Assert(res != GIN_TRUE);
 | |
| 	PG_RETURN_GIN_TERNARY_VALUE(res);
 | |
| }
 |