1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-09 06:21:09 +03:00

Add prefix operator for TEXT type.

The prefix operator along with SP-GiST indexes can be used as an alternative
for LIKE 'word%' commands  and it doesn't have a limitation of string/prefix
length as B-Tree has.

Bump catalog version

Author: Ildus Kurbangaliev with some editorization by me
Review by: Arthur Zakirov, Alexander Korotkov, and me
Discussion: https://www.postgresql.org/message-id/flat/20180202180327.222b04b3@wp.localdomain
This commit is contained in:
Teodor Sigaev
2018-04-03 19:46:45 +03:00
parent 4ab2999815
commit 710d90da1f
14 changed files with 189 additions and 9 deletions

View File

@@ -67,6 +67,20 @@
*/
#define SPGIST_MAX_PREFIX_LENGTH Max((int) (BLCKSZ - 258 * 16 - 100), 32)
/*
* Strategy for collation aware operator on text is equal to btree strategy
* plus value of 10.
*
* Current collation aware strategies and their corresponding btree strategies:
* 11 BTLessStrategyNumber
* 12 BTLessEqualStrategyNumber
* 14 BTGreaterEqualStrategyNumber
* 15 BTGreaterStrategyNumber
*/
#define SPG_STRATEGY_ADDITION (10)
#define SPG_IS_COLLATION_AWARE_STRATEGY(s) ((s) > SPG_STRATEGY_ADDITION \
&& (s) != RTPrefixStrategyNumber)
/* Struct for sorting values in picksplit */
typedef struct spgNodePtr
{
@@ -496,10 +510,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
* well end with a partial multibyte character, so that applying
* any encoding-sensitive test to it would be risky anyhow.)
*/
if (strategy > 10)
if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
{
if (collate_is_c)
strategy -= 10;
strategy -= SPG_STRATEGY_ADDITION;
else
continue;
}
@@ -526,6 +540,10 @@ spg_text_inner_consistent(PG_FUNCTION_ARGS)
if (r < 0)
res = false;
break;
case RTPrefixStrategyNumber:
if (r != 0)
res = false;
break;
default:
elog(ERROR, "unrecognized strategy number: %d",
in->scankeys[j].sk_strategy);
@@ -605,10 +623,27 @@ spg_text_leaf_consistent(PG_FUNCTION_ARGS)
int queryLen = VARSIZE_ANY_EXHDR(query);
int r;
if (strategy > 10)
if (strategy == RTPrefixStrategyNumber)
{
/*
* if level >= length of query then reconstrValue is began with
* query (prefix) string and we don't need to check it again.
*/
res = (level >= queryLen) ||
DatumGetBool(DirectFunctionCall2(text_starts_with,
out->leafValue, PointerGetDatum(query)));
if (!res) /* no need to consider remaining conditions */
break;
continue;
}
if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
{
/* Collation-aware comparison */
strategy -= 10;
strategy -= SPG_STRATEGY_ADDITION;
/* If asserts enabled, verify encoding of reconstructed string */
Assert(pg_verifymbstr(fullValue, fullLen, false));

View File

@@ -1488,6 +1488,16 @@ likesel(PG_FUNCTION_ARGS)
}
/*
* prefixsel - selectivity of prefix operator
*/
Datum
prefixsel(PG_FUNCTION_ARGS)
{
PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
}
/*
*
* iclikesel - Selectivity of ILIKE pattern match.
*/
Datum
@@ -2906,6 +2916,15 @@ likejoinsel(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
}
/*
* prefixjoinsel - Join selectivity of prefix operator
*/
Datum
prefixjoinsel(PG_FUNCTION_ARGS)
{
PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
}
/*
* iclikejoinsel - Join selectivity of ILIKE pattern match.
*/
@@ -5947,6 +5966,20 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
result = regex_fixed_prefix(patt, true, collation,
prefix, rest_selec);
break;
case Pattern_Type_Prefix:
/* Prefix type work is trivial. */
result = Pattern_Prefix_Partial;
*rest_selec = 1.0; /* all */
*prefix = makeConst(patt->consttype,
patt->consttypmod,
patt->constcollid,
patt->constlen,
datumCopy(patt->constvalue,
patt->constbyval,
patt->constlen),
patt->constisnull,
patt->constbyval);
break;
default:
elog(ERROR, "unrecognized ptype: %d", (int) ptype);
result = Pattern_Prefix_None; /* keep compiler quiet */

View File

@@ -1761,6 +1761,34 @@ text_ge(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(result);
}
Datum
text_starts_with(PG_FUNCTION_ARGS)
{
Datum arg1 = PG_GETARG_DATUM(0);
Datum arg2 = PG_GETARG_DATUM(1);
bool result;
Size len1,
len2;
len1 = toast_raw_datum_size(arg1);
len2 = toast_raw_datum_size(arg2);
if (len2 > len1)
result = false;
else
{
text *targ1 = DatumGetTextPP(arg1);
text *targ2 = DatumGetTextPP(arg2);
result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
VARSIZE_ANY_EXHDR(targ2)) == 0);
PG_FREE_IF_COPY(targ1, 0);
PG_FREE_IF_COPY(targ2, 1);
}
PG_RETURN_BOOL(result);
}
Datum
bttextcmp(PG_FUNCTION_ARGS)
{