mirror of
https://github.com/postgres/postgres.git
synced 2025-06-16 06:01:02 +03:00
Allow extensions to generate lossy index conditions.
For a long time, indxpath.c has had the ability to extract derived (lossy) index conditions from certain operators such as LIKE. For just as long, it's been obvious that we really ought to make that capability available to extensions. This commit finally accomplishes that, by adding another API for planner support functions that lets them create derived index conditions for their functions. As proof of concept, the hardwired "special index operator" code formerly present in indxpath.c is pushed out to planner support functions attached to LIKE and other relevant operators. A weak spot in this design is that an extension needs to know OIDs for the operators, datatypes, and opfamilies involved in the transformation it wants to make. The core-code prototypes use hard-wired OID references but extensions don't have that option for their own operators etc. It's usually possible to look up the required info, but that may be slow and inconvenient. However, improving that situation is a separate task. I want to do some additional refactorization around selfuncs.c, but that also seems like a separate task. Discussion: https://postgr.es/m/15193.1548028093@sss.pgh.pa.us
This commit is contained in:
313
src/backend/utils/adt/like_support.c
Normal file
313
src/backend/utils/adt/like_support.c
Normal file
@ -0,0 +1,313 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* like_support.c
|
||||
* Planner support functions for LIKE, regex, and related operators.
|
||||
*
|
||||
* These routines handle special optimization of operators that can be
|
||||
* used with index scans even though they are not known to the executor's
|
||||
* indexscan machinery. The key idea is that these operators allow us
|
||||
* to derive approximate indexscan qual clauses, such that any tuples
|
||||
* that pass the operator clause itself must also satisfy the simpler
|
||||
* indexscan condition(s). Then we can use the indexscan machinery
|
||||
* to avoid scanning as much of the table as we'd otherwise have to,
|
||||
* while applying the original operator as a qpqual condition to ensure
|
||||
* we deliver only the tuples we want. (In essence, we're using a regular
|
||||
* index as if it were a lossy index.)
|
||||
*
|
||||
* An example of what we're doing is
|
||||
* textfield LIKE 'abc%def'
|
||||
* from which we can generate the indexscanable conditions
|
||||
* textfield >= 'abc' AND textfield < 'abd'
|
||||
* which allow efficient scanning of an index on textfield.
|
||||
* (In reality, character set and collation issues make the transformation
|
||||
* from LIKE to indexscan limits rather harder than one might think ...
|
||||
* but that's the basic idea.)
|
||||
*
|
||||
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* src/backend/utils/adt/like_support.c
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/stratnum.h"
|
||||
#include "catalog/pg_opfamily.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "nodes/makefuncs.h"
|
||||
#include "nodes/nodeFuncs.h"
|
||||
#include "nodes/supportnodes.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/lsyscache.h"
|
||||
#include "utils/pg_locale.h"
|
||||
#include "utils/selfuncs.h"
|
||||
|
||||
|
||||
static Node *like_regex_support(Node *rawreq, Pattern_Type ptype);
|
||||
static List *match_pattern_prefix(Node *leftop,
|
||||
Node *rightop,
|
||||
Pattern_Type ptype,
|
||||
Oid expr_coll,
|
||||
Oid opfamily,
|
||||
Oid indexcollation);
|
||||
|
||||
|
||||
/*
|
||||
* Planner support functions for LIKE, regex, and related operators
|
||||
*/
|
||||
Datum
|
||||
textlike_support(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
|
||||
|
||||
PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like));
|
||||
}
|
||||
|
||||
Datum
|
||||
texticlike_support(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
|
||||
|
||||
PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like_IC));
|
||||
}
|
||||
|
||||
Datum
|
||||
textregexeq_support(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
|
||||
|
||||
PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex));
|
||||
}
|
||||
|
||||
Datum
|
||||
texticregexeq_support(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
|
||||
|
||||
PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex_IC));
|
||||
}
|
||||
|
||||
/* Common code for the above */
|
||||
static Node *
|
||||
like_regex_support(Node *rawreq, Pattern_Type ptype)
|
||||
{
|
||||
Node *ret = NULL;
|
||||
|
||||
if (IsA(rawreq, SupportRequestIndexCondition))
|
||||
{
|
||||
/* Try to convert operator/function call to index conditions */
|
||||
SupportRequestIndexCondition *req = (SupportRequestIndexCondition *) rawreq;
|
||||
|
||||
/*
|
||||
* Currently we have no "reverse" match operators with the pattern on
|
||||
* the left, so we only need consider cases with the indexkey on the
|
||||
* left.
|
||||
*/
|
||||
if (req->indexarg != 0)
|
||||
return NULL;
|
||||
|
||||
if (is_opclause(req->node))
|
||||
{
|
||||
OpExpr *clause = (OpExpr *) req->node;
|
||||
|
||||
Assert(list_length(clause->args) == 2);
|
||||
ret = (Node *)
|
||||
match_pattern_prefix((Node *) linitial(clause->args),
|
||||
(Node *) lsecond(clause->args),
|
||||
ptype,
|
||||
clause->inputcollid,
|
||||
req->opfamily,
|
||||
req->indexcollation);
|
||||
}
|
||||
else if (is_funcclause(req->node)) /* be paranoid */
|
||||
{
|
||||
FuncExpr *clause = (FuncExpr *) req->node;
|
||||
|
||||
Assert(list_length(clause->args) == 2);
|
||||
ret = (Node *)
|
||||
match_pattern_prefix((Node *) linitial(clause->args),
|
||||
(Node *) lsecond(clause->args),
|
||||
ptype,
|
||||
clause->inputcollid,
|
||||
req->opfamily,
|
||||
req->indexcollation);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* match_pattern_prefix
|
||||
* Try to generate an indexqual for a LIKE or regex operator.
|
||||
*/
|
||||
static List *
|
||||
match_pattern_prefix(Node *leftop,
|
||||
Node *rightop,
|
||||
Pattern_Type ptype,
|
||||
Oid expr_coll,
|
||||
Oid opfamily,
|
||||
Oid indexcollation)
|
||||
{
|
||||
List *result;
|
||||
Const *patt;
|
||||
Const *prefix;
|
||||
Pattern_Prefix_Status pstatus;
|
||||
Oid ldatatype;
|
||||
Oid rdatatype;
|
||||
Oid oproid;
|
||||
Expr *expr;
|
||||
FmgrInfo ltproc;
|
||||
Const *greaterstr;
|
||||
|
||||
/*
|
||||
* Can't do anything with a non-constant or NULL pattern argument.
|
||||
*
|
||||
* Note that since we restrict ourselves to cases with a hard constant on
|
||||
* the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry
|
||||
* about verifying that.
|
||||
*/
|
||||
if (!IsA(rightop, Const) ||
|
||||
((Const *) rightop)->constisnull)
|
||||
return NIL;
|
||||
patt = (Const *) rightop;
|
||||
|
||||
/*
|
||||
* Try to extract a fixed prefix from the pattern.
|
||||
*/
|
||||
pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
|
||||
&prefix, NULL);
|
||||
|
||||
/* fail if no fixed prefix */
|
||||
if (pstatus == Pattern_Prefix_None)
|
||||
return NIL;
|
||||
|
||||
/*
|
||||
* Must also check that index's opfamily supports the operators we will
|
||||
* want to apply. (A hash index, for example, will not support ">=".)
|
||||
* Currently, only btree and spgist support the operators we need.
|
||||
*
|
||||
* Note: actually, in the Pattern_Prefix_Exact case, we only need "=" so a
|
||||
* hash index would work. Currently it doesn't seem worth checking for
|
||||
* that, however.
|
||||
*
|
||||
* We insist on the opfamily being one of the specific ones we expect,
|
||||
* else we'd do the wrong thing if someone were to make a reverse-sort
|
||||
* opfamily with the same operators.
|
||||
*
|
||||
* The non-pattern opclasses will not sort the way we need in most non-C
|
||||
* locales. We can use such an index anyway for an exact match (simple
|
||||
* equality), but not for prefix-match cases. Note that here we are
|
||||
* looking at the index's collation, not the expression's collation --
|
||||
* this test is *not* dependent on the LIKE/regex operator's collation.
|
||||
*
|
||||
* While we're at it, identify the type the comparison constant(s) should
|
||||
* have, based on the opfamily.
|
||||
*/
|
||||
switch (opfamily)
|
||||
{
|
||||
case TEXT_BTREE_FAM_OID:
|
||||
if (!(pstatus == Pattern_Prefix_Exact ||
|
||||
lc_collate_is_c(indexcollation)))
|
||||
return NIL;
|
||||
rdatatype = TEXTOID;
|
||||
break;
|
||||
|
||||
case TEXT_PATTERN_BTREE_FAM_OID:
|
||||
case TEXT_SPGIST_FAM_OID:
|
||||
rdatatype = TEXTOID;
|
||||
break;
|
||||
|
||||
case BPCHAR_BTREE_FAM_OID:
|
||||
if (!(pstatus == Pattern_Prefix_Exact ||
|
||||
lc_collate_is_c(indexcollation)))
|
||||
return NIL;
|
||||
rdatatype = BPCHAROID;
|
||||
break;
|
||||
|
||||
case BPCHAR_PATTERN_BTREE_FAM_OID:
|
||||
rdatatype = BPCHAROID;
|
||||
break;
|
||||
|
||||
case BYTEA_BTREE_FAM_OID:
|
||||
rdatatype = BYTEAOID;
|
||||
break;
|
||||
|
||||
default:
|
||||
return NIL;
|
||||
}
|
||||
|
||||
/* OK, prepare to create the indexqual(s) */
|
||||
ldatatype = exprType(leftop);
|
||||
|
||||
/*
|
||||
* If necessary, coerce the prefix constant to the right type. The given
|
||||
* prefix constant is either text or bytea type, therefore the only case
|
||||
* where we need to do anything is when converting text to bpchar. Those
|
||||
* two types are binary-compatible, so relabeling the Const node is
|
||||
* sufficient.
|
||||
*/
|
||||
if (prefix->consttype != rdatatype)
|
||||
{
|
||||
Assert(prefix->consttype == TEXTOID &&
|
||||
rdatatype == BPCHAROID);
|
||||
prefix->consttype = rdatatype;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we found an exact-match pattern, generate an "=" indexqual.
|
||||
*/
|
||||
if (pstatus == Pattern_Prefix_Exact)
|
||||
{
|
||||
oproid = get_opfamily_member(opfamily, ldatatype, rdatatype,
|
||||
BTEqualStrategyNumber);
|
||||
if (oproid == InvalidOid)
|
||||
elog(ERROR, "no = operator for opfamily %u", opfamily);
|
||||
expr = make_opclause(oproid, BOOLOID, false,
|
||||
(Expr *) leftop, (Expr *) prefix,
|
||||
InvalidOid, indexcollation);
|
||||
result = list_make1(expr);
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Otherwise, we have a nonempty required prefix of the values.
|
||||
*
|
||||
* We can always say "x >= prefix".
|
||||
*/
|
||||
oproid = get_opfamily_member(opfamily, ldatatype, rdatatype,
|
||||
BTGreaterEqualStrategyNumber);
|
||||
if (oproid == InvalidOid)
|
||||
elog(ERROR, "no >= operator for opfamily %u", opfamily);
|
||||
expr = make_opclause(oproid, BOOLOID, false,
|
||||
(Expr *) leftop, (Expr *) prefix,
|
||||
InvalidOid, indexcollation);
|
||||
result = list_make1(expr);
|
||||
|
||||
/*-------
|
||||
* If we can create a string larger than the prefix, we can say
|
||||
* "x < greaterstr". NB: we rely on make_greater_string() to generate
|
||||
* a guaranteed-greater string, not just a probably-greater string.
|
||||
* In general this is only guaranteed in C locale, so we'd better be
|
||||
* using a C-locale index collation.
|
||||
*-------
|
||||
*/
|
||||
oproid = get_opfamily_member(opfamily, ldatatype, rdatatype,
|
||||
BTLessStrategyNumber);
|
||||
if (oproid == InvalidOid)
|
||||
elog(ERROR, "no < operator for opfamily %u", opfamily);
|
||||
fmgr_info(get_opcode(oproid), <proc);
|
||||
greaterstr = make_greater_string(prefix, <proc, indexcollation);
|
||||
if (greaterstr)
|
||||
{
|
||||
expr = make_opclause(oproid, BOOLOID, false,
|
||||
(Expr *) leftop, (Expr *) greaterstr,
|
||||
InvalidOid, indexcollation);
|
||||
result = lappend(result, expr);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
Reference in New Issue
Block a user