1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-10 17:42:29 +03:00

First cut at doing LIKE/regex indexing optimization in

optimizer rather than parser.  This has many advantages, such as not
getting fooled by chance uses of operator names ~ and ~~ (the operators
are identified by OID now), and not creating useless comparison operations
in contexts where the comparisons will not actually be used as indexquals.
The new code also recognizes exact-match LIKE and regex patterns, and
produces an = indexqual instead of >= and <=.

This change does NOT fix the problem with non-ASCII locales: the code
still doesn't know how to generate an upper bound indexqual for non-ASCII
collation order.  But it's no worse than before, just the same deficiency
in a different place...

Also, dike out loc_restrictinfo fields in Plan nodes.  These were doing
nothing useful in the absence of 'expensive functions' optimization,
and they took a considerable amount of processing to fill in.
This commit is contained in:
Tom Lane
1999-07-27 03:51:11 +00:00
parent 434df3fb7a
commit 9e7e29e6c9
12 changed files with 635 additions and 366 deletions

View File

@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.94 1999/07/20 00:18:01 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.95 1999/07/27 03:51:06 tgl Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
@@ -76,7 +76,6 @@ static void mapTargetColumns(List *source, List *target);
static List *makeConstantList( A_Const *node);
static char *FlattenStringList(List *list);
static char *fmtId(char *rawid);
static Node *makeIndexable(char *opname, Node *lexpr, Node *rexpr);
static void param_type_init(Oid *typev, int nargs);
static Node *doNegate(Node *n);
@@ -3724,9 +3723,9 @@ a_expr: attr
| '(' a_expr_or_null ')'
{ $$ = $2; }
| a_expr Op a_expr
{ $$ = makeIndexable($2,$1,$3); }
{ $$ = makeA_Expr(OP, $2, $1, $3); }
| a_expr LIKE a_expr
{ $$ = makeIndexable("~~", $1, $3); }
{ $$ = makeA_Expr(OP, "~~", $1, $3); }
| a_expr NOT LIKE a_expr
{ $$ = makeA_Expr(OP, "!~~", $1, $4); }
| Op a_expr
@@ -4376,7 +4375,7 @@ b_expr: attr
| '(' a_expr ')'
{ $$ = $2; }
| b_expr Op b_expr
{ $$ = makeIndexable($2,$1,$3); }
{ $$ = makeA_Expr(OP, $2,$1,$3); }
| Op b_expr
{ $$ = makeA_Expr(OP, $1, NULL, $2); }
| b_expr Op
@@ -5197,157 +5196,6 @@ mapTargetColumns(List *src, List *dst)
return;
} /* mapTargetColumns() */
static Node *makeIndexable(char *opname, Node *lexpr, Node *rexpr)
{
Node *result = NULL;
/* we do this so indexes can be used */
if (strcmp(opname,"~") == 0 ||
strcmp(opname,"~*") == 0)
{
if (nodeTag(rexpr) == T_A_Const &&
((A_Const *)rexpr)->val.type == T_String &&
((A_Const *)rexpr)->val.val.str[0] == '^')
{
A_Const *n = (A_Const *)rexpr;
char *match_least = palloc(strlen(n->val.val.str)+2);
char *match_most = palloc(strlen(n->val.val.str)+2);
int pos, match_pos=0;
bool found_special = false;
/* Cannot optimize if unquoted | { } is present in pattern */
for (pos = 1; n->val.val.str[pos]; pos++)
{
if (n->val.val.str[pos] == '|' ||
n->val.val.str[pos] == '{' ||
n->val.val.str[pos] == '}')
{
found_special = true;
break;
}
if (n->val.val.str[pos] == '\\')
{
pos++;
if (n->val.val.str[pos] == '\0')
break;
}
}
if (!found_special)
{
/* note start at pos 1 to skip leading ^ */
for (pos = 1; n->val.val.str[pos]; pos++)
{
if (n->val.val.str[pos] == '.' ||
n->val.val.str[pos] == '?' ||
n->val.val.str[pos] == '*' ||
n->val.val.str[pos] == '[' ||
n->val.val.str[pos] == '$' ||
(strcmp(opname,"~*") == 0 && isalpha(n->val.val.str[pos])))
break;
if (n->val.val.str[pos] == '\\')
{
pos++;
if (n->val.val.str[pos] == '\0')
break;
}
match_least[match_pos] = n->val.val.str[pos];
match_most[match_pos++] = n->val.val.str[pos];
}
if (match_pos != 0)
{
A_Const *least = makeNode(A_Const);
A_Const *most = makeNode(A_Const);
/* make strings to be used in index use */
match_least[match_pos] = '\0';
match_most[match_pos] = '\377';
match_most[match_pos+1] = '\0';
least->val.type = T_String;
least->val.val.str = match_least;
most->val.type = T_String;
most->val.val.str = match_most;
#ifdef USE_LOCALE
result = makeA_Expr(AND, NULL,
makeA_Expr(OP, "~", lexpr, rexpr),
makeA_Expr(OP, ">=", lexpr, (Node *)least));
#else
result = makeA_Expr(AND, NULL,
makeA_Expr(OP, "~", lexpr, rexpr),
makeA_Expr(AND, NULL,
makeA_Expr(OP, ">=", lexpr, (Node *)least),
makeA_Expr(OP, "<=", lexpr, (Node *)most)));
#endif
}
}
}
}
else if (strcmp(opname,"~~") == 0)
{
if (nodeTag(rexpr) == T_A_Const &&
((A_Const *)rexpr)->val.type == T_String)
{
A_Const *n = (A_Const *)rexpr;
char *match_least = palloc(strlen(n->val.val.str)+2);
char *match_most = palloc(strlen(n->val.val.str)+2);
int pos, match_pos=0;
for (pos = 0; n->val.val.str[pos]; pos++)
{
/* % and _ are wildcard characters in LIKE */
if (n->val.val.str[pos] == '%' ||
n->val.val.str[pos] == '_')
break;
/* Backslash quotes the next character */
if (n->val.val.str[pos] == '\\')
{
pos++;
if (n->val.val.str[pos] == '\0')
break;
}
/*
* NOTE: this code used to think that %% meant a literal %,
* but textlike() itself does not think that, and the SQL92
* spec doesn't say any such thing either.
*/
match_least[match_pos] = n->val.val.str[pos];
match_most[match_pos++] = n->val.val.str[pos];
}
if (match_pos != 0)
{
A_Const *least = makeNode(A_Const);
A_Const *most = makeNode(A_Const);
/* make strings to be used in index use */
match_least[match_pos] = '\0';
match_most[match_pos] = '\377';
match_most[match_pos+1] = '\0';
least->val.type = T_String;
least->val.val.str = match_least;
most->val.type = T_String;
most->val.val.str = match_most;
#ifdef USE_LOCALE
result = makeA_Expr(AND, NULL,
makeA_Expr(OP, "~~", lexpr, rexpr),
makeA_Expr(OP, ">=", lexpr, (Node *)least));
#else
result = makeA_Expr(AND, NULL,
makeA_Expr(OP, "~~", lexpr, rexpr),
makeA_Expr(AND, NULL,
makeA_Expr(OP, ">=", lexpr, (Node *)least),
makeA_Expr(OP, "<=", lexpr, (Node *)most)));
#endif
}
}
}
if (result == NULL)
result = makeA_Expr(OP, opname, lexpr, rexpr);
return result;
} /* makeIndexable() */
/* xlateSqlFunc()
* Convert alternate type names to internal Postgres types.