mirror of
https://github.com/postgres/postgres.git
synced 2025-07-12 21:01:52 +03:00
Back-patch fix for extraction of fixed prefixes from regular expressions.
Back-patch of commits628cbb50ba
andc6aae3042b
. This has been broken since 7.3, so back-patch to all supported branches.
This commit is contained in:
@ -12,7 +12,7 @@ subdir = src/backend/regex
|
|||||||
top_builddir = ../../..
|
top_builddir = ../../..
|
||||||
include $(top_builddir)/src/Makefile.global
|
include $(top_builddir)/src/Makefile.global
|
||||||
|
|
||||||
OBJS = regcomp.o regerror.o regexec.o regfree.o
|
OBJS = regcomp.o regerror.o regexec.o regfree.o regprefix.o
|
||||||
|
|
||||||
include $(top_srcdir)/src/backend/common.mk
|
include $(top_srcdir)/src/backend/common.mk
|
||||||
|
|
||||||
|
@ -7,12 +7,13 @@ So this file is an attempt to reverse-engineer some docs.
|
|||||||
General source-file layout
|
General source-file layout
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
||||||
There are four separately-compilable source files, each exposing exactly
|
There are five separately-compilable source files, each exposing exactly
|
||||||
one exported function:
|
one exported function:
|
||||||
regcomp.c: pg_regcomp
|
regcomp.c: pg_regcomp
|
||||||
regexec.c: pg_regexec
|
regexec.c: pg_regexec
|
||||||
regerror.c: pg_regerror
|
regerror.c: pg_regerror
|
||||||
regfree.c: pg_regfree
|
regfree.c: pg_regfree
|
||||||
|
regprefix.c: pg_regprefix
|
||||||
(The pg_ prefixes were added by the Postgres project to distinguish this
|
(The pg_ prefixes were added by the Postgres project to distinguish this
|
||||||
library version from any similar one that might be present on a particular
|
library version from any similar one that might be present on a particular
|
||||||
system. They'd need to be removed or replaced in any standalone version
|
system. They'd need to be removed or replaced in any standalone version
|
||||||
@ -44,6 +45,7 @@ regexec.c Top-level regex execution code
|
|||||||
rege_dfa.c DFA creation and execution
|
rege_dfa.c DFA creation and execution
|
||||||
regerror.c pg_regerror: generate text for a regex error code
|
regerror.c pg_regerror: generate text for a regex error code
|
||||||
regfree.c pg_regfree: API to free a no-longer-needed regex_t
|
regfree.c pg_regfree: API to free a no-longer-needed regex_t
|
||||||
|
regprefix.c Code for extracting a common prefix from a regex_t
|
||||||
|
|
||||||
The locale-specific code is concerned primarily with case-folding and with
|
The locale-specific code is concerned primarily with case-folding and with
|
||||||
expanding locale-specific character classes, such as [[:alnum:]]. It
|
expanding locale-specific character classes, such as [[:alnum:]]. It
|
||||||
|
@ -66,8 +66,9 @@ initcm(struct vars * v,
|
|||||||
cd = cm->cd; /* cm->cd[WHITE] */
|
cd = cm->cd; /* cm->cd[WHITE] */
|
||||||
cd->sub = NOSUB;
|
cd->sub = NOSUB;
|
||||||
cd->arcs = NULL;
|
cd->arcs = NULL;
|
||||||
cd->flags = 0;
|
cd->firstchr = CHR_MIN;
|
||||||
cd->nchrs = CHR_MAX - CHR_MIN + 1;
|
cd->nchrs = CHR_MAX - CHR_MIN + 1;
|
||||||
|
cd->flags = 0;
|
||||||
|
|
||||||
/* upper levels of tree */
|
/* upper levels of tree */
|
||||||
for (t = &cm->tree[0], j = NBYTS - 1; j > 0; t = nextt, j--)
|
for (t = &cm->tree[0], j = NBYTS - 1; j > 0; t = nextt, j--)
|
||||||
@ -272,6 +273,7 @@ newcolor(struct colormap * cm)
|
|||||||
cd->nchrs = 0;
|
cd->nchrs = 0;
|
||||||
cd->sub = NOSUB;
|
cd->sub = NOSUB;
|
||||||
cd->arcs = NULL;
|
cd->arcs = NULL;
|
||||||
|
cd->firstchr = CHR_MIN; /* in case never set otherwise */
|
||||||
cd->flags = 0;
|
cd->flags = 0;
|
||||||
cd->block = NULL;
|
cd->block = NULL;
|
||||||
|
|
||||||
@ -371,6 +373,8 @@ subcolor(struct colormap * cm, chr c)
|
|||||||
if (co == sco) /* already in an open subcolor */
|
if (co == sco) /* already in an open subcolor */
|
||||||
return co; /* rest is redundant */
|
return co; /* rest is redundant */
|
||||||
cm->cd[co].nchrs--;
|
cm->cd[co].nchrs--;
|
||||||
|
if (cm->cd[sco].nchrs == 0)
|
||||||
|
cm->cd[sco].firstchr = c;
|
||||||
cm->cd[sco].nchrs++;
|
cm->cd[sco].nchrs++;
|
||||||
setcolor(cm, c, sco);
|
setcolor(cm, c, sco);
|
||||||
return sco;
|
return sco;
|
||||||
@ -438,6 +442,11 @@ subrange(struct vars * v,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* subblock - allocate new subcolors for one tree block of chrs, fill in arcs
|
* subblock - allocate new subcolors for one tree block of chrs, fill in arcs
|
||||||
|
*
|
||||||
|
* Note: subcolors that are created during execution of this function
|
||||||
|
* will not be given a useful value of firstchr; it'll be left as CHR_MIN.
|
||||||
|
* For the current usage of firstchr in pg_regprefix, this does not matter
|
||||||
|
* because such subcolors won't occur in the common prefix of a regex.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
subblock(struct vars * v,
|
subblock(struct vars * v,
|
||||||
|
@ -1330,14 +1330,16 @@ compact(struct nfa * nfa,
|
|||||||
for (s = nfa->states; s != NULL; s = s->next)
|
for (s = nfa->states; s != NULL; s = s->next)
|
||||||
{
|
{
|
||||||
nstates++;
|
nstates++;
|
||||||
narcs += 1 + s->nouts + 1;
|
narcs += s->nouts + 1; /* need one extra for endmarker */
|
||||||
/* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cnfa->stflags = (char *) MALLOC(nstates * sizeof(char));
|
||||||
cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
|
cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
|
||||||
cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
|
cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
|
||||||
if (cnfa->states == NULL || cnfa->arcs == NULL)
|
if (cnfa->stflags == NULL || cnfa->states == NULL || cnfa->arcs == NULL)
|
||||||
{
|
{
|
||||||
|
if (cnfa->stflags != NULL)
|
||||||
|
FREE(cnfa->stflags);
|
||||||
if (cnfa->states != NULL)
|
if (cnfa->states != NULL)
|
||||||
FREE(cnfa->states);
|
FREE(cnfa->states);
|
||||||
if (cnfa->arcs != NULL)
|
if (cnfa->arcs != NULL)
|
||||||
@ -1359,9 +1361,8 @@ compact(struct nfa * nfa,
|
|||||||
for (s = nfa->states; s != NULL; s = s->next)
|
for (s = nfa->states; s != NULL; s = s->next)
|
||||||
{
|
{
|
||||||
assert((size_t) s->no < nstates);
|
assert((size_t) s->no < nstates);
|
||||||
|
cnfa->stflags[s->no] = 0;
|
||||||
cnfa->states[s->no] = ca;
|
cnfa->states[s->no] = ca;
|
||||||
ca->co = 0; /* clear and skip flags "arc" */
|
|
||||||
ca++;
|
|
||||||
first = ca;
|
first = ca;
|
||||||
for (a = s->outs; a != NULL; a = a->outchain)
|
for (a = s->outs; a != NULL; a = a->outchain)
|
||||||
switch (a->type)
|
switch (a->type)
|
||||||
@ -1392,8 +1393,8 @@ compact(struct nfa * nfa,
|
|||||||
|
|
||||||
/* mark no-progress states */
|
/* mark no-progress states */
|
||||||
for (a = nfa->pre->outs; a != NULL; a = a->outchain)
|
for (a = nfa->pre->outs; a != NULL; a = a->outchain)
|
||||||
cnfa->states[a->to->no]->co = 1;
|
cnfa->stflags[a->to->no] = CNFA_NOPROGRESS;
|
||||||
cnfa->states[nfa->pre->no]->co = 1;
|
cnfa->stflags[nfa->pre->no] = CNFA_NOPROGRESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1433,6 +1434,7 @@ freecnfa(struct cnfa * cnfa)
|
|||||||
{
|
{
|
||||||
assert(cnfa->nstates != 0); /* not empty already */
|
assert(cnfa->nstates != 0); /* not empty already */
|
||||||
cnfa->nstates = 0;
|
cnfa->nstates = 0;
|
||||||
|
FREE(cnfa->stflags);
|
||||||
FREE(cnfa->states);
|
FREE(cnfa->states);
|
||||||
FREE(cnfa->arcs);
|
FREE(cnfa->arcs);
|
||||||
}
|
}
|
||||||
@ -1617,7 +1619,7 @@ dumpcnfa(struct cnfa * cnfa,
|
|||||||
fprintf(f, ", haslacons");
|
fprintf(f, ", haslacons");
|
||||||
fprintf(f, "\n");
|
fprintf(f, "\n");
|
||||||
for (st = 0; st < cnfa->nstates; st++)
|
for (st = 0; st < cnfa->nstates; st++)
|
||||||
dumpcstate(st, cnfa->states[st], cnfa, f);
|
dumpcstate(st, cnfa, f);
|
||||||
fflush(f);
|
fflush(f);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -1629,22 +1631,20 @@ dumpcnfa(struct cnfa * cnfa,
|
|||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
dumpcstate(int st,
|
dumpcstate(int st,
|
||||||
struct carc * ca,
|
|
||||||
struct cnfa * cnfa,
|
struct cnfa * cnfa,
|
||||||
FILE *f)
|
FILE *f)
|
||||||
{
|
{
|
||||||
int i;
|
struct carc * ca;
|
||||||
int pos;
|
int pos;
|
||||||
|
|
||||||
fprintf(f, "%d%s", st, (ca[0].co) ? ":" : ".");
|
fprintf(f, "%d%s", st, (cnfa->stflags[st] & CNFA_NOPROGRESS) ? ":" : ".");
|
||||||
pos = 1;
|
pos = 1;
|
||||||
for (i = 1; ca[i].co != COLORLESS; i++)
|
for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
|
||||||
{
|
{
|
||||||
if (ca[i].co < cnfa->ncolors)
|
if (ca->co < cnfa->ncolors)
|
||||||
fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to);
|
fprintf(f, "\t[%ld]->%d", (long) ca->co, ca->to);
|
||||||
else
|
else
|
||||||
fprintf(f, "\t:%ld:->%d", (long) ca[i].co - cnfa->ncolors,
|
fprintf(f, "\t:%ld:->%d", (long) (ca->co - cnfa->ncolors), ca->to);
|
||||||
ca[i].to);
|
|
||||||
if (pos == 5)
|
if (pos == 5)
|
||||||
{
|
{
|
||||||
fprintf(f, "\n");
|
fprintf(f, "\n");
|
||||||
@ -1653,7 +1653,7 @@ dumpcstate(int st,
|
|||||||
else
|
else
|
||||||
pos++;
|
pos++;
|
||||||
}
|
}
|
||||||
if (i == 1 || pos != 1)
|
if (ca == cnfa->states[st] || pos != 1)
|
||||||
fprintf(f, "\n");
|
fprintf(f, "\n");
|
||||||
fflush(f);
|
fflush(f);
|
||||||
}
|
}
|
||||||
|
@ -162,7 +162,7 @@ static void dumparcs(struct state *, FILE *);
|
|||||||
static int dumprarcs(struct arc *, struct state *, FILE *, int);
|
static int dumprarcs(struct arc *, struct state *, FILE *, int);
|
||||||
static void dumparc(struct arc *, struct state *, FILE *);
|
static void dumparc(struct arc *, struct state *, FILE *);
|
||||||
static void dumpcnfa(struct cnfa *, FILE *);
|
static void dumpcnfa(struct cnfa *, FILE *);
|
||||||
static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);
|
static void dumpcstate(int, struct cnfa *, FILE *);
|
||||||
#endif
|
#endif
|
||||||
/* === regc_cvec.c === */
|
/* === regc_cvec.c === */
|
||||||
static struct cvec *newcvec(int, int);
|
static struct cvec *newcvec(int, int);
|
||||||
|
@ -457,14 +457,14 @@ miss(struct vars * v, /* used only for debug flags */
|
|||||||
gotstate = 0;
|
gotstate = 0;
|
||||||
for (i = 0; i < d->nstates; i++)
|
for (i = 0; i < d->nstates; i++)
|
||||||
if (ISBSET(css->states, i))
|
if (ISBSET(css->states, i))
|
||||||
for (ca = cnfa->states[i] + 1; ca->co != COLORLESS; ca++)
|
for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
|
||||||
if (ca->co == co)
|
if (ca->co == co)
|
||||||
{
|
{
|
||||||
BSET(d->work, ca->to);
|
BSET(d->work, ca->to);
|
||||||
gotstate = 1;
|
gotstate = 1;
|
||||||
if (ca->to == cnfa->post)
|
if (ca->to == cnfa->post)
|
||||||
ispost = 1;
|
ispost = 1;
|
||||||
if (!cnfa->states[ca->to]->co)
|
if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
|
||||||
noprogress = 0;
|
noprogress = 0;
|
||||||
FDEBUG(("%d -> %d\n", i, ca->to));
|
FDEBUG(("%d -> %d\n", i, ca->to));
|
||||||
}
|
}
|
||||||
@ -475,10 +475,9 @@ miss(struct vars * v, /* used only for debug flags */
|
|||||||
dolacons = 0;
|
dolacons = 0;
|
||||||
for (i = 0; i < d->nstates; i++)
|
for (i = 0; i < d->nstates; i++)
|
||||||
if (ISBSET(d->work, i))
|
if (ISBSET(d->work, i))
|
||||||
for (ca = cnfa->states[i] + 1; ca->co != COLORLESS;
|
for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
|
||||||
ca++)
|
|
||||||
{
|
{
|
||||||
if (ca->co <= cnfa->ncolors)
|
if (ca->co < cnfa->ncolors)
|
||||||
continue; /* NOTE CONTINUE */
|
continue; /* NOTE CONTINUE */
|
||||||
sawlacons = 1;
|
sawlacons = 1;
|
||||||
if (ISBSET(d->work, ca->to))
|
if (ISBSET(d->work, ca->to))
|
||||||
@ -489,7 +488,7 @@ miss(struct vars * v, /* used only for debug flags */
|
|||||||
dolacons = 1;
|
dolacons = 1;
|
||||||
if (ca->to == cnfa->post)
|
if (ca->to == cnfa->post)
|
||||||
ispost = 1;
|
ispost = 1;
|
||||||
if (!cnfa->states[ca->to]->co)
|
if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
|
||||||
noprogress = 0;
|
noprogress = 0;
|
||||||
FDEBUG(("%d :> %d\n", i, ca->to));
|
FDEBUG(("%d :> %d\n", i, ca->to));
|
||||||
}
|
}
|
||||||
|
259
src/backend/regex/regprefix.c
Normal file
259
src/backend/regex/regprefix.c
Normal file
@ -0,0 +1,259 @@
|
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* regprefix.c
|
||||||
|
* Extract a common prefix, if any, from a compiled regex.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* Portions Copyright (c) 2012, PostgreSQL Global Development Group
|
||||||
|
* Portions Copyright (c) 1998, 1999 Henry Spencer
|
||||||
|
*
|
||||||
|
* IDENTIFICATION
|
||||||
|
* src/backend/regex/regprefix.c
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "regex/regguts.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* forward declarations
|
||||||
|
*/
|
||||||
|
static int findprefix(struct cnfa * cnfa, struct colormap * cm,
|
||||||
|
chr *string, size_t *slength);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* pg_regprefix - get common prefix for regular expression
|
||||||
|
*
|
||||||
|
* Returns one of:
|
||||||
|
* REG_NOMATCH: there is no common prefix of strings matching the regex
|
||||||
|
* REG_PREFIX: there is a common prefix of strings matching the regex
|
||||||
|
* REG_EXACT: all strings satisfying the regex must match the same string
|
||||||
|
* or a REG_XXX error code
|
||||||
|
*
|
||||||
|
* In the non-failure cases, *string is set to a malloc'd string containing
|
||||||
|
* the common prefix or exact value, of length *slength (measured in chrs
|
||||||
|
* not bytes!).
|
||||||
|
*
|
||||||
|
* This function does not analyze all complex cases (such as lookahead
|
||||||
|
* constraints) exactly. Therefore it is possible that some strings matching
|
||||||
|
* the reported prefix or exact-match string do not satisfy the regex. But
|
||||||
|
* it should never be the case that a string satisfying the regex does not
|
||||||
|
* match the reported prefix or exact-match string.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
pg_regprefix(regex_t *re,
|
||||||
|
chr **string,
|
||||||
|
size_t *slength)
|
||||||
|
{
|
||||||
|
struct guts *g;
|
||||||
|
struct cnfa *cnfa;
|
||||||
|
int st;
|
||||||
|
|
||||||
|
/* sanity checks */
|
||||||
|
if (string == NULL || slength == NULL)
|
||||||
|
return REG_INVARG;
|
||||||
|
*string = NULL; /* initialize for failure cases */
|
||||||
|
*slength = 0;
|
||||||
|
if (re == NULL || re->re_magic != REMAGIC)
|
||||||
|
return REG_INVARG;
|
||||||
|
if (re->re_csize != sizeof(chr))
|
||||||
|
return REG_MIXED;
|
||||||
|
|
||||||
|
/* Initialize locale-dependent support */
|
||||||
|
pg_set_regex_collation(re->re_collation);
|
||||||
|
|
||||||
|
/* setup */
|
||||||
|
g = (struct guts *) re->re_guts;
|
||||||
|
if (g->info & REG_UIMPOSSIBLE)
|
||||||
|
return REG_NOMATCH;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This implementation considers only the search NFA for the topmost regex
|
||||||
|
* tree node. Therefore, constraints such as backrefs are not fully
|
||||||
|
* applied, which is allowed per the function's API spec.
|
||||||
|
*/
|
||||||
|
assert(g->tree != NULL);
|
||||||
|
cnfa = &g->tree->cnfa;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Since a correct NFA should never contain any exit-free loops, it should
|
||||||
|
* not be possible for our traversal to return to a previously visited
|
||||||
|
* NFA state. Hence we need at most nstates chrs in the output string.
|
||||||
|
*/
|
||||||
|
*string = (chr *) MALLOC(cnfa->nstates * sizeof(chr));
|
||||||
|
if (*string == NULL)
|
||||||
|
return REG_ESPACE;
|
||||||
|
|
||||||
|
/* do it */
|
||||||
|
st = findprefix(cnfa, &g->cmap, *string, slength);
|
||||||
|
|
||||||
|
assert(*slength <= cnfa->nstates);
|
||||||
|
|
||||||
|
/* clean up */
|
||||||
|
if (st != REG_PREFIX && st != REG_EXACT)
|
||||||
|
{
|
||||||
|
FREE(*string);
|
||||||
|
*string = NULL;
|
||||||
|
*slength = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return st;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* findprefix - extract common prefix from cNFA
|
||||||
|
*
|
||||||
|
* Results are returned into the preallocated chr array string[], with
|
||||||
|
* *slength (which must be preset to zero) incremented for each chr.
|
||||||
|
*/
|
||||||
|
static int /* regprefix return code */
|
||||||
|
findprefix(struct cnfa * cnfa,
|
||||||
|
struct colormap * cm,
|
||||||
|
chr *string,
|
||||||
|
size_t *slength)
|
||||||
|
{
|
||||||
|
int st;
|
||||||
|
int nextst;
|
||||||
|
color thiscolor;
|
||||||
|
chr c;
|
||||||
|
struct carc *ca;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The "pre" state must have only BOS/BOL outarcs, else pattern isn't
|
||||||
|
* anchored left. If we have both BOS and BOL, they must go to the
|
||||||
|
* same next state.
|
||||||
|
*/
|
||||||
|
st = cnfa->pre;
|
||||||
|
nextst = -1;
|
||||||
|
for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
|
||||||
|
{
|
||||||
|
if (ca->co == cnfa->bos[0] || ca->co == cnfa->bos[1])
|
||||||
|
{
|
||||||
|
if (nextst == -1)
|
||||||
|
nextst = ca->to;
|
||||||
|
else if (nextst != ca->to)
|
||||||
|
return REG_NOMATCH;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return REG_NOMATCH;
|
||||||
|
}
|
||||||
|
if (nextst == -1)
|
||||||
|
return REG_NOMATCH;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Scan through successive states, stopping as soon as we find one with
|
||||||
|
* more than one acceptable transition character (either multiple colors
|
||||||
|
* on out-arcs, or a color with more than one member chr).
|
||||||
|
*
|
||||||
|
* We could find a state with multiple out-arcs that are all labeled with
|
||||||
|
* the same singleton color; this comes from patterns like "^ab(cde|cxy)".
|
||||||
|
* In that case we add the chr "c" to the output string but then exit the
|
||||||
|
* loop with nextst == -1. This leaves a little bit on the table: if the
|
||||||
|
* pattern is like "^ab(cde|cdy)", we won't notice that "d" could be added
|
||||||
|
* to the prefix. But chasing multiple parallel state chains doesn't seem
|
||||||
|
* worth the trouble.
|
||||||
|
*/
|
||||||
|
do
|
||||||
|
{
|
||||||
|
st = nextst;
|
||||||
|
nextst = -1;
|
||||||
|
thiscolor = COLORLESS;
|
||||||
|
for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
|
||||||
|
{
|
||||||
|
/* We ignore lookahead constraints */
|
||||||
|
if (ca->co >= cnfa->ncolors)
|
||||||
|
continue;
|
||||||
|
/* We can also ignore BOS/BOL arcs */
|
||||||
|
if (ca->co == cnfa->bos[0] || ca->co == cnfa->bos[1])
|
||||||
|
continue;
|
||||||
|
/* ... but EOS/EOL arcs terminate the search */
|
||||||
|
if (ca->co == cnfa->eos[0] || ca->co == cnfa->eos[1])
|
||||||
|
{
|
||||||
|
thiscolor = COLORLESS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (thiscolor == COLORLESS)
|
||||||
|
{
|
||||||
|
/* First plain outarc */
|
||||||
|
thiscolor = ca->co;
|
||||||
|
nextst = ca->to;
|
||||||
|
}
|
||||||
|
else if (thiscolor == ca->co)
|
||||||
|
{
|
||||||
|
/* Another plain outarc for same color */
|
||||||
|
nextst = -1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* More than one plain outarc color terminates the search */
|
||||||
|
thiscolor = COLORLESS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Done if we didn't find exactly one color on plain outarcs */
|
||||||
|
if (thiscolor == COLORLESS)
|
||||||
|
break;
|
||||||
|
/* The color must be a singleton */
|
||||||
|
if (cm->cd[thiscolor].nchrs != 1)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Identify the color's sole member chr and add it to the prefix
|
||||||
|
* string. In general the colormap data structure doesn't provide a
|
||||||
|
* way to find color member chrs, except by trying GETCOLOR() on each
|
||||||
|
* possible chr value, which won't do at all. However, for the cases
|
||||||
|
* we care about it should be sufficient to test the "firstchr" value,
|
||||||
|
* that is the first chr ever added to the color. There are cases
|
||||||
|
* where this might no longer be a member of the color (so we do need
|
||||||
|
* to test), but none of them are likely to arise for a character that
|
||||||
|
* is a member of a common prefix. If we do hit such a corner case,
|
||||||
|
* we just fall out without adding anything to the prefix string.
|
||||||
|
*/
|
||||||
|
c = cm->cd[thiscolor].firstchr;
|
||||||
|
if (GETCOLOR(cm, c) != thiscolor)
|
||||||
|
break;
|
||||||
|
|
||||||
|
string[(*slength)++] = c;
|
||||||
|
|
||||||
|
/* Advance to next state, but only if we have a unique next state */
|
||||||
|
} while (nextst != -1);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we ended at a state that only has EOS/EOL outarcs leading to the
|
||||||
|
* "post" state, then we have an exact-match string. Note this is true
|
||||||
|
* even if the string is of zero length.
|
||||||
|
*/
|
||||||
|
nextst = -1;
|
||||||
|
for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
|
||||||
|
{
|
||||||
|
if (ca->co == cnfa->eos[0] || ca->co == cnfa->eos[1])
|
||||||
|
{
|
||||||
|
if (nextst == -1)
|
||||||
|
nextst = ca->to;
|
||||||
|
else if (nextst != ca->to)
|
||||||
|
{
|
||||||
|
nextst = -1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
nextst = -1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (nextst == cnfa->post)
|
||||||
|
return REG_EXACT;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Otherwise, if we were unable to identify any prefix characters, say
|
||||||
|
* NOMATCH --- the pattern is anchored left, but doesn't specify any
|
||||||
|
* particular first character.
|
||||||
|
*/
|
||||||
|
if (*slength > 0)
|
||||||
|
return REG_PREFIX;
|
||||||
|
|
||||||
|
return REG_NOMATCH;
|
||||||
|
}
|
@ -1170,3 +1170,68 @@ build_regexp_split_result(regexp_matches_ctx *splitctx)
|
|||||||
Int32GetDatum(startpos + 1));
|
Int32GetDatum(startpos + 1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* regexp_fixed_prefix - extract fixed prefix, if any, for a regexp
|
||||||
|
*
|
||||||
|
* The result is NULL if there is no fixed prefix, else a palloc'd string.
|
||||||
|
* If it is an exact match, not just a prefix, *exact is returned as TRUE.
|
||||||
|
*/
|
||||||
|
char *
|
||||||
|
regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation,
|
||||||
|
bool *exact)
|
||||||
|
{
|
||||||
|
char *result;
|
||||||
|
regex_t *re;
|
||||||
|
int cflags;
|
||||||
|
int re_result;
|
||||||
|
pg_wchar *str;
|
||||||
|
size_t slen;
|
||||||
|
size_t maxlen;
|
||||||
|
char errMsg[100];
|
||||||
|
|
||||||
|
*exact = false; /* default result */
|
||||||
|
|
||||||
|
/* Compile RE */
|
||||||
|
cflags = REG_ADVANCED;
|
||||||
|
if (case_insensitive)
|
||||||
|
cflags |= REG_ICASE;
|
||||||
|
|
||||||
|
re = RE_compile_and_cache(text_re, cflags, collation);
|
||||||
|
|
||||||
|
/* Examine it to see if there's a fixed prefix */
|
||||||
|
re_result = pg_regprefix(re, &str, &slen);
|
||||||
|
|
||||||
|
switch (re_result)
|
||||||
|
{
|
||||||
|
case REG_NOMATCH:
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
case REG_PREFIX:
|
||||||
|
/* continue with wchar conversion */
|
||||||
|
break;
|
||||||
|
|
||||||
|
case REG_EXACT:
|
||||||
|
*exact = true;
|
||||||
|
/* continue with wchar conversion */
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
/* re failed??? */
|
||||||
|
pg_regerror(re_result, re, errMsg, sizeof(errMsg));
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
|
||||||
|
errmsg("regular expression failed: %s", errMsg)));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Convert pg_wchar result back to database encoding */
|
||||||
|
maxlen = pg_database_encoding_max_length() * slen + 1;
|
||||||
|
result = (char *) palloc(maxlen);
|
||||||
|
slen = pg_wchar2mb_with_len(str, result, slen);
|
||||||
|
Assert(slen < maxlen);
|
||||||
|
|
||||||
|
free(str);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
@ -195,7 +195,8 @@ static Selectivity prefix_selectivity(PlannerInfo *root,
|
|||||||
static Selectivity like_selectivity(const char *patt, int pattlen,
|
static Selectivity like_selectivity(const char *patt, int pattlen,
|
||||||
bool case_insensitive);
|
bool case_insensitive);
|
||||||
static Selectivity regex_selectivity(const char *patt, int pattlen,
|
static Selectivity regex_selectivity(const char *patt, int pattlen,
|
||||||
bool case_insensitive);
|
bool case_insensitive,
|
||||||
|
int fixed_prefix_len);
|
||||||
static Datum string_to_datum(const char *str, Oid datatype);
|
static Datum string_to_datum(const char *str, Oid datatype);
|
||||||
static Const *string_to_const(const char *str, Oid datatype);
|
static Const *string_to_const(const char *str, Oid datatype);
|
||||||
static Const *string_to_bytea_const(const char *str, size_t str_len);
|
static Const *string_to_bytea_const(const char *str, size_t str_len);
|
||||||
@ -5255,18 +5256,9 @@ static Pattern_Prefix_Status
|
|||||||
regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
|
regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
|
||||||
Const **prefix_const, Selectivity *rest_selec)
|
Const **prefix_const, Selectivity *rest_selec)
|
||||||
{
|
{
|
||||||
char *match;
|
|
||||||
int pos,
|
|
||||||
match_pos,
|
|
||||||
prev_pos,
|
|
||||||
prev_match_pos;
|
|
||||||
bool have_leading_paren;
|
|
||||||
char *patt;
|
|
||||||
char *rest;
|
|
||||||
Oid typeid = patt_const->consttype;
|
Oid typeid = patt_const->consttype;
|
||||||
bool is_multibyte = (pg_database_encoding_max_length() > 1);
|
char *prefix;
|
||||||
pg_locale_t locale = 0;
|
bool exact;
|
||||||
bool locale_is_c = false;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Should be unnecessary, there are no bytea regex operators defined. As
|
* Should be unnecessary, there are no bytea regex operators defined. As
|
||||||
@ -5278,185 +5270,54 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
|
|||||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
errmsg("regular-expression matching not supported on type bytea")));
|
errmsg("regular-expression matching not supported on type bytea")));
|
||||||
|
|
||||||
if (case_insensitive)
|
/* Use the regexp machinery to extract the prefix, if any */
|
||||||
{
|
prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
|
||||||
/* If case-insensitive, we need locale info */
|
case_insensitive, collation,
|
||||||
if (lc_ctype_is_c(collation))
|
&exact);
|
||||||
locale_is_c = true;
|
|
||||||
else if (collation != DEFAULT_COLLATION_OID)
|
|
||||||
{
|
|
||||||
if (!OidIsValid(collation))
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* This typically means that the parser could not resolve a
|
|
||||||
* conflict of implicit collations, so report it that way.
|
|
||||||
*/
|
|
||||||
ereport(ERROR,
|
|
||||||
(errcode(ERRCODE_INDETERMINATE_COLLATION),
|
|
||||||
errmsg("could not determine which collation to use for regular expression"),
|
|
||||||
errhint("Use the COLLATE clause to set the collation explicitly.")));
|
|
||||||
}
|
|
||||||
locale = pg_newlocale_from_collation(collation);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* the right-hand const is type text for all of these */
|
if (prefix == NULL)
|
||||||
patt = TextDatumGetCString(patt_const->constvalue);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Check for ARE director prefix. It's worth our trouble to recognize
|
|
||||||
* this because similar_escape() used to use it, and some other code might
|
|
||||||
* still use it, to force ARE mode.
|
|
||||||
*/
|
|
||||||
pos = 0;
|
|
||||||
if (strncmp(patt, "***:", 4) == 0)
|
|
||||||
pos = 4;
|
|
||||||
|
|
||||||
/* Pattern must be anchored left */
|
|
||||||
if (patt[pos] != '^')
|
|
||||||
{
|
{
|
||||||
*prefix_const = NULL;
|
*prefix_const = NULL;
|
||||||
|
|
||||||
if (rest_selec != NULL)
|
if (rest_selec != NULL)
|
||||||
|
{
|
||||||
|
char *patt = TextDatumGetCString(patt_const->constvalue);
|
||||||
|
|
||||||
*rest_selec = regex_selectivity(patt, strlen(patt),
|
*rest_selec = regex_selectivity(patt, strlen(patt),
|
||||||
case_insensitive);
|
case_insensitive,
|
||||||
|
0);
|
||||||
return Pattern_Prefix_None;
|
pfree(patt);
|
||||||
}
|
}
|
||||||
pos++;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If '|' is present in pattern, then there may be multiple alternatives
|
|
||||||
* for the start of the string. (There are cases where this isn't so, for
|
|
||||||
* instance if the '|' is inside parens, but detecting that reliably is
|
|
||||||
* too hard.)
|
|
||||||
*/
|
|
||||||
if (strchr(patt + pos, '|') != NULL)
|
|
||||||
{
|
|
||||||
*prefix_const = NULL;
|
|
||||||
|
|
||||||
if (rest_selec != NULL)
|
|
||||||
*rest_selec = regex_selectivity(patt, strlen(patt),
|
|
||||||
case_insensitive);
|
|
||||||
|
|
||||||
return Pattern_Prefix_None;
|
return Pattern_Prefix_None;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* OK, allocate space for pattern */
|
*prefix_const = string_to_const(prefix, typeid);
|
||||||
match = palloc(strlen(patt) + 1);
|
|
||||||
prev_match_pos = match_pos = 0;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We special-case the syntax '^(...)$' because psql uses it. But beware:
|
|
||||||
* sequences beginning "(?" are not what they seem, unless they're "(?:".
|
|
||||||
* (We must recognize that because of similar_escape().)
|
|
||||||
*/
|
|
||||||
have_leading_paren = false;
|
|
||||||
if (patt[pos] == '(' &&
|
|
||||||
(patt[pos + 1] != '?' || patt[pos + 2] == ':'))
|
|
||||||
{
|
|
||||||
have_leading_paren = true;
|
|
||||||
pos += (patt[pos + 1] != '?' ? 1 : 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Scan remainder of pattern */
|
|
||||||
prev_pos = pos;
|
|
||||||
while (patt[pos])
|
|
||||||
{
|
|
||||||
int len;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Check for characters that indicate multiple possible matches here.
|
|
||||||
* Also, drop out at ')' or '$' so the termination test works right.
|
|
||||||
*/
|
|
||||||
if (patt[pos] == '.' ||
|
|
||||||
patt[pos] == '(' ||
|
|
||||||
patt[pos] == ')' ||
|
|
||||||
patt[pos] == '[' ||
|
|
||||||
patt[pos] == '^' ||
|
|
||||||
patt[pos] == '$')
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* Stop if case-varying character (it's sort of a wildcard) */
|
|
||||||
if (case_insensitive &&
|
|
||||||
pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
|
|
||||||
break;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Check for quantifiers. Except for +, this means the preceding
|
|
||||||
* character is optional, so we must remove it from the prefix too!
|
|
||||||
*/
|
|
||||||
if (patt[pos] == '*' ||
|
|
||||||
patt[pos] == '?' ||
|
|
||||||
patt[pos] == '{')
|
|
||||||
{
|
|
||||||
match_pos = prev_match_pos;
|
|
||||||
pos = prev_pos;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (patt[pos] == '+')
|
|
||||||
{
|
|
||||||
pos = prev_pos;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Normally, backslash quotes the next character. But in AREs,
|
|
||||||
* backslash followed by alphanumeric is an escape, not a quoted
|
|
||||||
* character. Must treat it as having multiple possible matches.
|
|
||||||
* Note: since only ASCII alphanumerics are escapes, we don't have to
|
|
||||||
* be paranoid about multibyte or collations here.
|
|
||||||
*/
|
|
||||||
if (patt[pos] == '\\')
|
|
||||||
{
|
|
||||||
if (isalnum((unsigned char) patt[pos + 1]))
|
|
||||||
break;
|
|
||||||
pos++;
|
|
||||||
if (patt[pos] == '\0')
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
/* save position in case we need to back up on next loop cycle */
|
|
||||||
prev_match_pos = match_pos;
|
|
||||||
prev_pos = pos;
|
|
||||||
/* must use encoding-aware processing here */
|
|
||||||
len = pg_mblen(&patt[pos]);
|
|
||||||
memcpy(&match[match_pos], &patt[pos], len);
|
|
||||||
match_pos += len;
|
|
||||||
pos += len;
|
|
||||||
}
|
|
||||||
|
|
||||||
match[match_pos] = '\0';
|
|
||||||
rest = &patt[pos];
|
|
||||||
|
|
||||||
if (have_leading_paren && patt[pos] == ')')
|
|
||||||
pos++;
|
|
||||||
|
|
||||||
if (patt[pos] == '$' && patt[pos + 1] == '\0')
|
|
||||||
{
|
|
||||||
*prefix_const = string_to_const(match, typeid);
|
|
||||||
|
|
||||||
if (rest_selec != NULL)
|
|
||||||
*rest_selec = 1.0;
|
|
||||||
|
|
||||||
pfree(patt);
|
|
||||||
pfree(match);
|
|
||||||
|
|
||||||
return Pattern_Prefix_Exact; /* pattern specifies exact match */
|
|
||||||
}
|
|
||||||
|
|
||||||
*prefix_const = string_to_const(match, typeid);
|
|
||||||
|
|
||||||
if (rest_selec != NULL)
|
if (rest_selec != NULL)
|
||||||
*rest_selec = regex_selectivity(rest, strlen(rest),
|
{
|
||||||
case_insensitive);
|
if (exact)
|
||||||
|
{
|
||||||
|
/* Exact match, so there's no additional selectivity */
|
||||||
|
*rest_selec = 1.0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
char *patt = TextDatumGetCString(patt_const->constvalue);
|
||||||
|
|
||||||
pfree(patt);
|
*rest_selec = regex_selectivity(patt, strlen(patt),
|
||||||
pfree(match);
|
case_insensitive,
|
||||||
|
strlen(prefix));
|
||||||
|
pfree(patt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (match_pos > 0)
|
pfree(prefix);
|
||||||
|
|
||||||
|
if (exact)
|
||||||
|
return Pattern_Prefix_Exact; /* pattern specifies exact match */
|
||||||
|
else
|
||||||
return Pattern_Prefix_Partial;
|
return Pattern_Prefix_Partial;
|
||||||
|
|
||||||
return Pattern_Prefix_None;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Pattern_Prefix_Status
|
Pattern_Prefix_Status
|
||||||
@ -5741,7 +5602,8 @@ regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static Selectivity
|
static Selectivity
|
||||||
regex_selectivity(const char *patt, int pattlen, bool case_insensitive)
|
regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
|
||||||
|
int fixed_prefix_len)
|
||||||
{
|
{
|
||||||
Selectivity sel;
|
Selectivity sel;
|
||||||
|
|
||||||
@ -5757,9 +5619,14 @@ regex_selectivity(const char *patt, int pattlen, bool case_insensitive)
|
|||||||
/* no trailing $ */
|
/* no trailing $ */
|
||||||
sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
|
sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
|
||||||
sel *= FULL_WILDCARD_SEL;
|
sel *= FULL_WILDCARD_SEL;
|
||||||
if (sel > 1.0)
|
|
||||||
sel = 1.0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If there's a fixed prefix, discount its selectivity */
|
||||||
|
if (fixed_prefix_len > 0)
|
||||||
|
sel /= pow(FIXED_CHAR_SEL, fixed_prefix_len);
|
||||||
|
|
||||||
|
/* Make sure result stays in range */
|
||||||
|
CLAMP_PROBABILITY(sel);
|
||||||
return sel;
|
return sel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -156,6 +156,9 @@ typedef struct
|
|||||||
/* two specials for debugging and testing */
|
/* two specials for debugging and testing */
|
||||||
#define REG_ATOI 101 /* convert error-code name to number */
|
#define REG_ATOI 101 /* convert error-code name to number */
|
||||||
#define REG_ITOA 102 /* convert error-code number to name */
|
#define REG_ITOA 102 /* convert error-code number to name */
|
||||||
|
/* non-error result codes for pg_regprefix */
|
||||||
|
#define REG_PREFIX (-1) /* identified a common prefix */
|
||||||
|
#define REG_EXACT (-2) /* identified an exact match */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -164,6 +167,7 @@ typedef struct
|
|||||||
*/
|
*/
|
||||||
extern int pg_regcomp(regex_t *, const pg_wchar *, size_t, int, Oid);
|
extern int pg_regcomp(regex_t *, const pg_wchar *, size_t, int, Oid);
|
||||||
extern int pg_regexec(regex_t *, const pg_wchar *, size_t, size_t, rm_detail_t *, size_t, regmatch_t[], int);
|
extern int pg_regexec(regex_t *, const pg_wchar *, size_t, size_t, rm_detail_t *, size_t, regmatch_t[], int);
|
||||||
|
extern int pg_regprefix(regex_t *, pg_wchar **, size_t *);
|
||||||
extern void pg_regfree(regex_t *);
|
extern void pg_regfree(regex_t *);
|
||||||
extern size_t pg_regerror(int, const regex_t *, char *, size_t);
|
extern size_t pg_regerror(int, const regex_t *, char *, size_t);
|
||||||
extern void pg_set_regex_collation(Oid collation);
|
extern void pg_set_regex_collation(Oid collation);
|
||||||
|
@ -199,19 +199,21 @@ struct colordesc
|
|||||||
color sub; /* open subcolor, if any; or free-chain ptr */
|
color sub; /* open subcolor, if any; or free-chain ptr */
|
||||||
#define NOSUB COLORLESS /* value of "sub" when no open subcolor */
|
#define NOSUB COLORLESS /* value of "sub" when no open subcolor */
|
||||||
struct arc *arcs; /* chain of all arcs of this color */
|
struct arc *arcs; /* chain of all arcs of this color */
|
||||||
|
chr firstchr; /* char first assigned to this color */
|
||||||
int flags; /* bit values defined next */
|
int flags; /* bit values defined next */
|
||||||
#define FREECOL 01 /* currently free */
|
#define FREECOL 01 /* currently free */
|
||||||
#define PSEUDO 02 /* pseudocolor, no real chars */
|
#define PSEUDO 02 /* pseudocolor, no real chars */
|
||||||
#define UNUSEDCOLOR(cd) ((cd)->flags&FREECOL)
|
#define UNUSEDCOLOR(cd) ((cd)->flags & FREECOL)
|
||||||
union tree *block; /* block of solid color, if any */
|
union tree *block; /* block of solid color, if any */
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The color map itself
|
* The color map itself
|
||||||
*
|
*
|
||||||
* Only the "tree" part is used at execution time, and that only via the
|
* Much of the data in the colormap struct is only used at compile time.
|
||||||
* GETCOLOR() macro. Possibly that should be separated from the compile-time
|
* However, the bulk of the space usage is in the "tree" structure, so it's
|
||||||
* data.
|
* not clear that there's much point in converting the rest to a more compact
|
||||||
|
* form when compilation is finished.
|
||||||
*/
|
*/
|
||||||
struct colormap
|
struct colormap
|
||||||
{
|
{
|
||||||
@ -279,15 +281,14 @@ struct state;
|
|||||||
|
|
||||||
struct arc
|
struct arc
|
||||||
{
|
{
|
||||||
int type;
|
int type; /* 0 if free, else an NFA arc type code */
|
||||||
#define ARCFREE '\0'
|
|
||||||
color co;
|
color co;
|
||||||
struct state *from; /* where it's from (and contained within) */
|
struct state *from; /* where it's from (and contained within) */
|
||||||
struct state *to; /* where it's to */
|
struct state *to; /* where it's to */
|
||||||
struct arc *outchain; /* *from's outs chain or free chain */
|
struct arc *outchain; /* link in *from's outs chain or free chain */
|
||||||
#define freechain outchain
|
#define freechain outchain
|
||||||
struct arc *inchain; /* *to's ins chain */
|
struct arc *inchain; /* link in *to's ins chain */
|
||||||
struct arc *colorchain; /* color's arc chain */
|
struct arc *colorchain; /* link in color's arc chain */
|
||||||
struct arc *colorchainRev; /* back-link in color's arc chain */
|
struct arc *colorchainRev; /* back-link in color's arc chain */
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -339,24 +340,38 @@ struct nfa
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* definitions for compacted NFA
|
* definitions for compacted NFA
|
||||||
|
*
|
||||||
|
* The main space savings in a compacted NFA is from making the arcs as small
|
||||||
|
* as possible. We store only the transition color and next-state number for
|
||||||
|
* each arc. The list of out arcs for each state is an array beginning at
|
||||||
|
* cnfa.states[statenumber], and terminated by a dummy carc struct with
|
||||||
|
* co == COLORLESS.
|
||||||
|
*
|
||||||
|
* The non-dummy carc structs are of two types: plain arcs and LACON arcs.
|
||||||
|
* Plain arcs just store the transition color number as "co". LACON arcs
|
||||||
|
* store the lookahead constraint number plus cnfa.ncolors as "co". LACON
|
||||||
|
* arcs can be distinguished from plain by testing for co >= cnfa.ncolors.
|
||||||
*/
|
*/
|
||||||
struct carc
|
struct carc
|
||||||
{
|
{
|
||||||
color co; /* COLORLESS is list terminator */
|
color co; /* COLORLESS is list terminator */
|
||||||
int to; /* state number */
|
int to; /* next-state number */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct cnfa
|
struct cnfa
|
||||||
{
|
{
|
||||||
int nstates; /* number of states */
|
int nstates; /* number of states */
|
||||||
int ncolors; /* number of colors */
|
int ncolors; /* number of colors (max color in use + 1) */
|
||||||
int flags;
|
int flags;
|
||||||
#define HASLACONS 01 /* uses lookahead constraints */
|
#define HASLACONS 01 /* uses lookahead constraints */
|
||||||
int pre; /* setup state number */
|
int pre; /* setup state number */
|
||||||
int post; /* teardown state number */
|
int post; /* teardown state number */
|
||||||
color bos[2]; /* colors, if any, assigned to BOS and BOL */
|
color bos[2]; /* colors, if any, assigned to BOS and BOL */
|
||||||
color eos[2]; /* colors, if any, assigned to EOS and EOL */
|
color eos[2]; /* colors, if any, assigned to EOS and EOL */
|
||||||
|
char *stflags; /* vector of per-state flags bytes */
|
||||||
|
#define CNFA_NOPROGRESS 01 /* flag bit for a no-progress state */
|
||||||
struct carc **states; /* vector of pointers to outarc lists */
|
struct carc **states; /* vector of pointers to outarc lists */
|
||||||
|
/* states[n] are pointers into a single malloc'd array of arcs */
|
||||||
struct carc *arcs; /* the area for the lists */
|
struct carc *arcs; /* the area for the lists */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -568,6 +568,8 @@ extern Datum regexp_split_to_table(PG_FUNCTION_ARGS);
|
|||||||
extern Datum regexp_split_to_table_no_flags(PG_FUNCTION_ARGS);
|
extern Datum regexp_split_to_table_no_flags(PG_FUNCTION_ARGS);
|
||||||
extern Datum regexp_split_to_array(PG_FUNCTION_ARGS);
|
extern Datum regexp_split_to_array(PG_FUNCTION_ARGS);
|
||||||
extern Datum regexp_split_to_array_no_flags(PG_FUNCTION_ARGS);
|
extern Datum regexp_split_to_array_no_flags(PG_FUNCTION_ARGS);
|
||||||
|
extern char *regexp_fixed_prefix(text *text_re, bool case_insensitive,
|
||||||
|
Oid collation, bool *exact);
|
||||||
|
|
||||||
/* regproc.c */
|
/* regproc.c */
|
||||||
extern Datum regprocin(PG_FUNCTION_ARGS);
|
extern Datum regprocin(PG_FUNCTION_ARGS);
|
||||||
|
@ -90,3 +90,66 @@ select substring('a' from '((a)+)');
|
|||||||
a
|
a
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
-- Test conversion of regex patterns to indexable conditions
|
||||||
|
explain (costs off) select * from pg_proc where proname ~ 'abc';
|
||||||
|
QUERY PLAN
|
||||||
|
-----------------------------------
|
||||||
|
Seq Scan on pg_proc
|
||||||
|
Filter: (proname ~ 'abc'::text)
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
explain (costs off) select * from pg_proc where proname ~ '^abc';
|
||||||
|
QUERY PLAN
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
Index Scan using pg_proc_proname_args_nsp_index on pg_proc
|
||||||
|
Index Cond: ((proname >= 'abc'::name) AND (proname < 'abd'::name))
|
||||||
|
Filter: (proname ~ '^abc'::text)
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
|
explain (costs off) select * from pg_proc where proname ~ '^abc$';
|
||||||
|
QUERY PLAN
|
||||||
|
------------------------------------------------------------
|
||||||
|
Index Scan using pg_proc_proname_args_nsp_index on pg_proc
|
||||||
|
Index Cond: (proname = 'abc'::name)
|
||||||
|
Filter: (proname ~ '^abc$'::text)
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
|
explain (costs off) select * from pg_proc where proname ~ '^abcd*e';
|
||||||
|
QUERY PLAN
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
Index Scan using pg_proc_proname_args_nsp_index on pg_proc
|
||||||
|
Index Cond: ((proname >= 'abc'::name) AND (proname < 'abd'::name))
|
||||||
|
Filter: (proname ~ '^abcd*e'::text)
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
|
explain (costs off) select * from pg_proc where proname ~ '^abc+d';
|
||||||
|
QUERY PLAN
|
||||||
|
----------------------------------------------------------------------
|
||||||
|
Index Scan using pg_proc_proname_args_nsp_index on pg_proc
|
||||||
|
Index Cond: ((proname >= 'abc'::name) AND (proname < 'abd'::name))
|
||||||
|
Filter: (proname ~ '^abc+d'::text)
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
|
explain (costs off) select * from pg_proc where proname ~ '^(abc)(def)';
|
||||||
|
QUERY PLAN
|
||||||
|
----------------------------------------------------------------------------
|
||||||
|
Index Scan using pg_proc_proname_args_nsp_index on pg_proc
|
||||||
|
Index Cond: ((proname >= 'abcdef'::name) AND (proname < 'abcdeg'::name))
|
||||||
|
Filter: (proname ~ '^(abc)(def)'::text)
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
|
explain (costs off) select * from pg_proc where proname ~ '^(abc)$';
|
||||||
|
QUERY PLAN
|
||||||
|
------------------------------------------------------------
|
||||||
|
Index Scan using pg_proc_proname_args_nsp_index on pg_proc
|
||||||
|
Index Cond: (proname = 'abc'::name)
|
||||||
|
Filter: (proname ~ '^(abc)$'::text)
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
|
explain (costs off) select * from pg_proc where proname ~ '^(abc)?d';
|
||||||
|
QUERY PLAN
|
||||||
|
----------------------------------------
|
||||||
|
Seq Scan on pg_proc
|
||||||
|
Filter: (proname ~ '^(abc)?d'::text)
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
@ -24,3 +24,13 @@ select 'abc abc abd' ~ '^(.+)( \1)+$' as f;
|
|||||||
select substring('asd TO foo' from ' TO (([a-z0-9._]+|"([^"]+|"")+")+)');
|
select substring('asd TO foo' from ' TO (([a-z0-9._]+|"([^"]+|"")+")+)');
|
||||||
select substring('a' from '((a))+');
|
select substring('a' from '((a))+');
|
||||||
select substring('a' from '((a)+)');
|
select substring('a' from '((a)+)');
|
||||||
|
|
||||||
|
-- Test conversion of regex patterns to indexable conditions
|
||||||
|
explain (costs off) select * from pg_proc where proname ~ 'abc';
|
||||||
|
explain (costs off) select * from pg_proc where proname ~ '^abc';
|
||||||
|
explain (costs off) select * from pg_proc where proname ~ '^abc$';
|
||||||
|
explain (costs off) select * from pg_proc where proname ~ '^abcd*e';
|
||||||
|
explain (costs off) select * from pg_proc where proname ~ '^abc+d';
|
||||||
|
explain (costs off) select * from pg_proc where proname ~ '^(abc)(def)';
|
||||||
|
explain (costs off) select * from pg_proc where proname ~ '^(abc)$';
|
||||||
|
explain (costs off) select * from pg_proc where proname ~ '^(abc)?d';
|
||||||
|
Reference in New Issue
Block a user