mirror of
https://github.com/postgres/postgres.git
synced 2025-08-31 17:02:12 +03:00
Simplify and document regex library's compact-NFA representation.
The previous coding abused the first element of a cNFA state's arcs list to hold a per-state flag bit, which was confusing, undocumented, and not even particularly efficient. Get rid of that in favor of a separate "stflags" vector. Since there's only one bit in use, I chose to allocate a char per state; we could possibly replace this with a bitmap at some point, but that would make accesses a little slower. It's already about 8X smaller than before, so let's not get overly tense. Also document the representation better than it was before, which is to say not at all. This patch is a byproduct of investigations towards extracting a "fixed prefix" string from the compact-NFA representation of regex patterns. Might need to back-patch it if we decide to back-patch that fix, but for now it's just code cleanup so I'll just put it in HEAD.
This commit is contained in:
@@ -1330,14 +1330,16 @@ compact(struct nfa * nfa,
|
||||
for (s = nfa->states; s != NULL; s = s->next)
|
||||
{
|
||||
nstates++;
|
||||
narcs += 1 + s->nouts + 1;
|
||||
/* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
|
||||
narcs += s->nouts + 1; /* need one extra for endmarker */
|
||||
}
|
||||
|
||||
cnfa->stflags = (char *) MALLOC(nstates * sizeof(char));
|
||||
cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
|
||||
cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
|
||||
if (cnfa->states == NULL || cnfa->arcs == NULL)
|
||||
if (cnfa->stflags == NULL || cnfa->states == NULL || cnfa->arcs == NULL)
|
||||
{
|
||||
if (cnfa->stflags != NULL)
|
||||
FREE(cnfa->stflags);
|
||||
if (cnfa->states != NULL)
|
||||
FREE(cnfa->states);
|
||||
if (cnfa->arcs != NULL)
|
||||
@@ -1359,9 +1361,8 @@ compact(struct nfa * nfa,
|
||||
for (s = nfa->states; s != NULL; s = s->next)
|
||||
{
|
||||
assert((size_t) s->no < nstates);
|
||||
cnfa->stflags[s->no] = 0;
|
||||
cnfa->states[s->no] = ca;
|
||||
ca->co = 0; /* clear and skip flags "arc" */
|
||||
ca++;
|
||||
first = ca;
|
||||
for (a = s->outs; a != NULL; a = a->outchain)
|
||||
switch (a->type)
|
||||
@@ -1392,8 +1393,8 @@ compact(struct nfa * nfa,
|
||||
|
||||
/* mark no-progress states */
|
||||
for (a = nfa->pre->outs; a != NULL; a = a->outchain)
|
||||
cnfa->states[a->to->no]->co = 1;
|
||||
cnfa->states[nfa->pre->no]->co = 1;
|
||||
cnfa->stflags[a->to->no] = CNFA_NOPROGRESS;
|
||||
cnfa->stflags[nfa->pre->no] = CNFA_NOPROGRESS;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1433,6 +1434,7 @@ freecnfa(struct cnfa * cnfa)
|
||||
{
|
||||
assert(cnfa->nstates != 0); /* not empty already */
|
||||
cnfa->nstates = 0;
|
||||
FREE(cnfa->stflags);
|
||||
FREE(cnfa->states);
|
||||
FREE(cnfa->arcs);
|
||||
}
|
||||
@@ -1617,7 +1619,7 @@ dumpcnfa(struct cnfa * cnfa,
|
||||
fprintf(f, ", haslacons");
|
||||
fprintf(f, "\n");
|
||||
for (st = 0; st < cnfa->nstates; st++)
|
||||
dumpcstate(st, cnfa->states[st], cnfa, f);
|
||||
dumpcstate(st, cnfa, f);
|
||||
fflush(f);
|
||||
}
|
||||
#endif
|
||||
@@ -1629,22 +1631,20 @@ dumpcnfa(struct cnfa * cnfa,
|
||||
*/
|
||||
static void
|
||||
dumpcstate(int st,
|
||||
struct carc * ca,
|
||||
struct cnfa * cnfa,
|
||||
FILE *f)
|
||||
{
|
||||
int i;
|
||||
struct carc * ca;
|
||||
int pos;
|
||||
|
||||
fprintf(f, "%d%s", st, (ca[0].co) ? ":" : ".");
|
||||
fprintf(f, "%d%s", st, (cnfa->stflags[st] & CNFA_NOPROGRESS) ? ":" : ".");
|
||||
pos = 1;
|
||||
for (i = 1; ca[i].co != COLORLESS; i++)
|
||||
for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
|
||||
{
|
||||
if (ca[i].co < cnfa->ncolors)
|
||||
fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to);
|
||||
if (ca->co < cnfa->ncolors)
|
||||
fprintf(f, "\t[%ld]->%d", (long) ca->co, ca->to);
|
||||
else
|
||||
fprintf(f, "\t:%ld:->%d", (long) ca[i].co - cnfa->ncolors,
|
||||
ca[i].to);
|
||||
fprintf(f, "\t:%ld:->%d", (long) (ca->co - cnfa->ncolors), ca->to);
|
||||
if (pos == 5)
|
||||
{
|
||||
fprintf(f, "\n");
|
||||
@@ -1653,7 +1653,7 @@ dumpcstate(int st,
|
||||
else
|
||||
pos++;
|
||||
}
|
||||
if (i == 1 || pos != 1)
|
||||
if (ca == cnfa->states[st] || pos != 1)
|
||||
fprintf(f, "\n");
|
||||
fflush(f);
|
||||
}
|
||||
|
Reference in New Issue
Block a user