mirror of
https://github.com/postgres/postgres.git
synced 2025-08-19 23:22:23 +03:00
Improve memory-usage accounting in regular-expression compiler.
This code previously counted the number of NFA states it created, and complained if a limit was exceeded, so as to prevent bizarre regex patterns from consuming unreasonable time or memory. That's fine as far as it went, but the code paid no attention to how many arcs linked those states. Since regexes can be contrived that have O(N) states but will need O(N^2) arcs after fixempties() processing, it was still possible to blow out memory, and take a long time doing it too. To fix, modify the bookkeeping to count space used by both states and arcs. I did not bother with including the "color map" in the accounting; it can only grow to a few megabytes, which is not a lot in comparison to what we're allowing for states+arcs (about 150MB on 64-bit machines or half that on 32-bit machines). Looking at some of the larger real-world regexes captured in the Tcl regression test suite suggests that the most that is likely to be needed for regexes found in the wild is under 10MB, so I believe that the current limit has enough headroom to make it okay to keep it as a hard-wired limit. In connection with this, redefine REG_ETOOBIG as meaning "regular expression is too complex"; the previous wording of "nfa has too many states" was already somewhat inapropos because of the error code's use for stack depth overrun, and it was not very user-friendly either. Back-patch to all supported branches.
This commit is contained in:
@@ -63,7 +63,6 @@ newnfa(struct vars * v,
|
||||
nfa->nstates = 0;
|
||||
nfa->cm = cm;
|
||||
nfa->v = v;
|
||||
nfa->size = 0;
|
||||
nfa->bos[0] = nfa->bos[1] = COLORLESS;
|
||||
nfa->eos[0] = nfa->eos[1] = COLORLESS;
|
||||
nfa->parent = parent; /* Precedes newfstate so parent is valid. */
|
||||
@@ -92,57 +91,6 @@ newnfa(struct vars * v,
|
||||
return nfa;
|
||||
}
|
||||
|
||||
/*
|
||||
* TooManyStates - checks if the max states exceeds the compile-time value
|
||||
*/
|
||||
static int
|
||||
TooManyStates(struct nfa * nfa)
|
||||
{
|
||||
struct nfa *parent = nfa->parent;
|
||||
size_t sz = nfa->size;
|
||||
|
||||
while (parent != NULL)
|
||||
{
|
||||
sz = parent->size;
|
||||
parent = parent->parent;
|
||||
}
|
||||
if (sz > REG_MAX_STATES)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* IncrementSize - increases the tracked size of the NFA and its parents.
|
||||
*/
|
||||
static void
|
||||
IncrementSize(struct nfa * nfa)
|
||||
{
|
||||
struct nfa *parent = nfa->parent;
|
||||
|
||||
nfa->size++;
|
||||
while (parent != NULL)
|
||||
{
|
||||
parent->size++;
|
||||
parent = parent->parent;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* DecrementSize - decreases the tracked size of the NFA and its parents.
|
||||
*/
|
||||
static void
|
||||
DecrementSize(struct nfa * nfa)
|
||||
{
|
||||
struct nfa *parent = nfa->parent;
|
||||
|
||||
nfa->size--;
|
||||
while (parent != NULL)
|
||||
{
|
||||
parent->size--;
|
||||
parent = parent->parent;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* freenfa - free an entire NFA
|
||||
*/
|
||||
@@ -188,12 +136,6 @@ newstate(struct nfa * nfa)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (TooManyStates(nfa))
|
||||
{
|
||||
NERR(REG_ETOOBIG);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (nfa->free != NULL)
|
||||
{
|
||||
s = nfa->free;
|
||||
@@ -201,12 +143,18 @@ newstate(struct nfa * nfa)
|
||||
}
|
||||
else
|
||||
{
|
||||
if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE)
|
||||
{
|
||||
NERR(REG_ETOOBIG);
|
||||
return NULL;
|
||||
}
|
||||
s = (struct state *) MALLOC(sizeof(struct state));
|
||||
if (s == NULL)
|
||||
{
|
||||
NERR(REG_ESPACE);
|
||||
return NULL;
|
||||
}
|
||||
nfa->v->spaceused += sizeof(struct state);
|
||||
s->oas.next = NULL;
|
||||
s->free = NULL;
|
||||
s->noas = 0;
|
||||
@@ -230,8 +178,6 @@ newstate(struct nfa * nfa)
|
||||
}
|
||||
s->prev = nfa->slast;
|
||||
nfa->slast = s;
|
||||
/* track the current size and the parent size */
|
||||
IncrementSize(nfa);
|
||||
return s;
|
||||
}
|
||||
|
||||
@@ -294,7 +240,6 @@ freestate(struct nfa * nfa,
|
||||
s->prev = NULL;
|
||||
s->next = nfa->free; /* don't delete it, put it on the free list */
|
||||
nfa->free = s;
|
||||
DecrementSize(nfa);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -312,11 +257,13 @@ destroystate(struct nfa * nfa,
|
||||
{
|
||||
abnext = ab->next;
|
||||
FREE(ab);
|
||||
nfa->v->spaceused -= sizeof(struct arcbatch);
|
||||
}
|
||||
s->ins = NULL;
|
||||
s->outs = NULL;
|
||||
s->next = NULL;
|
||||
FREE(s);
|
||||
nfa->v->spaceused -= sizeof(struct state);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -437,12 +384,18 @@ allocarc(struct nfa * nfa,
|
||||
struct arcbatch *newAb;
|
||||
int i;
|
||||
|
||||
if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE)
|
||||
{
|
||||
NERR(REG_ETOOBIG);
|
||||
return NULL;
|
||||
}
|
||||
newAb = (struct arcbatch *) MALLOC(sizeof(struct arcbatch));
|
||||
if (newAb == NULL)
|
||||
{
|
||||
NERR(REG_ESPACE);
|
||||
return NULL;
|
||||
}
|
||||
nfa->v->spaceused += sizeof(struct arcbatch);
|
||||
newAb->next = s->oas.next;
|
||||
s->oas.next = newAb;
|
||||
|
||||
|
@@ -248,6 +248,7 @@ struct vars
|
||||
struct cvec *cv2; /* utility cvec */
|
||||
struct subre *lacons; /* lookahead-constraint vector */
|
||||
int nlacons; /* size of lacons */
|
||||
size_t spaceused; /* approx. space used for compilation */
|
||||
};
|
||||
|
||||
/* parsing macros; most know that `v' is the struct vars pointer */
|
||||
@@ -363,6 +364,7 @@ pg_regcomp(regex_t *re,
|
||||
v->cv2 = NULL;
|
||||
v->lacons = NULL;
|
||||
v->nlacons = 0;
|
||||
v->spaceused = 0;
|
||||
re->re_magic = REMAGIC;
|
||||
re->re_info = 0; /* bits get set during parse */
|
||||
re->re_csize = sizeof(chr);
|
||||
|
Reference in New Issue
Block a user