mirror of
https://github.com/postgres/postgres.git
synced 2025-07-27 12:41:57 +03:00
Invent "rainbow" arcs within the regex engine.
Some regular expression constructs, most notably the "." match-anything metacharacter, produce a sheaf of parallel NFA arcs covering all possible colors (that is, character equivalence classes). We can make a noticeable improvement in the space and time needed to process large regexes by replacing such cases with a single arc bearing the special color code "RAINBOW". This requires only minor additional complication in places such as pull() and push(). Callers of pg_reg_getoutarcs() must now be prepared for the possibility of seeing a RAINBOW arc. For the one known user, contrib/pg_trgm, that's a net benefit since it cuts the number of arcs to be dealt with, and the handling isn't any different than for other colors that contain too many characters to be dealt with individually. This is part of a patch series that in total reduces the regex engine's runtime by about a factor of four on a large corpus of real-world regexes. Patch by me, reviewed by Joel Jacobson Discussion: https://postgr.es/m/1340281.1613018383@sss.pgh.pa.us
This commit is contained in:
@ -282,8 +282,8 @@ typedef struct
|
||||
typedef int TrgmColor;
|
||||
|
||||
/* We assume that colors returned by the regexp engine cannot be these: */
|
||||
#define COLOR_UNKNOWN (-1)
|
||||
#define COLOR_BLANK (-2)
|
||||
#define COLOR_UNKNOWN (-3)
|
||||
#define COLOR_BLANK (-4)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@ -780,7 +780,8 @@ getColorInfo(regex_t *regex, TrgmNFA *trgmNFA)
|
||||
palloc0(colorsCount * sizeof(TrgmColorInfo));
|
||||
|
||||
/*
|
||||
* Loop over colors, filling TrgmColorInfo about each.
|
||||
* Loop over colors, filling TrgmColorInfo about each. Note we include
|
||||
* WHITE (0) even though we know it'll be reported as non-expandable.
|
||||
*/
|
||||
for (i = 0; i < colorsCount; i++)
|
||||
{
|
||||
@ -1098,9 +1099,9 @@ addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
|
||||
/* Add enter key to this state */
|
||||
addKeyToQueue(trgmNFA, &destKey);
|
||||
}
|
||||
else
|
||||
else if (arc->co >= 0)
|
||||
{
|
||||
/* Regular color */
|
||||
/* Regular color (including WHITE) */
|
||||
TrgmColorInfo *colorInfo = &trgmNFA->colorInfo[arc->co];
|
||||
|
||||
if (colorInfo->expandable)
|
||||
@ -1156,6 +1157,14 @@ addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key)
|
||||
addKeyToQueue(trgmNFA, &destKey);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* RAINBOW: treat as unexpandable color */
|
||||
destKey.prefix.colors[0] = COLOR_UNKNOWN;
|
||||
destKey.prefix.colors[1] = COLOR_UNKNOWN;
|
||||
destKey.nstate = arc->to;
|
||||
addKeyToQueue(trgmNFA, &destKey);
|
||||
}
|
||||
}
|
||||
|
||||
pfree(arcs);
|
||||
@ -1216,10 +1225,10 @@ addArcs(TrgmNFA *trgmNFA, TrgmState *state)
|
||||
/*
|
||||
* Ignore non-expandable colors; addKey already handled the case.
|
||||
*
|
||||
* We need no special check for begin/end pseudocolors here. We
|
||||
* don't need to do any processing for them, and they will be
|
||||
* marked non-expandable since the regex engine will have reported
|
||||
* them that way.
|
||||
* We need no special check for WHITE or begin/end pseudocolors
|
||||
* here. We don't need to do any processing for them, and they
|
||||
* will be marked non-expandable since the regex engine will have
|
||||
* reported them that way.
|
||||
*/
|
||||
if (!colorInfo->expandable)
|
||||
continue;
|
||||
|
Reference in New Issue
Block a user