mirror of
https://github.com/postgres/postgres.git
synced 2025-05-03 22:24:49 +03:00
293 lines
6.9 KiB
C
293 lines
6.9 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* regexport.c
|
|
* Functions for exporting info about a regex's NFA
|
|
*
|
|
* In this implementation, the NFA defines a necessary but not sufficient
|
|
* condition for a string to match the regex: that is, there can be strings
|
|
* that match the NFA but don't match the full regex, but not vice versa.
|
|
* Thus, for example, it is okay for the functions below to ignore lookahead
|
|
* constraints, which merely constrain the string some more.
|
|
*
|
|
* Notice that these functions return info into caller-provided arrays
|
|
* rather than doing their own malloc's. This simplifies the APIs by
|
|
* eliminating a class of error conditions, and in the case of colors
|
|
* allows the caller to decide how big is too big to bother with.
|
|
*
|
|
*
|
|
* Portions Copyright (c) 2013-2015, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1998, 1999 Henry Spencer
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/regex/regexport.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "regex/regguts.h"
|
|
|
|
#include "regex/regexport.h"
|
|
|
|
static void scancolormap(struct colormap * cm, int co,
|
|
union tree * t, int level, chr partial,
|
|
pg_wchar **chars, int *chars_len);
|
|
|
|
|
|
/*
|
|
* Get total number of NFA states.
|
|
*/
|
|
int
|
|
pg_reg_getnumstates(const regex_t *regex)
|
|
{
|
|
struct cnfa *cnfa;
|
|
|
|
assert(regex != NULL && regex->re_magic == REMAGIC);
|
|
cnfa = &((struct guts *) regex->re_guts)->search;
|
|
|
|
return cnfa->nstates;
|
|
}
|
|
|
|
/*
|
|
* Get initial state of NFA.
|
|
*/
|
|
int
|
|
pg_reg_getinitialstate(const regex_t *regex)
|
|
{
|
|
struct cnfa *cnfa;
|
|
|
|
assert(regex != NULL && regex->re_magic == REMAGIC);
|
|
cnfa = &((struct guts *) regex->re_guts)->search;
|
|
|
|
return cnfa->pre;
|
|
}
|
|
|
|
/*
|
|
* Get final state of NFA.
|
|
*/
|
|
int
|
|
pg_reg_getfinalstate(const regex_t *regex)
|
|
{
|
|
struct cnfa *cnfa;
|
|
|
|
assert(regex != NULL && regex->re_magic == REMAGIC);
|
|
cnfa = &((struct guts *) regex->re_guts)->search;
|
|
|
|
return cnfa->post;
|
|
}
|
|
|
|
/*
|
|
* Get number of outgoing NFA arcs of state number "st".
|
|
*
|
|
* Note: LACON arcs are ignored, both here and in pg_reg_getoutarcs().
|
|
*/
|
|
int
|
|
pg_reg_getnumoutarcs(const regex_t *regex, int st)
|
|
{
|
|
struct cnfa *cnfa;
|
|
struct carc *ca;
|
|
int count;
|
|
|
|
assert(regex != NULL && regex->re_magic == REMAGIC);
|
|
cnfa = &((struct guts *) regex->re_guts)->search;
|
|
|
|
if (st < 0 || st >= cnfa->nstates)
|
|
return 0;
|
|
count = 0;
|
|
for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
|
|
{
|
|
if (ca->co < cnfa->ncolors)
|
|
count++;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
/*
|
|
* Write array of outgoing NFA arcs of state number "st" into arcs[],
|
|
* whose length arcs_len must be at least as long as indicated by
|
|
* pg_reg_getnumoutarcs(), else not all arcs will be returned.
|
|
*/
|
|
void
|
|
pg_reg_getoutarcs(const regex_t *regex, int st,
|
|
regex_arc_t *arcs, int arcs_len)
|
|
{
|
|
struct cnfa *cnfa;
|
|
struct carc *ca;
|
|
|
|
assert(regex != NULL && regex->re_magic == REMAGIC);
|
|
cnfa = &((struct guts *) regex->re_guts)->search;
|
|
|
|
if (st < 0 || st >= cnfa->nstates || arcs_len <= 0)
|
|
return;
|
|
for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
|
|
{
|
|
if (ca->co < cnfa->ncolors)
|
|
{
|
|
arcs->co = ca->co;
|
|
arcs->to = ca->to;
|
|
arcs++;
|
|
if (--arcs_len == 0)
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Get total number of colors.
|
|
*/
|
|
int
|
|
pg_reg_getnumcolors(const regex_t *regex)
|
|
{
|
|
struct colormap *cm;
|
|
|
|
assert(regex != NULL && regex->re_magic == REMAGIC);
|
|
cm = &((struct guts *) regex->re_guts)->cmap;
|
|
|
|
return cm->max + 1;
|
|
}
|
|
|
|
/*
|
|
* Check if color is beginning of line/string.
|
|
*
|
|
* (We might at some point need to offer more refined handling of pseudocolors,
|
|
* but this will do for now.)
|
|
*/
|
|
int
|
|
pg_reg_colorisbegin(const regex_t *regex, int co)
|
|
{
|
|
struct cnfa *cnfa;
|
|
|
|
assert(regex != NULL && regex->re_magic == REMAGIC);
|
|
cnfa = &((struct guts *) regex->re_guts)->search;
|
|
|
|
if (co == cnfa->bos[0] || co == cnfa->bos[1])
|
|
return true;
|
|
else
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Check if color is end of line/string.
|
|
*/
|
|
int
|
|
pg_reg_colorisend(const regex_t *regex, int co)
|
|
{
|
|
struct cnfa *cnfa;
|
|
|
|
assert(regex != NULL && regex->re_magic == REMAGIC);
|
|
cnfa = &((struct guts *) regex->re_guts)->search;
|
|
|
|
if (co == cnfa->eos[0] || co == cnfa->eos[1])
|
|
return true;
|
|
else
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Get number of member chrs of color number "co".
|
|
*
|
|
* Note: we return -1 if the color number is invalid, or if it is a special
|
|
* color (WHITE or a pseudocolor), or if the number of members is uncertain.
|
|
* The latter case cannot arise right now but is specified to allow for future
|
|
* improvements (see musings about run-time handling of higher character codes
|
|
* in regex/README). Callers should not try to extract the members if -1 is
|
|
* returned.
|
|
*/
|
|
int
|
|
pg_reg_getnumcharacters(const regex_t *regex, int co)
|
|
{
|
|
struct colormap *cm;
|
|
|
|
assert(regex != NULL && regex->re_magic == REMAGIC);
|
|
cm = &((struct guts *) regex->re_guts)->cmap;
|
|
|
|
if (co <= 0 || co > cm->max) /* we reject 0 which is WHITE */
|
|
return -1;
|
|
if (cm->cd[co].flags & PSEUDO) /* also pseudocolors (BOS etc) */
|
|
return -1;
|
|
|
|
return cm->cd[co].nchrs;
|
|
}
|
|
|
|
/*
|
|
* Write array of member chrs of color number "co" into chars[],
|
|
* whose length chars_len must be at least as long as indicated by
|
|
* pg_reg_getnumcharacters(), else not all chars will be returned.
|
|
*
|
|
* Fetching the members of WHITE or a pseudocolor is not supported.
|
|
*
|
|
* Caution: this is a relatively expensive operation.
|
|
*/
|
|
void
|
|
pg_reg_getcharacters(const regex_t *regex, int co,
|
|
pg_wchar *chars, int chars_len)
|
|
{
|
|
struct colormap *cm;
|
|
|
|
assert(regex != NULL && regex->re_magic == REMAGIC);
|
|
cm = &((struct guts *) regex->re_guts)->cmap;
|
|
|
|
if (co <= 0 || co > cm->max || chars_len <= 0)
|
|
return;
|
|
if (cm->cd[co].flags & PSEUDO)
|
|
return;
|
|
|
|
/* Recursively search the colormap tree */
|
|
scancolormap(cm, co, cm->tree, 0, 0, &chars, &chars_len);
|
|
}
|
|
|
|
/*
|
|
* Recursively scan the colormap tree to find chrs belonging to color "co".
|
|
* See regex/README for info about the tree structure.
|
|
*
|
|
* t: tree block to scan
|
|
* level: level (from 0) of t
|
|
* partial: partial chr code for chrs within t
|
|
* chars, chars_len: output area
|
|
*/
|
|
static void
|
|
scancolormap(struct colormap * cm, int co,
|
|
union tree * t, int level, chr partial,
|
|
pg_wchar **chars, int *chars_len)
|
|
{
|
|
int i;
|
|
|
|
if (level < NBYTS - 1)
|
|
{
|
|
/* non-leaf node */
|
|
for (i = 0; i < BYTTAB; i++)
|
|
{
|
|
/*
|
|
* We do not support search for chrs of color 0 (WHITE), so
|
|
* all-white subtrees need not be searched. These can be
|
|
* recognized because they are represented by the fill blocks in
|
|
* the colormap struct. This typically allows us to avoid
|
|
* scanning large regions of higher-numbered chrs.
|
|
*/
|
|
if (t->tptr[i] == &cm->tree[level + 1])
|
|
continue;
|
|
|
|
/* Recursively scan next level down */
|
|
scancolormap(cm, co,
|
|
t->tptr[i], level + 1,
|
|
(partial | (chr) i) << BYTBITS,
|
|
chars, chars_len);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* leaf node */
|
|
for (i = 0; i < BYTTAB; i++)
|
|
{
|
|
if (t->tcolor[i] == co)
|
|
{
|
|
if (*chars_len > 0)
|
|
{
|
|
**chars = partial | (chr) i;
|
|
(*chars)++;
|
|
(*chars_len)--;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|