mirror of
https://sourceware.org/git/glibc.git
synced 2025-07-29 11:41:21 +03:00
[BZ #558]
Update. 2005-01-27 Paolo Bonzini <bonzini@gnu.org> [BZ #558] * posix/regcomp.c (calc_inveclosure): Return reg_errcode_t. Initialize the node sets in dfa->inveclosures. (analyze): Initialize inveclosures only if it is needed. Check errors from calc_inveclosure. * posix/regex_internal.c (re_dfa_add_node): Do not initialize the inveclosure node set. * posix/regexec.c (re_search_internal): If nmatch includes unused subexpressions, reset them to { rm_so: -1, rm_eo: -1 } here. * posix/regcomp.c (parse_bracket_exp) [!RE_ENABLE_I18N]: Do build a SIMPLE_BRACKET token. * posix/regexec.c (transit_state_mb): Do not examine nodes where ACCEPT_MB is not set.
This commit is contained in:
18
ChangeLog
18
ChangeLog
@ -1,3 +1,21 @@
|
|||||||
|
2005-01-27 Paolo Bonzini <bonzini@gnu.org>
|
||||||
|
|
||||||
|
[BZ #558]
|
||||||
|
* posix/regcomp.c (calc_inveclosure): Return reg_errcode_t.
|
||||||
|
Initialize the node sets in dfa->inveclosures.
|
||||||
|
(analyze): Initialize inveclosures only if it is needed.
|
||||||
|
Check errors from calc_inveclosure.
|
||||||
|
* posix/regex_internal.c (re_dfa_add_node): Do not initialize
|
||||||
|
the inveclosure node set.
|
||||||
|
* posix/regexec.c (re_search_internal): If nmatch includes unused
|
||||||
|
subexpressions, reset them to { rm_so: -1, rm_eo: -1 } here.
|
||||||
|
|
||||||
|
* posix/regcomp.c (parse_bracket_exp) [!RE_ENABLE_I18N]:
|
||||||
|
Do build a SIMPLE_BRACKET token.
|
||||||
|
|
||||||
|
* posix/regexec.c (transit_state_mb): Do not examine nodes
|
||||||
|
where ACCEPT_MB is not set.
|
||||||
|
|
||||||
2005-01-27 Jakub Jelinek <jakub@redhat.com>
|
2005-01-27 Jakub Jelinek <jakub@redhat.com>
|
||||||
|
|
||||||
* stdlib/tst-fmtmsg.c: Include stdlib.h.
|
* stdlib/tst-fmtmsg.c: Include stdlib.h.
|
||||||
|
@ -58,7 +58,7 @@ static int search_duplicated_node (re_dfa_t *dfa, int org_node,
|
|||||||
static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
|
static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
|
||||||
static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
|
static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
|
||||||
int node, int root);
|
int node, int root);
|
||||||
static void calc_inveclosure (re_dfa_t *dfa);
|
static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
|
||||||
static int fetch_number (re_string_t *input, re_token_t *token,
|
static int fetch_number (re_string_t *input, re_token_t *token,
|
||||||
reg_syntax_t syntax);
|
reg_syntax_t syntax);
|
||||||
static void fetch_token (re_token_t *result, re_string_t *input,
|
static void fetch_token (re_token_t *result, re_string_t *input,
|
||||||
@ -1132,9 +1132,8 @@ analyze (preg)
|
|||||||
dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
|
dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
|
||||||
dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
|
dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
|
||||||
dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
|
dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
|
||||||
dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc);
|
|
||||||
if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
|
if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
|
||||||
|| dfa->eclosures == NULL || dfa->inveclosures == NULL, 0))
|
|| dfa->eclosures == NULL, 0))
|
||||||
return REG_ESPACE;
|
return REG_ESPACE;
|
||||||
|
|
||||||
dfa->subexp_map = re_malloc (int, preg->re_nsub);
|
dfa->subexp_map = re_malloc (int, preg->re_nsub);
|
||||||
@ -1167,7 +1166,18 @@ analyze (preg)
|
|||||||
ret = calc_eclosure (dfa);
|
ret = calc_eclosure (dfa);
|
||||||
if (BE (ret != REG_NOERROR, 0))
|
if (BE (ret != REG_NOERROR, 0))
|
||||||
return ret;
|
return ret;
|
||||||
calc_inveclosure (dfa);
|
|
||||||
|
/* We only need this during the prune_impossible_nodes pass in regexec.c;
|
||||||
|
skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */
|
||||||
|
if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
|
||||||
|
|| dfa->nbackref)
|
||||||
|
{
|
||||||
|
dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
|
||||||
|
if (BE (dfa->inveclosures == NULL, 0))
|
||||||
|
return REG_ESPACE;
|
||||||
|
ret = calc_inveclosure (dfa);
|
||||||
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1597,19 +1607,26 @@ duplicate_node (new_idx, dfa, org_idx, constraint)
|
|||||||
return REG_NOERROR;
|
return REG_NOERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static reg_errcode_t
|
||||||
calc_inveclosure (dfa)
|
calc_inveclosure (dfa)
|
||||||
re_dfa_t *dfa;
|
re_dfa_t *dfa;
|
||||||
{
|
{
|
||||||
int src, idx, dest;
|
int src, idx, ret;
|
||||||
|
for (idx = 0; idx < dfa->nodes_len; ++idx)
|
||||||
|
re_node_set_init_empty (dfa->inveclosures + idx);
|
||||||
|
|
||||||
for (src = 0; src < dfa->nodes_len; ++src)
|
for (src = 0; src < dfa->nodes_len; ++src)
|
||||||
{
|
{
|
||||||
|
int *elems = dfa->eclosures[src].elems;
|
||||||
for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
|
for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
|
||||||
{
|
{
|
||||||
dest = dfa->eclosures[src].elems[idx];
|
ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
|
||||||
re_node_set_insert_last (dfa->inveclosures + dest, src);
|
if (BE (ret == -1, 0))
|
||||||
|
return REG_ESPACE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return REG_NOERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Calculate "eclosure" for all the node in DFA. */
|
/* Calculate "eclosure" for all the node in DFA. */
|
||||||
@ -3304,17 +3321,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
#endif /* not RE_ENABLE_I18N */
|
||||||
{
|
{
|
||||||
|
#ifdef RE_ENABLE_I18N
|
||||||
|
free_charset (mbcset);
|
||||||
|
#endif
|
||||||
/* Build a tree for simple bracket. */
|
/* Build a tree for simple bracket. */
|
||||||
br_token.type = SIMPLE_BRACKET;
|
br_token.type = SIMPLE_BRACKET;
|
||||||
br_token.opr.sbcset = sbcset;
|
br_token.opr.sbcset = sbcset;
|
||||||
work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
|
work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
|
||||||
if (BE (work_tree == NULL, 0))
|
if (BE (work_tree == NULL, 0))
|
||||||
goto parse_bracket_exp_espace;
|
goto parse_bracket_exp_espace;
|
||||||
|
|
||||||
free_charset (mbcset);
|
|
||||||
}
|
}
|
||||||
#endif /* not RE_ENABLE_I18N */
|
|
||||||
return work_tree;
|
return work_tree;
|
||||||
|
|
||||||
parse_bracket_exp_espace:
|
parse_bracket_exp_espace:
|
||||||
|
@ -1339,7 +1339,7 @@ re_dfa_add_node (dfa, token)
|
|||||||
{
|
{
|
||||||
int new_nodes_alloc = dfa->nodes_alloc * 2;
|
int new_nodes_alloc = dfa->nodes_alloc * 2;
|
||||||
int *new_nexts, *new_indices;
|
int *new_nexts, *new_indices;
|
||||||
re_node_set *new_edests, *new_eclosures, *new_inveclosures;
|
re_node_set *new_edests, *new_eclosures;
|
||||||
|
|
||||||
re_token_t *new_array = re_realloc (dfa->nodes, re_token_t,
|
re_token_t *new_array = re_realloc (dfa->nodes, re_token_t,
|
||||||
new_nodes_alloc);
|
new_nodes_alloc);
|
||||||
@ -1350,17 +1350,13 @@ re_dfa_add_node (dfa, token)
|
|||||||
new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
|
new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
|
||||||
new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
|
new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
|
||||||
new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
|
new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
|
||||||
new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
|
|
||||||
new_nodes_alloc);
|
|
||||||
if (BE (new_nexts == NULL || new_indices == NULL
|
if (BE (new_nexts == NULL || new_indices == NULL
|
||||||
|| new_edests == NULL || new_eclosures == NULL
|
|| new_edests == NULL || new_eclosures == NULL, 0))
|
||||||
|| new_inveclosures == NULL, 0))
|
|
||||||
return -1;
|
return -1;
|
||||||
dfa->nexts = new_nexts;
|
dfa->nexts = new_nexts;
|
||||||
dfa->org_indices = new_indices;
|
dfa->org_indices = new_indices;
|
||||||
dfa->edests = new_edests;
|
dfa->edests = new_edests;
|
||||||
dfa->eclosures = new_eclosures;
|
dfa->eclosures = new_eclosures;
|
||||||
dfa->inveclosures = new_inveclosures;
|
|
||||||
dfa->nodes_alloc = new_nodes_alloc;
|
dfa->nodes_alloc = new_nodes_alloc;
|
||||||
}
|
}
|
||||||
dfa->nodes[dfa->nodes_len] = token;
|
dfa->nodes[dfa->nodes_len] = token;
|
||||||
@ -1372,7 +1368,6 @@ re_dfa_add_node (dfa, token)
|
|||||||
dfa->nexts[dfa->nodes_len] = -1;
|
dfa->nexts[dfa->nodes_len] = -1;
|
||||||
re_node_set_init_empty (dfa->edests + dfa->nodes_len);
|
re_node_set_init_empty (dfa->edests + dfa->nodes_len);
|
||||||
re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
|
re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
|
||||||
re_node_set_init_empty (dfa->inveclosures + dfa->nodes_len);
|
|
||||||
return dfa->nodes_len++;
|
return dfa->nodes_len++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -605,6 +605,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
|
|||||||
re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
|
re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
|
||||||
int left_lim, right_lim, incr;
|
int left_lim, right_lim, incr;
|
||||||
int fl_longest_match, match_first, match_kind, match_last = -1;
|
int fl_longest_match, match_first, match_kind, match_last = -1;
|
||||||
|
int extra_nmatch;
|
||||||
int sb, ch;
|
int sb, ch;
|
||||||
#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
|
#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
|
||||||
re_match_context_t mctx = { .dfa = dfa };
|
re_match_context_t mctx = { .dfa = dfa };
|
||||||
@ -620,6 +621,9 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
|
|||||||
mctx.dfa = dfa;
|
mctx.dfa = dfa;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
|
||||||
|
nmatch -= extra_nmatch;
|
||||||
|
|
||||||
/* Check if the DFA haven't been compiled. */
|
/* Check if the DFA haven't been compiled. */
|
||||||
if (BE (preg->used == 0 || dfa->init_state == NULL
|
if (BE (preg->used == 0 || dfa->init_state == NULL
|
||||||
|| dfa->init_state_word == NULL || dfa->init_state_nl == NULL
|
|| dfa->init_state_word == NULL || dfa->init_state_nl == NULL
|
||||||
@ -882,11 +886,14 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
|
|||||||
pmatch[reg_idx].rm_so += match_first;
|
pmatch[reg_idx].rm_so += match_first;
|
||||||
pmatch[reg_idx].rm_eo += match_first;
|
pmatch[reg_idx].rm_eo += match_first;
|
||||||
}
|
}
|
||||||
|
for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
|
||||||
|
{
|
||||||
|
pmatch[nmatch + reg_idx].rm_so = -1;
|
||||||
|
pmatch[nmatch + reg_idx].rm_eo = -1;
|
||||||
|
}
|
||||||
|
|
||||||
if (dfa->subexp_map)
|
if (dfa->subexp_map)
|
||||||
for (reg_idx = 0;
|
for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
|
||||||
reg_idx + 1 < nmatch && reg_idx < preg->re_nsub;
|
|
||||||
reg_idx++)
|
|
||||||
if (dfa->subexp_map[reg_idx] != reg_idx)
|
if (dfa->subexp_map[reg_idx] != reg_idx)
|
||||||
{
|
{
|
||||||
pmatch[reg_idx + 1].rm_so
|
pmatch[reg_idx + 1].rm_so
|
||||||
@ -1371,7 +1378,7 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
|
|||||||
int fl_backtrack;
|
int fl_backtrack;
|
||||||
{
|
{
|
||||||
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
|
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
|
||||||
int idx, cur_node, real_nmatch;
|
int idx, cur_node;
|
||||||
re_node_set eps_via_nodes;
|
re_node_set eps_via_nodes;
|
||||||
struct re_fail_stack_t *fs;
|
struct re_fail_stack_t *fs;
|
||||||
struct re_fail_stack_t fs_body = { 0, 2, NULL };
|
struct re_fail_stack_t fs_body = { 0, 2, NULL };
|
||||||
@ -1392,15 +1399,14 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
|
|||||||
fs = NULL;
|
fs = NULL;
|
||||||
|
|
||||||
cur_node = dfa->init_node;
|
cur_node = dfa->init_node;
|
||||||
real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
|
|
||||||
re_node_set_init_empty (&eps_via_nodes);
|
re_node_set_init_empty (&eps_via_nodes);
|
||||||
|
|
||||||
prev_idx_match = (regmatch_t *) alloca (sizeof (regmatch_t) * real_nmatch);
|
prev_idx_match = (regmatch_t *) alloca (sizeof (regmatch_t) * nmatch);
|
||||||
memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * real_nmatch);
|
memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
|
||||||
|
|
||||||
for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
|
for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
|
||||||
{
|
{
|
||||||
update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, real_nmatch);
|
update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
|
||||||
|
|
||||||
if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
|
if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
|
||||||
{
|
{
|
||||||
@ -2469,10 +2475,13 @@ transit_state_mb (mctx, pstate)
|
|||||||
{
|
{
|
||||||
re_node_set dest_nodes, *new_nodes;
|
re_node_set dest_nodes, *new_nodes;
|
||||||
int cur_node_idx = pstate->nodes.elems[i];
|
int cur_node_idx = pstate->nodes.elems[i];
|
||||||
int naccepted = 0, dest_idx;
|
int naccepted, dest_idx;
|
||||||
unsigned int context;
|
unsigned int context;
|
||||||
re_dfastate_t *dest_state;
|
re_dfastate_t *dest_state;
|
||||||
|
|
||||||
|
if (!dfa->nodes[cur_node_idx].accept_mb)
|
||||||
|
continue;
|
||||||
|
|
||||||
if (dfa->nodes[cur_node_idx].constraint)
|
if (dfa->nodes[cur_node_idx].constraint)
|
||||||
{
|
{
|
||||||
context = re_string_context_at (&mctx->input,
|
context = re_string_context_at (&mctx->input,
|
||||||
@ -2484,9 +2493,8 @@ transit_state_mb (mctx, pstate)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* How many bytes the node can accept? */
|
/* How many bytes the node can accept? */
|
||||||
if (dfa->nodes[cur_node_idx].accept_mb)
|
naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
|
||||||
naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
|
re_string_cur_idx (&mctx->input));
|
||||||
re_string_cur_idx (&mctx->input));
|
|
||||||
if (naccepted == 0)
|
if (naccepted == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@ -2500,9 +2508,7 @@ transit_state_mb (mctx, pstate)
|
|||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
assert (dfa->nexts[cur_node_idx] != -1);
|
assert (dfa->nexts[cur_node_idx] != -1);
|
||||||
#endif
|
#endif
|
||||||
/* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE,
|
new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
|
||||||
then we use pstate->nodes.elems[i] instead. */
|
|
||||||
new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]];
|
|
||||||
|
|
||||||
dest_state = mctx->state_log[dest_idx];
|
dest_state = mctx->state_log[dest_idx];
|
||||||
if (dest_state == NULL)
|
if (dest_state == NULL)
|
||||||
|
Reference in New Issue
Block a user