mirror of
https://sourceware.org/git/glibc.git
synced 2025-07-30 22:43:12 +03:00
Update. 2004-12-13 Paolo Bonzini <bonzini@gnu.org> Separate parsing and creation of the NFA. Avoided recursion on the (very unbalanced) parse tree. [BZ #611] * posix/regcomp.c (struct subexp_optimize, analyze_tree, calc_epsdest, re_dfa_add_tree_node, mark_opt_subexp_iter): Removed. (optimize_subexps, duplicate_tree, calc_first, calc_next, mark_opt_subexp): Rewritten. (preorder, postorder, lower_subexps, lower_subexp, link_nfa_nodes, create_token_tree, free_tree, free_token): New. (analyze): Accept a regex_t *. Invoke the passes via the preorder and postorder generic visitors. Do not initialize the fields in the re_dfa_t that represent the transitions. (free_dfa_content): Use free_token. (re_compile_internal): Analyze before UTF-8 optimizations. Do not include optimization of subexpressions. (create_initial_state): Fetch the DFA node index from the first node's bin_tree_t *. (optimize_utf8): Abort on unexpected nodes, including OP_DUP_QUESTION. Return on COMPLEX_BRACKET. (duplicate_node_closure): Fix comment. (duplicate_node): Do not initialize the fields in the re_dfa_t that represent the transitions. (calc_eclosure, calc_inveclosure): Do not handle OP_DELETED_SUBEXP. (create_tree): Remove final argument. All callers adjusted. Rewritten to use create_token_tree. (parse_reg_exp, parse_branch, parse_expression, parse_bracket_exp, build_charclass_op): Use create_tree or create_token_tree instead of re_dfa_add_tree_node. (parse_dup_op): Likewise. Also free the tree using free_tree for "<re>{0}", and lower OP_DUP_QUESTION to OP_ALT: "a?" is equivalent to "a|". Adjust invocation of mark_opt_subexp. (parse_sub_exp): Create a single SUBEXP node. * posix/regex_internal.c (re_dfa_add_node): Remove last parameter, always perform as if it was 1. Do not initialize OPT_SUBEXP and DUPLICATED, and initialize the DFA fields representing the transitions. * posix/regex_internal.h (re_dfa_add_node): Adjust prototype. (re_token_type_t): Move OP_DUP_PLUS and OP_DUP_QUESTION to the tokens section. Add a tree-only code SUBEXP. Remove OP_DELETED_SUBEXP. (bin_tree_t): Include a full re_token_t for TOKEN. Turn FIRST and NEXT into pointers to trees. Remove ECLOSURE. 2004-12-28 Paolo Bonzini <bonzini@gnu.org > [BZ #605] * posix/regcomp.c (parse_bracket_exp): Do not modify DFA nodes that were already created. * posix/regex_internal.c (re_dfa_add_node): Set accept_mb field in the token if needed. (create_ci_newstate, create_cd_newstate): Set accept_mb field from the tokens' field. * posix/regex_internal.h (re_token_t): Add accept_mb field. (ACCEPT_MB_NODE): Removed. * posix/regexec.c (proceed_next_node, transit_states_mb, build_sifted_states, check_arrival_add_next_nodes): Use accept_mb instead of ACCEPT_MB_NODE.
This commit is contained in:
@ -1262,7 +1262,7 @@ proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs)
|
||||
re_token_type_t type = dfa->nodes[node].type;
|
||||
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (ACCEPT_MB_NODE (type))
|
||||
if (dfa->nodes[node].accept_mb)
|
||||
naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
|
||||
else
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
@ -1624,15 +1624,13 @@ build_sifted_states (mctx, sctx, str_idx, cur_dest)
|
||||
int naccepted = 0;
|
||||
int ret;
|
||||
|
||||
#if defined DEBUG || defined RE_ENABLE_I18N
|
||||
re_token_type_t type = dfa->nodes[prev_node].type;
|
||||
#endif
|
||||
#ifdef DEBUG
|
||||
re_token_type_t type = dfa->nodes[prev_node].type;
|
||||
assert (!IS_EPSILON_NODE (type));
|
||||
#endif
|
||||
#ifdef RE_ENABLE_I18N
|
||||
/* If the node may accept `multi byte'. */
|
||||
if (ACCEPT_MB_NODE (type))
|
||||
if (dfa->nodes[prev_node].accept_mb)
|
||||
naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
|
||||
str_idx, sctx->last_str_idx);
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
@ -2486,7 +2484,7 @@ transit_state_mb (mctx, pstate)
|
||||
}
|
||||
|
||||
/* How many bytes the node can accept? */
|
||||
if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
|
||||
if (dfa->nodes[cur_node_idx].accept_mb)
|
||||
naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
|
||||
re_string_cur_idx (&mctx->input));
|
||||
if (naccepted == 0)
|
||||
@ -3020,15 +3018,13 @@ check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes)
|
||||
{
|
||||
int naccepted = 0;
|
||||
int cur_node = cur_nodes->elems[cur_idx];
|
||||
#if defined DEBUG || defined RE_ENABLE_I18N
|
||||
re_token_type_t type = dfa->nodes[cur_node].type;
|
||||
#endif
|
||||
#ifdef DEBUG
|
||||
re_token_type_t type = dfa->nodes[cur_node].type;
|
||||
assert (!IS_EPSILON_NODE (type));
|
||||
#endif
|
||||
#ifdef RE_ENABLE_I18N
|
||||
/* If the node may accept `multi byte'. */
|
||||
if (ACCEPT_MB_NODE (type))
|
||||
if (dfa->nodes[cur_node].accept_mb)
|
||||
{
|
||||
naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
|
||||
str_idx);
|
||||
|
Reference in New Issue
Block a user