1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-07-30 22:43:12 +03:00
Update.
2004-12-13  Paolo Bonzini  <bonzini@gnu.org>

	Separate parsing and creation of the NFA.  Avoided recursion on
	the (very unbalanced) parse tree.
	[BZ #611]
	* posix/regcomp.c (struct subexp_optimize, analyze_tree, calc_epsdest,
	re_dfa_add_tree_node, mark_opt_subexp_iter): Removed.
	(optimize_subexps, duplicate_tree, calc_first, calc_next,
	mark_opt_subexp): Rewritten.
	(preorder, postorder, lower_subexps, lower_subexp, link_nfa_nodes,
	create_token_tree, free_tree, free_token): New.
	(analyze): Accept a regex_t *.  Invoke the passes via the preorder and
	postorder generic visitors.  Do not initialize the fields in the
	re_dfa_t that represent the transitions.
	(free_dfa_content): Use free_token.
	(re_compile_internal): Analyze before UTF-8 optimizations.  Do not
	include optimization of subexpressions.
	(create_initial_state): Fetch the DFA node index from the first node's
	bin_tree_t *.
	(optimize_utf8): Abort on unexpected nodes, including OP_DUP_QUESTION.
	Return on COMPLEX_BRACKET.
	(duplicate_node_closure): Fix comment.
	(duplicate_node): Do not initialize the fields in the
	re_dfa_t that represent the transitions.
	(calc_eclosure, calc_inveclosure): Do not handle OP_DELETED_SUBEXP.
	(create_tree): Remove final argument.  All callers adjusted.  Rewritten
	to use create_token_tree.
	(parse_reg_exp, parse_branch, parse_expression, parse_bracket_exp,
	build_charclass_op): Use create_tree or create_token_tree instead
	of re_dfa_add_tree_node.
	(parse_dup_op): Likewise.  Also free the tree using free_tree for
	"<re>{0}", and lower OP_DUP_QUESTION to OP_ALT: "a?" is equivalent
	to "a|".  Adjust invocation of mark_opt_subexp.
	(parse_sub_exp): Create a single SUBEXP node.
	* posix/regex_internal.c (re_dfa_add_node): Remove last parameter,
	always perform as if it was 1.  Do not initialize OPT_SUBEXP and
	DUPLICATED, and initialize the DFA fields representing the transitions.
	* posix/regex_internal.h (re_dfa_add_node): Adjust prototype.
	(re_token_type_t): Move OP_DUP_PLUS and OP_DUP_QUESTION to the tokens
	section.  Add a tree-only code SUBEXP.  Remove OP_DELETED_SUBEXP.
	(bin_tree_t): Include a full re_token_t for TOKEN.  Turn FIRST and
	NEXT into pointers to trees.  Remove ECLOSURE.

2004-12-28  Paolo Bonzini  <bonzini@gnu.org >

	[BZ #605]
	* posix/regcomp.c (parse_bracket_exp): Do not modify DFA nodes
	that were already created.
	* posix/regex_internal.c (re_dfa_add_node): Set accept_mb field
	in the token if needed.
	(create_ci_newstate, create_cd_newstate): Set accept_mb field
	from the tokens' field.
	* posix/regex_internal.h (re_token_t): Add accept_mb field.
	(ACCEPT_MB_NODE): Removed.
	* posix/regexec.c (proceed_next_node, transit_states_mb,
	build_sifted_states, check_arrival_add_next_nodes): Use
	accept_mb instead of ACCEPT_MB_NODE.
This commit is contained in:
Ulrich Drepper
2005-01-26 22:42:49 +00:00
parent 629311b74a
commit 02f3550c8b
5 changed files with 583 additions and 566 deletions

View File

@ -1262,7 +1262,7 @@ proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs)
re_token_type_t type = dfa->nodes[node].type;
#ifdef RE_ENABLE_I18N
if (ACCEPT_MB_NODE (type))
if (dfa->nodes[node].accept_mb)
naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
else
#endif /* RE_ENABLE_I18N */
@ -1624,15 +1624,13 @@ build_sifted_states (mctx, sctx, str_idx, cur_dest)
int naccepted = 0;
int ret;
#if defined DEBUG || defined RE_ENABLE_I18N
re_token_type_t type = dfa->nodes[prev_node].type;
#endif
#ifdef DEBUG
re_token_type_t type = dfa->nodes[prev_node].type;
assert (!IS_EPSILON_NODE (type));
#endif
#ifdef RE_ENABLE_I18N
/* If the node may accept `multi byte'. */
if (ACCEPT_MB_NODE (type))
if (dfa->nodes[prev_node].accept_mb)
naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
str_idx, sctx->last_str_idx);
#endif /* RE_ENABLE_I18N */
@ -2486,7 +2484,7 @@ transit_state_mb (mctx, pstate)
}
/* How many bytes the node can accept? */
if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
if (dfa->nodes[cur_node_idx].accept_mb)
naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
re_string_cur_idx (&mctx->input));
if (naccepted == 0)
@ -3020,15 +3018,13 @@ check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes)
{
int naccepted = 0;
int cur_node = cur_nodes->elems[cur_idx];
#if defined DEBUG || defined RE_ENABLE_I18N
re_token_type_t type = dfa->nodes[cur_node].type;
#endif
#ifdef DEBUG
re_token_type_t type = dfa->nodes[cur_node].type;
assert (!IS_EPSILON_NODE (type));
#endif
#ifdef RE_ENABLE_I18N
/* If the node may accept `multi byte'. */
if (ACCEPT_MB_NODE (type))
if (dfa->nodes[cur_node].accept_mb)
{
naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
str_idx);