1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-08-05 19:35:52 +03:00
2002-02-28  Isamu Hasegawa  <isamu@yamato.ibm.com>

	* posix/regcomp.c (regcomp): Remove a redundant condition.
	(init_word_char): Add a check on malloc failure.
	(create_initial_state): Likewise.
	(duplicate_node): Likewise.
	(calc_eclosure): Likewise.
	(calc_eclosure_iter): Likewise.
	(parse_expression): Likewise.
	(parse_bracket_exp): Remove unnecessary malloc invocations.
	(build_equiv_class): Likewise.
	(build_charclass): Likewise.
	* posix/regex_internal.c (re_node_set_intersect): Add a check
	on malloc failure.
	(re_node_set_add_intersect): Likewise.
	(re_node_set_merge): Likewise.
	(re_acquire_state): Likewise.
	(re_acquire_state_context): Likewise.
	(create_newstate_common): Likewise.
	(register_state): Likewise.
	(create_ci_newstate): Likewise.
	(create_cd_newstate): Likewise.
	* posix/regex_internal.h: Fix prototypes of re_acquire_state
	and re_acquire_state_context.
	* posix/regexec.c (regexec): Suit it to the error handling of
	re_search_internal.
	(re_match): Likewise.
	(re_search): Likewise.
	(re_search_internal): Add a check on malloc failure.
	(acquire_init_state_context): Likewise.
	(check_matching): Likewise.
	(proceed_next_node): Likewise.
	(set_regs): Likewise.
	(sift_states_backward): Likewise.
	(sift_states_iter_bkref): Likewise.
	(add_epsilon_backreference): Likewise.
	(transit_state): Likewise.
	(transit_state_sb): Likewise.
	(transit_state_mb): Likewise.
	(transit_state_bkref_loop): Likewise.
	(build_trtable): Likewise.
	(group_nodes_into_DFAstates): Likewise.
	(match_ctx_init): Likewise.
	(match_ctx_add_entry): Likewise.
This commit is contained in:
Ulrich Drepper
2002-02-28 07:43:13 +00:00
parent 51f38e87b1
commit a9388965cc
5 changed files with 612 additions and 281 deletions

View File

@@ -1,3 +1,48 @@
2002-02-28 Isamu Hasegawa <isamu@yamato.ibm.com>
* posix/regcomp.c (regcomp): Remove a redundant condition.
(init_word_char): Add a check on malloc failure.
(create_initial_state): Likewise.
(duplicate_node): Likewise.
(calc_eclosure): Likewise.
(calc_eclosure_iter): Likewise.
(parse_expression): Likewise.
(parse_bracket_exp): Remove unnecessary malloc invocations.
(build_equiv_class): Likewise.
(build_charclass): Likewise.
* posix/regex_internal.c (re_node_set_intersect): Add a check
on malloc failure.
(re_node_set_add_intersect): Likewise.
(re_node_set_merge): Likewise.
(re_acquire_state): Likewise.
(re_acquire_state_context): Likewise.
(create_newstate_common): Likewise.
(register_state): Likewise.
(create_ci_newstate): Likewise.
(create_cd_newstate): Likewise.
* posix/regex_internal.h: Fix prototypes of re_acquire_state
and re_acquire_state_context.
* posix/regexec.c (regexec): Suit it to the error handling of
re_search_internal.
(re_match): Likewise.
(re_search): Likewise.
(re_search_internal): Add a check on malloc failure.
(acquire_init_state_context): Likewise.
(check_matching): Likewise.
(proceed_next_node): Likewise.
(set_regs): Likewise.
(sift_states_backward): Likewise.
(sift_states_iter_bkref): Likewise.
(add_epsilon_backreference): Likewise.
(transit_state): Likewise.
(transit_state_sb): Likewise.
(transit_state_mb): Likewise.
(transit_state_bkref_loop): Likewise.
(build_trtable): Likewise.
(group_nodes_into_DFAstates): Likewise.
(match_ctx_init): Likewise.
(match_ctx_add_entry): Likewise.
2002-02-27 Ulrich Drepper <drepper@redhat.com> 2002-02-27 Ulrich Drepper <drepper@redhat.com>
* elf/dl-load.c (_dl_map_object_from_fd): Always add SONAME to * elf/dl-load.c (_dl_map_object_from_fd): Always add SONAME to

View File

@@ -63,7 +63,7 @@ static void re_compile_fastmap_iter (regex_t *bufp,
const re_dfastate_t *init_state, const re_dfastate_t *init_state,
char *fastmap); char *fastmap);
static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len); static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len);
static void init_word_char (re_dfa_t *dfa); static reg_errcode_t init_word_char (re_dfa_t *dfa);
static void free_charset (re_charset_t *cset); static void free_charset (re_charset_t *cset);
static void free_workarea_compile (regex_t *preg); static void free_workarea_compile (regex_t *preg);
static reg_errcode_t create_initial_state (re_dfa_t *dfa); static reg_errcode_t create_initial_state (re_dfa_t *dfa);
@@ -72,10 +72,11 @@ static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node);
static void calc_first (re_dfa_t *dfa, bin_tree_t *node); static void calc_first (re_dfa_t *dfa, bin_tree_t *node);
static void calc_next (re_dfa_t *dfa, bin_tree_t *node); static void calc_next (re_dfa_t *dfa, bin_tree_t *node);
static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node); static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node);
static int duplicate_node (re_dfa_t *dfa, int org_idx, static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx,
unsigned int constraint); unsigned int constraint);
static reg_errcode_t calc_eclosure (re_dfa_t *dfa); static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
static re_node_set calc_eclosure_iter (re_dfa_t *dfa, int node, int root); static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
int node, int root);
static void calc_inveclosure (re_dfa_t *dfa); static void calc_inveclosure (re_dfa_t *dfa);
static int fetch_number (re_string_t *input, re_token_t *token, static int fetch_number (re_string_t *input, re_token_t *token,
reg_syntax_t syntax); reg_syntax_t syntax);
@@ -446,8 +447,8 @@ regcomp (preg, pattern, cflags)
if (ret == REG_ERPAREN) if (ret == REG_ERPAREN)
ret = REG_EPAREN; ret = REG_EPAREN;
/* XXX Why the test for preg->fastmap != NULL? */ /* We have already checked preg->fastmap != NULL. */
if (ret == REG_NOERROR && preg->fastmap != NULL) if (ret == REG_NOERROR)
{ {
/* Compute the fastmap now, since regexec cannot modify the pattern /* Compute the fastmap now, since regexec cannot modify the pattern
buffer. */ buffer. */
@@ -772,16 +773,19 @@ init_dfa (dfa, pat_len)
"word". In this case "word" means that it is the word construction "word". In this case "word" means that it is the word construction
character used by some operators like "\<", "\>", etc. */ character used by some operators like "\<", "\>", etc. */
static void static reg_errcode_t
init_word_char (dfa) init_word_char (dfa)
re_dfa_t *dfa; re_dfa_t *dfa;
{ {
int i, j, ch; int i, j, ch;
dfa->word_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1); dfa->word_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
if (dfa->word_char == NULL)
return REG_ESPACE;
for (i = 0, ch = 0; i < BITSET_UINTS; ++i) for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
for (j = 0; j < UINT_BITS; ++j, ++ch) for (j = 0; j < UINT_BITS; ++j, ++ch)
if (isalnum (ch) || ch == '_') if (isalnum (ch) || ch == '_')
dfa->word_char[i] |= 1 << j; dfa->word_char[i] |= 1 << j;
return REG_NOERROR;
} }
/* Free the work area which are only used while compiling. */ /* Free the work area which are only used while compiling. */
@@ -844,24 +848,28 @@ create_initial_state (dfa)
} }
/* It must be the first time to invoke acquire_state. */ /* It must be the first time to invoke acquire_state. */
dfa->init_state = re_acquire_state_context (dfa, &init_nodes, 0); dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
/* We don't check ERR here, since the initial state must not be NULL. */
if (dfa->init_state == NULL)
return err;
if (dfa->init_state->has_constraint) if (dfa->init_state->has_constraint)
{ {
dfa->init_state_word = re_acquire_state_context (dfa, &init_nodes, dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
CONTEXT_WORD); CONTEXT_WORD);
dfa->init_state_nl = re_acquire_state_context (dfa, &init_nodes, dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
CONTEXT_NEWLINE); CONTEXT_NEWLINE);
dfa->init_state_begbuf = re_acquire_state_context (dfa, &init_nodes, dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
&init_nodes,
CONTEXT_NEWLINE CONTEXT_NEWLINE
| CONTEXT_BEGBUF); | CONTEXT_BEGBUF);
if (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
|| dfa->init_state_begbuf == NULL)
return err;
} }
else else
dfa->init_state_word = dfa->init_state_nl dfa->init_state_word = dfa->init_state_nl
= dfa->init_state_begbuf = dfa->init_state; = dfa->init_state_begbuf = dfa->init_state;
if (dfa->init_state == NULL || dfa->init_state_word == NULL
|| dfa->init_state_nl == NULL || dfa->init_state_begbuf == NULL )
return REG_ESPACE;
re_node_set_free (&init_nodes); re_node_set_free (&init_nodes);
return REG_NOERROR; return REG_NOERROR;
} }
@@ -1114,20 +1122,30 @@ calc_epsdest (dfa, node)
} }
} }
static int /* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
duplicate_node (dfa, org_idx, constraint) The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded,
otherwise return the error code. */
static reg_errcode_t
duplicate_node (new_idx, dfa, org_idx, constraint)
re_dfa_t *dfa; re_dfa_t *dfa;
int org_idx; int *new_idx, org_idx;
unsigned int constraint; unsigned int constraint;
{ {
re_token_t dup; re_token_t dup;
int dup_idx; int dup_idx;
reg_errcode_t err;
dup.type = OP_CONTEXT_NODE; dup.type = OP_CONTEXT_NODE;
if (dfa->nodes[org_idx].type == OP_CONTEXT_NODE) if (dfa->nodes[org_idx].type == OP_CONTEXT_NODE)
{ {
/* If the node whose index is ORG_IDX is the same as the intended
node, use it. */
if (dfa->nodes[org_idx].constraint == constraint) if (dfa->nodes[org_idx].constraint == constraint)
return org_idx; {
*new_idx = org_idx;
return REG_NOERROR;
}
dup.constraint = constraint | dup.constraint = constraint |
dfa->nodes[org_idx].constraint; dfa->nodes[org_idx].constraint;
} }
@@ -1137,23 +1155,32 @@ duplicate_node (dfa, org_idx, constraint)
/* In case that `entity' points OP_CONTEXT_NODE, /* In case that `entity' points OP_CONTEXT_NODE,
we correct `entity' to real entity in calc_inveclosures(). */ we correct `entity' to real entity in calc_inveclosures(). */
dup.opr.ctx_info = malloc (sizeof (*dup.opr.ctx_info)); dup.opr.ctx_info = malloc (sizeof (*dup.opr.ctx_info));
dup_idx = re_dfa_add_node (dfa, dup, 1);
if (dup.opr.ctx_info == NULL || dup_idx == -1)
return REG_ESPACE;
dup.opr.ctx_info->entity = org_idx; dup.opr.ctx_info->entity = org_idx;
dup.opr.ctx_info->bkref_eclosure = NULL; dup.opr.ctx_info->bkref_eclosure = NULL;
dup_idx = re_dfa_add_node (dfa, dup, 1);
dfa->nodes[dup_idx].duplicated = 1;
dfa->nodes[dup_idx].duplicated = 1;
dfa->firsts[dup_idx] = dfa->firsts[org_idx]; dfa->firsts[dup_idx] = dfa->firsts[org_idx];
dfa->nexts[dup_idx] = dfa->nexts[org_idx]; dfa->nexts[dup_idx] = dfa->nexts[org_idx];
re_node_set_init_copy (dfa->edests + dup_idx, dfa->edests + org_idx); err = re_node_set_init_copy (dfa->edests + dup_idx, dfa->edests + org_idx);
if (err != REG_NOERROR)
return err;
/* Since we don't duplicate epsilon nodes, epsilon closure have /* Since we don't duplicate epsilon nodes, epsilon closure have
only itself. */ only itself. */
re_node_set_init_1 (dfa->eclosures + dup_idx, dup_idx); err = re_node_set_init_1 (dfa->eclosures + dup_idx, dup_idx);
re_node_set_init_1 (dfa->inveclosures + dup_idx, dup_idx); if (err != REG_NOERROR)
return err;
err = re_node_set_init_1 (dfa->inveclosures + dup_idx, dup_idx);
if (err != REG_NOERROR)
return err;
/* Then we must update inveclosure for this node. /* Then we must update inveclosure for this node.
We process them at last part of calc_eclosure(), We process them at last part of calc_eclosure(),
since we don't complete to calculate them here. */ since we don't complete to calculate them here. */
return dup_idx; *new_idx = dup_idx;
return REG_NOERROR;
} }
static void static void
@@ -1193,6 +1220,7 @@ calc_eclosure (dfa)
/* For each nodes, calculate epsilon closure. */ /* For each nodes, calculate epsilon closure. */
for (node_idx = 0, max = dfa->nodes_len; ; ++node_idx) for (node_idx = 0, max = dfa->nodes_len; ; ++node_idx)
{ {
reg_errcode_t err;
re_node_set eclosure_elem; re_node_set eclosure_elem;
if (node_idx == max) if (node_idx == max)
{ {
@@ -1210,7 +1238,9 @@ calc_eclosure (dfa)
if (dfa->eclosures[node_idx].nelem != 0) if (dfa->eclosures[node_idx].nelem != 0)
continue; continue;
/* Calculate epsilon closure of `node_idx'. */ /* Calculate epsilon closure of `node_idx'. */
eclosure_elem = calc_eclosure_iter (dfa, node_idx, 1); err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
if (err != REG_NOERROR)
return err;
if (dfa->eclosures[node_idx].nelem == 0) if (dfa->eclosures[node_idx].nelem == 0)
{ {
@@ -1241,7 +1271,13 @@ calc_eclosure (dfa)
{ {
int dest_node_idx = dfa->eclosures[dfa->nexts[idx]].elems[i]; int dest_node_idx = dfa->eclosures[dfa->nexts[idx]].elems[i];
if (!IS_EPSILON_NODE (dfa->nodes[dest_node_idx].type)) if (!IS_EPSILON_NODE (dfa->nodes[dest_node_idx].type))
dest_node_idx = duplicate_node (dfa, dest_node_idx, constraint); {
reg_errcode_t err;
err = duplicate_node (&dest_node_idx, dfa, dest_node_idx,
constraint);
if (err != REG_NOERROR)
return err;
}
re_node_set_insert (bkref_eclosure, dest_node_idx); re_node_set_insert (bkref_eclosure, dest_node_idx);
} }
dfa->nodes[idx].opr.ctx_info->bkref_eclosure = bkref_eclosure; dfa->nodes[idx].opr.ctx_info->bkref_eclosure = bkref_eclosure;
@@ -1252,15 +1288,19 @@ calc_eclosure (dfa)
/* Calculate epsilon closure of NODE. */ /* Calculate epsilon closure of NODE. */
static re_node_set static reg_errcode_t
calc_eclosure_iter (dfa, node, root) calc_eclosure_iter (new_set, dfa, node, root)
re_node_set *new_set;
re_dfa_t *dfa; re_dfa_t *dfa;
int node, root; int node, root;
{ {
reg_errcode_t err;
unsigned int constraint; unsigned int constraint;
int i, max, incomplete = 0; int i, max, incomplete = 0;
re_node_set eclosure; re_node_set eclosure;
re_node_set_alloc (&eclosure, 1); err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
if (err != REG_NOERROR)
return err;
/* This indicates that we are calculating this node now. /* This indicates that we are calculating this node now.
We reference this value to avoid infinite loop. */ We reference this value to avoid infinite loop. */
@@ -1285,7 +1325,11 @@ calc_eclosure_iter (dfa, node, root)
/* If we haven't calculated the epsilon closure of `edest' yet, /* If we haven't calculated the epsilon closure of `edest' yet,
calculate now. Otherwise use calculated epsilon closure. */ calculate now. Otherwise use calculated epsilon closure. */
if (dfa->eclosures[edest].nelem == 0) if (dfa->eclosures[edest].nelem == 0)
eclosure_elem = calc_eclosure_iter (dfa, edest, 0); {
err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
if (err != REG_NOERROR)
return err;
}
else else
eclosure_elem = dfa->eclosures[edest]; eclosure_elem = dfa->eclosures[edest];
/* Merge the epsilon closure of `edest'. */ /* Merge the epsilon closure of `edest'. */
@@ -1307,7 +1351,11 @@ calc_eclosure_iter (dfa, node, root)
int dest = eclosure.elems[i]; int dest = eclosure.elems[i];
if (!IS_EPSILON_NODE (dfa->nodes[dest].type)) if (!IS_EPSILON_NODE (dfa->nodes[dest].type))
{ {
int dup_dest = duplicate_node (dfa, dest, constraint); int dup_dest;
reg_errcode_t err;
err = duplicate_node (&dup_dest, dfa, dest, constraint);
if (err != REG_NOERROR)
return err;
if (dest != dup_dest) if (dest != dup_dest)
{ {
re_node_set_remove_at (&eclosure, i--); re_node_set_remove_at (&eclosure, i--);
@@ -1323,7 +1371,8 @@ calc_eclosure_iter (dfa, node, root)
dfa->eclosures[node].nelem = 0; dfa->eclosures[node].nelem = 0;
else else
dfa->eclosures[node] = eclosure; dfa->eclosures[node] = eclosure;
return eclosure; *new_set = eclosure;
return REG_NOERROR;
} }
/* Functions for token which are used in the parser. */ /* Functions for token which are used in the parser. */
@@ -1865,7 +1914,11 @@ parse_expression (regexp, preg, token, syntax, nest, err)
break; break;
case ANCHOR: case ANCHOR:
if (dfa->word_char == NULL) if (dfa->word_char == NULL)
init_word_char (dfa); {
*err = init_word_char (dfa);
if (*err != REG_NOERROR)
return NULL;
}
if (token->opr.ctx_type == WORD_DELIM) if (token->opr.ctx_type == WORD_DELIM)
{ {
bin_tree_t *tree_first, *tree_last; bin_tree_t *tree_first, *tree_last;
@@ -2137,28 +2190,6 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
I'm not sure, but maybe enough. */ I'm not sure, but maybe enough. */
#define BRACKET_NAME_BUF_SIZE 32 #define BRACKET_NAME_BUF_SIZE 32
static inline void *
extend_array_for_cset (array, num, alloc, type_size)
void *array;
int num, *alloc, type_size;
{
void *new_array = array;
if (*alloc == num)
{
if (*alloc == 0)
{
new_array = malloc (type_size);
*alloc = 1;
}
else
{
new_array = realloc (array, type_size * num * 2);
*alloc = 2 * num;
}
}
return new_array;
}
/* This function parse bracket expression like "[abc]", "[a-c]", /* This function parse bracket expression like "[abc]", "[a-c]",
"[[.a-a.]]" etc. */ "[[.a-a.]]" etc. */
@@ -2299,22 +2330,15 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
uint32_t *new_array_end; uint32_t *new_array_end;
int new_nranges; int new_nranges;
/* XXX If mbcset->range_starts and mbcset->range_ends are NULL /* +1 in case of mbcset->nranges is 0. */
if *range_alloc == 0 then we do not need the if. */ new_nranges = 2 * mbcset->nranges + 1;
if (*range_alloc == 0) /* Use realloc since mbcset->range_starts and mbcset->range_ends
{ are NULL if *range_alloc == 0. */
new_nranges = 1; new_array_start = re_realloc (mbcset->range_starts, uint32_t,
new_array_start = re_malloc (uint32_t, 1); new_nranges);
new_array_end = re_malloc (uint32_t, 1); new_array_end = re_realloc (mbcset->range_ends, uint32_t,
} new_nranges);
else
{
new_nranges = 2 * mbcset->nranges;
new_array_start = re_realloc (mbcset->range_starts, uint32_t,
new_nranges);
new_array_end = re_realloc (mbcset->range_ends, uint32_t,
new_nranges);
}
if (new_array_start == NULL || new_array_end == NULL) if (new_array_start == NULL || new_array_end == NULL)
return REG_ESPACE; return REG_ESPACE;
@@ -2394,13 +2418,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
/* Got valid collation sequence, add it as a new entry. */ /* Got valid collation sequence, add it as a new entry. */
/* Check the space of the arrays. */ /* Check the space of the arrays. */
mbcset->coll_syms = extend_array_for_cset (mbcset->coll_syms, if (*coll_sym_alloc == mbcset->ncoll_syms)
mbcset->ncoll_syms, {
coll_sym_alloc, /* Not enough, realloc it. */
sizeof (int32_t)); /* +1 in case of mbcset->ncoll_syms is 0. */
if (mbcset->coll_syms == NULL) *coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
return REG_ESPACE; /* Use realloc since mbcset->coll_syms is NULL
if *alloc == 0. */
mbcset->coll_syms = re_realloc (mbcset->coll_syms, int32_t,
*coll_sym_alloc);
if (mbcset->coll_syms == NULL)
return REG_ESPACE;
}
mbcset->coll_syms[mbcset->ncoll_syms++] = idx; mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
return REG_NOERROR; return REG_NOERROR;
} }
@@ -2557,12 +2586,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
bitset_set (sbcset, start_elem.opr.ch); bitset_set (sbcset, start_elem.opr.ch);
break; break;
case MB_CHAR: case MB_CHAR:
mbcset->mbchars = extend_array_for_cset (mbcset->mbchars, /* Check whether the array has enough space. */
mbcset->nmbchars, if (mbchar_alloc == mbcset->nmbchars)
&mbchar_alloc, {
sizeof (wchar_t)); /* Not enough, realloc it. */
if (mbcset->mbchars == NULL) /* +1 in case of mbcset->nmbchars is 0. */
goto parse_bracket_exp_espace; mbchar_alloc = 2 * mbcset->nmbchars + 1;
/* Use realloc since array is NULL if *alloc == 0. */
mbcset->mbchars = re_realloc (mbcset->mbchars, wchar_t,
mbchar_alloc);
if (mbcset->mbchars == NULL)
goto parse_bracket_exp_espace;
}
mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
break; break;
case EQUIV_CLASS: case EQUIV_CLASS:
@@ -2779,14 +2814,18 @@ build_equiv_class (mbcset, sbcset, equiv_class_alloc, name)
bitset_set (sbcset, ch); bitset_set (sbcset, ch);
} }
} }
/* Check the space of the arrays, and extend if we need. */ /* Check whether the array has enough space. */
mbcset->equiv_classes = extend_array_for_cset (mbcset->equiv_classes, if (*equiv_class_alloc == mbcset->nequiv_classes)
mbcset->nequiv_classes, {
equiv_class_alloc, /* Not enough, realloc it. */
sizeof (int32_t)); /* +1 in case of mbcset->nequiv_classes is 0. */
if (mbcset->equiv_classes == NULL) *equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
return REG_ESPACE; /* Use realloc since the array is NULL if *alloc == 0. */
mbcset->equiv_classes = re_realloc (mbcset->equiv_classes, int32_t,
*equiv_class_alloc);
if (mbcset->equiv_classes == NULL)
return REG_ESPACE;
}
mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
} }
else else
@@ -2815,12 +2854,17 @@ build_charclass (mbcset, sbcset, char_class_alloc, name)
int i; int i;
/* Check the space of the arrays. */ /* Check the space of the arrays. */
mbcset->char_classes = extend_array_for_cset (mbcset->char_classes, if (*char_class_alloc == mbcset->nchar_classes)
mbcset->nchar_classes, {
char_class_alloc, /* Not enough, realloc it. */
sizeof (wctype_t)); /* +1 in case of mbcset->nchar_classes is 0. */
if (mbcset->char_classes == NULL) *char_class_alloc = 2 * mbcset->nchar_classes + 1;
return REG_ESPACE; /* Use realloc since array is NULL if *alloc == 0. */
mbcset->char_classes = re_realloc (mbcset->char_classes, wctype_t,
*char_class_alloc);
if (mbcset->char_classes == NULL)
return REG_ESPACE;
}
mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);

View File

@@ -68,6 +68,8 @@ static reg_errcode_t re_string_translate_buffer (re_string_t *pstr,
static re_dfastate_t *create_newstate_common (re_dfa_t *dfa, static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
const re_node_set *nodes, const re_node_set *nodes,
unsigned int hash); unsigned int hash);
static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
unsigned int hash);
static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa, static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
const re_node_set *nodes, const re_node_set *nodes,
unsigned int hash); unsigned int hash);
@@ -473,6 +475,10 @@ re_node_set_init_copy (dest, src)
return REG_NOERROR; return REG_NOERROR;
} }
/* Calculate the intersection of the sets SRC1 and SRC2. And store it in
DEST. Return value indicate the error code or REG_NOERROR if succeeded.
Note: We assume dest->elems is NULL, when dest->alloc is 0. */
static reg_errcode_t static reg_errcode_t
re_node_set_intersect (dest, src1, src2) re_node_set_intersect (dest, src1, src2)
re_node_set *dest; re_node_set *dest;
@@ -483,31 +489,28 @@ re_node_set_intersect (dest, src1, src2)
{ {
if (src1->nelem + src2->nelem > dest->alloc) if (src1->nelem + src2->nelem > dest->alloc)
{ {
int *new_array;
if (dest->alloc == 0)
new_array = re_malloc (int, src1->nelem + src2->nelem);
else
new_array = re_realloc (dest->elems, int,
src1->nelem + src2->nelem);
dest->alloc = src1->nelem + src2->nelem; dest->alloc = src1->nelem + src2->nelem;
if (new_array == NULL) dest->elems = re_realloc (dest->elems, int, dest->alloc);
if (dest->elems == NULL)
return REG_ESPACE; return REG_ESPACE;
dest->elems = new_array;
} }
} }
else else
{ {
/* The intersection of empty sets is also empty set. */
dest->nelem = 0; dest->nelem = 0;
return REG_NOERROR; return REG_NOERROR;
} }
for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) for (i1 = i2 = id = 0; i1 < src1->nelem && i2 < src2->nelem; )
{ {
if (src1->elems[i1] > src2->elems[i2]) if (src1->elems[i1] > src2->elems[i2])
{ {
++i2; ++i2;
continue; continue;
} }
/* The intersection must have the elements which are in both of
SRC1 and SRC2. */
if (src1->elems[i1] == src2->elems[i2]) if (src1->elems[i1] == src2->elems[i2])
dest->elems[id++] = src2->elems[i2++]; dest->elems[id++] = src2->elems[i2++];
++i1; ++i1;
@@ -516,6 +519,10 @@ re_node_set_intersect (dest, src1, src2)
return REG_NOERROR; return REG_NOERROR;
} }
/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
DEST. Return value indicate the error code or REG_NOERROR if succeeded.
Note: We assume dest->elems is NULL, when dest->alloc is 0. */
static reg_errcode_t static reg_errcode_t
re_node_set_add_intersect (dest, src1, src2) re_node_set_add_intersect (dest, src1, src2)
re_node_set *dest; re_node_set *dest;
@@ -526,16 +533,10 @@ re_node_set_add_intersect (dest, src1, src2)
{ {
if (src1->nelem + src2->nelem + dest->nelem > dest->alloc) if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
{ {
int *new_array;
if (dest->alloc == 0)
new_array = re_malloc (int, src1->nelem + src2->nelem);
else
new_array = re_realloc (dest->elems, int,
src1->nelem + src2->nelem + dest->nelem);
dest->alloc = src1->nelem + src2->nelem + dest->nelem; dest->alloc = src1->nelem + src2->nelem + dest->nelem;
if (new_array == NULL) dest->elems = re_realloc (dest->elems, int, dest->alloc);
if (dest->elems == NULL)
return REG_ESPACE; return REG_ESPACE;
dest->elems = new_array;
} }
} }
else else
@@ -567,6 +568,9 @@ re_node_set_add_intersect (dest, src1, src2)
return REG_NOERROR; return REG_NOERROR;
} }
/* Calculate the union set of the sets SRC1 and SRC2. And store it to
DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
static reg_errcode_t static reg_errcode_t
re_node_set_init_union (dest, src1, src2) re_node_set_init_union (dest, src1, src2)
re_node_set *dest; re_node_set *dest;
@@ -617,6 +621,9 @@ re_node_set_init_union (dest, src1, src2)
return REG_NOERROR; return REG_NOERROR;
} }
/* Calculate the union set of the sets DEST and SRC. And store it to
DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
static reg_errcode_t static reg_errcode_t
re_node_set_merge (dest, src) re_node_set_merge (dest, src)
re_node_set *dest; re_node_set *dest;
@@ -628,12 +635,16 @@ re_node_set_merge (dest, src)
else if (dest == NULL) else if (dest == NULL)
{ {
dest = re_malloc (re_node_set, 1); dest = re_malloc (re_node_set, 1);
if (dest == NULL)
return REG_ESPACE;
return re_node_set_init_copy (dest, src); return re_node_set_init_copy (dest, src);
} }
if (dest->alloc < src->nelem + dest->nelem) if (dest->alloc < src->nelem + dest->nelem)
{ {
dest->alloc = 2 * (src->nelem + dest->alloc); dest->alloc = 2 * (src->nelem + dest->alloc);
dest->elems = re_realloc (dest->elems, int, dest->alloc); dest->elems = re_realloc (dest->elems, int, dest->alloc);
if (dest->elems == NULL)
return REG_ESPACE;
} }
for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;) for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;)
@@ -860,18 +871,28 @@ calc_state_hash (nodes, context)
/* Search for the state whose node_set is equivalent to NODES. /* Search for the state whose node_set is equivalent to NODES.
Return the pointer to the state, if we found it in the DFA. Return the pointer to the state, if we found it in the DFA.
Otherwise create the new one and return it. */ Otherwise create the new one and return it. In case of an error
return NULL and set the error code in ERR.
Note: - We assume NULL as the invalid state, then it is possible that
return value is NULL and ERR is REG_NOERROR.
- We never return non-NULL value in case of any errors, it is for
optimization. */
static re_dfastate_t * static re_dfastate_t*
re_acquire_state (dfa, nodes) re_acquire_state (err, dfa, nodes)
reg_errcode_t *err;
re_dfa_t *dfa; re_dfa_t *dfa;
const re_node_set *nodes; const re_node_set *nodes;
{ {
unsigned int hash; unsigned int hash;
re_dfastate_t *new_state;
struct re_state_table_entry *spot; struct re_state_table_entry *spot;
int i; int i;
if (nodes->nelem == 0) if (nodes->nelem == 0)
return NULL; {
*err = REG_NOERROR;
return NULL;
}
hash = calc_state_hash (nodes, 0); hash = calc_state_hash (nodes, 0);
spot = dfa->state_table + (hash & dfa->state_hash_mask); spot = dfa->state_table + (hash & dfa->state_hash_mask);
@@ -893,25 +914,42 @@ re_acquire_state (dfa, nodes)
} }
/* There are no appropriate state in the dfa, create the new one. */ /* There are no appropriate state in the dfa, create the new one. */
return create_ci_newstate (dfa, nodes, hash); new_state = create_ci_newstate (dfa, nodes, hash);
if (new_state != NULL)
return new_state;
else
{
*err = REG_ESPACE;
return NULL;
}
} }
/* Search for the state whose node_set is equivalent to NODES and /* Search for the state whose node_set is equivalent to NODES and
whose context is equivalent to CONTEXT. whose context is equivalent to CONTEXT.
Return the pointer to the state, if we found it in the DFA. Return the pointer to the state, if we found it in the DFA.
Otherwise create the new one and return it. */ Otherwise create the new one and return it. In case of an error
return NULL and set the error code in ERR.
Note: - We assume NULL as the invalid state, then it is possible that
return value is NULL and ERR is REG_NOERROR.
- We never return non-NULL value in case of any errors, it is for
optimization. */
static re_dfastate_t * static re_dfastate_t*
re_acquire_state_context (dfa, nodes, context) re_acquire_state_context (err, dfa, nodes, context)
reg_errcode_t *err;
re_dfa_t *dfa; re_dfa_t *dfa;
const re_node_set *nodes; const re_node_set *nodes;
unsigned int context; unsigned int context;
{ {
unsigned int hash; unsigned int hash;
re_dfastate_t *new_state;
struct re_state_table_entry *spot; struct re_state_table_entry *spot;
int i; int i;
if (nodes->nelem == 0) if (nodes->nelem == 0)
return NULL; {
*err = REG_NOERROR;
return NULL;
}
hash = calc_state_hash (nodes, context); hash = calc_state_hash (nodes, context);
spot = dfa->state_table + (hash & dfa->state_hash_mask); spot = dfa->state_table + (hash & dfa->state_hash_mask);
@@ -934,9 +972,19 @@ re_acquire_state_context (dfa, nodes, context)
return state; return state;
} }
/* There are no appropriate state in `dfa', create the new one. */ /* There are no appropriate state in `dfa', create the new one. */
return create_cd_newstate (dfa, nodes, context, hash); new_state = create_cd_newstate (dfa, nodes, context, hash);
if (new_state != NULL)
return new_state;
else
{
*err = REG_ESPACE;
return NULL;
}
} }
/* Allocate memory for DFA state and initialize common properties.
Return the new state if succeeded, otherwise return NULL. */
static re_dfastate_t * static re_dfastate_t *
create_newstate_common (dfa, nodes, hash) create_newstate_common (dfa, nodes, hash)
re_dfa_t *dfa; re_dfa_t *dfa;
@@ -945,6 +993,8 @@ create_newstate_common (dfa, nodes, hash)
{ {
re_dfastate_t *newstate; re_dfastate_t *newstate;
newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
if (newstate == NULL)
return NULL;
re_node_set_init_copy (&newstate->nodes, nodes); re_node_set_init_copy (&newstate->nodes, nodes);
newstate->trtable = NULL; newstate->trtable = NULL;
newstate->trtable_search = NULL; newstate->trtable_search = NULL;
@@ -952,7 +1002,10 @@ create_newstate_common (dfa, nodes, hash)
return newstate; return newstate;
} }
static void /* Store the new state NEWSTATE whose hash value is HASH in appropriate
position. Return value indicate the error code if failed. */
static reg_errcode_t
register_state (dfa, newstate, hash) register_state (dfa, newstate, hash)
re_dfa_t *dfa; re_dfa_t *dfa;
re_dfastate_t *newstate; re_dfastate_t *newstate;
@@ -972,8 +1025,7 @@ register_state (dfa, newstate, hash)
spot->alloc = 4; spot->alloc = 4;
new_array = re_malloc (re_dfastate_t *, spot->alloc); new_array = re_malloc (re_dfastate_t *, spot->alloc);
if (new_array == NULL) if (new_array == NULL)
/* XXX return value */ return REG_ESPACE;
return;
new_array[0] = spot->entry.state; new_array[0] = spot->entry.state;
} }
else else
@@ -985,8 +1037,12 @@ register_state (dfa, newstate, hash)
spot->entry.array = new_array; spot->entry.array = new_array;
} }
spot->entry.array[spot->num++] = newstate; spot->entry.array[spot->num++] = newstate;
return REG_NOERROR;
} }
/* Create the new state which is independ of contexts.
Return the new state if succeeded, otherwise return NULL. */
static re_dfastate_t * static re_dfastate_t *
create_ci_newstate (dfa, nodes, hash) create_ci_newstate (dfa, nodes, hash)
re_dfa_t *dfa; re_dfa_t *dfa;
@@ -994,8 +1050,11 @@ create_ci_newstate (dfa, nodes, hash)
unsigned int hash; unsigned int hash;
{ {
int i; int i;
reg_errcode_t err;
re_dfastate_t *newstate; re_dfastate_t *newstate;
newstate = create_newstate_common (dfa, nodes, hash); newstate = create_newstate_common (dfa, nodes, hash);
if (newstate == NULL)
return NULL;
newstate->entrance_nodes = &newstate->nodes; newstate->entrance_nodes = &newstate->nodes;
for (i = 0 ; i < nodes->nelem ; i++) for (i = 0 ; i < nodes->nelem ; i++)
@@ -1021,11 +1080,13 @@ create_ci_newstate (dfa, nodes, hash)
newstate->halt = 1; newstate->halt = 1;
} }
} }
err = register_state (dfa, newstate, hash);
register_state (dfa, newstate, hash); return (err != REG_NOERROR) ? NULL : newstate;
return newstate;
} }
/* Create the new state which is depend on the context CONTEXT.
Return the new state if succeeded, otherwise return NULL. */
static re_dfastate_t * static re_dfastate_t *
create_cd_newstate (dfa, nodes, context, hash) create_cd_newstate (dfa, nodes, context, hash)
re_dfa_t *dfa; re_dfa_t *dfa;
@@ -1033,9 +1094,12 @@ create_cd_newstate (dfa, nodes, context, hash)
unsigned int context, hash; unsigned int context, hash;
{ {
int i, nctx_nodes = 0; int i, nctx_nodes = 0;
reg_errcode_t err;
re_dfastate_t *newstate; re_dfastate_t *newstate;
newstate = create_newstate_common (dfa, nodes, hash); newstate = create_newstate_common (dfa, nodes, hash);
if (newstate == NULL)
return NULL;
newstate->context = context; newstate->context = context;
newstate->entrance_nodes = &newstate->nodes; newstate->entrance_nodes = &newstate->nodes;
@@ -1076,7 +1140,6 @@ create_cd_newstate (dfa, nodes, context, hash)
{ {
newstate->entrance_nodes = re_malloc (re_node_set, 1); newstate->entrance_nodes = re_malloc (re_node_set, 1);
if (newstate->entrance_nodes == NULL) if (newstate->entrance_nodes == NULL)
/* XXX Return which value? */
return NULL; return NULL;
re_node_set_init_copy (newstate->entrance_nodes, nodes); re_node_set_init_copy (newstate->entrance_nodes, nodes);
nctx_nodes = 0; nctx_nodes = 0;
@@ -1090,6 +1153,6 @@ create_cd_newstate (dfa, nodes, context, hash)
} }
} }
} }
register_state (dfa, newstate, hash); err = register_state (dfa, newstate, hash);
return newstate; return (err != REG_NOERROR) ? NULL : newstate;
} }

View File

@@ -426,9 +426,10 @@ static void re_node_set_remove_at (re_node_set *set, int idx);
#define re_node_set_empty(p) ((p)->nelem = 0) #define re_node_set_empty(p) ((p)->nelem = 0)
#define re_node_set_free(set) re_free ((set)->elems) #define re_node_set_free(set) re_free ((set)->elems)
static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode); static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode);
static re_dfastate_t *re_acquire_state (re_dfa_t *dfa, static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa,
const re_node_set *nodes); const re_node_set *nodes);
static re_dfastate_t *re_acquire_state_context (re_dfa_t *dfa, static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err,
re_dfa_t *dfa,
const re_node_set *nodes, const re_node_set *nodes,
unsigned int context); unsigned int context);

View File

@@ -38,15 +38,19 @@
#include "regex.h" #include "regex.h"
#include "regex_internal.h" #include "regex_internal.h"
static void match_ctx_init (re_match_context_t *cache, int eflags, int n); static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
int n);
static void match_ctx_free (re_match_context_t *cache); static void match_ctx_free (re_match_context_t *cache);
static void match_ctx_add_entry (re_match_context_t *cache, int node, int from, static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
int to); int from, int to);
static int re_search_internal (const regex_t *preg, const char *string, static reg_errcode_t re_search_internal (const regex_t *preg,
int length, int start, int range, size_t nmatch, const char *string, int length,
regmatch_t pmatch[], int eflags); int start, int range, size_t nmatch,
static inline re_dfastate_t *acquire_init_state_context (const regex_t *preg, regmatch_t pmatch[], int eflags);
const re_string_t *input, int idx, int eflags); static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,
const regex_t *preg,
const re_string_t *input,
int idx, int eflags);
static int check_matching (const regex_t *preg, re_string_t *input, static int check_matching (const regex_t *preg, re_string_t *input,
re_match_context_t *mctx, re_dfastate_t **state_log, re_match_context_t *mctx, re_dfastate_t **state_log,
int start_idx, int fl_search, int fl_longest_match); int start_idx, int fl_search, int fl_longest_match);
@@ -61,9 +65,10 @@ static int proceed_next_node (const regex_t *preg,
const re_match_context_t *mctx, const re_match_context_t *mctx,
const re_string_t *input, const re_string_t *input,
int *pidx, int node, re_node_set *eps_via_nodes); int *pidx, int node, re_node_set *eps_via_nodes);
static void set_regs (const regex_t *preg, re_dfastate_t **state_log, static reg_errcode_t set_regs (const regex_t *preg, re_dfastate_t **state_log,
const re_match_context_t *mctx, const re_string_t *input, const re_match_context_t *mctx,
size_t nmatch, regmatch_t *pmatch, int last); const re_string_t *input, size_t nmatch,
regmatch_t *pmatch, int last);
static int sift_states_iter_mb (const regex_t *preg, re_dfastate_t **state_log, static int sift_states_iter_mb (const regex_t *preg, re_dfastate_t **state_log,
const re_match_context_t *mctx, const re_match_context_t *mctx,
const re_string_t *input, int node_idx, const re_string_t *input, int node_idx,
@@ -73,36 +78,40 @@ static int sift_states_iter_bkref (const re_dfa_t *dfa,
struct re_backref_cache_entry *mctx_entry, struct re_backref_cache_entry *mctx_entry,
int node_idx, int idx, int match_first, int node_idx, int idx, int match_first,
int match_last); int match_last);
static void sift_states_backward (const regex_t *preg, static reg_errcode_t sift_states_backward (const regex_t *preg,
re_dfastate_t **state_log, re_dfastate_t **state_log,
const re_match_context_t *mctx, const re_match_context_t *mctx,
const re_string_t *input, int last_node); const re_string_t *input,
static void add_epsilon_backreference (const re_dfa_t *dfa, int last_node);
const re_match_context_t *mctx, static reg_errcode_t add_epsilon_backreference (const re_dfa_t *dfa,
const re_node_set *plog, int idx, const re_match_context_t *mctx,
re_node_set *state_buf); const re_node_set *plog,
static re_dfastate_t *transit_state (const regex_t *preg, re_dfastate_t *state, int idx,
re_string_t *input, int fl_search, re_node_set *state_buf);
re_dfastate_t **state_log, static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
re_dfastate_t *state, re_string_t *input,
int fl_search, re_dfastate_t **state_log,
re_match_context_t *mctx); re_match_context_t *mctx);
static re_dfastate_t *transit_state_sb (const regex_t *preg, static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
re_dfastate_t *pstate, re_dfastate_t *pstate,
re_string_t *input, int fl_search, re_string_t *input, int fl_search,
re_match_context_t *mctx); re_match_context_t *mctx);
static void transit_state_mb (const regex_t *preg, re_dfastate_t *pstate, static reg_errcode_t transit_state_mb (const regex_t *preg,
const re_string_t *input, re_dfastate_t *pstate,
re_dfastate_t **state_log, const re_string_t *input,
re_match_context_t *mctx); re_dfastate_t **state_log,
static void transit_state_bkref (const regex_t *preg, re_dfastate_t *pstate, re_match_context_t *mctx);
const re_string_t *input, static reg_errcode_t transit_state_bkref (const regex_t *preg,
re_dfastate_t **state_log, re_dfastate_t *pstate,
re_match_context_t *mctx); const re_string_t *input,
static void transit_state_bkref_loop (const regex_t *preg, re_dfastate_t **state_log,
const re_string_t *input, re_match_context_t *mctx);
re_node_set *nodes, static reg_errcode_t transit_state_bkref_loop (const regex_t *preg,
re_dfastate_t **work_state_log, const re_string_t *input,
re_dfastate_t **state_log, re_node_set *nodes,
re_match_context_t *mctx); re_dfastate_t **work_state_log,
re_dfastate_t **state_log,
re_match_context_t *mctx);
static re_dfastate_t **build_trtable (const regex_t *dfa, static re_dfastate_t **build_trtable (const regex_t *dfa,
const re_dfastate_t *state, const re_dfastate_t *state,
int fl_search); int fl_search);
@@ -141,13 +150,15 @@ regexec (preg, string, nmatch, pmatch, eflags)
regmatch_t pmatch[]; regmatch_t pmatch[];
int eflags; int eflags;
{ {
reg_errcode_t err;
int length = strlen (string); int length = strlen (string);
if (preg->no_sub) if (preg->no_sub)
return re_search_internal (preg, string, length, 0, length, 0, err = re_search_internal (preg, string, length, 0, length, 0,
NULL, eflags); NULL, eflags);
else else
return re_search_internal (preg, string, length, 0, length, nmatch, err = re_search_internal (preg, string, length, 0, length, nmatch,
pmatch, eflags); pmatch, eflags);
return err != REG_NOERROR;
} }
#ifdef _LIBC #ifdef _LIBC
weak_alias (__regexec, regexec) weak_alias (__regexec, regexec)
@@ -164,7 +175,8 @@ re_match (buffer, string, length, start, regs)
int length, start; int length, start;
struct re_registers *regs; struct re_registers *regs;
{ {
int i, nregs, result, rval, eflags = 0; reg_errcode_t result;
int i, nregs, rval, eflags = 0;
regmatch_t *pmatch; regmatch_t *pmatch;
eflags |= (buffer->not_bol) ? REG_NOTBOL : 0; eflags |= (buffer->not_bol) ? REG_NOTBOL : 0;
@@ -238,7 +250,7 @@ re_match (buffer, string, length, start, regs)
} }
} }
/* Return value is -1 if not match, the length of mathing otherwise. */ /* Return value is -1 if not match, the length of mathing otherwise. */
rval = (result) ? -1 : pmatch[0].rm_eo - pmatch[0].rm_so; rval = (result != REG_NOERROR) ? -1 : pmatch[0].rm_eo - pmatch[0].rm_so;
re_free (pmatch); re_free (pmatch);
return rval; return rval;
} }
@@ -290,7 +302,8 @@ re_search (bufp, string, size, startpos, range, regs)
int size, startpos, range; int size, startpos, range;
struct re_registers *regs; struct re_registers *regs;
{ {
int i, nregs, result, real_range, rval, eflags = 0; reg_errcode_t result;
int i, nregs, real_range, rval, eflags = 0;
regmatch_t *pmatch; regmatch_t *pmatch;
eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
@@ -376,7 +389,7 @@ re_search (bufp, string, size, startpos, range, regs)
} }
/* Return value is -1 if not match, the position where the mathing starts /* Return value is -1 if not match, the position where the mathing starts
otherwise. */ otherwise. */
rval = (result) ? -1 : pmatch[0].rm_so; rval = (result != REG_NOERROR) ? -1 : pmatch[0].rm_so;
re_free (pmatch); re_free (pmatch);
return rval; return rval;
} }
@@ -486,11 +499,12 @@ static re_node_set empty_set;
length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
mingings with regexec. START, and RANGE have the same meanings mingings with regexec. START, and RANGE have the same meanings
with re_search. with re_search.
Return 0 if we find a match and REG_NOMATCH if not. Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
otherwise return the error code.
Note: We assume front end functions already check ranges. Note: We assume front end functions already check ranges.
(START + RANGE >= 0 && START + RANGE <= LENGTH) */ (START + RANGE >= 0 && START + RANGE <= LENGTH) */
static int static reg_errcode_t
re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags) re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
const regex_t *preg; const regex_t *preg;
const char *string; const char *string;
@@ -498,6 +512,7 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
size_t nmatch; size_t nmatch;
regmatch_t pmatch[]; regmatch_t pmatch[];
{ {
reg_errcode_t err;
re_dfa_t *dfa = (re_dfa_t *)preg->buffer; re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
re_string_t input; re_string_t input;
re_dfastate_t **state_log; re_dfastate_t **state_log;
@@ -510,7 +525,7 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
if (preg->used == 0 || dfa->init_state == NULL if (preg->used == 0 || dfa->init_state == NULL
|| dfa->init_state_word == NULL || dfa->init_state_nl == NULL || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
|| dfa->init_state_begbuf == NULL) || dfa->init_state_begbuf == NULL)
return 1; return REG_NOMATCH;
re_node_set_init_empty (&empty_set); re_node_set_init_empty (&empty_set);
@@ -522,16 +537,24 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
back-reference or a node which can accept multibyte character or back-reference or a node which can accept multibyte character or
multi character collating element. */ multi character collating element. */
if (nmatch > 1 || dfa->has_mb_node) if (nmatch > 1 || dfa->has_mb_node)
state_log = re_malloc (re_dfastate_t *, length + 1); {
state_log = re_malloc (re_dfastate_t *, length + 1);
if (state_log == NULL)
return REG_ESPACE;
}
else else
state_log = NULL; state_log = NULL;
if (preg->syntax & RE_ICASE) if (preg->syntax & RE_ICASE)
re_string_construct_toupper (&input, string, length, preg->translate); err = re_string_construct_toupper (&input, string, length, preg->translate);
else else
re_string_construct (&input, string, length, preg->translate); err = re_string_construct (&input, string, length, preg->translate);
if (err != REG_NOERROR)
return err;
match_ctx_init (&mctx, eflags, dfa->nbackref * 2); err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
if (err != REG_NOERROR)
return err;
#ifdef DEBUG #ifdef DEBUG
/* We assume front-end functions already check them. */ /* We assume front-end functions already check them. */
@@ -557,7 +580,12 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
match_last = check_matching (preg, &input, &mctx, state_log, match_last = check_matching (preg, &input, &mctx, state_log,
match_first, 0, fl_longest_match); match_first, 0, fl_longest_match);
if (match_last != -1) if (match_last != -1)
break; {
if (match_last == -2)
return REG_ESPACE;
else
break; /* We found a matching. */
}
} }
} }
/* Update counter. */ /* Update counter. */
@@ -598,8 +626,13 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
#endif #endif
halt_node = check_halt_state_context (preg, pstate, &input, halt_node = check_halt_state_context (preg, pstate, &input,
match_last, eflags); match_last, eflags);
sift_states_backward (preg, state_log, &mctx, &input, halt_node); err = sift_states_backward (preg, state_log, &mctx, &input, halt_node);
set_regs (preg, state_log, &mctx, &input, nmatch, pmatch, halt_node); if (err != REG_NOERROR)
return err;
err = set_regs (preg, state_log, &mctx, &input, nmatch, pmatch,
halt_node);
if (err != REG_NOERROR)
return err;
} }
} }
@@ -607,21 +640,23 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
if (dfa->nbackref) if (dfa->nbackref)
match_ctx_free (&mctx); match_ctx_free (&mctx);
re_string_destruct (&input); re_string_destruct (&input);
return match_last == -1; return (match_last == -1) ? REG_NOMATCH : REG_NOERROR;
} }
/* Acquire an initial state. /* Acquire an initial state and return it.
We must select appropriate initial state depending on the context, We must select appropriate initial state depending on the context,
since initial states may have constraints like "\<", "^", etc.. */ since initial states may have constraints like "\<", "^", etc.. */
static inline re_dfastate_t * static inline re_dfastate_t *
acquire_init_state_context (preg, input, idx, eflags) acquire_init_state_context (err, preg, input, idx, eflags)
const regex_t *preg; reg_errcode_t *err;
const re_string_t *input; const regex_t *preg;
int idx, eflags; const re_string_t *input;
int idx, eflags;
{ {
re_dfa_t *dfa = (re_dfa_t *) preg->buffer; re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
*err = REG_NOERROR;
if (dfa->init_state->has_constraint) if (dfa->init_state->has_constraint)
{ {
unsigned int context; unsigned int context;
@@ -636,9 +671,12 @@ acquire_init_state_context (preg, input, idx, eflags)
else if (IS_NEWLINE_CONTEXT (context)) else if (IS_NEWLINE_CONTEXT (context))
return dfa->init_state_nl; return dfa->init_state_nl;
else if (IS_BEGBUF_CONTEXT (context)) else if (IS_BEGBUF_CONTEXT (context))
/* It is relatively rare case, then calculate on demand. */ {
return re_acquire_state_context (dfa, dfa->init_state->entrance_nodes, /* It is relatively rare case, then calculate on demand. */
context); return re_acquire_state_context (err, dfa,
dfa->init_state->entrance_nodes,
context);
}
else else
/* Must not happen? */ /* Must not happen? */
return dfa->init_state; return dfa->init_state;
@@ -648,7 +686,8 @@ acquire_init_state_context (preg, input, idx, eflags)
} }
/* Check whether the regular expression match input string INPUT or not, /* Check whether the regular expression match input string INPUT or not,
and return the index where the matching end, or return -1 if not match. and return the index where the matching end, return -1 if not match,
or return -2 in case of an error.
FL_SEARCH means we must search where the matching starts, FL_SEARCH means we must search where the matching starts,
FL_LONGEST_MATCH means we want the POSIX longest matching. */ FL_LONGEST_MATCH means we want the POSIX longest matching. */
@@ -661,11 +700,15 @@ check_matching (preg, input, mctx, state_log, start_idx, fl_search,
re_dfastate_t **state_log; re_dfastate_t **state_log;
int start_idx, fl_search, fl_longest_match; int start_idx, fl_search, fl_longest_match;
{ {
reg_errcode_t err;
int match = 0, match_last = -1; int match = 0, match_last = -1;
re_dfastate_t *cur_state; re_dfastate_t *cur_state;
cur_state = acquire_init_state_context (preg, input, start_idx, cur_state = acquire_init_state_context (&err, preg, input, start_idx,
mctx->eflags); mctx->eflags);
/* An initial state must not be NULL(invalid state). */
if (cur_state == NULL)
return -2;
if (state_log != NULL) if (state_log != NULL)
state_log[start_idx] = cur_state; state_log[start_idx] = cur_state;
/* If the RE accepts NULL string. */ /* If the RE accepts NULL string. */
@@ -687,11 +730,13 @@ check_matching (preg, input, mctx, state_log, start_idx, fl_search,
while (!re_string_eoi (input)) while (!re_string_eoi (input))
{ {
cur_state = transit_state (preg, cur_state, input, fl_search && !match, cur_state = transit_state (&err, preg, cur_state, input,
state_log, mctx); fl_search && !match, state_log, mctx);
if (cur_state == NULL) /* Reached at the invalid state. */ if (cur_state == NULL) /* Reached at the invalid state or an error. */
{ {
int cur_str_idx = re_string_cur_idx (input); int cur_str_idx = re_string_cur_idx (input);
if (err != REG_NOERROR)
return -2;
if (fl_search && !match) if (fl_search && !match)
{ {
/* Restart from initial state, since we are searching /* Restart from initial state, since we are searching
@@ -699,9 +744,11 @@ check_matching (preg, input, mctx, state_log, start_idx, fl_search,
#ifdef RE_ENABLE_I18N #ifdef RE_ENABLE_I18N
if (MB_CUR_MAX == 1 || re_string_first_byte (input, cur_str_idx)) if (MB_CUR_MAX == 1 || re_string_first_byte (input, cur_str_idx))
#endif /* RE_ENABLE_I18N */ #endif /* RE_ENABLE_I18N */
cur_state = acquire_init_state_context (preg, input, cur_state = acquire_init_state_context (&err, preg, input,
cur_str_idx, cur_str_idx,
mctx->eflags); mctx->eflags);
if (cur_state == NULL && err != REG_NOERROR)
return -2;
if (state_log != NULL) if (state_log != NULL)
state_log[cur_str_idx] = cur_state; state_log[cur_str_idx] = cur_state;
} }
@@ -787,9 +834,10 @@ check_halt_state_context (preg, state, input, idx, eflags)
return 0; return 0;
} }
/* Compute the next node to which "NFA" transit from NODE. /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
Return the destination node, and update EPS_VIA_NODES. corresponding to the DFA).
("NFA" is a NFA corresponding to the DFA. */ Return the destination node, and update EPS_VIA_NODES, return -1 in case
of errors. */
static int static int
proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes) proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
@@ -801,10 +849,12 @@ proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
re_node_set *eps_via_nodes; re_node_set *eps_via_nodes;
{ {
re_dfa_t *dfa = (re_dfa_t *)preg->buffer; re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
int i, dest_node = -1; int i, dest_node = -1, err;
if (IS_EPSILON_NODE (dfa->nodes[node].type)) if (IS_EPSILON_NODE (dfa->nodes[node].type))
{ {
re_node_set_insert (eps_via_nodes, node); err = re_node_set_insert (eps_via_nodes, node);
if (err < 0)
return -1;
for (i = 0; i < state_log[*pidx]->nodes.nelem; ++i) for (i = 0; i < state_log[*pidx]->nodes.nelem; ++i)
{ {
int candidate = state_log[*pidx]->nodes.elems[i]; int candidate = state_log[*pidx]->nodes.elems[i];
@@ -845,7 +895,9 @@ proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
} }
if (naccepted == 0) if (naccepted == 0)
{ {
re_node_set_insert (eps_via_nodes, node); err = re_node_set_insert (eps_via_nodes, node);
if (err < 0)
return -1;
dest_node = dfa->nexts[node]; dest_node = dfa->nexts[node];
if (re_node_set_contains (&state_log[*pidx]->nodes, dest_node)) if (re_node_set_contains (&state_log[*pidx]->nodes, dest_node))
return dest_node; return dest_node;
@@ -885,7 +937,7 @@ proceed_next_node (preg, state_log, mctx, input, pidx, node, eps_via_nodes)
Note: We assume that pmatch[0] is already set, and Note: We assume that pmatch[0] is already set, and
pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1). */ pmatch[i].rm_so == pmatch[i].rm_eo == -1 (i > 1). */
static void static reg_errcode_t
set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node) set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node)
const regex_t *preg; const regex_t *preg;
re_dfastate_t **state_log; re_dfastate_t **state_log;
@@ -944,9 +996,11 @@ set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node)
/* Proceed to next node. */ /* Proceed to next node. */
cur_node = proceed_next_node (preg, state_log, mctx, input, &idx, cur_node = proceed_next_node (preg, state_log, mctx, input, &idx,
cur_node, &eps_via_nodes); cur_node, &eps_via_nodes);
if (cur_node < 0)
return REG_ESPACE;
} }
re_node_set_free (&eps_via_nodes); re_node_set_free (&eps_via_nodes);
return; return REG_NOERROR;
} }
#define NUMBER_OF_STATE 1 #define NUMBER_OF_STATE 1
@@ -974,7 +1028,7 @@ set_regs (preg, state_log, mctx, input, nmatch, pmatch, last_node)
#define STATE_NODE_CONTAINS(state,node) \ #define STATE_NODE_CONTAINS(state,node) \
((state) != NULL && re_node_set_contains (&(state)->nodes, node)) ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
static void static reg_errcode_t
sift_states_backward (preg, state_log, mctx, input, last_node) sift_states_backward (preg, state_log, mctx, input, last_node)
const regex_t *preg; const regex_t *preg;
re_dfastate_t **state_log; re_dfastate_t **state_log;
@@ -982,6 +1036,7 @@ sift_states_backward (preg, state_log, mctx, input, last_node)
const re_string_t *input; const re_string_t *input;
int last_node; int last_node;
{ {
reg_errcode_t err;
re_dfa_t *dfa = (re_dfa_t *)preg->buffer; re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
re_node_set state_buf; re_node_set state_buf;
int str_idx = mctx->match_last; int str_idx = mctx->match_last;
@@ -990,18 +1045,28 @@ sift_states_backward (preg, state_log, mctx, input, last_node)
#ifdef DEBUG #ifdef DEBUG
assert (state_log != NULL && state_log[str_idx] != NULL); assert (state_log != NULL && state_log[str_idx] != NULL);
#endif #endif
re_node_set_alloc (&state_buf, NUMBER_OF_STATE); err = re_node_set_alloc (&state_buf, NUMBER_OF_STATE);
if (err != REG_NOERROR)
return err;
plog = &state_log[str_idx]->nodes; plog = &state_log[str_idx]->nodes;
/* Build sifted state_log[str_idx]. It has the nodes which can epsilon /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
transit to the last_node and the last_node itself. */ transit to the last_node and the last_node itself. */
re_node_set_intersect (&state_buf, plog, dfa->inveclosures + last_node); err = re_node_set_intersect (&state_buf, plog, dfa->inveclosures + last_node);
if (err != REG_NOERROR)
return err;
if (state_log[str_idx] != NULL && state_log[str_idx]->has_backref) if (state_log[str_idx] != NULL && state_log[str_idx]->has_backref)
add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf); {
err = add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf);
if (err != REG_NOERROR)
return err;
}
/* Update state log. */ /* Update state log. */
state_log[str_idx] = re_acquire_state (dfa, &state_buf); state_log[str_idx] = re_acquire_state (&err, dfa, &state_buf);
if (state_log[str_idx] == NULL && err != REG_NOERROR)
return err;
/* Then check each states in the state_log. */ /* Then check each states in the state_log. */
while (str_idx > mctx->match_first) while (str_idx > mctx->match_first)
@@ -1062,17 +1127,26 @@ sift_states_backward (preg, state_log, mctx, input, last_node)
/* `prev_node' may point the entity of the OP_CONTEXT_NODE, /* `prev_node' may point the entity of the OP_CONTEXT_NODE,
then we use plog->elems[i] instead. */ then we use plog->elems[i] instead. */
re_node_set_add_intersect (&state_buf, plog, err = re_node_set_add_intersect (&state_buf, plog,
dfa->inveclosures + prev_node); dfa->inveclosures + prev_node);
if (err != REG_NOERROR)
return err;
} }
if (state_log[str_idx] != NULL && state_log[str_idx]->has_backref) if (state_log[str_idx] != NULL && state_log[str_idx]->has_backref)
add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf); {
err = add_epsilon_backreference (dfa, mctx, plog, str_idx, &state_buf);
if (err != REG_NOERROR)
return err;
}
/* Update state_log. */ /* Update state_log. */
state_log[str_idx] = re_acquire_state (dfa, &state_buf); state_log[str_idx] = re_acquire_state (&err, dfa, &state_buf);
if (state_log[str_idx] == NULL && err != REG_NOERROR)
return err;
} }
re_node_set_free (&state_buf); re_node_set_free (&state_buf);
return REG_NOERROR;
} }
/* Helper functions. */ /* Helper functions. */
@@ -1136,7 +1210,7 @@ sift_states_iter_bkref (dfa, state_log, mctx_entry, node_idx, idx, match_first,
return naccepted; return naccepted;
} }
static void static reg_errcode_t
add_epsilon_backreference (dfa, mctx, plog, idx, state_buf) add_epsilon_backreference (dfa, mctx, plog, idx, state_buf)
const re_dfa_t *dfa; const re_dfa_t *dfa;
const re_match_context_t *mctx; const re_match_context_t *mctx;
@@ -1164,12 +1238,16 @@ add_epsilon_backreference (dfa, mctx, plog, idx, state_buf)
} }
if (j < mctx->nbkref_ents || idx == mctx->match_first) if (j < mctx->nbkref_ents || idx == mctx->match_first)
{ {
re_node_set_add_intersect (state_buf, plog, reg_errcode_t err;
dfa->inveclosures + node_idx); err = re_node_set_add_intersect (state_buf, plog,
dfa->inveclosures + node_idx);
if (err != REG_NOERROR)
return err;
i = 0; i = 0;
} }
} }
} }
return REG_NOERROR;
} }
/* Functions for state transition. */ /* Functions for state transition. */
@@ -1180,17 +1258,19 @@ add_epsilon_backreference (dfa, mctx, plog, idx, state_buf)
update the destination of STATE_LOG. */ update the destination of STATE_LOG. */
static re_dfastate_t * static re_dfastate_t *
transit_state (preg, state, input, fl_search, state_log, mctx) transit_state (err, preg, state, input, fl_search, state_log, mctx)
const regex_t *preg; reg_errcode_t *err;
re_dfastate_t *state, **state_log; const regex_t *preg;
re_string_t *input; re_dfastate_t *state, **state_log;
int fl_search; re_string_t *input;
re_match_context_t *mctx; int fl_search;
re_match_context_t *mctx;
{ {
re_dfa_t *dfa = (re_dfa_t *) preg->buffer; re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
re_dfastate_t **trtable, *next_state; re_dfastate_t **trtable, *next_state;
unsigned char ch; unsigned char ch;
*err = REG_NOERROR;
if (state == NULL) if (state == NULL)
{ {
next_state = state; next_state = state;
@@ -1200,7 +1280,11 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
{ {
/* If the current state can accept multibyte. */ /* If the current state can accept multibyte. */
if (state->accept_mb) if (state->accept_mb)
transit_state_mb (preg, state, input, state_log, mctx); {
*err = transit_state_mb (preg, state, input, state_log, mctx);
if (*err != REG_NOERROR)
return NULL;
}
/* Then decide the next state with the single byte. */ /* Then decide the next state with the single byte. */
if (1) if (1)
@@ -1221,7 +1305,10 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
else else
{ {
/* don't use transition table */ /* don't use transition table */
next_state = transit_state_sb (preg, state, input, fl_search, mctx); next_state = transit_state_sb (err, preg, state, input, fl_search,
mctx);
if (next_state == NULL && err != REG_NOERROR)
return NULL;
} }
} }
@@ -1252,7 +1339,10 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
if (next_state != NULL) if (next_state != NULL)
{ {
table_nodes = next_state->entrance_nodes; table_nodes = next_state->entrance_nodes;
re_node_set_init_union (&next_nodes, table_nodes, log_nodes); *err = re_node_set_init_union (&next_nodes, table_nodes,
log_nodes);
if (*err != REG_NOERROR)
return NULL;
} }
else else
next_nodes = *log_nodes; next_nodes = *log_nodes;
@@ -1262,14 +1352,19 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
context = re_string_context_at (input, re_string_cur_idx (input) - 1, context = re_string_context_at (input, re_string_cur_idx (input) - 1,
mctx->eflags, preg->newline_anchor); mctx->eflags, preg->newline_anchor);
next_state = state_log[cur_idx] next_state = state_log[cur_idx]
= re_acquire_state_context (dfa, &next_nodes, context); = re_acquire_state_context (err, dfa, &next_nodes, context);
/* We don't need to check errors here, since the return value of
this function is next_state and ERR is already set. */
if (table_nodes != NULL) if (table_nodes != NULL)
re_node_set_free (&next_nodes); re_node_set_free (&next_nodes);
} }
/* If the next state has back references. */ /* If the next state has back references. */
if (next_state != NULL && next_state->has_backref) if (next_state != NULL && next_state->has_backref)
{ {
transit_state_bkref (preg, next_state, input, state_log, mctx); *err = transit_state_bkref (preg, next_state, input, state_log, mctx);
if (*err != REG_NOERROR)
return NULL;
next_state = state_log[cur_idx]; next_state = state_log[cur_idx];
} }
} }
@@ -1282,12 +1377,13 @@ transit_state (preg, state, input, fl_search, state_log, mctx)
accepting the current input byte. */ accepting the current input byte. */
static re_dfastate_t * static re_dfastate_t *
transit_state_sb (preg, state, input, fl_search, mctx) transit_state_sb (err, preg, state, input, fl_search, mctx)
const regex_t *preg; reg_errcode_t *err;
re_dfastate_t *state; const regex_t *preg;
re_string_t *input; re_dfastate_t *state;
int fl_search; re_string_t *input;
re_match_context_t *mctx; int fl_search;
re_match_context_t *mctx;
{ {
re_dfa_t *dfa = (re_dfa_t *) preg->buffer; re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
re_node_set next_nodes; re_node_set next_nodes;
@@ -1295,14 +1391,20 @@ transit_state_sb (preg, state, input, fl_search, mctx)
int node_cnt, cur_str_idx = re_string_cur_idx (input); int node_cnt, cur_str_idx = re_string_cur_idx (input);
unsigned int context; unsigned int context;
re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
if (*err != REG_NOERROR)
return NULL;
for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt) for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
{ {
int cur_node = state->nodes.elems[node_cnt]; int cur_node = state->nodes.elems[node_cnt];
if (check_node_accept (preg, dfa->nodes + cur_node, input, if (check_node_accept (preg, dfa->nodes + cur_node, input,
cur_str_idx, mctx->eflags)) cur_str_idx, mctx->eflags))
re_node_set_merge (&next_nodes, {
dfa->eclosures + dfa->nexts[cur_node]); *err = re_node_set_merge (&next_nodes,
dfa->eclosures + dfa->nexts[cur_node]);
if (*err != REG_NOERROR)
return NULL;
}
} }
if (fl_search) if (fl_search)
{ {
@@ -1317,23 +1419,32 @@ transit_state_sb (preg, state, input, fl_search, mctx)
} }
if (!not_initial) if (!not_initial)
#endif #endif
re_node_set_merge (&next_nodes, dfa->init_state->entrance_nodes); {
*err = re_node_set_merge (&next_nodes,
dfa->init_state->entrance_nodes);
if (*err != REG_NOERROR)
return NULL;
}
} }
context = re_string_context_at (input, cur_str_idx, mctx->eflags, context = re_string_context_at (input, cur_str_idx, mctx->eflags,
preg->newline_anchor); preg->newline_anchor);
next_state = re_acquire_state_context (dfa, &next_nodes, context); next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
/* We don't need to check errors here, since the return value of
this function is next_state and ERR is already set. */
re_node_set_free (&next_nodes); re_node_set_free (&next_nodes);
re_string_skip_bytes (input, 1); re_string_skip_bytes (input, 1);
return next_state; return next_state;
} }
static void static reg_errcode_t
transit_state_mb (preg, pstate, input, state_log, mctx) transit_state_mb (preg, pstate, input, state_log, mctx)
const regex_t *preg; const regex_t *preg;
re_dfastate_t *pstate, **state_log; re_dfastate_t *pstate, **state_log;
const re_string_t *input; const re_string_t *input;
re_match_context_t *mctx; re_match_context_t *mctx;
{ {
reg_errcode_t err;
re_dfa_t *dfa = (re_dfa_t *) preg->buffer; re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
int i; int i;
@@ -1376,39 +1487,50 @@ transit_state_mb (preg, pstate, input, state_log, mctx)
if (dest_state == NULL) if (dest_state == NULL)
dest_nodes = *new_nodes; dest_nodes = *new_nodes;
else else
re_node_set_init_union (&dest_nodes, dest_state->entrance_nodes, {
new_nodes); err = re_node_set_init_union (&dest_nodes,
dest_state->entrance_nodes, new_nodes);
if (err != REG_NOERROR)
return err;
}
context = re_string_context_at (input, dest_idx - 1, mctx->eflags, context = re_string_context_at (input, dest_idx - 1, mctx->eflags,
preg->newline_anchor); preg->newline_anchor);
state_log[dest_idx] = re_acquire_state_context (dfa, &dest_nodes, context); state_log[dest_idx] = re_acquire_state_context (&err, dfa, &dest_nodes,
context);
if (state_log[dest_idx] == NULL && err != REG_NOERROR)
return err;
if (dest_state != NULL) if (dest_state != NULL)
re_node_set_free (&dest_nodes); re_node_set_free (&dest_nodes);
} }
return REG_NOERROR;
} }
static void static reg_errcode_t
transit_state_bkref (preg, pstate, input, state_log, mctx) transit_state_bkref (preg, pstate, input, state_log, mctx)
const regex_t *preg; const regex_t *preg;
re_dfastate_t *pstate, **state_log; re_dfastate_t *pstate, **state_log;
const re_string_t *input; const re_string_t *input;
re_match_context_t *mctx; re_match_context_t *mctx;
{ {
reg_errcode_t err;
re_dfastate_t **work_state_log; re_dfastate_t **work_state_log;
#ifdef DEBUG #ifdef DEBUG
assert (mctx->match_first != -1); assert (mctx->match_first != -1);
#endif #endif
work_state_log = re_malloc (re_dfastate_t *, re_string_cur_idx (input) + 1); work_state_log = re_malloc (re_dfastate_t *, re_string_cur_idx (input) + 1);
if (work_state_log == NULL)
return REG_ESPACE;
transit_state_bkref_loop (preg, input, &pstate->nodes, work_state_log, err = transit_state_bkref_loop (preg, input, &pstate->nodes, work_state_log,
state_log, mctx); state_log, mctx);
re_free (work_state_log); re_free (work_state_log);
return err;
} }
/* Caller must allocate `work_state_log'. */ /* Caller must allocate `work_state_log'. */
static void static reg_errcode_t
transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx) transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
const regex_t *preg; const regex_t *preg;
const re_string_t *input; const re_string_t *input;
@@ -1416,10 +1538,13 @@ transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
re_dfastate_t **work_state_log, **state_log; re_dfastate_t **work_state_log, **state_log;
re_match_context_t *mctx; re_match_context_t *mctx;
{ {
reg_errcode_t err;
re_dfa_t *dfa = (re_dfa_t *) preg->buffer; re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
int i, j; int i, j;
regmatch_t *cur_regs = re_malloc (regmatch_t, preg->re_nsub + 1); regmatch_t *cur_regs = re_malloc (regmatch_t, preg->re_nsub + 1);
int cur_str_idx = re_string_cur_idx (input); int cur_str_idx = re_string_cur_idx (input);
if (cur_regs == NULL)
return REG_ESPACE;
for (i = 0; i < nodes->nelem; ++i) for (i = 0; i < nodes->nelem; ++i)
{ {
@@ -1474,7 +1599,9 @@ transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
/* Successfully matched, add a new cache entry. */ /* Successfully matched, add a new cache entry. */
dest_str_idx = cur_str_idx + subexp_len; dest_str_idx = cur_str_idx + subexp_len;
match_ctx_add_entry (mctx, node_idx, cur_str_idx, dest_str_idx); err = match_ctx_add_entry (mctx, node_idx, cur_str_idx, dest_str_idx);
if (err != REG_NOERROR)
return err;
clean_state_log_if_need (state_log, mctx, dest_str_idx); clean_state_log_if_need (state_log, mctx, dest_str_idx);
/* And add the epsilon closures (which is `new_dest_nodes') of /* And add the epsilon closures (which is `new_dest_nodes') of
@@ -1494,29 +1621,44 @@ transit_state_bkref_loop (preg, input, nodes, work_state_log, state_log, mctx)
: state_log[cur_str_idx]->nodes.nelem); : state_log[cur_str_idx]->nodes.nelem);
/* Add `new_dest_node' to state_log. */ /* Add `new_dest_node' to state_log. */
if (dest_state == NULL) if (dest_state == NULL)
state_log[dest_str_idx] = re_acquire_state_context (dfa, {
new_dest_nodes, state_log[dest_str_idx] = re_acquire_state_context (&err, dfa,
context); new_dest_nodes,
context);
if (state_log[dest_str_idx] == NULL && err != REG_NOERROR)
return err;
}
else else
{ {
re_node_set dest_nodes; re_node_set dest_nodes;
re_node_set_init_union (&dest_nodes, dest_state->entrance_nodes, err = re_node_set_init_union (&dest_nodes, dest_state->entrance_nodes,
new_dest_nodes); new_dest_nodes);
state_log[dest_str_idx] = re_acquire_state_context (dfa, &dest_nodes, if (err != REG_NOERROR)
return err;
state_log[dest_str_idx] = re_acquire_state_context (&err, dfa,
&dest_nodes,
context); context);
if (state_log[dest_str_idx] == NULL && err != REG_NOERROR)
return err;
re_node_set_free (&dest_nodes); re_node_set_free (&dest_nodes);
} }
/* We need to check recursively if the backreference can epsilon /* We need to check recursively if the backreference can epsilon
transit. */ transit. */
if (subexp_len == 0 && state_log[cur_str_idx]->nodes.nelem > prev_nelem) if (subexp_len == 0 && state_log[cur_str_idx]->nodes.nelem > prev_nelem)
transit_state_bkref_loop (preg, input, new_dest_nodes, work_state_log, {
state_log, mctx); err = transit_state_bkref_loop (preg, input, new_dest_nodes,
work_state_log, state_log, mctx);
if (err != REG_NOERROR)
return err;
}
} }
re_free (cur_regs); re_free (cur_regs);
return REG_NOERROR;
} }
/* Build transition table for the state. */ /* Build transition table for the state.
Return the new table if succeeded, otherwise return NULL. */
static re_dfastate_t ** static re_dfastate_t **
build_trtable (preg, state, fl_search) build_trtable (preg, state, fl_search)
@@ -1524,6 +1666,7 @@ build_trtable (preg, state, fl_search)
const re_dfastate_t *state; const re_dfastate_t *state;
int fl_search; int fl_search;
{ {
reg_errcode_t err;
re_dfa_t *dfa = (re_dfa_t *) preg->buffer; re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
int i, j, k, ch; int i, j, k, ch;
int ndests; /* Number of the destination states from `state'. */ int ndests; /* Number of the destination states from `state'. */
@@ -1541,15 +1684,18 @@ build_trtable (preg, state, fl_search)
/* Initialize transiton table. */ /* Initialize transiton table. */
trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
if (dests_node == NULL || dests_ch == NULL || trtable == NULL)
return NULL;
/* At first, group all nodes belonging to `state' into several /* At first, group all nodes belonging to `state' into several
destinations. */ destinations. */
ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch); ndests = group_nodes_into_DFAstates (preg, state, dests_node, dests_ch);
if (ndests == 0) if (ndests <= 0)
{ {
re_free (dests_node); re_free (dests_node);
re_free (dests_ch); re_free (dests_ch);
return trtable; /* Return NULL in case of an error, trtable otherwise. */
return (ndests < 0) ? NULL : trtable;
} }
dest_states = re_malloc (re_dfastate_t *, ndests); dest_states = re_malloc (re_dfastate_t *, ndests);
@@ -1557,7 +1703,11 @@ build_trtable (preg, state, fl_search)
dest_states_nl = re_malloc (re_dfastate_t *, ndests); dest_states_nl = re_malloc (re_dfastate_t *, ndests);
bitset_empty (acceptable); bitset_empty (acceptable);
re_node_set_alloc (&follows, ndests + 1); err = re_node_set_alloc (&follows, ndests + 1);
if (dest_states == NULL || dest_states_word == NULL || dest_states_nl == NULL
|| err != REG_NOERROR)
return NULL;
/* Then build the states for all destinations. */ /* Then build the states for all destinations. */
for (i = 0; i < ndests; ++i) for (i = 0; i < ndests; ++i)
{ {
@@ -1569,7 +1719,9 @@ build_trtable (preg, state, fl_search)
next_node = dfa->nexts[dests_node[i].elems[j]]; next_node = dfa->nexts[dests_node[i].elems[j]];
if (next_node != -1) if (next_node != -1)
{ {
re_node_set_merge (&follows, dfa->eclosures + next_node); err = re_node_set_merge (&follows, dfa->eclosures + next_node);
if (err != REG_NOERROR)
return NULL;
} }
} }
/* If search flag is set, merge the initial state. */ /* If search flag is set, merge the initial state. */
@@ -1585,17 +1737,28 @@ build_trtable (preg, state, fl_search)
} }
if (!not_initial) if (!not_initial)
#endif #endif
re_node_set_merge (&follows, dfa->init_state->entrance_nodes); {
err = re_node_set_merge (&follows,
dfa->init_state->entrance_nodes);
if (err != REG_NOERROR)
return NULL;
}
} }
dest_states[i] = re_acquire_state_context (dfa, &follows, 0); dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
if (dest_states[i] == NULL && err != REG_NOERROR)
return NULL;
/* If the new state has context constraint, /* If the new state has context constraint,
build appropriate states for these contexts. */ build appropriate states for these contexts. */
if (dest_states[i]->has_constraint) if (dest_states[i]->has_constraint)
{ {
dest_states_word[i] = re_acquire_state_context (dfa, &follows, dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
CONTEXT_WORD); CONTEXT_WORD);
dest_states_nl[i] = re_acquire_state_context (dfa, &follows, if (dest_states_word[i] == NULL && err != REG_NOERROR)
return NULL;
dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
CONTEXT_NEWLINE); CONTEXT_NEWLINE);
if (dest_states_nl[i] == NULL && err != REG_NOERROR)
return NULL;
} }
else else
{ {
@@ -1654,6 +1817,7 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
re_node_set *dests_node; re_node_set *dests_node;
bitset *dests_ch; bitset *dests_ch;
{ {
reg_errcode_t err;
const re_dfa_t *dfa = (re_dfa_t *) preg->buffer; const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
int i, j, k; int i, j, k;
int ndests; /* Number of the destinations from `state'. */ int ndests; /* Number of the destinations from `state'. */
@@ -1750,12 +1914,16 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
{ {
bitset_copy (dests_ch[ndests], remains); bitset_copy (dests_ch[ndests], remains);
bitset_copy (dests_ch[j], intersec); bitset_copy (dests_ch[j], intersec);
re_node_set_init_copy (dests_node + ndests, &dests_node[j]); err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
if (err != REG_NOERROR)
return -1;
++ndests; ++ndests;
} }
/* Put the position in the current group. */ /* Put the position in the current group. */
re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
if (err < 0)
return -1;
/* If all characters are consumed, go to next node. */ /* If all characters are consumed, go to next node. */
if (!not_consumed) if (!not_consumed)
@@ -1765,7 +1933,9 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
if (j == ndests) if (j == ndests)
{ {
bitset_copy (dests_ch[ndests], accepts); bitset_copy (dests_ch[ndests], accepts);
re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
if (err != REG_NOERROR)
return -1;
++ndests; ++ndests;
bitset_empty (accepts); bitset_empty (accepts);
} }
@@ -2028,7 +2198,7 @@ check_node_accept (preg, node, input, idx, eflags)
/* Functions for matching context. */ /* Functions for matching context. */
static void static reg_errcode_t
match_ctx_init (mctx, eflags, n) match_ctx_init (mctx, eflags, n)
re_match_context_t *mctx; re_match_context_t *mctx;
int eflags; int eflags;
@@ -2037,12 +2207,17 @@ match_ctx_init (mctx, eflags, n)
mctx->eflags = eflags; mctx->eflags = eflags;
mctx->match_first = mctx->match_last = -1; mctx->match_first = mctx->match_last = -1;
if (n > 0) if (n > 0)
mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); {
mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
if (mctx->bkref_ents == NULL)
return REG_ESPACE;
}
else else
mctx->bkref_ents = NULL; mctx->bkref_ents = NULL;
mctx->nbkref_ents = 0; mctx->nbkref_ents = 0;
mctx->abkref_ents = n; mctx->abkref_ents = n;
mctx->max_bkref_len = 0; mctx->max_bkref_len = 0;
return REG_NOERROR;
} }
static void static void
@@ -2054,7 +2229,7 @@ match_ctx_free (mctx)
/* Add a new backreference entry to the cache. */ /* Add a new backreference entry to the cache. */
static void static reg_errcode_t
match_ctx_add_entry (mctx, node, from, to) match_ctx_add_entry (mctx, node, from, to)
re_match_context_t *mctx; re_match_context_t *mctx;
int node, from, to; int node, from, to;
@@ -2064,6 +2239,8 @@ match_ctx_add_entry (mctx, node, from, to)
mctx->bkref_ents = re_realloc (mctx->bkref_ents, mctx->bkref_ents = re_realloc (mctx->bkref_ents,
struct re_backref_cache_entry, struct re_backref_cache_entry,
mctx->abkref_ents * 2); mctx->abkref_ents * 2);
if (mctx->bkref_ents == NULL)
return REG_ESPACE;
memset (mctx->bkref_ents + mctx->nbkref_ents, '\0', memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
sizeof (struct re_backref_cache_entry) * mctx->abkref_ents); sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
mctx->abkref_ents *= 2; mctx->abkref_ents *= 2;
@@ -2073,4 +2250,5 @@ match_ctx_add_entry (mctx, node, from, to)
mctx->bkref_ents[mctx->nbkref_ents++].to = to; mctx->bkref_ents[mctx->nbkref_ents++].to = to;
if (mctx->max_bkref_len < to - from) if (mctx->max_bkref_len < to - from)
mctx->max_bkref_len = to - from; mctx->max_bkref_len = to - from;
return REG_NOERROR;
} }