1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-07-29 11:41:21 +03:00

* posix/regex_internal.c (re_string_reconstruct): Avoid calling

mbrtowc for very simple UTF-8 case.

2005-09-01  Paul Eggert  <eggert@cs.ucla.edu>

	* posix/regex_internal.c (build_wcs_upper_buffer): Fix portability
	bugs in int versus size_t comparisons.

2005-09-06  Ulrich Drepper  <drepper@redhat.com>

	* posix/regex_internal.c (re_acquire_state): Make DFA pointer arg
	a pointer-to-const.
	(re_acquire_state_context): Likewise.
	* posix/regex_internal.h: Adjust prototypes.

2005-08-31  Jim Meyering  <jim@meyering.net>

	* posix/regcomp.c (search_duplicated_node): Make first pointer arg
	a pointer-to-const.
	* posix/regex_internal.c (create_ci_newstate, create_cd_newstate,
	register_state): Likewise.
	* posix/regexec.c (search_cur_bkref_entry, check_dst_limits):
	(check_dst_limits_calc_pos_1, check_dst_limits_calc_pos):
	(group_nodes_into_DFAstates): Likewise.

	* posix/regexec.c (re_search_internal): Simplify update of
	rm_so and rm_eo by replacing "if (A == B) A += C - B;"
	with the equivalent of "if (A == B) A = C;".

2005-09-06  Ulrich Drepper  <drepper@redhat.com>

	* posix/regcomp.c (re_compile_internal): Change third parameter type
	to size_t.
	(init_dfa): Likewise.  Make sure that arithmetic on pat_len doesn't
	overflow.
	* posix/regex_internal.h (struct re_dfa_t): Change type of nodes_alloc
	and nodes_len to size_t.
	* posix/regex_internal.c (re_dfa_add_node): Use size_t as type for
	new_nodes_alloc.  Check for overflow.

2005-08-31  Paul Eggert  <eggert@cs.ucla.edu>

	* posix/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
	(optimize_subexps, lower_subexp):
	Don't assume 1<<31 has defined behavior on hosts with 32-bit int,
	since the signed shift might overflow.  Use 1u<<31 instead.
	* posix/regex_internal.h (bitset_set, bitset_clear, bitset_contain):
	Likewise.
	* posix/regexec.c (check_dst_limits_calc_pos_1): Likewise.
	(check_subexp_matching_top): Likewise.
	* posix/regcomp.c (optimize_subexps, lower_subexp):
	Use CHAR_BIT rather than 8, for clarity.
	* posix/regexec.c (check_dst_limits_calc_pos_1):
	(check_subexp_matching_top): Likewise.
	* posix/regcomp.c (init_dfa): Make table_size unsigned, so that we
	don't have to worry about portability issues when shifting it left.
	Remove no-longer-needed test for table_size > 0.
	* posix/regcomp.c (parse_sub_exp): Do not shift more bits than there
	are in a word, as the resulting behavior is undefined.
	* posix/regexec.c (check_dst_limits_calc_pos_1): Likewise;
	in one case, a <= should have been an <, and in another case the
	whole test was missing.
	* posix/regex_internal.h (BYTE_BITS): Remove.  All uses changed to
	the standard name CHAR_BIT.
This commit is contained in:
Ulrich Drepper
2005-09-07 01:15:33 +00:00
parent 2d87db5b53
commit 01ed6ceb7c
5 changed files with 211 additions and 109 deletions

View File

@ -1,5 +1,72 @@
2005-09-06 Ulrich Drepper <drepper@redhat.com>
* posix/regex_internal.c (re_string_reconstruct): Avoid calling
mbrtowc for very simple UTF-8 case.
2005-09-01 Paul Eggert <eggert@cs.ucla.edu>
* posix/regex_internal.c (build_wcs_upper_buffer): Fix portability
bugs in int versus size_t comparisons.
2005-09-06 Ulrich Drepper <drepper@redhat.com>
* posix/regex_internal.c (re_acquire_state): Make DFA pointer arg
a pointer-to-const.
(re_acquire_state_context): Likewise.
* posix/regex_internal.h: Adjust prototypes.
2005-08-31 Jim Meyering <jim@meyering.net>
* posix/regcomp.c (search_duplicated_node): Make first pointer arg
a pointer-to-const.
* posix/regex_internal.c (create_ci_newstate, create_cd_newstate,
register_state): Likewise.
* posix/regexec.c (search_cur_bkref_entry, check_dst_limits):
(check_dst_limits_calc_pos_1, check_dst_limits_calc_pos):
(group_nodes_into_DFAstates): Likewise.
2005-08-31 Paul Eggert <eggert@cs.ucla.edu> 2005-08-31 Paul Eggert <eggert@cs.ucla.edu>
* posix/regexec.c (re_search_internal): Simplify update of
rm_so and rm_eo by replacing "if (A == B) A += C - B;"
with the equivalent of "if (A == B) A = C;".
2005-09-06 Ulrich Drepper <drepper@redhat.com>
* posix/regcomp.c (re_compile_internal): Change third parameter type
to size_t.
(init_dfa): Likewise. Make sure that arithmetic on pat_len doesn't
overflow.
* posix/regex_internal.h (struct re_dfa_t): Change type of nodes_alloc
and nodes_len to size_t.
* posix/regex_internal.c (re_dfa_add_node): Use size_t as type for
new_nodes_alloc. Check for overflow.
2005-08-31 Paul Eggert <eggert@cs.ucla.edu>
* posix/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
(optimize_subexps, lower_subexp):
Don't assume 1<<31 has defined behavior on hosts with 32-bit int,
since the signed shift might overflow. Use 1u<<31 instead.
* posix/regex_internal.h (bitset_set, bitset_clear, bitset_contain):
Likewise.
* posix/regexec.c (check_dst_limits_calc_pos_1): Likewise.
(check_subexp_matching_top): Likewise.
* posix/regcomp.c (optimize_subexps, lower_subexp):
Use CHAR_BIT rather than 8, for clarity.
* posix/regexec.c (check_dst_limits_calc_pos_1):
(check_subexp_matching_top): Likewise.
* posix/regcomp.c (init_dfa): Make table_size unsigned, so that we
don't have to worry about portability issues when shifting it left.
Remove no-longer-needed test for table_size > 0.
* posix/regcomp.c (parse_sub_exp): Do not shift more bits than there
are in a word, as the resulting behavior is undefined.
* posix/regexec.c (check_dst_limits_calc_pos_1): Likewise;
in one case, a <= should have been an <, and in another case the
whole test was missing.
* posix/regex_internal.h (BYTE_BITS): Remove. All uses changed to
the standard name CHAR_BIT.
* posix/regex_internal.h (re_sub_match_top_t): Remove unused member * posix/regex_internal.h (re_sub_match_top_t): Remove unused member
next_last_offset. next_last_offset.
(struct re_dfa_t): Remove unused member states_alloc. (struct re_dfa_t): Remove unused member states_alloc.

View File

@ -19,11 +19,11 @@
02111-1307 USA. */ 02111-1307 USA. */
static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
int length, reg_syntax_t syntax); size_t length, reg_syntax_t syntax);
static void re_compile_fastmap_iter (regex_t *bufp, static void re_compile_fastmap_iter (regex_t *bufp,
const re_dfastate_t *init_state, const re_dfastate_t *init_state,
char *fastmap); char *fastmap);
static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len); static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
static void init_word_char (re_dfa_t *dfa); static void init_word_char (re_dfa_t *dfa);
#ifdef RE_ENABLE_I18N #ifdef RE_ENABLE_I18N
static void free_charset (re_charset_t *cset); static void free_charset (re_charset_t *cset);
@ -51,7 +51,7 @@ static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node,
int top_clone_node, int root_node, int top_clone_node, int root_node,
unsigned int constraint); unsigned int constraint);
static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint); static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
static int search_duplicated_node (re_dfa_t *dfa, int org_node, static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
unsigned int constraint); unsigned int constraint);
static reg_errcode_t calc_eclosure (re_dfa_t *dfa); static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
@ -368,7 +368,7 @@ re_compile_fastmap_iter (bufp, init_state, fastmap)
int i, j, ch; int i, j, ch;
for (i = 0, ch = 0; i < BITSET_UINTS; ++i) for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
for (j = 0; j < UINT_BITS; ++j, ++ch) for (j = 0; j < UINT_BITS; ++j, ++ch)
if (dfa->nodes[node].opr.sbcset[i] & (1 << j)) if (dfa->nodes[node].opr.sbcset[i] & (1u << j))
re_set_fastmap (fastmap, icase, ch); re_set_fastmap (fastmap, icase, ch);
} }
#ifdef RE_ENABLE_I18N #ifdef RE_ENABLE_I18N
@ -740,7 +740,7 @@ static reg_errcode_t
re_compile_internal (preg, pattern, length, syntax) re_compile_internal (preg, pattern, length, syntax)
regex_t *preg; regex_t *preg;
const char * pattern; const char * pattern;
int length; size_t length;
reg_syntax_t syntax; reg_syntax_t syntax;
{ {
reg_errcode_t err = REG_NOERROR; reg_errcode_t err = REG_NOERROR;
@ -781,6 +781,7 @@ re_compile_internal (preg, pattern, length, syntax)
return err; return err;
} }
#ifdef DEBUG #ifdef DEBUG
/* Note: length+1 will not overflow since it is checked in init_dfa. */
dfa->re_str = re_malloc (char, length + 1); dfa->re_str = re_malloc (char, length + 1);
strncpy (dfa->re_str, pattern, length + 1); strncpy (dfa->re_str, pattern, length + 1);
#endif #endif
@ -840,9 +841,9 @@ re_compile_internal (preg, pattern, length, syntax)
static reg_errcode_t static reg_errcode_t
init_dfa (dfa, pat_len) init_dfa (dfa, pat_len)
re_dfa_t *dfa; re_dfa_t *dfa;
int pat_len; size_t pat_len;
{ {
int table_size; unsigned int table_size;
#ifndef _LIBC #ifndef _LIBC
char *codeset_name; char *codeset_name;
#endif #endif
@ -852,11 +853,15 @@ init_dfa (dfa, pat_len)
/* Force allocation of str_tree_storage the first time. */ /* Force allocation of str_tree_storage the first time. */
dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
/* Avoid overflows. */
if (pat_len == SIZE_MAX)
return REG_ESPACE;
dfa->nodes_alloc = pat_len + 1; dfa->nodes_alloc = pat_len + 1;
dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
/* table_size = 2 ^ ceil(log pat_len) */ /* table_size = 2 ^ ceil(log pat_len) */
for (table_size = 1; table_size > 0; table_size <<= 1) for (table_size = 1; ; table_size <<= 1)
if (table_size > pat_len) if (table_size > pat_len)
break; break;
@ -916,7 +921,7 @@ init_dfa (dfa, pat_len)
{ {
wint_t wch = __btowc (ch); wint_t wch = __btowc (ch);
if (wch != WEOF) if (wch != WEOF)
dfa->sb_char[i] |= 1 << j; dfa->sb_char[i] |= 1u << j;
# ifndef _LIBC # ifndef _LIBC
if (isascii (ch) && wch != ch) if (isascii (ch) && wch != ch)
dfa->map_notascii = 1; dfa->map_notascii = 1;
@ -944,7 +949,7 @@ init_word_char (dfa)
for (i = 0, ch = 0; i < BITSET_UINTS; ++i) for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
for (j = 0; j < UINT_BITS; ++j, ++ch) for (j = 0; j < UINT_BITS; ++j, ++ch)
if (isalnum (ch) || ch == '_') if (isalnum (ch) || ch == '_')
dfa->word_char[i] |= 1 << j; dfa->word_char[i] |= 1u << j;
} }
/* Free the work area which are only used while compiling. */ /* Free the work area which are only used while compiling. */
@ -1277,8 +1282,8 @@ optimize_subexps (extra, node)
node->left->parent = node; node->left->parent = node;
dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
if (other_idx < 8 * sizeof (dfa->used_bkref_map)) if (other_idx < CHAR_BIT * sizeof dfa->used_bkref_map)
dfa->used_bkref_map &= ~(1 << other_idx); dfa->used_bkref_map &= ~(1u << other_idx);
} }
return REG_NOERROR; return REG_NOERROR;
@ -1326,8 +1331,8 @@ lower_subexp (err, preg, node)
very common, so we do not lose much. An example that triggers very common, so we do not lose much. An example that triggers
this case is the sed "script" /\(\)/x. */ this case is the sed "script" /\(\)/x. */
&& node->left != NULL && node->left != NULL
&& (node->token.opr.idx >= 8 * sizeof (dfa->used_bkref_map) && (node->token.opr.idx >= CHAR_BIT * sizeof dfa->used_bkref_map
|| !(dfa->used_bkref_map & (1 << node->token.opr.idx)))) || !(dfa->used_bkref_map & (1u << node->token.opr.idx))))
return node->left; return node->left;
/* Convert the SUBEXP node to the concatenation of an /* Convert the SUBEXP node to the concatenation of an
@ -1574,7 +1579,7 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
static int static int
search_duplicated_node (dfa, org_node, constraint) search_duplicated_node (dfa, org_node, constraint)
re_dfa_t *dfa; const re_dfa_t *dfa;
int org_node; int org_node;
unsigned int constraint; unsigned int constraint;
{ {
@ -2492,7 +2497,9 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err)
if (BE (*err != REG_NOERROR, 0)) if (BE (*err != REG_NOERROR, 0))
return NULL; return NULL;
} }
dfa->completed_bkref_map |= 1 << cur_nsub;
if (cur_nsub <= '9' - '1')
dfa->completed_bkref_map |= 1 << cur_nsub;
tree = create_tree (dfa, tree, NULL, SUBEXP); tree = create_tree (dfa, tree, NULL, SUBEXP);
if (BE (tree == NULL, 0)) if (BE (tree == NULL, 0))

View File

@ -26,12 +26,13 @@ static void re_string_construct_common (const char *str, int len,
static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx, static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
wint_t *last_wc) internal_function; wint_t *last_wc) internal_function;
#endif /* RE_ENABLE_I18N */ #endif /* RE_ENABLE_I18N */
static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate, static reg_errcode_t register_state (const re_dfa_t *dfa,
re_dfastate_t *newstate,
unsigned int hash) internal_function; unsigned int hash) internal_function;
static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa, static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
const re_node_set *nodes, const re_node_set *nodes,
unsigned int hash) internal_function; unsigned int hash) internal_function;
static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa, static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
const re_node_set *nodes, const re_node_set *nodes,
unsigned int context, unsigned int context,
unsigned int hash) internal_function; unsigned int hash) internal_function;
@ -654,37 +655,50 @@ re_string_reconstruct (pstr, idx, eflags)
byte other than 0x80 - 0xbf. */ byte other than 0x80 - 0xbf. */
raw = pstr->raw_mbs + pstr->raw_mbs_idx; raw = pstr->raw_mbs + pstr->raw_mbs_idx;
end = raw + (offset - pstr->mb_cur_max); end = raw + (offset - pstr->mb_cur_max);
for (p = raw + offset - 1; p >= end; --p) p = raw + offset - 1;
if ((*p & 0xc0) != 0x80) #ifdef _LIBC
{ /* We know the wchar_t encoding is UCS4, so for the simple
mbstate_t cur_state; case, ASCII characters, skip the conversion step. */
wchar_t wc2; if (isascii (*p) && BE (pstr->trans == NULL, 1))
int mlen = raw + pstr->len - p; {
unsigned char buf[6]; memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
pstr->valid_len = 0;
wc = (wchar_t) *p;
}
else
#endif
for (; p >= end; --p)
if ((*p & 0xc0) != 0x80)
{
mbstate_t cur_state;
wchar_t wc2;
int mlen = raw + pstr->len - p;
unsigned char buf[6];
size_t mbclen;
q = p; q = p;
if (BE (pstr->trans != NULL, 0)) if (BE (pstr->trans != NULL, 0))
{ {
int i = mlen < 6 ? mlen : 6; int i = mlen < 6 ? mlen : 6;
while (--i >= 0) while (--i >= 0)
buf[i] = pstr->trans[p[i]]; buf[i] = pstr->trans[p[i]];
q = buf; q = buf;
} }
/* XXX Don't use mbrtowc, we know which conversion /* XXX Don't use mbrtowc, we know which conversion
to use (UTF-8 -> UCS4). */ to use (UTF-8 -> UCS4). */
memset (&cur_state, 0, sizeof (cur_state)); memset (&cur_state, 0, sizeof (cur_state));
mlen = (mbrtowc (&wc2, (const char *) p, mlen, mbclen = mbrtowc (&wc2, (const char *) p, mlen,
&cur_state) &cur_state);
- (raw + offset - p)); if (raw + offset - p <= mbclen
if (mlen >= 0) && mbclen < (size_t) -2)
{ {
memset (&pstr->cur_state, '\0', memset (&pstr->cur_state, '\0',
sizeof (mbstate_t)); sizeof (mbstate_t));
pstr->valid_len = mlen; pstr->valid_len = mbclen - (raw + offset - p);
wc = wc2; wc = wc2;
} }
break; break;
} }
} }
if (wc == WEOF) if (wc == WEOF)
@ -738,15 +752,15 @@ re_string_reconstruct (pstr, idx, eflags)
} }
else else
#endif /* RE_ENABLE_I18N */ #endif /* RE_ENABLE_I18N */
if (BE (pstr->mbs_allocated, 0)) if (BE (pstr->mbs_allocated, 0))
{ {
if (pstr->icase) if (pstr->icase)
build_upper_buffer (pstr); build_upper_buffer (pstr);
else if (pstr->trans != NULL) else if (pstr->trans != NULL)
re_string_translate_buffer (pstr); re_string_translate_buffer (pstr);
} }
else else
pstr->valid_len = pstr->len; pstr->valid_len = pstr->len;
pstr->cur_idx = 0; pstr->cur_idx = 0;
return REG_NOERROR; return REG_NOERROR;
@ -1345,12 +1359,16 @@ re_dfa_add_node (dfa, token)
int type = token.type; int type = token.type;
if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0)) if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
{ {
int new_nodes_alloc = dfa->nodes_alloc * 2; size_t new_nodes_alloc = dfa->nodes_alloc * 2;
int *new_nexts, *new_indices; int *new_nexts, *new_indices;
re_node_set *new_edests, *new_eclosures; re_node_set *new_edests, *new_eclosures;
re_token_t *new_nodes;
re_token_t *new_nodes = re_realloc (dfa->nodes, re_token_t, /* Avoid overflows. */
new_nodes_alloc); if (BE (new_nodes_alloc < dfa->nodes_alloc, 0))
return -1;
new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
if (BE (new_nodes == NULL, 0)) if (BE (new_nodes == NULL, 0))
return -1; return -1;
dfa->nodes = new_nodes; dfa->nodes = new_nodes;
@ -1403,7 +1421,7 @@ calc_state_hash (nodes, context)
static re_dfastate_t* static re_dfastate_t*
re_acquire_state (err, dfa, nodes) re_acquire_state (err, dfa, nodes)
reg_errcode_t *err; reg_errcode_t *err;
re_dfa_t *dfa; const re_dfa_t *dfa;
const re_node_set *nodes; const re_node_set *nodes;
{ {
unsigned int hash; unsigned int hash;
@ -1448,7 +1466,7 @@ re_acquire_state (err, dfa, nodes)
static re_dfastate_t* static re_dfastate_t*
re_acquire_state_context (err, dfa, nodes, context) re_acquire_state_context (err, dfa, nodes, context)
reg_errcode_t *err; reg_errcode_t *err;
re_dfa_t *dfa; const re_dfa_t *dfa;
const re_node_set *nodes; const re_node_set *nodes;
unsigned int context; unsigned int context;
{ {
@ -1486,7 +1504,7 @@ re_acquire_state_context (err, dfa, nodes, context)
static reg_errcode_t static reg_errcode_t
register_state (dfa, newstate, hash) register_state (dfa, newstate, hash)
re_dfa_t *dfa; const re_dfa_t *dfa;
re_dfastate_t *newstate; re_dfastate_t *newstate;
unsigned int hash; unsigned int hash;
{ {
@ -1525,7 +1543,7 @@ register_state (dfa, newstate, hash)
static re_dfastate_t * static re_dfastate_t *
create_ci_newstate (dfa, nodes, hash) create_ci_newstate (dfa, nodes, hash)
re_dfa_t *dfa; const re_dfa_t *dfa;
const re_node_set *nodes; const re_node_set *nodes;
unsigned int hash; unsigned int hash;
{ {
@ -1576,7 +1594,7 @@ create_ci_newstate (dfa, nodes, hash)
static re_dfastate_t * static re_dfastate_t *
create_cd_newstate (dfa, nodes, context, hash) create_cd_newstate (dfa, nodes, context, hash)
re_dfa_t *dfa; const re_dfa_t *dfa;
const re_node_set *nodes; const re_node_set *nodes;
unsigned int context, hash; unsigned int context, hash;
{ {

View File

@ -91,8 +91,6 @@
# define inline # define inline
#endif #endif
/* Number of bits in a byte. */
#define BYTE_BITS 8
/* Number of single byte character. */ /* Number of single byte character. */
#define SBC_MAX 256 #define SBC_MAX 256
@ -123,16 +121,16 @@ extern const char __re_error_msgid[] attribute_hidden;
extern const size_t __re_error_msgid_idx[] attribute_hidden; extern const size_t __re_error_msgid_idx[] attribute_hidden;
/* Number of bits in an unsinged int. */ /* Number of bits in an unsinged int. */
#define UINT_BITS (sizeof (unsigned int) * BYTE_BITS) #define UINT_BITS (sizeof (unsigned int) * CHAR_BIT)
/* Number of unsigned int in an bit_set. */ /* Number of unsigned int in an bit_set. */
#define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS) #define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS)
typedef unsigned int bitset[BITSET_UINTS]; typedef unsigned int bitset[BITSET_UINTS];
typedef unsigned int *re_bitset_ptr_t; typedef unsigned int *re_bitset_ptr_t;
typedef const unsigned int *re_const_bitset_ptr_t; typedef const unsigned int *re_const_bitset_ptr_t;
#define bitset_set(set,i) (set[i / UINT_BITS] |= 1 << i % UINT_BITS) #define bitset_set(set,i) (set[i / UINT_BITS] |= 1u << i % UINT_BITS)
#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1 << i % UINT_BITS)) #define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1u << i % UINT_BITS))
#define bitset_contain(set,i) (set[i / UINT_BITS] & (1 << i % UINT_BITS)) #define bitset_contain(set,i) (set[i / UINT_BITS] & (1u << i % UINT_BITS))
#define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS) #define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS)
#define bitset_set_all(set) \ #define bitset_set_all(set) \
memset (set, 255, sizeof (unsigned int) * BITSET_UINTS) memset (set, 255, sizeof (unsigned int) * BITSET_UINTS)
@ -627,8 +625,8 @@ struct re_fail_stack_t
struct re_dfa_t struct re_dfa_t
{ {
re_token_t *nodes; re_token_t *nodes;
int nodes_alloc; size_t nodes_alloc;
int nodes_len; size_t nodes_len;
int *nexts; int *nexts;
int *org_indices; int *org_indices;
re_node_set *edests; re_node_set *edests;
@ -701,10 +699,12 @@ static void re_node_set_remove_at (re_node_set *set, int idx) internal_function;
#define re_node_set_empty(p) ((p)->nelem = 0) #define re_node_set_empty(p) ((p)->nelem = 0)
#define re_node_set_free(set) re_free ((set)->elems) #define re_node_set_free(set) re_free ((set)->elems)
static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token) internal_function; static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token) internal_function;
static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa, static re_dfastate_t *re_acquire_state (reg_errcode_t *err, const
const re_node_set *nodes) internal_function; re_dfa_t *dfa,
const re_node_set *nodes)
internal_function;
static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err, static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err,
re_dfa_t *dfa, const re_dfa_t *dfa,
const re_node_set *nodes, const re_node_set *nodes,
unsigned int context) internal_function; unsigned int context) internal_function;
static void free_state (re_dfastate_t *state) internal_function; static void free_state (re_dfastate_t *state) internal_function;

View File

@ -25,7 +25,7 @@ static void match_ctx_free (re_match_context_t *cache) internal_function;
static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node, static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
int str_idx, int from, int to) int str_idx, int from, int to)
internal_function; internal_function;
static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx) static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
internal_function; internal_function;
static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node, static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
int str_idx) internal_function; int str_idx) internal_function;
@ -104,13 +104,14 @@ static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node, static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
re_node_set *dest_nodes, re_node_set *dest_nodes,
const re_node_set *and_nodes) internal_function; const re_node_set *and_nodes) internal_function;
static int check_dst_limits (re_match_context_t *mctx, re_node_set *limits, static int check_dst_limits (const re_match_context_t *mctx,
re_node_set *limits,
int dst_node, int dst_idx, int src_node, int dst_node, int dst_idx, int src_node,
int src_idx) internal_function; int src_idx) internal_function;
static int check_dst_limits_calc_pos_1 (re_match_context_t *mctx, static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
int boundaries, int subexp_idx, int boundaries, int subexp_idx,
int from_node, int bkref_idx) internal_function; int from_node, int bkref_idx) internal_function;
static int check_dst_limits_calc_pos (re_match_context_t *mctx, static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
int limit, int subexp_idx, int limit, int subexp_idx,
int node, int str_idx, int node, int str_idx,
int bkref_idx) internal_function; int bkref_idx) internal_function;
@ -185,7 +186,7 @@ static unsigned int find_collation_sequence_value (const unsigned char *mbs,
size_t name_len) internal_function; size_t name_len) internal_function;
# endif /* _LIBC */ # endif /* _LIBC */
#endif /* RE_ENABLE_I18N */ #endif /* RE_ENABLE_I18N */
static int group_nodes_into_DFAstates (re_dfa_t *dfa, static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
const re_dfastate_t *state, const re_dfastate_t *state,
re_node_set *states_node, re_node_set *states_node,
bitset *states_ch) internal_function; bitset *states_ch) internal_function;
@ -883,14 +884,14 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
#ifdef RE_ENABLE_I18N #ifdef RE_ENABLE_I18N
if (BE (mctx.input.offsets_needed != 0, 0)) if (BE (mctx.input.offsets_needed != 0, 0))
{ {
if (pmatch[reg_idx].rm_so == mctx.input.valid_len) pmatch[reg_idx].rm_so =
pmatch[reg_idx].rm_so += mctx.input.valid_raw_len - mctx.input.valid_len; (pmatch[reg_idx].rm_so == mctx.input.valid_len
else ? mctx.input.valid_raw_len
pmatch[reg_idx].rm_so = mctx.input.offsets[pmatch[reg_idx].rm_so]; : mctx.input.offsets[pmatch[reg_idx].rm_so]);
if (pmatch[reg_idx].rm_eo == mctx.input.valid_len) pmatch[reg_idx].rm_eo =
pmatch[reg_idx].rm_eo += mctx.input.valid_raw_len - mctx.input.valid_len; (pmatch[reg_idx].rm_eo == mctx.input.valid_len
else ? mctx.input.valid_raw_len
pmatch[reg_idx].rm_eo = mctx.input.offsets[pmatch[reg_idx].rm_eo]; : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
} }
#else #else
assert (mctx.input.offsets_needed == 0); assert (mctx.input.offsets_needed == 0);
@ -1887,7 +1888,7 @@ sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
static int static int
check_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx) check_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx)
re_match_context_t *mctx; const re_match_context_t *mctx;
re_node_set *limits; re_node_set *limits;
int dst_node, dst_idx, src_node, src_idx; int dst_node, dst_idx, src_node, src_idx;
{ {
@ -1924,7 +1925,7 @@ check_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx)
static int static int
check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx) check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
re_match_context_t *mctx; const re_match_context_t *mctx;
int boundaries, subexp_idx, from_node, bkref_idx; int boundaries, subexp_idx, from_node, bkref_idx;
{ {
re_dfa_t *const dfa = mctx->dfa; re_dfa_t *const dfa = mctx->dfa;
@ -1949,8 +1950,9 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
if (ent->node != node) if (ent->node != node)
continue; continue;
if (subexp_idx <= 8 * sizeof (ent->eps_reachable_subexps_map) if (subexp_idx
&& !(ent->eps_reachable_subexps_map & (1 << subexp_idx))) < CHAR_BIT * sizeof ent->eps_reachable_subexps_map
&& !(ent->eps_reachable_subexps_map & (1u << subexp_idx)))
continue; continue;
/* Recurse trying to reach the OP_OPEN_SUBEXP and /* Recurse trying to reach the OP_OPEN_SUBEXP and
@ -1976,7 +1978,9 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
if (cpos == 0 && (boundaries & 2)) if (cpos == 0 && (boundaries & 2))
return 0; return 0;
ent->eps_reachable_subexps_map &= ~(1 << subexp_idx); if (subexp_idx
< CHAR_BIT * sizeof ent->eps_reachable_subexps_map)
ent->eps_reachable_subexps_map &= ~(1u << subexp_idx);
} }
while (ent++->more); while (ent++->more);
} }
@ -2002,7 +2006,7 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
static int static int
check_dst_limits_calc_pos (mctx, limit, subexp_idx, from_node, str_idx, bkref_idx) check_dst_limits_calc_pos (mctx, limit, subexp_idx, from_node, str_idx, bkref_idx)
re_match_context_t *mctx; const re_match_context_t *mctx;
int limit, subexp_idx, from_node, str_idx, bkref_idx; int limit, subexp_idx, from_node, str_idx, bkref_idx;
{ {
struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
@ -2443,8 +2447,8 @@ check_subexp_matching_top (mctx, cur_nodes, str_idx)
{ {
int node = cur_nodes->elems[node_idx]; int node = cur_nodes->elems[node_idx];
if (dfa->nodes[node].type == OP_OPEN_SUBEXP if (dfa->nodes[node].type == OP_OPEN_SUBEXP
&& dfa->nodes[node].opr.idx < (8 * sizeof (dfa->used_bkref_map)) && dfa->nodes[node].opr.idx < CHAR_BIT * sizeof dfa->used_bkref_map
&& dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx)) && dfa->used_bkref_map & (1u << dfa->nodes[node].opr.idx))
{ {
err = match_ctx_add_subtop (mctx, node, str_idx); err = match_ctx_add_subtop (mctx, node, str_idx);
if (BE (err != REG_NOERROR, 0)) if (BE (err != REG_NOERROR, 0))
@ -2557,7 +2561,8 @@ transit_state_mb (mctx, pstate)
if (BE (err != REG_NOERROR, 0)) if (BE (err != REG_NOERROR, 0))
return err; return err;
} }
context = re_string_context_at (&mctx->input, dest_idx - 1, mctx->eflags); context = re_string_context_at (&mctx->input, dest_idx - 1,
mctx->eflags);
mctx->state_log[dest_idx] mctx->state_log[dest_idx]
= re_acquire_state_context (&err, dfa, &dest_nodes, context); = re_acquire_state_context (&err, dfa, &dest_nodes, context);
if (dest_state != NULL) if (dest_state != NULL)
@ -2696,7 +2701,8 @@ get_subexp (mctx, bkref_node, bkref_str_idx)
int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
if (cache_idx != -1) if (cache_idx != -1)
{ {
const struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx; const struct re_backref_cache_entry *entry
= mctx->bkref_ents + cache_idx;
do do
if (entry->node == bkref_node) if (entry->node == bkref_node)
return REG_NOERROR; /* We already checked it. */ return REG_NOERROR; /* We already checked it. */
@ -2743,7 +2749,8 @@ get_subexp (mctx, bkref_node, bkref_str_idx)
buf = (const char *) re_string_get_buffer (&mctx->input); buf = (const char *) re_string_get_buffer (&mctx->input);
} }
if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0) if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
break; /* We don't need to search this sub expression any more. */ /* We don't need to search this sub expression any more. */
break;
} }
bkref_str_off += sl_str_diff; bkref_str_off += sl_str_diff;
sl_str += sl_str_diff; sl_str += sl_str_diff;
@ -2794,7 +2801,8 @@ get_subexp (mctx, bkref_node, bkref_str_idx)
continue; continue;
/* Does this state have a ')' of the sub expression? */ /* Does this state have a ')' of the sub expression? */
nodes = &mctx->state_log[sl_str]->nodes; nodes = &mctx->state_log[sl_str]->nodes;
cls_node = find_subexp_node (dfa, nodes, subexp_num, OP_CLOSE_SUBEXP); cls_node = find_subexp_node (dfa, nodes, subexp_num,
OP_CLOSE_SUBEXP);
if (cls_node == -1) if (cls_node == -1)
continue; /* No. */ continue; /* No. */
if (sub_top->path == NULL) if (sub_top->path == NULL)
@ -2807,7 +2815,8 @@ get_subexp (mctx, bkref_node, bkref_str_idx)
/* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
in the current context? */ in the current context? */
err = check_arrival (mctx, sub_top->path, sub_top->node, err = check_arrival (mctx, sub_top->path, sub_top->node,
sub_top->str_idx, cls_node, sl_str, OP_CLOSE_SUBEXP); sub_top->str_idx, cls_node, sl_str,
OP_CLOSE_SUBEXP);
if (err == REG_NOMATCH) if (err == REG_NOMATCH)
continue; continue;
if (BE (err != REG_NOERROR, 0)) if (BE (err != REG_NOERROR, 0))
@ -2841,7 +2850,8 @@ get_subexp_sub (mctx, sub_top, sub_last, bkref_node, bkref_str)
int to_idx; int to_idx;
/* Can the subexpression arrive the back reference? */ /* Can the subexpression arrive the back reference? */
err = check_arrival (mctx, &sub_last->path, sub_last->node, err = check_arrival (mctx, &sub_last->path, sub_last->node,
sub_last->str_idx, bkref_node, bkref_str, OP_OPEN_SUBEXP); sub_last->str_idx, bkref_node, bkref_str,
OP_OPEN_SUBEXP);
if (err != REG_NOERROR) if (err != REG_NOERROR)
return err; return err;
err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx, err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
@ -3539,10 +3549,10 @@ out_free:
static int static int
group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
re_dfa_t *dfa; const re_dfa_t *dfa;
const re_dfastate_t *state; const re_dfastate_t *state;
re_node_set *dests_node; re_node_set *dests_node;
bitset *dests_ch; bitset *dests_ch;
{ {
reg_errcode_t err; reg_errcode_t err;
int result; int result;
@ -4265,7 +4275,7 @@ match_ctx_add_entry (mctx, node, str_idx, from, to)
static int static int
search_cur_bkref_entry (mctx, str_idx) search_cur_bkref_entry (mctx, str_idx)
re_match_context_t *mctx; const re_match_context_t *mctx;
int str_idx; int str_idx;
{ {
int left, right, mid, last; int left, right, mid, last;