mirror of
https://sourceware.org/git/glibc.git
synced 2025-12-24 17:51:17 +03:00
* posix/regex_internal.c (re_string_reconstruct): Avoid calling
mbrtowc for very simple UTF-8 case. 2005-09-01 Paul Eggert <eggert@cs.ucla.edu> * posix/regex_internal.c (build_wcs_upper_buffer): Fix portability bugs in int versus size_t comparisons. 2005-09-06 Ulrich Drepper <drepper@redhat.com> * posix/regex_internal.c (re_acquire_state): Make DFA pointer arg a pointer-to-const. (re_acquire_state_context): Likewise. * posix/regex_internal.h: Adjust prototypes. 2005-08-31 Jim Meyering <jim@meyering.net> * posix/regcomp.c (search_duplicated_node): Make first pointer arg a pointer-to-const. * posix/regex_internal.c (create_ci_newstate, create_cd_newstate, register_state): Likewise. * posix/regexec.c (search_cur_bkref_entry, check_dst_limits): (check_dst_limits_calc_pos_1, check_dst_limits_calc_pos): (group_nodes_into_DFAstates): Likewise. * posix/regexec.c (re_search_internal): Simplify update of rm_so and rm_eo by replacing "if (A == B) A += C - B;" with the equivalent of "if (A == B) A = C;". 2005-09-06 Ulrich Drepper <drepper@redhat.com> * posix/regcomp.c (re_compile_internal): Change third parameter type to size_t. (init_dfa): Likewise. Make sure that arithmetic on pat_len doesn't overflow. * posix/regex_internal.h (struct re_dfa_t): Change type of nodes_alloc and nodes_len to size_t. * posix/regex_internal.c (re_dfa_add_node): Use size_t as type for new_nodes_alloc. Check for overflow. 2005-08-31 Paul Eggert <eggert@cs.ucla.edu> * posix/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char): (optimize_subexps, lower_subexp): Don't assume 1<<31 has defined behavior on hosts with 32-bit int, since the signed shift might overflow. Use 1u<<31 instead. * posix/regex_internal.h (bitset_set, bitset_clear, bitset_contain): Likewise. * posix/regexec.c (check_dst_limits_calc_pos_1): Likewise. (check_subexp_matching_top): Likewise. * posix/regcomp.c (optimize_subexps, lower_subexp): Use CHAR_BIT rather than 8, for clarity. * posix/regexec.c (check_dst_limits_calc_pos_1): (check_subexp_matching_top): Likewise. * posix/regcomp.c (init_dfa): Make table_size unsigned, so that we don't have to worry about portability issues when shifting it left. Remove no-longer-needed test for table_size > 0. * posix/regcomp.c (parse_sub_exp): Do not shift more bits than there are in a word, as the resulting behavior is undefined. * posix/regexec.c (check_dst_limits_calc_pos_1): Likewise; in one case, a <= should have been an <, and in another case the whole test was missing. * posix/regex_internal.h (BYTE_BITS): Remove. All uses changed to the standard name CHAR_BIT.
This commit is contained in:
@@ -19,11 +19,11 @@
|
||||
02111-1307 USA. */
|
||||
|
||||
static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
|
||||
int length, reg_syntax_t syntax);
|
||||
size_t length, reg_syntax_t syntax);
|
||||
static void re_compile_fastmap_iter (regex_t *bufp,
|
||||
const re_dfastate_t *init_state,
|
||||
char *fastmap);
|
||||
static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len);
|
||||
static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
|
||||
static void init_word_char (re_dfa_t *dfa);
|
||||
#ifdef RE_ENABLE_I18N
|
||||
static void free_charset (re_charset_t *cset);
|
||||
@@ -51,7 +51,7 @@ static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node,
|
||||
int top_clone_node, int root_node,
|
||||
unsigned int constraint);
|
||||
static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
|
||||
static int search_duplicated_node (re_dfa_t *dfa, int org_node,
|
||||
static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
|
||||
unsigned int constraint);
|
||||
static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
|
||||
static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
|
||||
@@ -368,7 +368,7 @@ re_compile_fastmap_iter (bufp, init_state, fastmap)
|
||||
int i, j, ch;
|
||||
for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
|
||||
for (j = 0; j < UINT_BITS; ++j, ++ch)
|
||||
if (dfa->nodes[node].opr.sbcset[i] & (1 << j))
|
||||
if (dfa->nodes[node].opr.sbcset[i] & (1u << j))
|
||||
re_set_fastmap (fastmap, icase, ch);
|
||||
}
|
||||
#ifdef RE_ENABLE_I18N
|
||||
@@ -740,7 +740,7 @@ static reg_errcode_t
|
||||
re_compile_internal (preg, pattern, length, syntax)
|
||||
regex_t *preg;
|
||||
const char * pattern;
|
||||
int length;
|
||||
size_t length;
|
||||
reg_syntax_t syntax;
|
||||
{
|
||||
reg_errcode_t err = REG_NOERROR;
|
||||
@@ -781,6 +781,7 @@ re_compile_internal (preg, pattern, length, syntax)
|
||||
return err;
|
||||
}
|
||||
#ifdef DEBUG
|
||||
/* Note: length+1 will not overflow since it is checked in init_dfa. */
|
||||
dfa->re_str = re_malloc (char, length + 1);
|
||||
strncpy (dfa->re_str, pattern, length + 1);
|
||||
#endif
|
||||
@@ -840,9 +841,9 @@ re_compile_internal (preg, pattern, length, syntax)
|
||||
static reg_errcode_t
|
||||
init_dfa (dfa, pat_len)
|
||||
re_dfa_t *dfa;
|
||||
int pat_len;
|
||||
size_t pat_len;
|
||||
{
|
||||
int table_size;
|
||||
unsigned int table_size;
|
||||
#ifndef _LIBC
|
||||
char *codeset_name;
|
||||
#endif
|
||||
@@ -852,11 +853,15 @@ init_dfa (dfa, pat_len)
|
||||
/* Force allocation of str_tree_storage the first time. */
|
||||
dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
|
||||
|
||||
/* Avoid overflows. */
|
||||
if (pat_len == SIZE_MAX)
|
||||
return REG_ESPACE;
|
||||
|
||||
dfa->nodes_alloc = pat_len + 1;
|
||||
dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
|
||||
|
||||
/* table_size = 2 ^ ceil(log pat_len) */
|
||||
for (table_size = 1; table_size > 0; table_size <<= 1)
|
||||
for (table_size = 1; ; table_size <<= 1)
|
||||
if (table_size > pat_len)
|
||||
break;
|
||||
|
||||
@@ -916,7 +921,7 @@ init_dfa (dfa, pat_len)
|
||||
{
|
||||
wint_t wch = __btowc (ch);
|
||||
if (wch != WEOF)
|
||||
dfa->sb_char[i] |= 1 << j;
|
||||
dfa->sb_char[i] |= 1u << j;
|
||||
# ifndef _LIBC
|
||||
if (isascii (ch) && wch != ch)
|
||||
dfa->map_notascii = 1;
|
||||
@@ -944,7 +949,7 @@ init_word_char (dfa)
|
||||
for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
|
||||
for (j = 0; j < UINT_BITS; ++j, ++ch)
|
||||
if (isalnum (ch) || ch == '_')
|
||||
dfa->word_char[i] |= 1 << j;
|
||||
dfa->word_char[i] |= 1u << j;
|
||||
}
|
||||
|
||||
/* Free the work area which are only used while compiling. */
|
||||
@@ -1277,8 +1282,8 @@ optimize_subexps (extra, node)
|
||||
node->left->parent = node;
|
||||
|
||||
dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
|
||||
if (other_idx < 8 * sizeof (dfa->used_bkref_map))
|
||||
dfa->used_bkref_map &= ~(1 << other_idx);
|
||||
if (other_idx < CHAR_BIT * sizeof dfa->used_bkref_map)
|
||||
dfa->used_bkref_map &= ~(1u << other_idx);
|
||||
}
|
||||
|
||||
return REG_NOERROR;
|
||||
@@ -1326,8 +1331,8 @@ lower_subexp (err, preg, node)
|
||||
very common, so we do not lose much. An example that triggers
|
||||
this case is the sed "script" /\(\)/x. */
|
||||
&& node->left != NULL
|
||||
&& (node->token.opr.idx >= 8 * sizeof (dfa->used_bkref_map)
|
||||
|| !(dfa->used_bkref_map & (1 << node->token.opr.idx))))
|
||||
&& (node->token.opr.idx >= CHAR_BIT * sizeof dfa->used_bkref_map
|
||||
|| !(dfa->used_bkref_map & (1u << node->token.opr.idx))))
|
||||
return node->left;
|
||||
|
||||
/* Convert the SUBEXP node to the concatenation of an
|
||||
@@ -1574,7 +1579,7 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
|
||||
|
||||
static int
|
||||
search_duplicated_node (dfa, org_node, constraint)
|
||||
re_dfa_t *dfa;
|
||||
const re_dfa_t *dfa;
|
||||
int org_node;
|
||||
unsigned int constraint;
|
||||
{
|
||||
@@ -2492,7 +2497,9 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err)
|
||||
if (BE (*err != REG_NOERROR, 0))
|
||||
return NULL;
|
||||
}
|
||||
dfa->completed_bkref_map |= 1 << cur_nsub;
|
||||
|
||||
if (cur_nsub <= '9' - '1')
|
||||
dfa->completed_bkref_map |= 1 << cur_nsub;
|
||||
|
||||
tree = create_tree (dfa, tree, NULL, SUBEXP);
|
||||
if (BE (tree == NULL, 0))
|
||||
|
||||
Reference in New Issue
Block a user