mirror of
https://sourceware.org/git/glibc.git
synced 2025-07-29 11:41:21 +03:00
* posix/regex_internal.c (re_string_reconstruct): Avoid calling
mbrtowc for very simple UTF-8 case. 2005-09-01 Paul Eggert <eggert@cs.ucla.edu> * posix/regex_internal.c (build_wcs_upper_buffer): Fix portability bugs in int versus size_t comparisons. 2005-09-06 Ulrich Drepper <drepper@redhat.com> * posix/regex_internal.c (re_acquire_state): Make DFA pointer arg a pointer-to-const. (re_acquire_state_context): Likewise. * posix/regex_internal.h: Adjust prototypes. 2005-08-31 Jim Meyering <jim@meyering.net> * posix/regcomp.c (search_duplicated_node): Make first pointer arg a pointer-to-const. * posix/regex_internal.c (create_ci_newstate, create_cd_newstate, register_state): Likewise. * posix/regexec.c (search_cur_bkref_entry, check_dst_limits): (check_dst_limits_calc_pos_1, check_dst_limits_calc_pos): (group_nodes_into_DFAstates): Likewise. * posix/regexec.c (re_search_internal): Simplify update of rm_so and rm_eo by replacing "if (A == B) A += C - B;" with the equivalent of "if (A == B) A = C;". 2005-09-06 Ulrich Drepper <drepper@redhat.com> * posix/regcomp.c (re_compile_internal): Change third parameter type to size_t. (init_dfa): Likewise. Make sure that arithmetic on pat_len doesn't overflow. * posix/regex_internal.h (struct re_dfa_t): Change type of nodes_alloc and nodes_len to size_t. * posix/regex_internal.c (re_dfa_add_node): Use size_t as type for new_nodes_alloc. Check for overflow. 2005-08-31 Paul Eggert <eggert@cs.ucla.edu> * posix/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char): (optimize_subexps, lower_subexp): Don't assume 1<<31 has defined behavior on hosts with 32-bit int, since the signed shift might overflow. Use 1u<<31 instead. * posix/regex_internal.h (bitset_set, bitset_clear, bitset_contain): Likewise. * posix/regexec.c (check_dst_limits_calc_pos_1): Likewise. (check_subexp_matching_top): Likewise. * posix/regcomp.c (optimize_subexps, lower_subexp): Use CHAR_BIT rather than 8, for clarity. * posix/regexec.c (check_dst_limits_calc_pos_1): (check_subexp_matching_top): Likewise. * posix/regcomp.c (init_dfa): Make table_size unsigned, so that we don't have to worry about portability issues when shifting it left. Remove no-longer-needed test for table_size > 0. * posix/regcomp.c (parse_sub_exp): Do not shift more bits than there are in a word, as the resulting behavior is undefined. * posix/regexec.c (check_dst_limits_calc_pos_1): Likewise; in one case, a <= should have been an <, and in another case the whole test was missing. * posix/regex_internal.h (BYTE_BITS): Remove. All uses changed to the standard name CHAR_BIT.
This commit is contained in:
@ -26,12 +26,13 @@ static void re_string_construct_common (const char *str, int len,
|
||||
static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
|
||||
wint_t *last_wc) internal_function;
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
|
||||
static reg_errcode_t register_state (const re_dfa_t *dfa,
|
||||
re_dfastate_t *newstate,
|
||||
unsigned int hash) internal_function;
|
||||
static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
|
||||
static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
|
||||
const re_node_set *nodes,
|
||||
unsigned int hash) internal_function;
|
||||
static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
|
||||
static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
|
||||
const re_node_set *nodes,
|
||||
unsigned int context,
|
||||
unsigned int hash) internal_function;
|
||||
@ -654,37 +655,50 @@ re_string_reconstruct (pstr, idx, eflags)
|
||||
byte other than 0x80 - 0xbf. */
|
||||
raw = pstr->raw_mbs + pstr->raw_mbs_idx;
|
||||
end = raw + (offset - pstr->mb_cur_max);
|
||||
for (p = raw + offset - 1; p >= end; --p)
|
||||
if ((*p & 0xc0) != 0x80)
|
||||
{
|
||||
mbstate_t cur_state;
|
||||
wchar_t wc2;
|
||||
int mlen = raw + pstr->len - p;
|
||||
unsigned char buf[6];
|
||||
p = raw + offset - 1;
|
||||
#ifdef _LIBC
|
||||
/* We know the wchar_t encoding is UCS4, so for the simple
|
||||
case, ASCII characters, skip the conversion step. */
|
||||
if (isascii (*p) && BE (pstr->trans == NULL, 1))
|
||||
{
|
||||
memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
|
||||
pstr->valid_len = 0;
|
||||
wc = (wchar_t) *p;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
for (; p >= end; --p)
|
||||
if ((*p & 0xc0) != 0x80)
|
||||
{
|
||||
mbstate_t cur_state;
|
||||
wchar_t wc2;
|
||||
int mlen = raw + pstr->len - p;
|
||||
unsigned char buf[6];
|
||||
size_t mbclen;
|
||||
|
||||
q = p;
|
||||
if (BE (pstr->trans != NULL, 0))
|
||||
{
|
||||
int i = mlen < 6 ? mlen : 6;
|
||||
while (--i >= 0)
|
||||
buf[i] = pstr->trans[p[i]];
|
||||
q = buf;
|
||||
}
|
||||
/* XXX Don't use mbrtowc, we know which conversion
|
||||
to use (UTF-8 -> UCS4). */
|
||||
memset (&cur_state, 0, sizeof (cur_state));
|
||||
mlen = (mbrtowc (&wc2, (const char *) p, mlen,
|
||||
&cur_state)
|
||||
- (raw + offset - p));
|
||||
if (mlen >= 0)
|
||||
{
|
||||
memset (&pstr->cur_state, '\0',
|
||||
sizeof (mbstate_t));
|
||||
pstr->valid_len = mlen;
|
||||
wc = wc2;
|
||||
}
|
||||
break;
|
||||
}
|
||||
q = p;
|
||||
if (BE (pstr->trans != NULL, 0))
|
||||
{
|
||||
int i = mlen < 6 ? mlen : 6;
|
||||
while (--i >= 0)
|
||||
buf[i] = pstr->trans[p[i]];
|
||||
q = buf;
|
||||
}
|
||||
/* XXX Don't use mbrtowc, we know which conversion
|
||||
to use (UTF-8 -> UCS4). */
|
||||
memset (&cur_state, 0, sizeof (cur_state));
|
||||
mbclen = mbrtowc (&wc2, (const char *) p, mlen,
|
||||
&cur_state);
|
||||
if (raw + offset - p <= mbclen
|
||||
&& mbclen < (size_t) -2)
|
||||
{
|
||||
memset (&pstr->cur_state, '\0',
|
||||
sizeof (mbstate_t));
|
||||
pstr->valid_len = mbclen - (raw + offset - p);
|
||||
wc = wc2;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (wc == WEOF)
|
||||
@ -738,15 +752,15 @@ re_string_reconstruct (pstr, idx, eflags)
|
||||
}
|
||||
else
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
if (BE (pstr->mbs_allocated, 0))
|
||||
{
|
||||
if (pstr->icase)
|
||||
build_upper_buffer (pstr);
|
||||
else if (pstr->trans != NULL)
|
||||
re_string_translate_buffer (pstr);
|
||||
}
|
||||
else
|
||||
pstr->valid_len = pstr->len;
|
||||
if (BE (pstr->mbs_allocated, 0))
|
||||
{
|
||||
if (pstr->icase)
|
||||
build_upper_buffer (pstr);
|
||||
else if (pstr->trans != NULL)
|
||||
re_string_translate_buffer (pstr);
|
||||
}
|
||||
else
|
||||
pstr->valid_len = pstr->len;
|
||||
|
||||
pstr->cur_idx = 0;
|
||||
return REG_NOERROR;
|
||||
@ -1345,12 +1359,16 @@ re_dfa_add_node (dfa, token)
|
||||
int type = token.type;
|
||||
if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
|
||||
{
|
||||
int new_nodes_alloc = dfa->nodes_alloc * 2;
|
||||
size_t new_nodes_alloc = dfa->nodes_alloc * 2;
|
||||
int *new_nexts, *new_indices;
|
||||
re_node_set *new_edests, *new_eclosures;
|
||||
re_token_t *new_nodes;
|
||||
|
||||
re_token_t *new_nodes = re_realloc (dfa->nodes, re_token_t,
|
||||
new_nodes_alloc);
|
||||
/* Avoid overflows. */
|
||||
if (BE (new_nodes_alloc < dfa->nodes_alloc, 0))
|
||||
return -1;
|
||||
|
||||
new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
|
||||
if (BE (new_nodes == NULL, 0))
|
||||
return -1;
|
||||
dfa->nodes = new_nodes;
|
||||
@ -1403,7 +1421,7 @@ calc_state_hash (nodes, context)
|
||||
static re_dfastate_t*
|
||||
re_acquire_state (err, dfa, nodes)
|
||||
reg_errcode_t *err;
|
||||
re_dfa_t *dfa;
|
||||
const re_dfa_t *dfa;
|
||||
const re_node_set *nodes;
|
||||
{
|
||||
unsigned int hash;
|
||||
@ -1448,7 +1466,7 @@ re_acquire_state (err, dfa, nodes)
|
||||
static re_dfastate_t*
|
||||
re_acquire_state_context (err, dfa, nodes, context)
|
||||
reg_errcode_t *err;
|
||||
re_dfa_t *dfa;
|
||||
const re_dfa_t *dfa;
|
||||
const re_node_set *nodes;
|
||||
unsigned int context;
|
||||
{
|
||||
@ -1486,7 +1504,7 @@ re_acquire_state_context (err, dfa, nodes, context)
|
||||
|
||||
static reg_errcode_t
|
||||
register_state (dfa, newstate, hash)
|
||||
re_dfa_t *dfa;
|
||||
const re_dfa_t *dfa;
|
||||
re_dfastate_t *newstate;
|
||||
unsigned int hash;
|
||||
{
|
||||
@ -1525,7 +1543,7 @@ register_state (dfa, newstate, hash)
|
||||
|
||||
static re_dfastate_t *
|
||||
create_ci_newstate (dfa, nodes, hash)
|
||||
re_dfa_t *dfa;
|
||||
const re_dfa_t *dfa;
|
||||
const re_node_set *nodes;
|
||||
unsigned int hash;
|
||||
{
|
||||
@ -1576,7 +1594,7 @@ create_ci_newstate (dfa, nodes, hash)
|
||||
|
||||
static re_dfastate_t *
|
||||
create_cd_newstate (dfa, nodes, context, hash)
|
||||
re_dfa_t *dfa;
|
||||
const re_dfa_t *dfa;
|
||||
const re_node_set *nodes;
|
||||
unsigned int context, hash;
|
||||
{
|
||||
|
Reference in New Issue
Block a user