1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-07-29 11:41:21 +03:00

* posix/regex_internal.c (re_string_reconstruct): Avoid calling

mbrtowc for very simple UTF-8 case.

2005-09-01  Paul Eggert  <eggert@cs.ucla.edu>

	* posix/regex_internal.c (build_wcs_upper_buffer): Fix portability
	bugs in int versus size_t comparisons.

2005-09-06  Ulrich Drepper  <drepper@redhat.com>

	* posix/regex_internal.c (re_acquire_state): Make DFA pointer arg
	a pointer-to-const.
	(re_acquire_state_context): Likewise.
	* posix/regex_internal.h: Adjust prototypes.

2005-08-31  Jim Meyering  <jim@meyering.net>

	* posix/regcomp.c (search_duplicated_node): Make first pointer arg
	a pointer-to-const.
	* posix/regex_internal.c (create_ci_newstate, create_cd_newstate,
	register_state): Likewise.
	* posix/regexec.c (search_cur_bkref_entry, check_dst_limits):
	(check_dst_limits_calc_pos_1, check_dst_limits_calc_pos):
	(group_nodes_into_DFAstates): Likewise.

	* posix/regexec.c (re_search_internal): Simplify update of
	rm_so and rm_eo by replacing "if (A == B) A += C - B;"
	with the equivalent of "if (A == B) A = C;".

2005-09-06  Ulrich Drepper  <drepper@redhat.com>

	* posix/regcomp.c (re_compile_internal): Change third parameter type
	to size_t.
	(init_dfa): Likewise.  Make sure that arithmetic on pat_len doesn't
	overflow.
	* posix/regex_internal.h (struct re_dfa_t): Change type of nodes_alloc
	and nodes_len to size_t.
	* posix/regex_internal.c (re_dfa_add_node): Use size_t as type for
	new_nodes_alloc.  Check for overflow.

2005-08-31  Paul Eggert  <eggert@cs.ucla.edu>

	* posix/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
	(optimize_subexps, lower_subexp):
	Don't assume 1<<31 has defined behavior on hosts with 32-bit int,
	since the signed shift might overflow.  Use 1u<<31 instead.
	* posix/regex_internal.h (bitset_set, bitset_clear, bitset_contain):
	Likewise.
	* posix/regexec.c (check_dst_limits_calc_pos_1): Likewise.
	(check_subexp_matching_top): Likewise.
	* posix/regcomp.c (optimize_subexps, lower_subexp):
	Use CHAR_BIT rather than 8, for clarity.
	* posix/regexec.c (check_dst_limits_calc_pos_1):
	(check_subexp_matching_top): Likewise.
	* posix/regcomp.c (init_dfa): Make table_size unsigned, so that we
	don't have to worry about portability issues when shifting it left.
	Remove no-longer-needed test for table_size > 0.
	* posix/regcomp.c (parse_sub_exp): Do not shift more bits than there
	are in a word, as the resulting behavior is undefined.
	* posix/regexec.c (check_dst_limits_calc_pos_1): Likewise;
	in one case, a <= should have been an <, and in another case the
	whole test was missing.
	* posix/regex_internal.h (BYTE_BITS): Remove.  All uses changed to
	the standard name CHAR_BIT.
This commit is contained in:
Ulrich Drepper
2005-09-07 01:15:33 +00:00
parent 2d87db5b53
commit 01ed6ceb7c
5 changed files with 211 additions and 109 deletions

View File

@ -26,12 +26,13 @@ static void re_string_construct_common (const char *str, int len,
static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
wint_t *last_wc) internal_function;
#endif /* RE_ENABLE_I18N */
static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
static reg_errcode_t register_state (const re_dfa_t *dfa,
re_dfastate_t *newstate,
unsigned int hash) internal_function;
static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
const re_node_set *nodes,
unsigned int hash) internal_function;
static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
const re_node_set *nodes,
unsigned int context,
unsigned int hash) internal_function;
@ -654,37 +655,50 @@ re_string_reconstruct (pstr, idx, eflags)
byte other than 0x80 - 0xbf. */
raw = pstr->raw_mbs + pstr->raw_mbs_idx;
end = raw + (offset - pstr->mb_cur_max);
for (p = raw + offset - 1; p >= end; --p)
if ((*p & 0xc0) != 0x80)
{
mbstate_t cur_state;
wchar_t wc2;
int mlen = raw + pstr->len - p;
unsigned char buf[6];
p = raw + offset - 1;
#ifdef _LIBC
/* We know the wchar_t encoding is UCS4, so for the simple
case, ASCII characters, skip the conversion step. */
if (isascii (*p) && BE (pstr->trans == NULL, 1))
{
memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
pstr->valid_len = 0;
wc = (wchar_t) *p;
}
else
#endif
for (; p >= end; --p)
if ((*p & 0xc0) != 0x80)
{
mbstate_t cur_state;
wchar_t wc2;
int mlen = raw + pstr->len - p;
unsigned char buf[6];
size_t mbclen;
q = p;
if (BE (pstr->trans != NULL, 0))
{
int i = mlen < 6 ? mlen : 6;
while (--i >= 0)
buf[i] = pstr->trans[p[i]];
q = buf;
}
/* XXX Don't use mbrtowc, we know which conversion
to use (UTF-8 -> UCS4). */
memset (&cur_state, 0, sizeof (cur_state));
mlen = (mbrtowc (&wc2, (const char *) p, mlen,
&cur_state)
- (raw + offset - p));
if (mlen >= 0)
{
memset (&pstr->cur_state, '\0',
sizeof (mbstate_t));
pstr->valid_len = mlen;
wc = wc2;
}
break;
}
q = p;
if (BE (pstr->trans != NULL, 0))
{
int i = mlen < 6 ? mlen : 6;
while (--i >= 0)
buf[i] = pstr->trans[p[i]];
q = buf;
}
/* XXX Don't use mbrtowc, we know which conversion
to use (UTF-8 -> UCS4). */
memset (&cur_state, 0, sizeof (cur_state));
mbclen = mbrtowc (&wc2, (const char *) p, mlen,
&cur_state);
if (raw + offset - p <= mbclen
&& mbclen < (size_t) -2)
{
memset (&pstr->cur_state, '\0',
sizeof (mbstate_t));
pstr->valid_len = mbclen - (raw + offset - p);
wc = wc2;
}
break;
}
}
if (wc == WEOF)
@ -738,15 +752,15 @@ re_string_reconstruct (pstr, idx, eflags)
}
else
#endif /* RE_ENABLE_I18N */
if (BE (pstr->mbs_allocated, 0))
{
if (pstr->icase)
build_upper_buffer (pstr);
else if (pstr->trans != NULL)
re_string_translate_buffer (pstr);
}
else
pstr->valid_len = pstr->len;
if (BE (pstr->mbs_allocated, 0))
{
if (pstr->icase)
build_upper_buffer (pstr);
else if (pstr->trans != NULL)
re_string_translate_buffer (pstr);
}
else
pstr->valid_len = pstr->len;
pstr->cur_idx = 0;
return REG_NOERROR;
@ -1345,12 +1359,16 @@ re_dfa_add_node (dfa, token)
int type = token.type;
if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
{
int new_nodes_alloc = dfa->nodes_alloc * 2;
size_t new_nodes_alloc = dfa->nodes_alloc * 2;
int *new_nexts, *new_indices;
re_node_set *new_edests, *new_eclosures;
re_token_t *new_nodes;
re_token_t *new_nodes = re_realloc (dfa->nodes, re_token_t,
new_nodes_alloc);
/* Avoid overflows. */
if (BE (new_nodes_alloc < dfa->nodes_alloc, 0))
return -1;
new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
if (BE (new_nodes == NULL, 0))
return -1;
dfa->nodes = new_nodes;
@ -1403,7 +1421,7 @@ calc_state_hash (nodes, context)
static re_dfastate_t*
re_acquire_state (err, dfa, nodes)
reg_errcode_t *err;
re_dfa_t *dfa;
const re_dfa_t *dfa;
const re_node_set *nodes;
{
unsigned int hash;
@ -1448,7 +1466,7 @@ re_acquire_state (err, dfa, nodes)
static re_dfastate_t*
re_acquire_state_context (err, dfa, nodes, context)
reg_errcode_t *err;
re_dfa_t *dfa;
const re_dfa_t *dfa;
const re_node_set *nodes;
unsigned int context;
{
@ -1486,7 +1504,7 @@ re_acquire_state_context (err, dfa, nodes, context)
static reg_errcode_t
register_state (dfa, newstate, hash)
re_dfa_t *dfa;
const re_dfa_t *dfa;
re_dfastate_t *newstate;
unsigned int hash;
{
@ -1525,7 +1543,7 @@ register_state (dfa, newstate, hash)
static re_dfastate_t *
create_ci_newstate (dfa, nodes, hash)
re_dfa_t *dfa;
const re_dfa_t *dfa;
const re_node_set *nodes;
unsigned int hash;
{
@ -1576,7 +1594,7 @@ create_ci_newstate (dfa, nodes, hash)
static re_dfastate_t *
create_cd_newstate (dfa, nodes, context, hash)
re_dfa_t *dfa;
const re_dfa_t *dfa;
const re_node_set *nodes;
unsigned int context, hash;
{