1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-07-30 22:43:12 +03:00
2004-11-08  Ulrich Drepper  <drepper@redhat.com>

	* posix/regcomp.c (utf8_sb_map): Define.
	(free_dfa_content): Don't free dfa->sb_char if it's a pointer to
	utf8_sb_map.
	(init_dfa): Use utf8_sb_map instead of initializing memory when the
	encoding is UTF-8.

	* posix/regcomp.c (init_dfa): Get the codeset name outside glibc as
	well.  Check if it is spelled UTF8 as well as UTF-8, and check
	case-insensitively.  Set dfa->map_notascii manually when outside
	glibc.
	* posix/regex_internal.c (build_wcs_upper_buffer) [!_LIBC]: Enable
	optimizations based on map_notascii.
	* posix/regex_internal.h [HAVE_LANGINFO_H || HAVE_LANGINFO_CODESET
	|| _LIBC]: Include langinfo.h.

	* posix/regex_internal.h (struct re_backref_cache_entry): Add "more"
	field.
	* posix/regexec.c (check_dst_limits): Hoist computation of the source
	and destination bkref_idx out of the loop.  Pass it to
	check_dst_limits_calc_pos.
	(check_dst_limits_calc_pos_1): New function, containing the recursive
	loop of check_dst_limits_calc_pos; uses the "more" field of
	struct re_backref_cache to control the loop.
	(check_dst_limits_calc_pos): Store into "boundaries" the position
	relative to lim's start and end positions.  Do not accept eclosures,
	accept bkref_idx instead.  Call check_dst_limits_calc_pos_1 to do the
	work.
	(sift_states_bkref): Use the "more" field of struct re_backref_cache
	to control the loop.  A big "if" was turned into a continue and the
	function was reindented.
	(get_subexp): Use the "more" field of struct re_backref_cache
	to control the loop.
	(match_ctx_add_entry): Initialize the bkref_ents' "more" field.
	(search_cur_bkref_entry): Return -1 if out of bounds.

	* posix/regexec.c (empty_set): Remove.
	(sift_states_backward): Remove cur_src variable.  Move inner loop
	to build_sifted_states.
	(build_sifted_states): Extract from sift_states_backward.  Do not
	use empty_set.
	(update_cur_sifted_state): Do not use empty_set.  Special case
	dest_nodes->nelem == 0.
This commit is contained in:
Ulrich Drepper
2004-11-08 22:49:44 +00:00
parent d2c38eb3fa
commit e40a38b383
5 changed files with 363 additions and 221 deletions

View File

@ -293,9 +293,8 @@ build_wcs_upper_buffer (pstr)
byte_idx = pstr->valid_len;
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
#ifdef _LIBC
/* The following optimization assumes that the wchar_t encoding is
always ISO 10646. */
/* The following optimization assumes that ASCII characters can be
mapped to wide characters with a simple cast. */
if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
{
while (byte_idx < end_idx)
@ -309,8 +308,7 @@ build_wcs_upper_buffer (pstr)
pstr->mbs[byte_idx]
= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
/* The next step uses the assumption that wchar_t is encoded
with ISO 10646: all ASCII values can be converted like
this. */
ASCII-safe: all ASCII values can be converted like this. */
pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
++byte_idx;
continue;
@ -368,14 +366,11 @@ build_wcs_upper_buffer (pstr)
return REG_NOERROR;
}
else
#endif
for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
{
wchar_t wc;
const char *p;
#ifdef _LIBC
offsets_needed:
#endif
offsets_needed:
remain_len = end_idx - byte_idx;
prev_st = pstr->cur_state;
if (BE (pstr->trans != NULL, 0))
@ -647,7 +642,6 @@ re_string_reconstruct (pstr, idx, eflags)
int wcs_idx;
wint_t wc = WEOF;
#ifdef _LIBC
if (pstr->is_utf8)
{
const unsigned char *raw, *p, *q, *end;
@ -687,7 +681,7 @@ re_string_reconstruct (pstr, idx, eflags)
break;
}
}
#endif
if (wc == WEOF)
pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
if (BE (pstr->valid_len, 0))