mirror of
https://sourceware.org/git/glibc.git
synced 2025-08-30 17:41:16 +03:00
Update.
2004-11-08 Ulrich Drepper <drepper@redhat.com> * posix/regcomp.c (utf8_sb_map): Define. (free_dfa_content): Don't free dfa->sb_char if it's a pointer to utf8_sb_map. (init_dfa): Use utf8_sb_map instead of initializing memory when the encoding is UTF-8. * posix/regcomp.c (init_dfa): Get the codeset name outside glibc as well. Check if it is spelled UTF8 as well as UTF-8, and check case-insensitively. Set dfa->map_notascii manually when outside glibc. * posix/regex_internal.c (build_wcs_upper_buffer) [!_LIBC]: Enable optimizations based on map_notascii. * posix/regex_internal.h [HAVE_LANGINFO_H || HAVE_LANGINFO_CODESET || _LIBC]: Include langinfo.h. * posix/regex_internal.h (struct re_backref_cache_entry): Add "more" field. * posix/regexec.c (check_dst_limits): Hoist computation of the source and destination bkref_idx out of the loop. Pass it to check_dst_limits_calc_pos. (check_dst_limits_calc_pos_1): New function, containing the recursive loop of check_dst_limits_calc_pos; uses the "more" field of struct re_backref_cache to control the loop. (check_dst_limits_calc_pos): Store into "boundaries" the position relative to lim's start and end positions. Do not accept eclosures, accept bkref_idx instead. Call check_dst_limits_calc_pos_1 to do the work. (sift_states_bkref): Use the "more" field of struct re_backref_cache to control the loop. A big "if" was turned into a continue and the function was reindented. (get_subexp): Use the "more" field of struct re_backref_cache to control the loop. (match_ctx_add_entry): Initialize the bkref_ents' "more" field. (search_cur_bkref_entry): Return -1 if out of bounds. * posix/regexec.c (empty_set): Remove. (sift_states_backward): Remove cur_src variable. Move inner loop to build_sifted_states. (build_sifted_states): Extract from sift_states_backward. Do not use empty_set. (update_cur_sifted_state): Do not use empty_set. Special case dest_nodes->nelem == 0.
This commit is contained in:
@@ -566,6 +566,23 @@ weak_alias (__regerror, regerror)
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef RE_ENABLE_I18N
|
||||
/* This static array is used for the map to single-byte characters when
|
||||
UTF-8 is used. Otherwise we would allocate memory just to initialize
|
||||
it the same all the time. UTF-8 is the preferred encoding so this is
|
||||
a worthwhile optimization. */
|
||||
static const bitset utf8_sb_map =
|
||||
{
|
||||
/* Set the first 128 bits. */
|
||||
# if UINT_MAX == 0xffffffff
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
|
||||
# else
|
||||
# error "Add case for new unsigned int size"
|
||||
# endif
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
static void
|
||||
free_dfa_content (re_dfa_t *dfa)
|
||||
{
|
||||
@@ -613,7 +630,8 @@ free_dfa_content (re_dfa_t *dfa)
|
||||
}
|
||||
re_free (dfa->state_table);
|
||||
#ifdef RE_ENABLE_I18N
|
||||
re_free (dfa->sb_char);
|
||||
if (dfa->sb_char != utf8_sb_map)
|
||||
re_free (dfa->sb_char);
|
||||
#endif
|
||||
#ifdef DEBUG
|
||||
re_free (dfa->re_str);
|
||||
@@ -824,6 +842,9 @@ init_dfa (dfa, pat_len)
|
||||
int pat_len;
|
||||
{
|
||||
int table_size;
|
||||
#ifndef _LIBC
|
||||
char *codeset_name;
|
||||
#endif
|
||||
|
||||
memset (dfa, '\0', sizeof (re_dfa_t));
|
||||
|
||||
@@ -853,22 +874,59 @@ init_dfa (dfa, pat_len)
|
||||
dfa->is_utf8 = 1;
|
||||
dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
|
||||
!= 0);
|
||||
#else
|
||||
# ifdef HAVE_LANGINFO_CODESET
|
||||
codeset_name = nl_langinfo (CODESET);
|
||||
# else
|
||||
codeset_name = getenv ("LC_ALL");
|
||||
if (codeset_name == NULL || codeset[0] == '\0')
|
||||
codeset_name = getenv ("LC_CTYPE");
|
||||
if (codeset_name == NULL || codeset[0] == '\0')
|
||||
codeset_name = getenv ("LANG");
|
||||
if (codeset_name == NULL)
|
||||
codeset_name = "";
|
||||
else if (strchr (codeset_name, '.') != NULL)
|
||||
codeset_name = strchr (codeset_name, '.') + 1;
|
||||
# endif
|
||||
|
||||
if (strcasecmp (codeset_name, "UTF-8") == 0
|
||||
|| strcasecmp (codeset_name, "UTF8") == 0)
|
||||
dfa->is_utf8 = 1;
|
||||
|
||||
/* We check exhaustively in the loop below if this charset is a
|
||||
superset of ASCII. */
|
||||
dfa->map_notascii = 0;
|
||||
#endif
|
||||
|
||||
#ifdef RE_ENABLE_I18N
|
||||
if (dfa->mb_cur_max > 1)
|
||||
{
|
||||
int i, j, ch;
|
||||
|
||||
dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
|
||||
if (BE (dfa->sb_char == NULL, 0))
|
||||
return REG_ESPACE;
|
||||
if (dfa->is_utf8)
|
||||
memset (dfa->sb_char, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
|
||||
dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
|
||||
else
|
||||
for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
|
||||
for (j = 0; j < UINT_BITS; ++j, ++ch)
|
||||
if (__btowc (ch) != WEOF)
|
||||
dfa->sb_char[i] |= 1 << j;
|
||||
{
|
||||
int i, j, ch;
|
||||
|
||||
dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
|
||||
if (BE (dfa->sb_char == NULL, 0))
|
||||
return REG_ESPACE;
|
||||
|
||||
/* Clear all bits by, then set those corresponding to single
|
||||
byte chars. */
|
||||
bitset_empty (dfa->sb_char);
|
||||
|
||||
for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
|
||||
for (j = 0; j < UINT_BITS; ++j, ++ch)
|
||||
{
|
||||
wchar_t wch = __btowc (ch);
|
||||
if (wch != WEOF)
|
||||
dfa->sb_char[i] |= 1 << j;
|
||||
# ifndef _LIBC
|
||||
if (isascii (ch) && wch != (wchar_t) ch)
|
||||
dfa->map_notascii = 1;
|
||||
# endif
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
Reference in New Issue
Block a user