1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-07-30 22:43:12 +03:00
* posix/regex_internal.h: Add forward declaration of re_dfa_t.
	Replace last two parameters of re_string_allocate and
	re_string_construct with pointer to DFA.
	(re_dfa_t): Add map_notascii field.
	* posix/regcomp.c (re_compile_internal): Add call of
	re_string_construct.
	(init_dfa): Initialize mpa_notascii.
	* posix/regex_internal.c: Adjust definitions of re_string_allocate
	and re_string_construct.
	Pass DFA to re_string_construct.  Adjust definition.  Initialize
	map_notascii field.
	(build_wcs_upper_buffer): If map_notascii is zero use simplfied
	method to map ASCII values to upper case.
	* posix/regex.c: Include localeinfo.h.
	* posix/regexec.c: Adjust call of re_string_allocate.

	* locale/langinfo.h: Add _NL_CTYPE_MAP_TO_NONASCII.
	* locale/localeinfo.h (LIMAGIC): Change value.
	* locale/categories.def. Add entry for _NL_CTYPE_MAP_TO_NONASCII.
	* locale/C-ctype.h: Likewise.
	* locale/programs/ld-ctype.c: Compute whether any mapping maps from
	ASCII to non-ASCII value.  Write out that value.
This commit is contained in:
Ulrich Drepper
2003-11-16 07:14:28 +00:00
parent 2def87644d
commit f0c7c524bb
13 changed files with 201 additions and 87 deletions

View File

@ -21,7 +21,7 @@
static void re_string_construct_common (const char *str, int len,
re_string_t *pstr,
RE_TRANSLATE_TYPE trans, int icase,
int mb_cur_max, int is_utf8);
const re_dfa_t *dfa);
#ifdef RE_ENABLE_I18N
static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
wint_t *last_wc);
@ -47,17 +47,16 @@ static unsigned int inline calc_state_hash (const re_node_set *nodes,
re_string_reconstruct before using the object. */
static reg_errcode_t
re_string_allocate (pstr, str, len, init_len, trans, icase,
mb_cur_max, is_utf8)
re_string_allocate (pstr, str, len, init_len, trans, icase, dfa)
re_string_t *pstr;
const char *str;
int len, init_len, icase, mb_cur_max, is_utf8;
int len, init_len, icase;
RE_TRANSLATE_TYPE trans;
const re_dfa_t *dfa;
{
reg_errcode_t ret;
int init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
re_string_construct_common (str, len, pstr, trans, icase,
mb_cur_max, is_utf8);
re_string_construct_common (str, len, pstr, trans, icase, dfa);
pstr->stop = pstr->len;
ret = re_string_realloc_buffers (pstr, init_buf_len);
@ -68,22 +67,22 @@ re_string_allocate (pstr, str, len, init_len, trans, icase,
: (unsigned char *) str);
pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
|| mb_cur_max > 1) ? pstr->valid_len : len;
|| dfa->mb_cur_max > 1) ? pstr->valid_len : len;
return REG_NOERROR;
}
/* This function allocate the buffers, and initialize them. */
static reg_errcode_t
re_string_construct (pstr, str, len, trans, icase, mb_cur_max, is_utf8)
re_string_construct (pstr, str, len, trans, icase, dfa)
re_string_t *pstr;
const char *str;
int len, icase, mb_cur_max, is_utf8;
int len, icase;
RE_TRANSLATE_TYPE trans;
const re_dfa_t *dfa;
{
reg_errcode_t ret;
re_string_construct_common (str, len, pstr, trans, icase,
mb_cur_max, is_utf8);
re_string_construct_common (str, len, pstr, trans, icase, dfa);
pstr->stop = pstr->len;
/* Set 0 so that this function can initialize whole buffers. */
pstr->valid_len = 0;
@ -101,7 +100,7 @@ re_string_construct (pstr, str, len, trans, icase, mb_cur_max, is_utf8)
if (icase)
{
#ifdef RE_ENABLE_I18N
if (mb_cur_max > 1)
if (dfa->mb_cur_max > 1)
build_wcs_upper_buffer (pstr);
else
#endif /* RE_ENABLE_I18N */
@ -110,7 +109,7 @@ re_string_construct (pstr, str, len, trans, icase, mb_cur_max, is_utf8)
else
{
#ifdef RE_ENABLE_I18N
if (mb_cur_max > 1)
if (dfa->mb_cur_max > 1)
build_wcs_buffer (pstr);
else
#endif /* RE_ENABLE_I18N */
@ -167,20 +166,22 @@ re_string_realloc_buffers (pstr, new_buf_len)
static void
re_string_construct_common (str, len, pstr, trans, icase, mb_cur_max, is_utf8)
re_string_construct_common (str, len, pstr, trans, icase, dfa)
const char *str;
int len;
re_string_t *pstr;
RE_TRANSLATE_TYPE trans;
int icase, mb_cur_max, is_utf8;
int icase;
const re_dfa_t *dfa;
{
memset (pstr, '\0', sizeof (re_string_t));
pstr->raw_mbs = (const unsigned char *) str;
pstr->len = len;
pstr->trans = trans;
pstr->icase = icase ? 1 : 0;
pstr->mb_cur_max = mb_cur_max;
pstr->is_utf8 = is_utf8;
pstr->mb_cur_max = dfa->mb_cur_max;
pstr->is_utf8 = dfa->is_utf8;
pstr->map_notascii = dfa->map_notascii;
}
#ifdef RE_ENABLE_I18N
@ -253,47 +254,110 @@ build_wcs_upper_buffer (pstr)
/* Build the buffers from pstr->valid_len to either pstr->len or
pstr->bufs_len. */
end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
{
wchar_t wc;
remain_len = end_idx - byte_idx;
prev_st = pstr->cur_state;
mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ byte_idx), remain_len, &pstr->cur_state);
if (BE (mbclen == (size_t) -2, 0))
{
/* The buffer doesn't have enough space, finish to build. */
pstr->cur_state = prev_st;
break;
}
else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0)
#ifdef _LIBC
/* The following optimization assumes that the wchar_t encoding is
always ISO 10646. */
if (! pstr->map_notascii && pstr->trans == NULL)
for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
&& mbsinit (&pstr->cur_state))
{
/* In case of a singlebyte character. */
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
/* Apply the translation if we need. */
if (pstr->trans != NULL && mbclen == 1)
{
ch = pstr->trans[ch];
pstr->mbs_case[byte_idx] = ch;
}
pstr->wcs[byte_idx] = iswlower (wc) ? towupper (wc) : wc;
pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch;
if (BE (mbclen == (size_t) -1, 0))
pstr->cur_state = prev_st;
pstr->mbs[byte_idx]
= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
/* The next step uses the assumption that wchar_t is encoded
with ISO 10646: all ASCII values can be converted like this. */
pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
++byte_idx;
}
else /* mbclen > 1 */
else
{
if (iswlower (wc))
wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st);
wchar_t wc;
remain_len = end_idx - byte_idx;
prev_st = pstr->cur_state;
mbclen = mbrtowc (&wc,
((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ byte_idx), remain_len, &pstr->cur_state);
if (BE (mbclen > 1, 1))
{
if (iswlower (wc))
wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc),
&prev_st);
else
memcpy (pstr->mbs + byte_idx,
pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
pstr->wcs[byte_idx++] = towupper (wc);
/* Write paddings. */
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
pstr->wcs[byte_idx++] = WEOF;
}
else if (mbclen == (size_t) -1 || mbclen == 0)
{
/* In case of a singlebyte character. */
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
/* Apply the translation if we need. */
if (BE (pstr->trans != NULL, 0) && mbclen == 1)
{
ch = pstr->trans[ch];
pstr->mbs_case[byte_idx] = ch;
}
pstr->wcs[byte_idx] = towupper (wc);
pstr->mbs[byte_idx++] = toupper (ch);
if (BE (mbclen == (size_t) -1, 0))
pstr->cur_state = prev_st;
}
else
memcpy (pstr->mbs + byte_idx,
pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
pstr->wcs[byte_idx++] = iswlower (wc) ? towupper (wc) : wc;
/* Write paddings. */
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
pstr->wcs[byte_idx++] = WEOF;
{
/* The buffer doesn't have enough space, finish to build. */
pstr->cur_state = prev_st;
break;
}
}
}
else
#endif
for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
{
wchar_t wc;
remain_len = end_idx - byte_idx;
prev_st = pstr->cur_state;
mbclen = mbrtowc (&wc,
((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ byte_idx), remain_len, &pstr->cur_state);
if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0)
{
/* In case of a singlebyte character. */
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
/* Apply the translation if we need. */
if (BE (pstr->trans != NULL, 0) && mbclen == 1)
{
ch = pstr->trans[ch];
pstr->mbs_case[byte_idx] = ch;
}
pstr->wcs[byte_idx] = towupper (wc);
pstr->mbs[byte_idx++] = toupper (ch);
if (BE (mbclen == (size_t) -1, 0))
pstr->cur_state = prev_st;
}
else if (BE (mbclen != (size_t) -2, 1))
{
if (iswlower (wc))
wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st);
else
memcpy (pstr->mbs + byte_idx,
pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
pstr->wcs[byte_idx++] = towupper (wc);
/* Write paddings. */
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
pstr->wcs[byte_idx++] = WEOF;
}
else
{
/* The buffer doesn't have enough space, finish to build. */
pstr->cur_state = prev_st;
break;
}
}
pstr->valid_len = byte_idx;
}