1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-08-07 06:43:00 +03:00
* misc/mntent_r.c (decode_name): Fix decoding of tab, add decoding
	of newline.
	* manual/sysinfo.texi (mtab): Adjust description accordingly.
	Reported by Andries.Brouwer@cwi.nl.
This commit is contained in:
Ulrich Drepper
2003-11-29 06:40:52 +00:00
parent bb3f4825c4
commit 37369d1cef
4 changed files with 360 additions and 126 deletions

View File

@@ -62,17 +62,14 @@ re_string_allocate (pstr, str, len, init_len, trans, icase, dfa)
init_len = dfa->mb_cur_max;
init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
re_string_construct_common (str, len, pstr, trans, icase, dfa);
pstr->stop = pstr->len;
ret = re_string_realloc_buffers (pstr, init_buf_len);
if (BE (ret != REG_NOERROR, 0))
return ret;
pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
: (unsigned char *) str);
pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
|| dfa->mb_cur_max > 1) ? pstr->valid_len : len;
pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
pstr->valid_raw_len = pstr->valid_len;
return REG_NOERROR;
}
@@ -88,9 +85,6 @@ re_string_construct (pstr, str, len, trans, icase, dfa)
{
reg_errcode_t ret;
re_string_construct_common (str, len, pstr, trans, icase, dfa);
pstr->stop = pstr->len;
/* Set 0 so that this function can initialize whole buffers. */
pstr->valid_len = 0;
if (len > 0)
{
@@ -98,15 +92,27 @@ re_string_construct (pstr, str, len, trans, icase, dfa)
if (BE (ret != REG_NOERROR, 0))
return ret;
}
pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
: (unsigned char *) str);
pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
if (icase)
{
#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
build_wcs_upper_buffer (pstr);
{
while (1)
{
ret = build_wcs_upper_buffer (pstr);
if (BE (ret != REG_NOERROR, 0))
return ret;
if (pstr->valid_raw_len >= len)
break;
if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
break;
ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
if (BE (ret != REG_NOERROR, 0))
return ret;
}
}
else
#endif /* RE_ENABLE_I18N */
build_upper_buffer (pstr);
@@ -122,12 +128,13 @@ re_string_construct (pstr, str, len, trans, icase, dfa)
if (trans != NULL)
re_string_translate_buffer (pstr);
else
pstr->valid_len = len;
{
pstr->valid_len = pstr->bufs_len;
pstr->valid_raw_len = pstr->bufs_len;
}
}
}
/* Initialized whole buffers, then valid_len == bufs_len. */
pstr->valid_len = pstr->bufs_len;
return REG_NOERROR;
}
@@ -145,9 +152,16 @@ re_string_realloc_buffers (pstr, new_buf_len)
if (BE (new_array == NULL, 0))
return REG_ESPACE;
pstr->wcs = new_array;
if (pstr->offsets != NULL)
{
int *new_array = re_realloc (pstr->offsets, int, new_buf_len);
if (BE (new_array == NULL, 0))
return REG_ESPACE;
pstr->offsets = new_array;
}
}
#endif /* RE_ENABLE_I18N */
if (MBS_ALLOCATED (pstr))
if (pstr->mbs_allocated)
{
unsigned char *new_array = re_realloc (pstr->mbs, unsigned char,
new_buf_len);
@@ -155,16 +169,6 @@ re_string_realloc_buffers (pstr, new_buf_len)
return REG_ESPACE;
pstr->mbs = new_array;
}
if (MBS_CASE_ALLOCATED (pstr))
{
unsigned char *new_array = re_realloc (pstr->mbs_case, unsigned char,
new_buf_len);
if (BE (new_array == NULL, 0))
return REG_ESPACE;
pstr->mbs_case = new_array;
if (!MBS_ALLOCATED (pstr))
pstr->mbs = pstr->mbs_case;
}
pstr->bufs_len = new_buf_len;
return REG_NOERROR;
}
@@ -182,11 +186,15 @@ re_string_construct_common (str, len, pstr, trans, icase, dfa)
memset (pstr, '\0', sizeof (re_string_t));
pstr->raw_mbs = (const unsigned char *) str;
pstr->len = len;
pstr->raw_len = len;
pstr->trans = trans;
pstr->icase = icase ? 1 : 0;
pstr->mbs_allocated = (trans != NULL || icase);
pstr->mb_cur_max = dfa->mb_cur_max;
pstr->is_utf8 = dfa->is_utf8;
pstr->map_notascii = dfa->map_notascii;
pstr->stop = pstr->len;
pstr->raw_stop = pstr->stop;
}
#ifdef RE_ENABLE_I18N
@@ -206,18 +214,39 @@ static void
build_wcs_buffer (pstr)
re_string_t *pstr;
{
#ifdef _LIBC
unsigned char buf[pstr->mb_cur_max];
#else
unsigned char buf[64];
#endif
mbstate_t prev_st;
int byte_idx, end_idx, mbclen, remain_len;
/* Build the buffers from pstr->valid_len to either pstr->len or
pstr->bufs_len. */
end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
{
wchar_t wc;
const char *p;
remain_len = end_idx - byte_idx;
prev_st = pstr->cur_state;
mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ byte_idx), remain_len, &pstr->cur_state);
/* Apply the translation if we need. */
if (BE (pstr->trans != NULL, 0))
{
int i, ch;
for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
{
ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
buf[i] = pstr->trans[ch];
}
p = (const char *) buf;
}
else
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
if (BE (mbclen == (size_t) -2, 0))
{
/* The buffer doesn't have enough space, finish to build. */
@@ -229,15 +258,11 @@ build_wcs_buffer (pstr)
/* We treat these cases as a singlebyte character. */
mbclen = 1;
wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
if (BE (pstr->trans != NULL, 0))
wc = pstr->trans[wc];
pstr->cur_state = prev_st;
}
/* Apply the translation if we need. */
if (pstr->trans != NULL && mbclen == 1)
{
int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
pstr->mbs_case[byte_idx] = ch;
}
/* Write wide character and padding. */
pstr->wcs[byte_idx++] = wc;
/* Write paddings. */
@@ -245,61 +270,83 @@ build_wcs_buffer (pstr)
pstr->wcs[byte_idx++] = WEOF;
}
pstr->valid_len = byte_idx;
pstr->valid_raw_len = byte_idx;
}
/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
but for REG_ICASE. */
static void
static int
build_wcs_upper_buffer (pstr)
re_string_t *pstr;
{
mbstate_t prev_st;
int byte_idx, end_idx, mbclen, remain_len;
/* Build the buffers from pstr->valid_len to either pstr->len or
pstr->bufs_len. */
end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
int src_idx, byte_idx, end_idx, mbclen, remain_len;
#ifdef _LIBC
unsigned char buf[pstr->mb_cur_max];
#else
unsigned char buf[64];
#endif
byte_idx = pstr->valid_len;
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
#ifdef _LIBC
/* The following optimization assumes that the wchar_t encoding is
always ISO 10646. */
if (! pstr->map_notascii && pstr->trans == NULL)
for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
&& mbsinit (&pstr->cur_state))
{
/* In case of a singlebyte character. */
pstr->mbs[byte_idx]
= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
/* The next step uses the assumption that wchar_t is encoded
with ISO 10646: all ASCII values can be converted like this. */
pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
++byte_idx;
}
else
if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
{
while (byte_idx < end_idx)
{
wchar_t wc;
if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
&& mbsinit (&pstr->cur_state))
{
/* In case of a singlebyte character. */
pstr->mbs[byte_idx]
= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
/* The next step uses the assumption that wchar_t is encoded
with ISO 10646: all ASCII values can be converted like
this. */
pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
++byte_idx;
continue;
}
remain_len = end_idx - byte_idx;
prev_st = pstr->cur_state;
mbclen = mbrtowc (&wc,
((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ byte_idx), remain_len, &pstr->cur_state);
if (BE (mbclen > 1, 1))
if (BE (mbclen > 0, 1))
{
wchar_t wcu = wc;
if (iswlower (wc))
wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc),
&prev_st);
{
int mbcdlen;
wcu = towupper (wc);
mbcdlen = wcrtomb (buf, wcu, &prev_st);
if (BE (mbclen == mbcdlen, 1))
memcpy (pstr->mbs + byte_idx, buf, mbclen);
else
{
src_idx = byte_idx;
goto offsets_needed;
}
}
else
memcpy (pstr->mbs + byte_idx,
pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
pstr->wcs[byte_idx++] = towupper (wc);
pstr->wcs[byte_idx++] = wcu;
/* Write paddings. */
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
pstr->wcs[byte_idx++] = WEOF;
}
else if (mbclen == (size_t) -1 || mbclen == 0)
{
/* It is an invalid character. Just use the byte. */
/* It is an invalid character or '\0'. Just use the byte. */
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
pstr->mbs[byte_idx] = ch;
/* And also cast it to wide char. */
@@ -314,48 +361,116 @@ build_wcs_upper_buffer (pstr)
break;
}
}
pstr->valid_len = byte_idx;
pstr->valid_raw_len = byte_idx;
return REG_NOERROR;
}
else
#endif
for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
{
wchar_t wc;
const char *p;
offsets_needed:
remain_len = end_idx - byte_idx;
prev_st = pstr->cur_state;
mbclen = mbrtowc (&wc,
((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ byte_idx), remain_len, &pstr->cur_state);
if (mbclen == 1)
if (BE (pstr->trans != NULL, 0))
{
/* In case of a singlebyte character. */
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
/* Apply the translation if we need. */
if (BE (pstr->trans != NULL, 0) && mbclen == 1)
int i, ch;
for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
{
ch = pstr->trans[ch];
pstr->mbs_case[byte_idx] = ch;
ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
buf[i] = pstr->trans[ch];
}
pstr->wcs[byte_idx] = towupper (wc);
pstr->mbs[byte_idx++] = toupper (ch);
if (BE (mbclen == (size_t) -1, 0))
pstr->cur_state = prev_st;
p = (const char *) buf;
}
else if (BE (mbclen != (size_t) -2, 1))
else
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
if (BE (mbclen > 0, 1))
{
wchar_t wcu = wc;
if (iswlower (wc))
wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st);
{
int mbcdlen;
wcu = towupper (wc);
mbcdlen = wcrtomb (buf, wcu, &prev_st);
if (BE (mbclen == mbcdlen, 1))
memcpy (pstr->mbs + byte_idx, buf, mbclen);
else
{
int i;
if (byte_idx + mbcdlen > pstr->bufs_len)
{
pstr->cur_state = prev_st;
break;
}
if (pstr->offsets == NULL)
{
pstr->offsets = re_malloc (int, pstr->bufs_len);
if (pstr->offsets == NULL)
return REG_ESPACE;
}
if (!pstr->offsets_needed)
{
for (i = 0; i < byte_idx; ++i)
pstr->offsets[i] = i;
pstr->offsets_needed = 1;
}
memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
pstr->wcs[byte_idx] = wcu;
pstr->offsets[byte_idx] = src_idx;
for (i = 1; i < mbcdlen; ++i)
{
pstr->offsets[byte_idx + i]
= src_idx + (i < mbclen ? i : mbclen - 1);
pstr->wcs[byte_idx + i] = WEOF;
}
pstr->len += mbcdlen - mbclen;
if (pstr->raw_stop > src_idx)
pstr->stop += mbcdlen - mbclen;
end_idx = (pstr->bufs_len > pstr->len)
? pstr->len : pstr->bufs_len;
byte_idx += mbcdlen;
src_idx += mbclen;
continue;
}
}
else
memcpy (pstr->mbs + byte_idx,
pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
pstr->wcs[byte_idx++] = towupper (wc);
memcpy (pstr->mbs + byte_idx, p, mbclen);
if (BE (pstr->offsets_needed != 0, 0))
{
int i;
for (i = 0; i < mbclen; ++i)
pstr->offsets[byte_idx + i] = src_idx + i;
}
src_idx += mbclen;
pstr->wcs[byte_idx++] = wcu;
/* Write paddings. */
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
pstr->wcs[byte_idx++] = WEOF;
}
else if (mbclen == (size_t) -1 || mbclen == 0)
{
/* It is an invalid character. Just use the byte. */
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
/* It is an invalid character or '\0'. Just use the byte. */
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
if (BE (pstr->trans != NULL, 0))
ch = pstr->trans [ch];
pstr->mbs[byte_idx] = ch;
if (BE (pstr->offsets_needed != 0, 0))
pstr->offsets[byte_idx] = src_idx;
++src_idx;
/* And also cast it to wide char. */
pstr->wcs[byte_idx++] = (wchar_t) ch;
if (BE (mbclen == (size_t) -1, 0))
@@ -369,6 +484,8 @@ build_wcs_upper_buffer (pstr)
}
}
pstr->valid_len = byte_idx;
pstr->valid_raw_len = src_idx;
return REG_NOERROR;
}
/* Skip characters until the index becomes greater than NEW_RAW_IDX.
@@ -385,7 +502,7 @@ re_string_skip_chars (pstr, new_raw_idx, last_wc)
wchar_t wc = 0;
/* Skip the characters which are not necessary to check. */
for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len;
for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
rawbuf_idx < new_raw_idx;)
{
int remain_len;
@@ -420,17 +537,15 @@ build_upper_buffer (pstr)
for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
{
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
if (pstr->trans != NULL)
{
ch = pstr->trans[ch];
pstr->mbs_case[char_idx] = ch;
}
if (BE (pstr->trans != NULL, 0))
ch = pstr->trans[ch];
if (islower (ch))
pstr->mbs[char_idx] = toupper (ch);
else
pstr->mbs[char_idx] = ch;
}
pstr->valid_len = char_idx;
pstr->valid_raw_len = char_idx;
}
/* Apply TRANS to the buffer in PSTR. */
@@ -445,10 +560,11 @@ re_string_translate_buffer (pstr)
for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
{
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
pstr->mbs_case[buf_idx] = pstr->trans[ch];
pstr->mbs[buf_idx] = pstr->trans[ch];
}
pstr->valid_len = buf_idx;
pstr->valid_raw_len = buf_idx;
}
/* This function re-construct the buffers.
@@ -468,14 +584,15 @@ re_string_reconstruct (pstr, idx, eflags, newline)
if (pstr->mb_cur_max > 1)
memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
#endif /* RE_ENABLE_I18N */
pstr->len += pstr->raw_mbs_idx;
pstr->stop += pstr->raw_mbs_idx;
pstr->valid_len = pstr->raw_mbs_idx = 0;
pstr->len = pstr->raw_len;
pstr->stop = pstr->raw_stop;
pstr->valid_len = 0;
pstr->raw_mbs_idx = 0;
pstr->valid_raw_len = 0;
pstr->offsets_needed = 0;
pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
: CONTEXT_NEWLINE | CONTEXT_BEGBUF);
if (!MBS_CASE_ALLOCATED (pstr))
pstr->mbs_case = (unsigned char *) pstr->raw_mbs;
if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
if (!pstr->mbs_allocated)
pstr->mbs = (unsigned char *) pstr->raw_mbs;
offset = idx;
}
@@ -483,7 +600,13 @@ re_string_reconstruct (pstr, idx, eflags, newline)
if (offset != 0)
{
/* Are the characters which are already checked remain? */
if (offset < pstr->valid_len)
if (offset < pstr->valid_raw_len
#ifdef RE_ENABLE_I18N
/* Handling this would enlarge the code too much.
Accept a slowdown in that case. */
&& pstr->offsets_needed == 0
#endif
)
{
/* Yes, move them to the front of the buffer. */
pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags,
@@ -493,13 +616,11 @@ re_string_reconstruct (pstr, idx, eflags, newline)
memmove (pstr->wcs, pstr->wcs + offset,
(pstr->valid_len - offset) * sizeof (wint_t));
#endif /* RE_ENABLE_I18N */
if (MBS_ALLOCATED (pstr))
if (pstr->mbs_allocated)
memmove (pstr->mbs, pstr->mbs + offset,
pstr->valid_len - offset);
if (MBS_CASE_ALLOCATED (pstr))
memmove (pstr->mbs_case, pstr->mbs_case + offset,
pstr->valid_len - offset);
pstr->valid_len -= offset;
pstr->valid_raw_len -= offset;
#if DEBUG
assert (pstr->valid_len > 0);
#endif
@@ -507,16 +628,26 @@ re_string_reconstruct (pstr, idx, eflags, newline)
else
{
/* No, skip all characters until IDX. */
#ifdef RE_ENABLE_I18N
if (BE (pstr->offsets_needed, 0))
{
pstr->len = pstr->raw_len - idx + offset;
pstr->stop = pstr->raw_stop - idx + offset;
pstr->offsets_needed = 0;
}
#endif
pstr->valid_len = 0;
pstr->valid_raw_len = 0;
#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1)
{
int wcs_idx;
wint_t wc = WEOF;
#ifdef _LIBC
if (pstr->is_utf8)
{
const unsigned char *raw, *p, *end;
const unsigned char *raw, *p, *q, *end;
/* Special case UTF-8. Multi-byte chars start with any
byte other than 0x80 - 0xbf. */
@@ -527,13 +658,22 @@ re_string_reconstruct (pstr, idx, eflags, newline)
{
mbstate_t cur_state;
wchar_t wc2;
int mlen;
int mlen = raw + pstr->len - p;
unsigned char buf[6];
q = p;
if (BE (pstr->trans != NULL, 0))
{
int i = mlen < 6 ? mlen : 6;
while (--i >= 0)
buf[i] = pstr->trans[p[i]];
q = buf;
}
/* XXX Don't use mbrtowc, we know which conversion
to use (UTF-8 -> UCS4). */
memset (&cur_state, 0, sizeof (cur_state));
mlen = mbrtowc (&wc2, p, raw + pstr->len - p,
&cur_state) - (raw + offset - p);
mlen = mbrtowc (&wc2, p, mlen, &cur_state)
- (raw + offset - p);
if (mlen >= 0)
{
memset (&pstr->cur_state, '\0',
@@ -544,12 +684,17 @@ re_string_reconstruct (pstr, idx, eflags, newline)
break;
}
}
#endif
if (wc == WEOF)
pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
pstr->wcs[wcs_idx] = WEOF;
if (pstr->trans && wc <= 0xff)
wc = pstr->trans[wc];
if (BE (pstr->valid_len, 0))
{
for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
pstr->wcs[wcs_idx] = WEOF;
if (pstr->mbs_allocated)
memset (pstr->mbs, 255, pstr->valid_len);
}
pstr->valid_raw_len = pstr->valid_len;
pstr->tip_context = (IS_WIDE_WORD_CHAR (wc) ? CONTEXT_WORD
: ((newline && IS_WIDE_NEWLINE (wc))
? CONTEXT_NEWLINE : 0));
@@ -565,13 +710,8 @@ re_string_reconstruct (pstr, idx, eflags, newline)
? CONTEXT_NEWLINE : 0));
}
}
if (!MBS_CASE_ALLOCATED (pstr))
{
pstr->mbs_case += offset;
/* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */
if (!MBS_ALLOCATED (pstr))
pstr->mbs += offset;
}
if (!pstr->mbs_allocated)
pstr->mbs += offset;
}
pstr->raw_mbs_idx = idx;
pstr->len -= offset;
@@ -582,7 +722,11 @@ re_string_reconstruct (pstr, idx, eflags, newline)
if (pstr->mb_cur_max > 1)
{
if (pstr->icase)
build_wcs_upper_buffer (pstr);
{
int ret = build_wcs_upper_buffer (pstr);
if (BE (ret != REG_NOERROR, 0))
return ret;
}
else
build_wcs_buffer (pstr);
}
@@ -601,17 +745,95 @@ re_string_reconstruct (pstr, idx, eflags, newline)
return REG_NOERROR;
}
static unsigned char
re_string_peek_byte_case (const re_string_t *pstr,
int idx)
{
int ch, off;
/* Handle the common (easiest) cases first. */
if (BE (!pstr->icase, 1))
return re_string_peek_byte (pstr, idx);
#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1
&& ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
return re_string_peek_byte (pstr, idx);
#endif
off = pstr->cur_idx + idx;
#ifdef RE_ENABLE_I18N
if (pstr->offsets_needed)
off = pstr->offsets[off];
#endif
ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
if (pstr->trans)
ch = pstr->trans[ch];
#ifdef RE_ENABLE_I18N
/* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
this function returns CAPITAL LETTER I instead of first byte of
DOTLESS SMALL LETTER I. The latter would confuse the parser,
since peek_byte_case doesn't advance cur_idx in any way. */
if (pstr->offsets_needed && !isascii (ch))
return re_string_peek_byte (pstr, idx);
#endif
return ch;
}
static unsigned char
re_string_fetch_byte_case (re_string_t *pstr)
{
int ch, off;
if (BE (!pstr->icase, 1))
return re_string_fetch_byte (pstr);
#ifdef RE_ENABLE_I18N
if (pstr->offsets_needed)
{
/* For tr_TR.UTF-8 [[:islower:]] there is
[[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
in that case the whole multi-byte character and return
the original letter. On the other side, with
[[: DOTLESS SMALL LETTER I return [[:I, as doing
anything else would complicate things too much. */
if (!re_string_first_byte (pstr, pstr->cur_idx))
return re_string_fetch_byte (pstr);
off = pstr->offsets[pstr->cur_idx];
ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
if (pstr->trans)
ch = pstr->trans[ch];
if (! isascii (ch))
return re_string_fetch_byte (pstr);
re_string_skip_bytes (pstr,
re_string_char_size_at (pstr, pstr->cur_idx));
return ch;
}
#endif
ch = pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
if (pstr->trans)
ch = pstr->trans[ch];
return ch;
}
static void
re_string_destruct (pstr)
re_string_t *pstr;
{
#ifdef RE_ENABLE_I18N
re_free (pstr->wcs);
re_free (pstr->offsets);
#endif /* RE_ENABLE_I18N */
if (MBS_ALLOCATED (pstr))
if (pstr->mbs_allocated)
re_free (pstr->mbs);
if (MBS_CASE_ALLOCATED (pstr))
re_free (pstr->mbs_case);
}
/* Return the context at IDX in INPUT. */