mirror of
https://sourceware.org/git/glibc.git
synced 2025-07-30 22:43:12 +03:00
* posix/regex_internal.c (re_string_skip_chars): If no character has
been converted at all, set *last_wc to WEOF. If mbrtowc failed, set wc to the byte which couldn't be converted. (re_string_reconstruct): Don't clear valid_raw_len before calling re_string_skip_chars. If wc is WEOF after re_string_skip_chars, set tip_context using re_string_context_at. * posix/Makefile: Add rules to build and run bug-regex25 test. * posix/bug-regex25.c: New test.
This commit is contained in:
11
ChangeLog
11
ChangeLog
@ -1,3 +1,14 @@
|
|||||||
|
2006-06-02 Jakub Jelinek <jakub@redhat.com>
|
||||||
|
|
||||||
|
* posix/regex_internal.c (re_string_skip_chars): If no character has
|
||||||
|
been converted at all, set *last_wc to WEOF. If mbrtowc failed, set wc
|
||||||
|
to the byte which couldn't be converted.
|
||||||
|
(re_string_reconstruct): Don't clear valid_raw_len before calling
|
||||||
|
re_string_skip_chars. If wc is WEOF after re_string_skip_chars, set
|
||||||
|
tip_context using re_string_context_at.
|
||||||
|
* posix/Makefile: Add rules to build and run bug-regex25 test.
|
||||||
|
* posix/bug-regex25.c: New test.
|
||||||
|
|
||||||
2006-06-02 Ryan S. Arnold <rsa@us.ibm.com>
|
2006-06-02 Ryan S. Arnold <rsa@us.ibm.com>
|
||||||
|
|
||||||
[BZ #2703]
|
[BZ #2703]
|
||||||
|
@ -81,7 +81,7 @@ tests := tstgetopt testfnm runtests runptests \
|
|||||||
bug-regex13 bug-regex14 bug-regex15 bug-regex16 \
|
bug-regex13 bug-regex14 bug-regex15 bug-regex16 \
|
||||||
bug-regex17 bug-regex18 bug-regex19 bug-regex20 \
|
bug-regex17 bug-regex18 bug-regex19 bug-regex20 \
|
||||||
bug-regex21 bug-regex22 bug-regex23 bug-regex24 \
|
bug-regex21 bug-regex22 bug-regex23 bug-regex24 \
|
||||||
tst-nice tst-nanosleep tst-regex2 \
|
bug-regex25 tst-nice tst-nanosleep tst-regex2 \
|
||||||
transbug tst-rxspencer tst-pcre tst-boost \
|
transbug tst-rxspencer tst-pcre tst-boost \
|
||||||
bug-ga1 tst-vfork1 tst-vfork2 tst-waitid \
|
bug-ga1 tst-vfork1 tst-vfork2 tst-waitid \
|
||||||
tst-getaddrinfo2 bug-glob1 bug-glob2 tst-sysconf \
|
tst-getaddrinfo2 bug-glob1 bug-glob2 tst-sysconf \
|
||||||
@ -188,6 +188,7 @@ bug-regex19-ENV = LOCPATH=$(common-objpfx)localedata
|
|||||||
bug-regex20-ENV = LOCPATH=$(common-objpfx)localedata
|
bug-regex20-ENV = LOCPATH=$(common-objpfx)localedata
|
||||||
bug-regex22-ENV = LOCPATH=$(common-objpfx)localedata
|
bug-regex22-ENV = LOCPATH=$(common-objpfx)localedata
|
||||||
bug-regex23-ENV = LOCPATH=$(common-objpfx)localedata
|
bug-regex23-ENV = LOCPATH=$(common-objpfx)localedata
|
||||||
|
bug-regex25-ENV = LOCPATH=$(common-objpfx)localedata
|
||||||
tst-rxspencer-ARGS = --utf8 rxspencer/tests
|
tst-rxspencer-ARGS = --utf8 rxspencer/tests
|
||||||
tst-rxspencer-ENV = LOCPATH=$(common-objpfx)localedata
|
tst-rxspencer-ENV = LOCPATH=$(common-objpfx)localedata
|
||||||
tst-pcre-ARGS = PCRE.tests
|
tst-pcre-ARGS = PCRE.tests
|
||||||
|
57
posix/bug-regex25.c
Normal file
57
posix/bug-regex25.c
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
/* Test re_search in multibyte locale other than UTF-8.
|
||||||
|
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
Contributed by Jakub Jelinek <jakub@redhat.com>, 2006.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, write to the Free
|
||||||
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||||
|
02111-1307 USA. */
|
||||||
|
|
||||||
|
#include <locale.h>
|
||||||
|
#include <regex.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
const char *str1 = "\xa3\xd8\xa3\xc9\xa3\xc9";
|
||||||
|
const char *str2 = "\xa3\xd8\xa3\xc9";
|
||||||
|
|
||||||
|
int
|
||||||
|
main (void)
|
||||||
|
{
|
||||||
|
setlocale (LC_ALL, "ja_JP.eucJP");
|
||||||
|
|
||||||
|
re_set_syntax (RE_SYNTAX_SED);
|
||||||
|
|
||||||
|
struct re_pattern_buffer re;
|
||||||
|
memset (&re, 0, sizeof (re));
|
||||||
|
|
||||||
|
struct re_registers regs;
|
||||||
|
memset (®s, 0, sizeof (regs));
|
||||||
|
|
||||||
|
re_compile_pattern ("$", 1, &re);
|
||||||
|
|
||||||
|
int ret = 0, r = re_search (&re, str1, 4, 0, 4, ®s);
|
||||||
|
if (r != 4)
|
||||||
|
{
|
||||||
|
printf ("First re_search returned %d\n", r);
|
||||||
|
ret = 1;
|
||||||
|
}
|
||||||
|
r = re_search (&re, str2, 4, 0, 4, ®s);
|
||||||
|
if (r != 4)
|
||||||
|
{
|
||||||
|
printf ("Second re_search returned %d\n", r);
|
||||||
|
ret = 1;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
@ -482,7 +482,7 @@ re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
|
|||||||
mbstate_t prev_st;
|
mbstate_t prev_st;
|
||||||
int rawbuf_idx;
|
int rawbuf_idx;
|
||||||
size_t mbclen;
|
size_t mbclen;
|
||||||
wchar_t wc = 0;
|
wchar_t wc = WEOF;
|
||||||
|
|
||||||
/* Skip the characters which are not necessary to check. */
|
/* Skip the characters which are not necessary to check. */
|
||||||
for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
|
for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
|
||||||
@ -495,7 +495,11 @@ re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc)
|
|||||||
remain_len, &pstr->cur_state);
|
remain_len, &pstr->cur_state);
|
||||||
if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
|
if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
|
||||||
{
|
{
|
||||||
/* We treat these cases as a singlebyte character. */
|
/* We treat these cases as a single byte character. */
|
||||||
|
if (mbclen == 0 || remain_len == 0)
|
||||||
|
wc = L'\0';
|
||||||
|
else
|
||||||
|
wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
|
||||||
mbclen = 1;
|
mbclen = 1;
|
||||||
pstr->cur_state = prev_st;
|
pstr->cur_state = prev_st;
|
||||||
}
|
}
|
||||||
@ -618,7 +622,6 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
pstr->valid_len = 0;
|
pstr->valid_len = 0;
|
||||||
pstr->valid_raw_len = 0;
|
|
||||||
#ifdef RE_ENABLE_I18N
|
#ifdef RE_ENABLE_I18N
|
||||||
if (pstr->mb_cur_max > 1)
|
if (pstr->mb_cur_max > 1)
|
||||||
{
|
{
|
||||||
@ -681,6 +684,16 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
|
|||||||
|
|
||||||
if (wc == WEOF)
|
if (wc == WEOF)
|
||||||
pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
|
pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
|
||||||
|
if (wc == WEOF)
|
||||||
|
pstr->tip_context
|
||||||
|
= re_string_context_at (pstr, pstr->valid_raw_len - 1, eflags);
|
||||||
|
else
|
||||||
|
pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
|
||||||
|
&& IS_WIDE_WORD_CHAR (wc))
|
||||||
|
? CONTEXT_WORD
|
||||||
|
: ((IS_WIDE_NEWLINE (wc)
|
||||||
|
&& pstr->newline_anchor)
|
||||||
|
? CONTEXT_NEWLINE : 0));
|
||||||
if (BE (pstr->valid_len, 0))
|
if (BE (pstr->valid_len, 0))
|
||||||
{
|
{
|
||||||
for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
|
for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
|
||||||
@ -689,17 +702,12 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
|
|||||||
memset (pstr->mbs, 255, pstr->valid_len);
|
memset (pstr->mbs, 255, pstr->valid_len);
|
||||||
}
|
}
|
||||||
pstr->valid_raw_len = pstr->valid_len;
|
pstr->valid_raw_len = pstr->valid_len;
|
||||||
pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
|
|
||||||
&& IS_WIDE_WORD_CHAR (wc))
|
|
||||||
? CONTEXT_WORD
|
|
||||||
: ((IS_WIDE_NEWLINE (wc)
|
|
||||||
&& pstr->newline_anchor)
|
|
||||||
? CONTEXT_NEWLINE : 0));
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif /* RE_ENABLE_I18N */
|
#endif /* RE_ENABLE_I18N */
|
||||||
{
|
{
|
||||||
int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
|
int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
|
||||||
|
pstr->valid_raw_len = 0;
|
||||||
if (pstr->trans)
|
if (pstr->trans)
|
||||||
c = pstr->trans[c];
|
c = pstr->trans[c];
|
||||||
pstr->tip_context = (bitset_contain (pstr->word_char, c)
|
pstr->tip_context = (bitset_contain (pstr->word_char, c)
|
||||||
|
Reference in New Issue
Block a user