1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-07-29 11:41:21 +03:00

posix: Sync gnulib regex implementation

This patch syncs the regex implementation with gnulib (commit 0ee5212).
Only two changes in GLIBC regex testing are required:

  1. posix/bug-regex28.c: as previously discussed [1] the change of
     expected results on the pattern should be safe.

  2. posix/PCRE.tests: the ERE (a)|\1 is malformed (in the sense that
     the \1 doesn't mean anything) and although current GLIBC accepts
     it has undefined behavior.  This patch removes the specific test.

This sync contains some patches from thread 'Regex: Make libc regex
more usable outside GLIBC.' [2] which have been pushed upstream in
gnulib.  This patches also fixes some regex issues (BZ #23233,
BZ #21163, BZ #18986, BZ #13762) and I did not add testcases for
both #23233 and #13762 because I couldn't think a simple way to
trigger the expected failure path to trigger them.

Checked on x86_64-linux-gnu and i686-linux-gnu.

	[BZ #23233]
	[BZ #21163]
	[BZ #18986]
	[BZ #13762]
	* posix/Makefile (tests): Add bug-regex37 and bug-regex38.
	* posix/PCRE.tests: Remove invalid test.
	* posix/bug-regex28.c: Fix expected values for used syntax.
	* posix/bug-regex37.c: New file.
	* posix/bug-regex38.c: Likewise.
	* posix/regcomp.c: Sync with gnulib.
	* posix/regex.c: Likewise.
	* posix/regex.h: Likewise.
	* posix/regex_internal.c: Likewise.
	* posix/regex_internal.h: Likewise.
	* posix/regexec.c: Likewise.

[1] https://sourceware.org/ml/libc-alpha/2017-12/msg00807.html
[2] https://sourceware.org/ml/libc-alpha/2017-12/msg00237.html
This commit is contained in:
Adhemerval Zanella
2017-12-20 09:47:44 -02:00
parent b11643c21c
commit eb04c21373
12 changed files with 1610 additions and 1162 deletions

View File

@ -21,18 +21,22 @@
#include <stdio.h>
#include <string.h>
#include <support/test-driver.h>
#include <support/check.h>
struct tests
{
const char *regex;
const char *string;
reg_syntax_t syntax;
int retval;
} tests[] = {
};
static const struct tests tests[] = {
#define EGREP RE_SYNTAX_EGREP
#define EGREP_NL (RE_SYNTAX_EGREP | RE_DOT_NEWLINE) & ~RE_HAT_LISTS_NOT_NEWLINE
{ "a.b", "a\nb", EGREP, -1 },
{ "a.b", "a\nb", EGREP, 0 },
{ "a.b", "a\nb", EGREP_NL, 0 },
{ "a[^x]b", "a\nb", EGREP, -1 },
{ "a[^x]b", "a\nb", EGREP, 0 },
{ "a[^x]b", "a\nb", EGREP_NL, 0 },
/* While \S and \W are internally handled as [^[:space:]] and [^[:alnum:]_],
RE_HAT_LISTS_NOT_NEWLINE did not make any difference, so ensure
@ -42,33 +46,33 @@ struct tests
{ "a\\Wb", "a\nb", EGREP, 0 },
{ "a\\Wb", "a\nb", EGREP_NL, 0 }
};
static const size_t tests_size = sizeof (tests) / sizeof (tests[0]);
int
main (void)
static int
do_test (void)
{
struct re_pattern_buffer r;
size_t i;
int ret = 0;
for (i = 0; i < sizeof (tests) / sizeof (tests[i]); ++i)
for (size_t i = 0; i < tests_size; i++)
{
re_set_syntax (tests[i].syntax);
memset (&r, 0, sizeof (r));
if (re_compile_pattern (tests[i].regex, strlen (tests[i].regex), &r))
{
printf ("re_compile_pattern %zd failed\n", i);
ret = 1;
continue;
}
const char *re = re_compile_pattern (tests[i].regex,
strlen (tests[i].regex), &r);
TEST_VERIFY (re == NULL);
if (re != NULL)
continue;
size_t len = strlen (tests[i].string);
int rv = re_search (&r, tests[i].string, len, 0, len, NULL);
if (rv != tests[i].retval)
{
printf ("re_search %zd unexpected value %d != %d\n",
i, rv, tests[i].retval);
ret = 1;
}
TEST_VERIFY (rv == tests[i].retval);
if (test_verbose > 0)
printf ("info: i=%zu rv=%d expected=%d\n", i, rv, tests[i].retval);
regfree (&r);
}
return ret;
return 0;
}
#include <support/test-driver.c>