mirror of
https://git.savannah.gnu.org/git/gnulib.git
synced 2025-08-10 04:43:00 +03:00
regex-quote: New API.
* lib/regex-quote.h: Include <stdbool.h>. (struct regex_quote_spec): New type. (regex_quote_spec_posix, regex_quote_spec_gnu, regex_quote_spec_pcre): New declarations. (regex_quote_length, regex_quote_copy, regex_quote): Take a 'const struct regex_quote_spec *' argument. * lib/regex-quote.c (RE_*, PCRE_*): New macros. (pcre_special): New constant. (regex_quote_spec_posix, regex_quote_spec_gnu, regex_quote_spec_pcre): New functions. (regex_quote_length, regex_quote_copy, regex_quote): Take a 'const struct regex_quote_spec *' argument. * modules/regex-quote (Depends-on): Add stdbool. * tests/test-regex-quote.c (check): Update for new API. Add test for anchored results. * NEWS: Mention the API change. Reported by Reuben Thomas and Eric Blake.
This commit is contained in:
21
ChangeLog
21
ChangeLog
@@ -1,3 +1,24 @@
|
||||
2011-03-08 Bruno Haible <bruno@clisp.org>
|
||||
|
||||
regex-quote: New API.
|
||||
* lib/regex-quote.h: Include <stdbool.h>.
|
||||
(struct regex_quote_spec): New type.
|
||||
(regex_quote_spec_posix, regex_quote_spec_gnu, regex_quote_spec_pcre):
|
||||
New declarations.
|
||||
(regex_quote_length, regex_quote_copy, regex_quote): Take a
|
||||
'const struct regex_quote_spec *' argument.
|
||||
* lib/regex-quote.c (RE_*, PCRE_*): New macros.
|
||||
(pcre_special): New constant.
|
||||
(regex_quote_spec_posix, regex_quote_spec_gnu, regex_quote_spec_pcre):
|
||||
New functions.
|
||||
(regex_quote_length, regex_quote_copy, regex_quote): Take a
|
||||
'const struct regex_quote_spec *' argument.
|
||||
* modules/regex-quote (Depends-on): Add stdbool.
|
||||
* tests/test-regex-quote.c (check): Update for new API. Add test for
|
||||
anchored results.
|
||||
* NEWS: Mention the API change.
|
||||
Reported by Reuben Thomas and Eric Blake.
|
||||
|
||||
2011-03-06 Bruno Haible <bruno@clisp.org>
|
||||
|
||||
regex-quote: Fix creation of POSIX extended regular expressions.
|
||||
|
4
NEWS
4
NEWS
@@ -12,6 +12,10 @@ User visible incompatible changes
|
||||
|
||||
Date Modules Changes
|
||||
|
||||
2011-03-08 regex-quote The last argument is no longer an 'int cflags'
|
||||
but instead a pointer to a previously constructed
|
||||
'struct regex_quote_spec'.
|
||||
|
||||
2011-02-25 dirname These modules no longer put #defines for the
|
||||
dirname-lgpl following symbols into <config.h>: ISSLASH,
|
||||
backupfile FILE_SYSTEM_ACCEPTS_DRIVE_LETTER_PREFIX,
|
||||
|
@@ -31,14 +31,115 @@ static const char bre_special[] = "$^.*[]\\";
|
||||
/* Characters that are special in an ERE. */
|
||||
static const char ere_special[] = "$^.*[]\\+?{}()|";
|
||||
|
||||
size_t
|
||||
regex_quote_length (const char *string, int cflags)
|
||||
struct regex_quote_spec
|
||||
regex_quote_spec_posix (int cflags, bool anchored)
|
||||
{
|
||||
const char *special = (cflags != 0 ? ere_special : bre_special);
|
||||
struct regex_quote_spec result;
|
||||
|
||||
strcpy (result.special, cflags != 0 ? ere_special : bre_special);
|
||||
result.multibyte = true;
|
||||
result.anchored = anchored;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Syntax bit values, defined in GNU <regex.h>. We don't include it here,
|
||||
otherwise this module would need to depend on gnulib module 'regex'. */
|
||||
#define RE_BK_PLUS_QM 0x00000002
|
||||
#define RE_INTERVALS 0x00000200
|
||||
#define RE_LIMITED_OPS 0x00000400
|
||||
#define RE_NEWLINE_ALT 0x00000800
|
||||
#define RE_NO_BK_BRACES 0x00001000
|
||||
#define RE_NO_BK_PARENS 0x00002000
|
||||
#define RE_NO_BK_VBAR 0x00008000
|
||||
|
||||
struct regex_quote_spec
|
||||
regex_quote_spec_gnu (unsigned long /*reg_syntax_t*/ syntax, bool anchored)
|
||||
{
|
||||
struct regex_quote_spec result;
|
||||
char *p;
|
||||
|
||||
p = result.special;
|
||||
memcpy (p, bre_special, sizeof (bre_special) - 1);
|
||||
p += sizeof (bre_special) - 1;
|
||||
if ((syntax & RE_LIMITED_OPS) == 0 && (syntax & RE_BK_PLUS_QM) == 0)
|
||||
{
|
||||
*p++ = '+';
|
||||
*p++ = '?';
|
||||
}
|
||||
if ((syntax & RE_INTERVALS) != 0 && (syntax & RE_NO_BK_BRACES) != 0)
|
||||
{
|
||||
*p++ = '{';
|
||||
*p++ = '}';
|
||||
}
|
||||
if ((syntax & RE_NO_BK_PARENS) != 0)
|
||||
{
|
||||
*p++ = '(';
|
||||
*p++ = ')';
|
||||
}
|
||||
if ((syntax & RE_LIMITED_OPS) == 0 && (syntax & RE_NO_BK_VBAR) != 0)
|
||||
*p++ = '|';
|
||||
if ((syntax & RE_NEWLINE_ALT) != 0)
|
||||
*p++ = '\n';
|
||||
*p = '\0';
|
||||
|
||||
result.multibyte = true;
|
||||
result.anchored = anchored;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Characters that are special in a PCRE. */
|
||||
static const char pcre_special[] = "$^.*[]\\+?{}()|";
|
||||
|
||||
/* Options bit values, defined in <pcre.h>. We don't include it here, because
|
||||
it is not a standard header. */
|
||||
#define PCRE_ANCHORED 0x00000010
|
||||
#define PCRE_EXTENDED 0x00000008
|
||||
|
||||
struct regex_quote_spec
|
||||
regex_quote_spec_pcre (int options, bool anchored)
|
||||
{
|
||||
struct regex_quote_spec result;
|
||||
char *p;
|
||||
|
||||
p = result.special;
|
||||
memcpy (p, bre_special, sizeof (pcre_special) - 1);
|
||||
p += sizeof (pcre_special) - 1;
|
||||
if (options & PCRE_EXTENDED)
|
||||
{
|
||||
*p++ = ' ';
|
||||
*p++ = '\t';
|
||||
*p++ = '\n';
|
||||
*p++ = '\v';
|
||||
*p++ = '\f';
|
||||
*p++ = '\r';
|
||||
*p++ = '#';
|
||||
}
|
||||
*p = '\0';
|
||||
|
||||
/* PCRE regular expressions consist of UTF-8 characters of options contains
|
||||
PCRE_UTF8 and of single bytes otherwise. */
|
||||
result.multibyte = false;
|
||||
/* If options contains PCRE_ANCHORED, the anchoring is implicit. */
|
||||
result.anchored = (options & PCRE_ANCHORED ? 0 : anchored);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t
|
||||
regex_quote_length (const char *string, const struct regex_quote_spec *spec)
|
||||
{
|
||||
const char *special = spec->special;
|
||||
size_t length;
|
||||
mbui_iterator_t iter;
|
||||
|
||||
length = 0;
|
||||
if (spec->anchored)
|
||||
length += 2; /* for '^' at the beginning and '$' at the end */
|
||||
if (spec->multibyte)
|
||||
{
|
||||
mbui_iterator_t iter;
|
||||
|
||||
for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
|
||||
{
|
||||
/* We know that special contains only ASCII characters. */
|
||||
@@ -47,16 +148,31 @@ regex_quote_length (const char *string, int cflags)
|
||||
length += 1;
|
||||
length += mb_len (mbui_cur (iter));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const char *iter;
|
||||
|
||||
for (iter = string; *iter != '\0'; iter++)
|
||||
{
|
||||
if (strchr (special, *iter))
|
||||
length += 1;
|
||||
length += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
/* Copies the quoted string to p and returns the incremented p.
|
||||
There must be room for regex_quote_length (string, cflags) + 1 bytes at p.
|
||||
*/
|
||||
char *
|
||||
regex_quote_copy (char *p, const char *string, int cflags)
|
||||
regex_quote_copy (char *p, const char *string, const struct regex_quote_spec *spec)
|
||||
{
|
||||
const char *special = spec->special;
|
||||
|
||||
if (spec->anchored)
|
||||
*p++ = '^';
|
||||
if (spec->multibyte)
|
||||
{
|
||||
const char *special = (cflags != 0 ? ere_special : bre_special);
|
||||
mbui_iterator_t iter;
|
||||
|
||||
for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
|
||||
@@ -68,19 +184,33 @@ regex_quote_copy (char *p, const char *string, int cflags)
|
||||
memcpy (p, mbui_cur_ptr (iter), mb_len (mbui_cur (iter)));
|
||||
p += mb_len (mbui_cur (iter));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const char *iter;
|
||||
|
||||
for (iter = string; *iter != '\0'; iter++)
|
||||
{
|
||||
if (strchr (special, *iter))
|
||||
*p++ = '\\';
|
||||
*p++ = *iter++;
|
||||
}
|
||||
}
|
||||
if (spec->anchored)
|
||||
*p++ = '$';
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Returns the freshly allocated quoted string. */
|
||||
char *
|
||||
regex_quote (const char *string, int cflags)
|
||||
regex_quote (const char *string, const struct regex_quote_spec *spec)
|
||||
{
|
||||
size_t length = regex_quote_length (string, cflags);
|
||||
size_t length = regex_quote_length (string, spec);
|
||||
char *result = XNMALLOC (length + 1, char);
|
||||
char *p;
|
||||
|
||||
p = result;
|
||||
p = regex_quote_copy (p, string, cflags);
|
||||
p = regex_quote_copy (p, string, spec);
|
||||
*p = '\0';
|
||||
return result;
|
||||
}
|
||||
|
@@ -15,27 +15,74 @@
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <stddef.h>
|
||||
#ifndef _REGEX_QUOTE_H
|
||||
#define _REGEX_QUOTE_H
|
||||
|
||||
/* regex_quote converts a literal string to a regular expression that will
|
||||
look for this literal string.
|
||||
cflags can be 0 or REG_EXTENDED.
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
|
||||
/* Specifies a quotation task for converting a fixed string to a regular
|
||||
expression pattern. */
|
||||
struct regex_quote_spec
|
||||
{
|
||||
/* True if the regular expression pattern consists of multibyte characters
|
||||
(in the encoding given by the LC_CTYPE category of the locale),
|
||||
false if it consists of single bytes or UTF-8 characters. */
|
||||
unsigned int /*bool*/ multibyte : 1;
|
||||
/* True if the regular expression pattern shall match only entire lines. */
|
||||
unsigned int /*bool*/ anchored : 1;
|
||||
/* Set of characters that need to be escaped (all ASCII), as a
|
||||
NUL-terminated string. */
|
||||
char special[30 + 1];
|
||||
};
|
||||
|
||||
|
||||
/* Creates a quotation task that produces a POSIX regular expression, that is,
|
||||
a pattern that can be compiled with regcomp().
|
||||
CFLAGS can be 0 or REG_EXTENDED.
|
||||
If it is 0, the result is a Basic Regular Expression (BRE)
|
||||
<http://www.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03>.
|
||||
If it is REG_EXTENDED, the result is an Extended Regular Expression (ERE)
|
||||
<http://www.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04>.
|
||||
The result is not anchored; if you want it to match only complete lines,
|
||||
you need to add "^" at the beginning of the result and "$" at the end of the
|
||||
result.
|
||||
*/
|
||||
If ANCHORED is false, the regular expression will match substrings of lines.
|
||||
If ANCHORED is true, it will match only complete lines, */
|
||||
extern struct regex_quote_spec
|
||||
regex_quote_spec_posix (int cflags, bool anchored);
|
||||
|
||||
/* Creates a quotation task that produces a regular expression that can be
|
||||
compiled with the GNU API function re_compile_pattern().
|
||||
SYNTAX describes the syntax of the regular expression (such as
|
||||
RE_SYNTAX_POSIX_BASIC, RE_SYNTAX_POSIX_EXTENDED, RE_SYNTAX_EMACS, all
|
||||
defined in <regex.h>). It must be the same value as 're_syntax_options'
|
||||
at the moment of the re_compile_pattern() call.
|
||||
If ANCHORED is false, the regular expression will match substrings of lines.
|
||||
If ANCHORED is true, it will match only complete lines, */
|
||||
extern struct regex_quote_spec
|
||||
regex_quote_spec_gnu (unsigned long /*reg_syntax_t*/ syntax, bool anchored);
|
||||
|
||||
/* Creates a quotation task that produces a PCRE regular expression, that is,
|
||||
a pattern that can be compiled with pcre_compile().
|
||||
OPTIONS is the same value as the second argument passed to pcre_compile().
|
||||
If ANCHORED is false, the regular expression will match substrings of lines.
|
||||
If ANCHORED is true, it will match only complete lines, */
|
||||
extern struct regex_quote_spec
|
||||
regex_quote_spec_pcre (int options, bool anchored);
|
||||
|
||||
|
||||
/* Returns the number of bytes needed for the quoted string. */
|
||||
extern size_t regex_quote_length (const char *string, int cflags);
|
||||
extern size_t
|
||||
regex_quote_length (const char *string, const struct regex_quote_spec *spec);
|
||||
|
||||
/* Copies the quoted string to p and returns the incremented p.
|
||||
There must be room for regex_quote_length (string, cflags) + 1 bytes at p.
|
||||
*/
|
||||
extern char * regex_quote_copy (char *p, const char *string, int cflags);
|
||||
There must be room for regex_quote_length (string, spec) + 1 bytes at p. */
|
||||
extern char *
|
||||
regex_quote_copy (char *p,
|
||||
const char *string, const struct regex_quote_spec *spec);
|
||||
|
||||
/* Returns the freshly allocated quoted string. */
|
||||
extern char * regex_quote (const char *string, int cflags);
|
||||
extern char *
|
||||
regex_quote (const char *string, const struct regex_quote_spec *spec);
|
||||
|
||||
|
||||
#endif /* _REGEX_QUOTE_H */
|
||||
|
@@ -6,6 +6,7 @@ lib/regex-quote.h
|
||||
lib/regex-quote.c
|
||||
|
||||
Depends-on:
|
||||
stdbool
|
||||
xalloc
|
||||
mbuiter
|
||||
|
||||
|
@@ -29,18 +29,37 @@
|
||||
static void
|
||||
check (const char *literal, int cflags, const char *expected)
|
||||
{
|
||||
struct regex_quote_spec spec;
|
||||
char *result;
|
||||
size_t length;
|
||||
|
||||
result = regex_quote (literal, cflags);
|
||||
spec = regex_quote_spec_posix (cflags, false);
|
||||
result = regex_quote (literal, &spec);
|
||||
ASSERT (strcmp (result, expected) == 0);
|
||||
length = regex_quote_length (literal, cflags);
|
||||
length = regex_quote_length (literal, &spec);
|
||||
ASSERT (length == strlen (result));
|
||||
free (result);
|
||||
|
||||
result = (char *) xmalloc (1 + length + 1 + 1);
|
||||
result[0] = '^';
|
||||
strcpy (regex_quote_copy (result + 1, literal, cflags), "$");
|
||||
strcpy (regex_quote_copy (result + 1, literal, &spec), "$");
|
||||
{
|
||||
regex_t regex;
|
||||
regmatch_t match[1];
|
||||
|
||||
ASSERT (regcomp (®ex, result, cflags) == 0);
|
||||
|
||||
ASSERT (regexec (®ex, literal, 1, match, 0) == 0);
|
||||
ASSERT (match[0].rm_so == 0);
|
||||
ASSERT (match[0].rm_eo == strlen (literal));
|
||||
regfree (®ex);
|
||||
}
|
||||
free (result);
|
||||
|
||||
spec = regex_quote_spec_posix (cflags, true);
|
||||
result = regex_quote (literal, &spec);
|
||||
length = regex_quote_length (literal, &spec);
|
||||
ASSERT (length == strlen (result));
|
||||
{
|
||||
regex_t regex;
|
||||
regmatch_t match[1];
|
||||
|
Reference in New Issue
Block a user