mirror of
https://git.savannah.gnu.org/git/gnulib.git
synced 2025-08-10 04:43:00 +03:00
* lib/regex-quote.h: Include <stdbool.h>. (struct regex_quote_spec): New type. (regex_quote_spec_posix, regex_quote_spec_gnu, regex_quote_spec_pcre): New declarations. (regex_quote_length, regex_quote_copy, regex_quote): Take a 'const struct regex_quote_spec *' argument. * lib/regex-quote.c (RE_*, PCRE_*): New macros. (pcre_special): New constant. (regex_quote_spec_posix, regex_quote_spec_gnu, regex_quote_spec_pcre): New functions. (regex_quote_length, regex_quote_copy, regex_quote): Take a 'const struct regex_quote_spec *' argument. * modules/regex-quote (Depends-on): Add stdbool. * tests/test-regex-quote.c (check): Update for new API. Add test for anchored results. * NEWS: Mention the API change. Reported by Reuben Thomas and Eric Blake.
217 lines
5.6 KiB
C
217 lines
5.6 KiB
C
/* Construct a regular expression from a literal string.
|
|
Copyright (C) 1995, 2010-2011 Free Software Foundation, Inc.
|
|
Written by Bruno Haible <haible@clisp.cons.org>, 2010.
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
|
|
|
#include <config.h>
|
|
|
|
/* Specification. */
|
|
#include "regex-quote.h"
|
|
|
|
#include <string.h>
|
|
|
|
#include "mbuiter.h"
|
|
#include "xalloc.h"
|
|
|
|
/* Characters that are special in a BRE. */
|
|
static const char bre_special[] = "$^.*[]\\";
|
|
|
|
/* Characters that are special in an ERE. */
|
|
static const char ere_special[] = "$^.*[]\\+?{}()|";
|
|
|
|
struct regex_quote_spec
|
|
regex_quote_spec_posix (int cflags, bool anchored)
|
|
{
|
|
struct regex_quote_spec result;
|
|
|
|
strcpy (result.special, cflags != 0 ? ere_special : bre_special);
|
|
result.multibyte = true;
|
|
result.anchored = anchored;
|
|
|
|
return result;
|
|
}
|
|
|
|
/* Syntax bit values, defined in GNU <regex.h>. We don't include it here,
|
|
otherwise this module would need to depend on gnulib module 'regex'. */
|
|
#define RE_BK_PLUS_QM 0x00000002
|
|
#define RE_INTERVALS 0x00000200
|
|
#define RE_LIMITED_OPS 0x00000400
|
|
#define RE_NEWLINE_ALT 0x00000800
|
|
#define RE_NO_BK_BRACES 0x00001000
|
|
#define RE_NO_BK_PARENS 0x00002000
|
|
#define RE_NO_BK_VBAR 0x00008000
|
|
|
|
struct regex_quote_spec
|
|
regex_quote_spec_gnu (unsigned long /*reg_syntax_t*/ syntax, bool anchored)
|
|
{
|
|
struct regex_quote_spec result;
|
|
char *p;
|
|
|
|
p = result.special;
|
|
memcpy (p, bre_special, sizeof (bre_special) - 1);
|
|
p += sizeof (bre_special) - 1;
|
|
if ((syntax & RE_LIMITED_OPS) == 0 && (syntax & RE_BK_PLUS_QM) == 0)
|
|
{
|
|
*p++ = '+';
|
|
*p++ = '?';
|
|
}
|
|
if ((syntax & RE_INTERVALS) != 0 && (syntax & RE_NO_BK_BRACES) != 0)
|
|
{
|
|
*p++ = '{';
|
|
*p++ = '}';
|
|
}
|
|
if ((syntax & RE_NO_BK_PARENS) != 0)
|
|
{
|
|
*p++ = '(';
|
|
*p++ = ')';
|
|
}
|
|
if ((syntax & RE_LIMITED_OPS) == 0 && (syntax & RE_NO_BK_VBAR) != 0)
|
|
*p++ = '|';
|
|
if ((syntax & RE_NEWLINE_ALT) != 0)
|
|
*p++ = '\n';
|
|
*p = '\0';
|
|
|
|
result.multibyte = true;
|
|
result.anchored = anchored;
|
|
|
|
return result;
|
|
}
|
|
|
|
/* Characters that are special in a PCRE. */
|
|
static const char pcre_special[] = "$^.*[]\\+?{}()|";
|
|
|
|
/* Options bit values, defined in <pcre.h>. We don't include it here, because
|
|
it is not a standard header. */
|
|
#define PCRE_ANCHORED 0x00000010
|
|
#define PCRE_EXTENDED 0x00000008
|
|
|
|
struct regex_quote_spec
|
|
regex_quote_spec_pcre (int options, bool anchored)
|
|
{
|
|
struct regex_quote_spec result;
|
|
char *p;
|
|
|
|
p = result.special;
|
|
memcpy (p, bre_special, sizeof (pcre_special) - 1);
|
|
p += sizeof (pcre_special) - 1;
|
|
if (options & PCRE_EXTENDED)
|
|
{
|
|
*p++ = ' ';
|
|
*p++ = '\t';
|
|
*p++ = '\n';
|
|
*p++ = '\v';
|
|
*p++ = '\f';
|
|
*p++ = '\r';
|
|
*p++ = '#';
|
|
}
|
|
*p = '\0';
|
|
|
|
/* PCRE regular expressions consist of UTF-8 characters of options contains
|
|
PCRE_UTF8 and of single bytes otherwise. */
|
|
result.multibyte = false;
|
|
/* If options contains PCRE_ANCHORED, the anchoring is implicit. */
|
|
result.anchored = (options & PCRE_ANCHORED ? 0 : anchored);
|
|
|
|
return result;
|
|
}
|
|
|
|
size_t
|
|
regex_quote_length (const char *string, const struct regex_quote_spec *spec)
|
|
{
|
|
const char *special = spec->special;
|
|
size_t length;
|
|
|
|
length = 0;
|
|
if (spec->anchored)
|
|
length += 2; /* for '^' at the beginning and '$' at the end */
|
|
if (spec->multibyte)
|
|
{
|
|
mbui_iterator_t iter;
|
|
|
|
for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
|
|
{
|
|
/* We know that special contains only ASCII characters. */
|
|
if (mb_len (mbui_cur (iter)) == 1
|
|
&& strchr (special, * mbui_cur_ptr (iter)))
|
|
length += 1;
|
|
length += mb_len (mbui_cur (iter));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
const char *iter;
|
|
|
|
for (iter = string; *iter != '\0'; iter++)
|
|
{
|
|
if (strchr (special, *iter))
|
|
length += 1;
|
|
length += 1;
|
|
}
|
|
}
|
|
|
|
return length;
|
|
}
|
|
|
|
char *
|
|
regex_quote_copy (char *p, const char *string, const struct regex_quote_spec *spec)
|
|
{
|
|
const char *special = spec->special;
|
|
|
|
if (spec->anchored)
|
|
*p++ = '^';
|
|
if (spec->multibyte)
|
|
{
|
|
mbui_iterator_t iter;
|
|
|
|
for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
|
|
{
|
|
/* We know that special contains only ASCII characters. */
|
|
if (mb_len (mbui_cur (iter)) == 1
|
|
&& strchr (special, * mbui_cur_ptr (iter)))
|
|
*p++ = '\\';
|
|
memcpy (p, mbui_cur_ptr (iter), mb_len (mbui_cur (iter)));
|
|
p += mb_len (mbui_cur (iter));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
const char *iter;
|
|
|
|
for (iter = string; *iter != '\0'; iter++)
|
|
{
|
|
if (strchr (special, *iter))
|
|
*p++ = '\\';
|
|
*p++ = *iter++;
|
|
}
|
|
}
|
|
if (spec->anchored)
|
|
*p++ = '$';
|
|
|
|
return p;
|
|
}
|
|
|
|
char *
|
|
regex_quote (const char *string, const struct regex_quote_spec *spec)
|
|
{
|
|
size_t length = regex_quote_length (string, spec);
|
|
char *result = XNMALLOC (length + 1, char);
|
|
char *p;
|
|
|
|
p = result;
|
|
p = regex_quote_copy (p, string, spec);
|
|
*p = '\0';
|
|
return result;
|
|
}
|