mirror of
https://github.com/postgres/postgres.git
synced 2025-06-30 21:42:05 +03:00
Postgres95 1.01 Distribution - Virgin Sources
This commit is contained in:
56
src/backend/regex/COPYRIGHT
Normal file
56
src/backend/regex/COPYRIGHT
Normal file
@ -0,0 +1,56 @@
|
||||
Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved.
|
||||
This software is not subject to any license of the American Telephone
|
||||
and Telegraph Company or of the Regents of the University of California.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose on
|
||||
any computer system, and to alter it and redistribute it, subject
|
||||
to the following restrictions:
|
||||
|
||||
1. The author is not responsible for the consequences of use of this
|
||||
software, no matter how awful, even if they arise from flaws in it.
|
||||
|
||||
2. The origin of this software must not be misrepresented, either by
|
||||
explicit claim or by omission. Since few users ever read sources,
|
||||
credits must appear in the documentation.
|
||||
|
||||
3. Altered versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software. Since few users
|
||||
ever read sources, credits must appear in the documentation.
|
||||
|
||||
4. This notice may not be removed or altered.
|
||||
|
||||
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
/*-
|
||||
* Copyright (c) 1994
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)COPYRIGHT 8.1 (Berkeley) 3/16/94
|
||||
*/
|
14
src/backend/regex/Makefile.inc
Normal file
14
src/backend/regex/Makefile.inc
Normal file
@ -0,0 +1,14 @@
|
||||
# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93
|
||||
|
||||
# regex sources
|
||||
VPATH:=$(VPATH):$(CURDIR)/regex
|
||||
|
||||
CFLAGS+=-DPOSIX_MISTAKE -I$(CURDIR)/regex
|
||||
|
||||
SRCS_REGEX= regcomp.c regerror.c regexec.c regfree.c
|
||||
|
||||
MAN3+= regex.0
|
||||
MAN7+= re_format.0
|
||||
|
||||
MLINKS+=regex.3 regcomp.3 regex.3 regexec.3 regex.3 regerror.3
|
||||
MLINKS+=regexec.3 regfree.3
|
94
src/backend/regex/WHATSNEW
Normal file
94
src/backend/regex/WHATSNEW
Normal file
@ -0,0 +1,94 @@
|
||||
# @(#)WHATSNEW 8.3 (Berkeley) 3/18/94
|
||||
|
||||
New in alpha3.4: The complex bug alluded to below has been fixed (in a
|
||||
slightly kludgey temporary way that may hurt efficiency a bit; this is
|
||||
another "get it out the door for 4.4" release). The tests at the end of
|
||||
the tests file have accordingly been uncommented. The primary sign of
|
||||
the bug was that something like a?b matching ab matched b rather than ab.
|
||||
(The bug was essentially specific to this exact situation, else it would
|
||||
have shown up earlier.)
|
||||
|
||||
New in alpha3.3: The definition of word boundaries has been altered
|
||||
slightly, to more closely match the usual programming notion that "_"
|
||||
is an alphabetic. Stuff used for pre-ANSI systems is now in a subdir,
|
||||
and the makefile no longer alludes to it in mysterious ways. The
|
||||
makefile has generally been cleaned up some. Fixes have been made
|
||||
(again!) so that the regression test will run without -DREDEBUG, at
|
||||
the cost of weaker checking. A workaround for a bug in some folks'
|
||||
<assert.h> has been added. And some more things have been added to
|
||||
tests, including a couple right at the end which are commented out
|
||||
because the code currently flunks them (complex bug; fix coming).
|
||||
Plus the usual minor cleanup.
|
||||
|
||||
New in alpha3.2: Assorted bits of cleanup and portability improvement
|
||||
(the development base is now a BSDI system using GCC instead of an ancient
|
||||
Sun system, and the newer compiler exposed some glitches). Fix for a
|
||||
serious bug that affected REs using many [] (including REG_ICASE REs
|
||||
because of the way they are implemented), *sometimes*, depending on
|
||||
memory-allocation patterns. The header-file prototypes no longer name
|
||||
the parameters, avoiding possible name conflicts. The possibility that
|
||||
some clot has defined CHAR_MIN as (say) `-128' instead of `(-128)' is
|
||||
now handled gracefully. "uchar" is no longer used as an internal type
|
||||
name (too many people have the same idea). Still the same old lousy
|
||||
performance, alas.
|
||||
|
||||
New in alpha3.1: Basically nothing, this release is just a bookkeeping
|
||||
convenience. Stay tuned.
|
||||
|
||||
New in alpha3.0: Performance is no better, alas, but some fixes have been
|
||||
made and some functionality has been added. (This is basically the "get
|
||||
it out the door in time for 4.4" release.) One bug fix: regfree() didn't
|
||||
free the main internal structure (how embarrassing). It is now possible
|
||||
to put NULs in either the RE or the target string, using (resp.) a new
|
||||
REG_PEND flag and the old REG_STARTEND flag. The REG_NOSPEC flag to
|
||||
regcomp() makes all characters ordinary, so you can match a literal
|
||||
string easily (this will become more useful when performance improves!).
|
||||
There are now primitives to match beginnings and ends of words, although
|
||||
the syntax is disgusting and so is the implementation. The REG_ATOI
|
||||
debugging interface has changed a bit. And there has been considerable
|
||||
internal cleanup of various kinds.
|
||||
|
||||
New in alpha2.3: Split change list out of README, and moved flags notes
|
||||
into Makefile. Macro-ized the name of regex(7) in regex(3), since it has
|
||||
to change for 4.4BSD. Cleanup work in engine.c, and some new regression
|
||||
tests to catch tricky cases thereof.
|
||||
|
||||
New in alpha2.2: Out-of-date manpages updated. Regerror() acquires two
|
||||
small extensions -- REG_ITOA and REG_ATOI -- which avoid debugging kludges
|
||||
in my own test program and might be useful to others for similar purposes.
|
||||
The regression test will now compile (and run) without REDEBUG. The
|
||||
BRE \$ bug is fixed. Most uses of "uchar" are gone; it's all chars now.
|
||||
Char/uchar parameters are now written int/unsigned, to avoid possible
|
||||
portability problems with unpromoted parameters. Some unsigned casts have
|
||||
been introduced to minimize portability problems with shifting into sign
|
||||
bits.
|
||||
|
||||
New in alpha2.1: Lots of little stuff, cleanup and fixes. The one big
|
||||
thing is that regex.h is now generated, using mkh, rather than being
|
||||
supplied in the distribution; due to circularities in dependencies,
|
||||
you have to build regex.h explicitly by "make h". The two known bugs
|
||||
have been fixed (and the regression test now checks for them), as has a
|
||||
problem with assertions not being suppressed in the absence of REDEBUG.
|
||||
No performance work yet.
|
||||
|
||||
New in alpha2: Backslash-anything is an ordinary character, not an
|
||||
error (except, of course, for the handful of backslashed metacharacters
|
||||
in BREs), which should reduce script breakage. The regression test
|
||||
checks *where* null strings are supposed to match, and has generally
|
||||
been tightened up somewhat. Small bug fixes in parameter passing (not
|
||||
harmful, but technically errors) and some other areas. Debugging
|
||||
invoked by defining REDEBUG rather than not defining NDEBUG.
|
||||
|
||||
New in alpha+3: full prototyping for internal routines, using a little
|
||||
helper program, mkh, which extracts prototypes given in stylized comments.
|
||||
More minor cleanup. Buglet fix: it's CHAR_BIT, not CHAR_BITS. Simple
|
||||
pre-screening of input when a literal string is known to be part of the
|
||||
RE; this does wonders for performance.
|
||||
|
||||
New in alpha+2: minor bits of cleanup. Notably, the number "32" for the
|
||||
word width isn't hardwired into regexec.c any more, the public header
|
||||
file prototypes the functions if __STDC__ is defined, and some small typos
|
||||
in the manpages have been fixed.
|
||||
|
||||
New in alpha+1: improvements to the manual pages, and an important
|
||||
extension, the REG_STARTEND option to regexec().
|
70
src/backend/regex/cclass.h
Normal file
70
src/backend/regex/cclass.h
Normal file
@ -0,0 +1,70 @@
|
||||
/*-
|
||||
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||
* Copyright (c) 1992, 1993, 1994
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Henry Spencer.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)cclass.h 8.3 (Berkeley) 3/20/94
|
||||
*/
|
||||
|
||||
/* character-class table */
|
||||
static struct cclass {
|
||||
char *name;
|
||||
char *chars;
|
||||
char *multis;
|
||||
} cclasses[] = {
|
||||
{"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
|
||||
0123456789", ""},
|
||||
{"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
|
||||
""},
|
||||
{"blank", " \t", ""},
|
||||
{"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
|
||||
\25\26\27\30\31\32\33\34\35\36\37\177", ""},
|
||||
{"digit", "0123456789", ""},
|
||||
{"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
|
||||
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
|
||||
""},
|
||||
{"lower", "abcdefghijklmnopqrstuvwxyz",
|
||||
""},
|
||||
{"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
|
||||
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
|
||||
""},
|
||||
{"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
|
||||
""},
|
||||
{"space", "\t\n\v\f\r ", ""},
|
||||
{"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
||||
""},
|
||||
{"xdigit", "0123456789ABCDEFabcdef",
|
||||
""},
|
||||
{NULL, 0, ""}
|
||||
};
|
144
src/backend/regex/cdefs.h
Normal file
144
src/backend/regex/cdefs.h
Normal file
@ -0,0 +1,144 @@
|
||||
/*
|
||||
* ++Copyright++ 1991, 1993
|
||||
* -
|
||||
* Copyright (c) 1991, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* -
|
||||
* Portions Copyright (c) 1993 by Digital Equipment Corporation.
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies, and that
|
||||
* the name of Digital Equipment Corporation not be used in advertising or
|
||||
* publicity pertaining to distribution of the document or software without
|
||||
* specific, written prior permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
|
||||
* WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
|
||||
* CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
|
||||
* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
|
||||
* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
|
||||
* SOFTWARE.
|
||||
* -
|
||||
* --Copyright--
|
||||
*/
|
||||
|
||||
/*
|
||||
* @(#)cdefs.h 8.1 (Berkeley) 6/2/93
|
||||
* $Id: cdefs.h,v 1.1.1.1 1996/07/09 06:21:49 scrappy Exp $
|
||||
*/
|
||||
|
||||
#ifndef _CDEFS_H_
|
||||
#define _CDEFS_H_
|
||||
|
||||
#if defined(__cplusplus)
|
||||
#define __BEGIN_DECLS extern "C" {
|
||||
#define __END_DECLS };
|
||||
#else
|
||||
#define __BEGIN_DECLS
|
||||
#define __END_DECLS
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The __CONCAT macro is used to concatenate parts of symbol names, e.g.
|
||||
* with "#define OLD(foo) __CONCAT(old,foo)", OLD(foo) produces oldfoo.
|
||||
* The __CONCAT macro is a bit tricky -- make sure you don't put spaces
|
||||
* in between its arguments. __CONCAT can also concatenate double-quoted
|
||||
* strings produced by the __STRING macro, but this only works with ANSI C.
|
||||
*/
|
||||
#if defined(__STDC__) || defined(__cplusplus)
|
||||
#define __P(protos) protos /* full-blown ANSI C */
|
||||
#define __CONCAT(x,y) x ## y
|
||||
#define __STRING(x) #x
|
||||
|
||||
#define __const const /* define reserved names to standard */
|
||||
#define __signed signed
|
||||
#define __volatile volatile
|
||||
#if defined(__cplusplus)
|
||||
#define __inline inline /* convert to C++ keyword */
|
||||
#else
|
||||
#ifndef __GNUC__
|
||||
#define __inline /* delete GCC keyword */
|
||||
#endif /* !__GNUC__ */
|
||||
#endif /* !__cplusplus */
|
||||
|
||||
#else /* !(__STDC__ || __cplusplus) */
|
||||
#define __P(protos) () /* traditional C preprocessor */
|
||||
#define __CONCAT(x,y) x/**/y
|
||||
#define __STRING(x) "x"
|
||||
|
||||
#ifndef __GNUC__
|
||||
#define __const /* delete pseudo-ANSI C keywords */
|
||||
#define __inline
|
||||
#define __signed
|
||||
#define __volatile
|
||||
/*
|
||||
* In non-ANSI C environments, new programs will want ANSI-only C keywords
|
||||
* deleted from the program and old programs will want them left alone.
|
||||
* When using a compiler other than gcc, programs using the ANSI C keywords
|
||||
* const, inline etc. as normal identifiers should define -DNO_ANSI_KEYWORDS.
|
||||
* When using "gcc -traditional", we assume that this is the intent; if
|
||||
* __GNUC__ is defined but __STDC__ is not, we leave the new keywords alone.
|
||||
*/
|
||||
#ifndef NO_ANSI_KEYWORDS
|
||||
#define const /* delete ANSI C keywords */
|
||||
#define inline
|
||||
#define signed
|
||||
#define volatile
|
||||
#endif
|
||||
#endif /* !__GNUC__ */
|
||||
#endif /* !(__STDC__ || __cplusplus) */
|
||||
|
||||
/*
|
||||
* GCC1 and some versions of GCC2 declare dead (non-returning) and
|
||||
* pure (no side effects) functions using "volatile" and "const";
|
||||
* unfortunately, these then cause warnings under "-ansi -pedantic".
|
||||
* GCC2 uses a new, peculiar __attribute__((attrs)) style. All of
|
||||
* these work for GNU C++ (modulo a slight glitch in the C++ grammar
|
||||
* in the distribution version of 2.5.5).
|
||||
*/
|
||||
#if !defined(__GNUC__) || __GNUC__ < 2 || __GNUC_MINOR__ < 5
|
||||
#define __attribute__(x) /* delete __attribute__ if non-gcc or gcc1 */
|
||||
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
|
||||
#define __dead __volatile
|
||||
#define __pure __const
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Delete pseudo-keywords wherever they are not available or needed. */
|
||||
#ifndef __dead
|
||||
#define __dead
|
||||
#define __pure
|
||||
#endif
|
||||
|
||||
#endif /* !_CDEFS_H_ */
|
141
src/backend/regex/cname.h
Normal file
141
src/backend/regex/cname.h
Normal file
@ -0,0 +1,141 @@
|
||||
/*-
|
||||
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||
* Copyright (c) 1992, 1993, 1994
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Henry Spencer.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)cname.h 8.3 (Berkeley) 3/20/94
|
||||
*/
|
||||
|
||||
/* character-name table */
|
||||
static struct cname {
|
||||
char *name;
|
||||
char code;
|
||||
} cnames[] = {
|
||||
{"NUL", '\0'},
|
||||
{"SOH", '\001'},
|
||||
{"STX", '\002'},
|
||||
{"ETX", '\003'},
|
||||
{"EOT", '\004'},
|
||||
{"ENQ", '\005'},
|
||||
{"ACK", '\006'},
|
||||
{"BEL", '\007'},
|
||||
{"alert", '\007'},
|
||||
{"BS", '\010'},
|
||||
{"backspace", '\b'},
|
||||
{"HT", '\011'},
|
||||
{"tab", '\t'},
|
||||
{"LF", '\012'},
|
||||
{"newline", '\n'},
|
||||
{"VT", '\013'},
|
||||
{"vertical-tab", '\v'},
|
||||
{"FF", '\014'},
|
||||
{"form-feed", '\f'},
|
||||
{"CR", '\015'},
|
||||
{"carriage-return", '\r'},
|
||||
{"SO", '\016'},
|
||||
{"SI", '\017'},
|
||||
{"DLE", '\020'},
|
||||
{"DC1", '\021'},
|
||||
{"DC2", '\022'},
|
||||
{"DC3", '\023'},
|
||||
{"DC4", '\024'},
|
||||
{"NAK", '\025'},
|
||||
{"SYN", '\026'},
|
||||
{"ETB", '\027'},
|
||||
{"CAN", '\030'},
|
||||
{"EM", '\031'},
|
||||
{"SUB", '\032'},
|
||||
{"ESC", '\033'},
|
||||
{"IS4", '\034'},
|
||||
{"FS", '\034'},
|
||||
{"IS3", '\035'},
|
||||
{"GS", '\035'},
|
||||
{"IS2", '\036'},
|
||||
{"RS", '\036'},
|
||||
{"IS1", '\037'},
|
||||
{"US", '\037'},
|
||||
{"space", ' '},
|
||||
{"exclamation-mark", '!'},
|
||||
{"quotation-mark", '"'},
|
||||
{"number-sign", '#'},
|
||||
{"dollar-sign", '$'},
|
||||
{"percent-sign", '%'},
|
||||
{"ampersand", '&'},
|
||||
{"apostrophe", '\''},
|
||||
{"left-parenthesis", '('},
|
||||
{"right-parenthesis", ')'},
|
||||
{"asterisk", '*'},
|
||||
{"plus-sign", '+'},
|
||||
{"comma", ','},
|
||||
{"hyphen", '-'},
|
||||
{"hyphen-minus", '-'},
|
||||
{"period", '.'},
|
||||
{"full-stop", '.'},
|
||||
{"slash", '/'},
|
||||
{"solidus", '/'},
|
||||
{"zero", '0'},
|
||||
{"one", '1'},
|
||||
{"two", '2'},
|
||||
{"three", '3'},
|
||||
{"four", '4'},
|
||||
{"five", '5'},
|
||||
{"six", '6'},
|
||||
{"seven", '7'},
|
||||
{"eight", '8'},
|
||||
{"nine", '9'},
|
||||
{"colon", ':'},
|
||||
{"semicolon", ';'},
|
||||
{"less-than-sign", '<'},
|
||||
{"equals-sign", '='},
|
||||
{"greater-than-sign", '>'},
|
||||
{"question-mark", '?'},
|
||||
{"commercial-at", '@'},
|
||||
{"left-square-bracket", '['},
|
||||
{"backslash", '\\'},
|
||||
{"reverse-solidus", '\\'},
|
||||
{"right-square-bracket", ']'},
|
||||
{"circumflex", '^'},
|
||||
{"circumflex-accent", '^'},
|
||||
{"underscore", '_'},
|
||||
{"low-line", '_'},
|
||||
{"grave-accent", '`'},
|
||||
{"left-brace", '{'},
|
||||
{"left-curly-bracket", '{'},
|
||||
{"vertical-line", '|'},
|
||||
{"right-brace", '}'},
|
||||
{"right-curly-bracket", '}'},
|
||||
{"tilde", '~'},
|
||||
{"DEL", '\177'},
|
||||
{NULL, 0}
|
||||
};
|
1092
src/backend/regex/engine.c
Normal file
1092
src/backend/regex/engine.c
Normal file
File diff suppressed because it is too large
Load Diff
269
src/backend/regex/re_format.7
Normal file
269
src/backend/regex/re_format.7
Normal file
@ -0,0 +1,269 @@
|
||||
.\" Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||
.\" Copyright (c) 1992, 1993, 1994
|
||||
.\" The Regents of the University of California. All rights reserved.
|
||||
.\"
|
||||
.\" This code is derived from software contributed to Berkeley by
|
||||
.\" Henry Spencer.
|
||||
.\"
|
||||
.\" Redistribution and use in source and binary forms, with or without
|
||||
.\" modification, are permitted provided that the following conditions
|
||||
.\" are met:
|
||||
.\" 1. Redistributions of source code must retain the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer.
|
||||
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer in the
|
||||
.\" documentation and/or other materials provided with the distribution.
|
||||
.\" 3. All advertising materials mentioning features or use of this software
|
||||
.\" must display the following acknowledgement:
|
||||
.\" This product includes software developed by the University of
|
||||
.\" California, Berkeley and its contributors.
|
||||
.\" 4. Neither the name of the University nor the names of its contributors
|
||||
.\" may be used to endorse or promote products derived from this software
|
||||
.\" without specific prior written permission.
|
||||
.\"
|
||||
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
.\" SUCH DAMAGE.
|
||||
.\"
|
||||
.\" @(#)re_format.7 8.3 (Berkeley) 3/20/94
|
||||
.\"
|
||||
.TH RE_FORMAT 7 "March 20, 1994"
|
||||
.SH NAME
|
||||
re_format \- POSIX 1003.2 regular expressions
|
||||
.SH DESCRIPTION
|
||||
Regular expressions (``RE''s),
|
||||
as defined in POSIX 1003.2, come in two forms:
|
||||
modern REs (roughly those of
|
||||
.IR egrep ;
|
||||
1003.2 calls these ``extended'' REs)
|
||||
and obsolete REs (roughly those of
|
||||
.IR ed ;
|
||||
1003.2 ``basic'' REs).
|
||||
Obsolete REs mostly exist for backward compatibility in some old programs;
|
||||
they will be discussed at the end.
|
||||
1003.2 leaves some aspects of RE syntax and semantics open;
|
||||
`\(dg' marks decisions on these aspects that
|
||||
may not be fully portable to other 1003.2 implementations.
|
||||
.PP
|
||||
A (modern) RE is one\(dg or more non-empty\(dg \fIbranches\fR,
|
||||
separated by `|'.
|
||||
It matches anything that matches one of the branches.
|
||||
.PP
|
||||
A branch is one\(dg or more \fIpieces\fR, concatenated.
|
||||
It matches a match for the first, followed by a match for the second, etc.
|
||||
.PP
|
||||
A piece is an \fIatom\fR possibly followed
|
||||
by a single\(dg `*', `+', `?', or \fIbound\fR.
|
||||
An atom followed by `*' matches a sequence of 0 or more matches of the atom.
|
||||
An atom followed by `+' matches a sequence of 1 or more matches of the atom.
|
||||
An atom followed by `?' matches a sequence of 0 or 1 matches of the atom.
|
||||
.PP
|
||||
A \fIbound\fR is `{' followed by an unsigned decimal integer,
|
||||
possibly followed by `,'
|
||||
possibly followed by another unsigned decimal integer,
|
||||
always followed by `}'.
|
||||
The integers must lie between 0 and RE_DUP_MAX (255\(dg) inclusive,
|
||||
and if there are two of them, the first may not exceed the second.
|
||||
An atom followed by a bound containing one integer \fIi\fR
|
||||
and no comma matches
|
||||
a sequence of exactly \fIi\fR matches of the atom.
|
||||
An atom followed by a bound
|
||||
containing one integer \fIi\fR and a comma matches
|
||||
a sequence of \fIi\fR or more matches of the atom.
|
||||
An atom followed by a bound
|
||||
containing two integers \fIi\fR and \fIj\fR matches
|
||||
a sequence of \fIi\fR through \fIj\fR (inclusive) matches of the atom.
|
||||
.PP
|
||||
An atom is a regular expression enclosed in `()' (matching a match for the
|
||||
regular expression),
|
||||
an empty set of `()' (matching the null string)\(dg,
|
||||
a \fIbracket expression\fR (see below), `.'
|
||||
(matching any single character), `^' (matching the null string at the
|
||||
beginning of a line), `$' (matching the null string at the
|
||||
end of a line), a `\e' followed by one of the characters
|
||||
`^.[$()|*+?{\e'
|
||||
(matching that character taken as an ordinary character),
|
||||
a `\e' followed by any other character\(dg
|
||||
(matching that character taken as an ordinary character,
|
||||
as if the `\e' had not been present\(dg),
|
||||
or a single character with no other significance (matching that character).
|
||||
A `{' followed by a character other than a digit is an ordinary
|
||||
character, not the beginning of a bound\(dg.
|
||||
It is illegal to end an RE with `\e'.
|
||||
.PP
|
||||
A \fIbracket expression\fR is a list of characters enclosed in `[]'.
|
||||
It normally matches any single character from the list (but see below).
|
||||
If the list begins with `^',
|
||||
it matches any single character
|
||||
(but see below) \fInot\fR from the rest of the list.
|
||||
If two characters in the list are separated by `\-', this is shorthand
|
||||
for the full \fIrange\fR of characters between those two (inclusive) in the
|
||||
collating sequence,
|
||||
e.g. `[0-9]' in ASCII matches any decimal digit.
|
||||
It is illegal\(dg for two ranges to share an
|
||||
endpoint, e.g. `a-c-e'.
|
||||
Ranges are very collating-sequence-dependent,
|
||||
and portable programs should avoid relying on them.
|
||||
.PP
|
||||
To include a literal `]' in the list, make it the first character
|
||||
(following a possible `^').
|
||||
To include a literal `\-', make it the first or last character,
|
||||
or the second endpoint of a range.
|
||||
To use a literal `\-' as the first endpoint of a range,
|
||||
enclose it in `[.' and `.]' to make it a collating element (see below).
|
||||
With the exception of these and some combinations using `[' (see next
|
||||
paragraphs), all other special characters, including `\e', lose their
|
||||
special significance within a bracket expression.
|
||||
.PP
|
||||
Within a bracket expression, a collating element (a character,
|
||||
a multi-character sequence that collates as if it were a single character,
|
||||
or a collating-sequence name for either)
|
||||
enclosed in `[.' and `.]' stands for the
|
||||
sequence of characters of that collating element.
|
||||
The sequence is a single element of the bracket expression's list.
|
||||
A bracket expression containing a multi-character collating element
|
||||
can thus match more than one character,
|
||||
e.g. if the collating sequence includes a `ch' collating element,
|
||||
then the RE `[[.ch.]]*c' matches the first five characters
|
||||
of `chchcc'.
|
||||
.PP
|
||||
Within a bracket expression, a collating element enclosed in `[=' and
|
||||
`=]' is an equivalence class, standing for the sequences of characters
|
||||
of all collating elements equivalent to that one, including itself.
|
||||
(If there are no other equivalent collating elements,
|
||||
the treatment is as if the enclosing delimiters were `[.' and `.]'.)
|
||||
For example, if o and \o'o^' are the members of an equivalence class,
|
||||
then `[[=o=]]', `[[=\o'o^'=]]', and `[o\o'o^']' are all synonymous.
|
||||
An equivalence class may not\(dg be an endpoint
|
||||
of a range.
|
||||
.PP
|
||||
Within a bracket expression, the name of a \fIcharacter class\fR enclosed
|
||||
in `[:' and `:]' stands for the list of all characters belonging to that
|
||||
class.
|
||||
Standard character class names are:
|
||||
.PP
|
||||
.RS
|
||||
.nf
|
||||
.ta 3c 6c 9c
|
||||
alnum digit punct
|
||||
alpha graph space
|
||||
blank lower upper
|
||||
cntrl print xdigit
|
||||
.fi
|
||||
.RE
|
||||
.PP
|
||||
These stand for the character classes defined in
|
||||
.IR ctype (3).
|
||||
A locale may provide others.
|
||||
A character class may not be used as an endpoint of a range.
|
||||
.PP
|
||||
There are two special cases\(dg of bracket expressions:
|
||||
the bracket expressions `[[:<:]]' and `[[:>:]]' match the null string at
|
||||
the beginning and end of a word respectively.
|
||||
A word is defined as a sequence of
|
||||
word characters
|
||||
which is neither preceded nor followed by
|
||||
word characters.
|
||||
A word character is an
|
||||
.I alnum
|
||||
character (as defined by
|
||||
.IR ctype (3))
|
||||
or an underscore.
|
||||
This is an extension,
|
||||
compatible with but not specified by POSIX 1003.2,
|
||||
and should be used with
|
||||
caution in software intended to be portable to other systems.
|
||||
.PP
|
||||
In the event that an RE could match more than one substring of a given
|
||||
string,
|
||||
the RE matches the one starting earliest in the string.
|
||||
If the RE could match more than one substring starting at that point,
|
||||
it matches the longest.
|
||||
Subexpressions also match the longest possible substrings, subject to
|
||||
the constraint that the whole match be as long as possible,
|
||||
with subexpressions starting earlier in the RE taking priority over
|
||||
ones starting later.
|
||||
Note that higher-level subexpressions thus take priority over
|
||||
their lower-level component subexpressions.
|
||||
.PP
|
||||
Match lengths are measured in characters, not collating elements.
|
||||
A null string is considered longer than no match at all.
|
||||
For example,
|
||||
`bb*' matches the three middle characters of `abbbc',
|
||||
`(wee|week)(knights|nights)' matches all ten characters of `weeknights',
|
||||
when `(.*).*' is matched against `abc' the parenthesized subexpression
|
||||
matches all three characters, and
|
||||
when `(a*)*' is matched against `bc' both the whole RE and the parenthesized
|
||||
subexpression match the null string.
|
||||
.PP
|
||||
If case-independent matching is specified,
|
||||
the effect is much as if all case distinctions had vanished from the
|
||||
alphabet.
|
||||
When an alphabetic that exists in multiple cases appears as an
|
||||
ordinary character outside a bracket expression, it is effectively
|
||||
transformed into a bracket expression containing both cases,
|
||||
e.g. `x' becomes `[xX]'.
|
||||
When it appears inside a bracket expression, all case counterparts
|
||||
of it are added to the bracket expression, so that (e.g.) `[x]'
|
||||
becomes `[xX]' and `[^x]' becomes `[^xX]'.
|
||||
.PP
|
||||
No particular limit is imposed on the length of REs\(dg.
|
||||
Programs intended to be portable should not employ REs longer
|
||||
than 256 bytes,
|
||||
as an implementation can refuse to accept such REs and remain
|
||||
POSIX-compliant.
|
||||
.PP
|
||||
Obsolete (``basic'') regular expressions differ in several respects.
|
||||
`|', `+', and `?' are ordinary characters and there is no equivalent
|
||||
for their functionality.
|
||||
The delimiters for bounds are `\e{' and `\e}',
|
||||
with `{' and `}' by themselves ordinary characters.
|
||||
The parentheses for nested subexpressions are `\e(' and `\e)',
|
||||
with `(' and `)' by themselves ordinary characters.
|
||||
`^' is an ordinary character except at the beginning of the
|
||||
RE or\(dg the beginning of a parenthesized subexpression,
|
||||
`$' is an ordinary character except at the end of the
|
||||
RE or\(dg the end of a parenthesized subexpression,
|
||||
and `*' is an ordinary character if it appears at the beginning of the
|
||||
RE or the beginning of a parenthesized subexpression
|
||||
(after a possible leading `^').
|
||||
Finally, there is one new type of atom, a \fIback reference\fR:
|
||||
`\e' followed by a non-zero decimal digit \fId\fR
|
||||
matches the same sequence of characters
|
||||
matched by the \fId\fRth parenthesized subexpression
|
||||
(numbering subexpressions by the positions of their opening parentheses,
|
||||
left to right),
|
||||
so that (e.g.) `\e([bc]\e)\e1' matches `bb' or `cc' but not `bc'.
|
||||
.SH SEE ALSO
|
||||
regex(3)
|
||||
.PP
|
||||
POSIX 1003.2, section 2.8 (Regular Expression Notation).
|
||||
.SH BUGS
|
||||
Having two kinds of REs is a botch.
|
||||
.PP
|
||||
The current 1003.2 spec says that `)' is an ordinary character in
|
||||
the absence of an unmatched `(';
|
||||
this was an unintentional result of a wording error,
|
||||
and change is likely.
|
||||
Avoid relying on it.
|
||||
.PP
|
||||
Back references are a dreadful botch,
|
||||
posing major problems for efficient implementations.
|
||||
They are also somewhat vaguely defined
|
||||
(does
|
||||
`a\e(\e(b\e)*\e2\e)*d' match `abbbd'?).
|
||||
Avoid using them.
|
||||
.PP
|
||||
1003.2's specification of case-independent matching is vague.
|
||||
The ``one case implies all cases'' definition given above
|
||||
is current consensus among implementors as to the right interpretation.
|
||||
.PP
|
||||
The syntax for word boundaries is incredibly ugly.
|
1701
src/backend/regex/regcomp.c
Normal file
1701
src/backend/regex/regcomp.c
Normal file
File diff suppressed because it is too large
Load Diff
178
src/backend/regex/regerror.c
Normal file
178
src/backend/regex/regerror.c
Normal file
@ -0,0 +1,178 @@
|
||||
/*-
|
||||
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||
* Copyright (c) 1992, 1993, 1994
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Henry Spencer.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)regerror.c 8.4 (Berkeley) 3/20/94
|
||||
*/
|
||||
|
||||
#if defined(LIBC_SCCS) && !defined(lint)
|
||||
static char sccsid[] = "@(#)regerror.c 8.4 (Berkeley) 3/20/94";
|
||||
#endif /* LIBC_SCCS and not lint */
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <regex.h>
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
/* ========= begin header generated by ./mkh ========= */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* === regerror.c === */
|
||||
static char *regatoi __P((const regex_t *preg, char *localbuf));
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
/* ========= end header generated by ./mkh ========= */
|
||||
/*
|
||||
= #define REG_NOMATCH 1
|
||||
= #define REG_BADPAT 2
|
||||
= #define REG_ECOLLATE 3
|
||||
= #define REG_ECTYPE 4
|
||||
= #define REG_EESCAPE 5
|
||||
= #define REG_ESUBREG 6
|
||||
= #define REG_EBRACK 7
|
||||
= #define REG_EPAREN 8
|
||||
= #define REG_EBRACE 9
|
||||
= #define REG_BADBR 10
|
||||
= #define REG_ERANGE 11
|
||||
= #define REG_ESPACE 12
|
||||
= #define REG_BADRPT 13
|
||||
= #define REG_EMPTY 14
|
||||
= #define REG_ASSERT 15
|
||||
= #define REG_INVARG 16
|
||||
= #define REG_ATOI 255 // convert name to number (!)
|
||||
= #define REG_ITOA 0400 // convert number to name (!)
|
||||
*/
|
||||
static struct rerr {
|
||||
int code;
|
||||
char *name;
|
||||
char *explain;
|
||||
} rerrs[] = {
|
||||
{REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match"},
|
||||
{REG_BADPAT, "REG_BADPAT", "invalid regular expression"},
|
||||
{REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element"},
|
||||
{REG_ECTYPE, "REG_ECTYPE", "invalid character class"},
|
||||
{REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)"},
|
||||
{REG_ESUBREG, "REG_ESUBREG", "invalid backreference number"},
|
||||
{REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced"},
|
||||
{REG_EPAREN, "REG_EPAREN", "parentheses not balanced"},
|
||||
{REG_EBRACE, "REG_EBRACE", "braces not balanced"},
|
||||
{REG_BADBR, "REG_BADBR", "invalid repetition count(s)"},
|
||||
{REG_ERANGE, "REG_ERANGE", "invalid character range"},
|
||||
{REG_ESPACE, "REG_ESPACE", "out of memory"},
|
||||
{REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid"},
|
||||
{REG_EMPTY, "REG_EMPTY", "empty (sub)expression"},
|
||||
{REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug"},
|
||||
{REG_INVARG, "REG_INVARG", "invalid argument to regex routine"},
|
||||
{0, "", "*** unknown regexp error code ***"}
|
||||
};
|
||||
|
||||
/*
|
||||
- regerror - the interface to error numbers
|
||||
= extern size_t regerror(int, const regex_t *, char *, size_t);
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
size_t
|
||||
pg95_regerror(errcode, preg, errbuf, errbuf_size)
|
||||
int errcode;
|
||||
const regex_t *preg;
|
||||
char *errbuf;
|
||||
size_t errbuf_size;
|
||||
{
|
||||
register struct rerr *r;
|
||||
register size_t len;
|
||||
register int target = errcode &~ REG_ITOA;
|
||||
register char *s;
|
||||
char convbuf[50];
|
||||
|
||||
if (errcode == REG_ATOI)
|
||||
s = regatoi(preg, convbuf);
|
||||
else {
|
||||
for (r = rerrs; r->code != 0; r++)
|
||||
if (r->code == target)
|
||||
break;
|
||||
|
||||
if (errcode®_ITOA) {
|
||||
if (r->code != 0)
|
||||
(void) strcpy(convbuf, r->name);
|
||||
else
|
||||
sprintf(convbuf, "REG_0x%x", target);
|
||||
assert(strlen(convbuf) < sizeof(convbuf));
|
||||
s = convbuf;
|
||||
} else
|
||||
s = r->explain;
|
||||
}
|
||||
|
||||
len = strlen(s) + 1;
|
||||
if (errbuf_size > 0) {
|
||||
if (errbuf_size > len)
|
||||
(void) strcpy(errbuf, s);
|
||||
else {
|
||||
(void) strncpy(errbuf, s, errbuf_size-1);
|
||||
errbuf[errbuf_size-1] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
return(len);
|
||||
}
|
||||
|
||||
/*
|
||||
- regatoi - internal routine to implement REG_ATOI
|
||||
== static char *regatoi(const regex_t *preg, char *localbuf);
|
||||
*/
|
||||
static char *
|
||||
regatoi(preg, localbuf)
|
||||
const regex_t *preg;
|
||||
char *localbuf;
|
||||
{
|
||||
register struct rerr *r;
|
||||
|
||||
for (r = rerrs; r->code != 0; r++)
|
||||
if (strcmp(r->name, preg->re_endp) == 0)
|
||||
break;
|
||||
if (r->code == 0)
|
||||
return("0");
|
||||
|
||||
sprintf(localbuf, "%d", r->code);
|
||||
return(localbuf);
|
||||
}
|
538
src/backend/regex/regex.3
Normal file
538
src/backend/regex/regex.3
Normal file
@ -0,0 +1,538 @@
|
||||
.\" Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||
.\" Copyright (c) 1992, 1993, 1994
|
||||
.\" The Regents of the University of California. All rights reserved.
|
||||
.\"
|
||||
.\" This code is derived from software contributed to Berkeley by
|
||||
.\" Henry Spencer.
|
||||
.\"
|
||||
.\" Redistribution and use in source and binary forms, with or without
|
||||
.\" modification, are permitted provided that the following conditions
|
||||
.\" are met:
|
||||
.\" 1. Redistributions of source code must retain the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer.
|
||||
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer in the
|
||||
.\" documentation and/or other materials provided with the distribution.
|
||||
.\" 3. All advertising materials mentioning features or use of this software
|
||||
.\" must display the following acknowledgement:
|
||||
.\" This product includes software developed by the University of
|
||||
.\" California, Berkeley and its contributors.
|
||||
.\" 4. Neither the name of the University nor the names of its contributors
|
||||
.\" may be used to endorse or promote products derived from this software
|
||||
.\" without specific prior written permission.
|
||||
.\"
|
||||
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
.\" SUCH DAMAGE.
|
||||
.\"
|
||||
.\" @(#)regex.3 8.4 (Berkeley) 3/20/94
|
||||
.\"
|
||||
.TH REGEX 3 "March 20, 1994"
|
||||
.de ZR
|
||||
.\" one other place knows this name: the SEE ALSO section
|
||||
.IR re_format (7) \\$1
|
||||
..
|
||||
.SH NAME
|
||||
regcomp, regexec, regerror, regfree \- regular-expression library
|
||||
.SH SYNOPSIS
|
||||
.ft B
|
||||
.\".na
|
||||
#include <sys/types.h>
|
||||
.br
|
||||
#include <regex.h>
|
||||
.HP 10
|
||||
int regcomp(regex_t\ *preg, const\ char\ *pattern, int\ cflags);
|
||||
.HP
|
||||
int\ regexec(const\ regex_t\ *preg, const\ char\ *string,
|
||||
size_t\ nmatch, regmatch_t\ pmatch[], int\ eflags);
|
||||
.HP
|
||||
size_t\ regerror(int\ errcode, const\ regex_t\ *preg,
|
||||
char\ *errbuf, size_t\ errbuf_size);
|
||||
.HP
|
||||
void\ regfree(regex_t\ *preg);
|
||||
.\".ad
|
||||
.ft
|
||||
.SH DESCRIPTION
|
||||
These routines implement POSIX 1003.2 regular expressions (``RE''s);
|
||||
see
|
||||
.ZR .
|
||||
.I Regcomp
|
||||
compiles an RE written as a string into an internal form,
|
||||
.I regexec
|
||||
matches that internal form against a string and reports results,
|
||||
.I regerror
|
||||
transforms error codes from either into human-readable messages,
|
||||
and
|
||||
.I regfree
|
||||
frees any dynamically-allocated storage used by the internal form
|
||||
of an RE.
|
||||
.PP
|
||||
The header
|
||||
.I <regex.h>
|
||||
declares two structure types,
|
||||
.I regex_t
|
||||
and
|
||||
.IR regmatch_t ,
|
||||
the former for compiled internal forms and the latter for match reporting.
|
||||
It also declares the four functions,
|
||||
a type
|
||||
.IR regoff_t ,
|
||||
and a number of constants with names starting with ``REG_''.
|
||||
.PP
|
||||
.I Regcomp
|
||||
compiles the regular expression contained in the
|
||||
.I pattern
|
||||
string,
|
||||
subject to the flags in
|
||||
.IR cflags ,
|
||||
and places the results in the
|
||||
.I regex_t
|
||||
structure pointed to by
|
||||
.IR preg .
|
||||
.I Cflags
|
||||
is the bitwise OR of zero or more of the following flags:
|
||||
.IP REG_EXTENDED \w'REG_EXTENDED'u+2n
|
||||
Compile modern (``extended'') REs,
|
||||
rather than the obsolete (``basic'') REs that
|
||||
are the default.
|
||||
.IP REG_BASIC
|
||||
This is a synonym for 0,
|
||||
provided as a counterpart to REG_EXTENDED to improve readability.
|
||||
.IP REG_NOSPEC
|
||||
Compile with recognition of all special characters turned off.
|
||||
All characters are thus considered ordinary,
|
||||
so the ``RE'' is a literal string.
|
||||
This is an extension,
|
||||
compatible with but not specified by POSIX 1003.2,
|
||||
and should be used with
|
||||
caution in software intended to be portable to other systems.
|
||||
REG_EXTENDED and REG_NOSPEC may not be used
|
||||
in the same call to
|
||||
.IR regcomp .
|
||||
.IP REG_ICASE
|
||||
Compile for matching that ignores upper/lower case distinctions.
|
||||
See
|
||||
.ZR .
|
||||
.IP REG_NOSUB
|
||||
Compile for matching that need only report success or failure,
|
||||
not what was matched.
|
||||
.IP REG_NEWLINE
|
||||
Compile for newline-sensitive matching.
|
||||
By default, newline is a completely ordinary character with no special
|
||||
meaning in either REs or strings.
|
||||
With this flag,
|
||||
`[^' bracket expressions and `.' never match newline,
|
||||
a `^' anchor matches the null string after any newline in the string
|
||||
in addition to its normal function,
|
||||
and the `$' anchor matches the null string before any newline in the
|
||||
string in addition to its normal function.
|
||||
.IP REG_PEND
|
||||
The regular expression ends,
|
||||
not at the first NUL,
|
||||
but just before the character pointed to by the
|
||||
.I re_endp
|
||||
member of the structure pointed to by
|
||||
.IR preg .
|
||||
The
|
||||
.I re_endp
|
||||
member is of type
|
||||
.IR const\ char\ * .
|
||||
This flag permits inclusion of NULs in the RE;
|
||||
they are considered ordinary characters.
|
||||
This is an extension,
|
||||
compatible with but not specified by POSIX 1003.2,
|
||||
and should be used with
|
||||
caution in software intended to be portable to other systems.
|
||||
.PP
|
||||
When successful,
|
||||
.I regcomp
|
||||
returns 0 and fills in the structure pointed to by
|
||||
.IR preg .
|
||||
One member of that structure
|
||||
(other than
|
||||
.IR re_endp )
|
||||
is publicized:
|
||||
.IR re_nsub ,
|
||||
of type
|
||||
.IR size_t ,
|
||||
contains the number of parenthesized subexpressions within the RE
|
||||
(except that the value of this member is undefined if the
|
||||
REG_NOSUB flag was used).
|
||||
If
|
||||
.I regcomp
|
||||
fails, it returns a non-zero error code;
|
||||
see DIAGNOSTICS.
|
||||
.PP
|
||||
.I Regexec
|
||||
matches the compiled RE pointed to by
|
||||
.I preg
|
||||
against the
|
||||
.IR string ,
|
||||
subject to the flags in
|
||||
.IR eflags ,
|
||||
and reports results using
|
||||
.IR nmatch ,
|
||||
.IR pmatch ,
|
||||
and the returned value.
|
||||
The RE must have been compiled by a previous invocation of
|
||||
.IR regcomp .
|
||||
The compiled form is not altered during execution of
|
||||
.IR regexec ,
|
||||
so a single compiled RE can be used simultaneously by multiple threads.
|
||||
.PP
|
||||
By default,
|
||||
the NUL-terminated string pointed to by
|
||||
.I string
|
||||
is considered to be the text of an entire line, minus any terminating
|
||||
newline.
|
||||
The
|
||||
.I eflags
|
||||
argument is the bitwise OR of zero or more of the following flags:
|
||||
.IP REG_NOTBOL \w'REG_STARTEND'u+2n
|
||||
The first character of
|
||||
the string
|
||||
is not the beginning of a line, so the `^' anchor should not match before it.
|
||||
This does not affect the behavior of newlines under REG_NEWLINE.
|
||||
.IP REG_NOTEOL
|
||||
The NUL terminating
|
||||
the string
|
||||
does not end a line, so the `$' anchor should not match before it.
|
||||
This does not affect the behavior of newlines under REG_NEWLINE.
|
||||
.IP REG_STARTEND
|
||||
The string is considered to start at
|
||||
\fIstring\fR\ + \fIpmatch\fR[0].\fIrm_so\fR
|
||||
and to have a terminating NUL located at
|
||||
\fIstring\fR\ + \fIpmatch\fR[0].\fIrm_eo\fR
|
||||
(there need not actually be a NUL at that location),
|
||||
regardless of the value of
|
||||
.IR nmatch .
|
||||
See below for the definition of
|
||||
.IR pmatch
|
||||
and
|
||||
.IR nmatch .
|
||||
This is an extension,
|
||||
compatible with but not specified by POSIX 1003.2,
|
||||
and should be used with
|
||||
caution in software intended to be portable to other systems.
|
||||
Note that a non-zero \fIrm_so\fR does not imply REG_NOTBOL;
|
||||
REG_STARTEND affects only the location of the string,
|
||||
not how it is matched.
|
||||
.PP
|
||||
See
|
||||
.ZR
|
||||
for a discussion of what is matched in situations where an RE or a
|
||||
portion thereof could match any of several substrings of
|
||||
.IR string .
|
||||
.PP
|
||||
Normally,
|
||||
.I regexec
|
||||
returns 0 for success and the non-zero code REG_NOMATCH for failure.
|
||||
Other non-zero error codes may be returned in exceptional situations;
|
||||
see DIAGNOSTICS.
|
||||
.PP
|
||||
If REG_NOSUB was specified in the compilation of the RE,
|
||||
or if
|
||||
.I nmatch
|
||||
is 0,
|
||||
.I regexec
|
||||
ignores the
|
||||
.I pmatch
|
||||
argument (but see below for the case where REG_STARTEND is specified).
|
||||
Otherwise,
|
||||
.I pmatch
|
||||
points to an array of
|
||||
.I nmatch
|
||||
structures of type
|
||||
.IR regmatch_t .
|
||||
Such a structure has at least the members
|
||||
.I rm_so
|
||||
and
|
||||
.IR rm_eo ,
|
||||
both of type
|
||||
.I regoff_t
|
||||
(a signed arithmetic type at least as large as an
|
||||
.I off_t
|
||||
and a
|
||||
.IR ssize_t ),
|
||||
containing respectively the offset of the first character of a substring
|
||||
and the offset of the first character after the end of the substring.
|
||||
Offsets are measured from the beginning of the
|
||||
.I string
|
||||
argument given to
|
||||
.IR regexec .
|
||||
An empty substring is denoted by equal offsets,
|
||||
both indicating the character following the empty substring.
|
||||
.PP
|
||||
The 0th member of the
|
||||
.I pmatch
|
||||
array is filled in to indicate what substring of
|
||||
.I string
|
||||
was matched by the entire RE.
|
||||
Remaining members report what substring was matched by parenthesized
|
||||
subexpressions within the RE;
|
||||
member
|
||||
.I i
|
||||
reports subexpression
|
||||
.IR i ,
|
||||
with subexpressions counted (starting at 1) by the order of their opening
|
||||
parentheses in the RE, left to right.
|
||||
Unused entries in the array\(emcorresponding either to subexpressions that
|
||||
did not participate in the match at all, or to subexpressions that do not
|
||||
exist in the RE (that is, \fIi\fR\ > \fIpreg\fR\->\fIre_nsub\fR)\(emhave both
|
||||
.I rm_so
|
||||
and
|
||||
.I rm_eo
|
||||
set to \-1.
|
||||
If a subexpression participated in the match several times,
|
||||
the reported substring is the last one it matched.
|
||||
(Note, as an example in particular, that when the RE `(b*)+' matches `bbb',
|
||||
the parenthesized subexpression matches each of the three `b's and then
|
||||
an infinite number of empty strings following the last `b',
|
||||
so the reported substring is one of the empties.)
|
||||
.PP
|
||||
If REG_STARTEND is specified,
|
||||
.I pmatch
|
||||
must point to at least one
|
||||
.I regmatch_t
|
||||
(even if
|
||||
.I nmatch
|
||||
is 0 or REG_NOSUB was specified),
|
||||
to hold the input offsets for REG_STARTEND.
|
||||
Use for output is still entirely controlled by
|
||||
.IR nmatch ;
|
||||
if
|
||||
.I nmatch
|
||||
is 0 or REG_NOSUB was specified,
|
||||
the value of
|
||||
.IR pmatch [0]
|
||||
will not be changed by a successful
|
||||
.IR regexec .
|
||||
.PP
|
||||
.I Regerror
|
||||
maps a non-zero
|
||||
.I errcode
|
||||
from either
|
||||
.I regcomp
|
||||
or
|
||||
.I regexec
|
||||
to a human-readable, printable message.
|
||||
If
|
||||
.I preg
|
||||
is non-NULL,
|
||||
the error code should have arisen from use of
|
||||
the
|
||||
.I regex_t
|
||||
pointed to by
|
||||
.IR preg ,
|
||||
and if the error code came from
|
||||
.IR regcomp ,
|
||||
it should have been the result from the most recent
|
||||
.I regcomp
|
||||
using that
|
||||
.IR regex_t .
|
||||
.RI ( Regerror
|
||||
may be able to supply a more detailed message using information
|
||||
from the
|
||||
.IR regex_t .)
|
||||
.I Regerror
|
||||
places the NUL-terminated message into the buffer pointed to by
|
||||
.IR errbuf ,
|
||||
limiting the length (including the NUL) to at most
|
||||
.I errbuf_size
|
||||
bytes.
|
||||
If the whole message won't fit,
|
||||
as much of it as will fit before the terminating NUL is supplied.
|
||||
In any case,
|
||||
the returned value is the size of buffer needed to hold the whole
|
||||
message (including terminating NUL).
|
||||
If
|
||||
.I errbuf_size
|
||||
is 0,
|
||||
.I errbuf
|
||||
is ignored but the return value is still correct.
|
||||
.PP
|
||||
If the
|
||||
.I errcode
|
||||
given to
|
||||
.I regerror
|
||||
is first ORed with REG_ITOA,
|
||||
the ``message'' that results is the printable name of the error code,
|
||||
e.g. ``REG_NOMATCH'',
|
||||
rather than an explanation thereof.
|
||||
If
|
||||
.I errcode
|
||||
is REG_ATOI,
|
||||
then
|
||||
.I preg
|
||||
shall be non-NULL and the
|
||||
.I re_endp
|
||||
member of the structure it points to
|
||||
must point to the printable name of an error code;
|
||||
in this case, the result in
|
||||
.I errbuf
|
||||
is the decimal digits of
|
||||
the numeric value of the error code
|
||||
(0 if the name is not recognized).
|
||||
REG_ITOA and REG_ATOI are intended primarily as debugging facilities;
|
||||
they are extensions,
|
||||
compatible with but not specified by POSIX 1003.2,
|
||||
and should be used with
|
||||
caution in software intended to be portable to other systems.
|
||||
Be warned also that they are considered experimental and changes are possible.
|
||||
.PP
|
||||
.I Regfree
|
||||
frees any dynamically-allocated storage associated with the compiled RE
|
||||
pointed to by
|
||||
.IR preg .
|
||||
The remaining
|
||||
.I regex_t
|
||||
is no longer a valid compiled RE
|
||||
and the effect of supplying it to
|
||||
.I regexec
|
||||
or
|
||||
.I regerror
|
||||
is undefined.
|
||||
.PP
|
||||
None of these functions references global variables except for tables
|
||||
of constants;
|
||||
all are safe for use from multiple threads if the arguments are safe.
|
||||
.SH IMPLEMENTATION CHOICES
|
||||
There are a number of decisions that 1003.2 leaves up to the implementor,
|
||||
either by explicitly saying ``undefined'' or by virtue of them being
|
||||
forbidden by the RE grammar.
|
||||
This implementation treats them as follows.
|
||||
.PP
|
||||
See
|
||||
.ZR
|
||||
for a discussion of the definition of case-independent matching.
|
||||
.PP
|
||||
There is no particular limit on the length of REs,
|
||||
except insofar as memory is limited.
|
||||
Memory usage is approximately linear in RE size, and largely insensitive
|
||||
to RE complexity, except for bounded repetitions.
|
||||
See BUGS for one short RE using them
|
||||
that will run almost any system out of memory.
|
||||
.PP
|
||||
A backslashed character other than one specifically given a magic meaning
|
||||
by 1003.2 (such magic meanings occur only in obsolete [``basic''] REs)
|
||||
is taken as an ordinary character.
|
||||
.PP
|
||||
Any unmatched [ is a REG_EBRACK error.
|
||||
.PP
|
||||
Equivalence classes cannot begin or end bracket-expression ranges.
|
||||
The endpoint of one range cannot begin another.
|
||||
.PP
|
||||
RE_DUP_MAX, the limit on repetition counts in bounded repetitions, is 255.
|
||||
.PP
|
||||
A repetition operator (?, *, +, or bounds) cannot follow another
|
||||
repetition operator.
|
||||
A repetition operator cannot begin an expression or subexpression
|
||||
or follow `^' or `|'.
|
||||
.PP
|
||||
`|' cannot appear first or last in a (sub)expression or after another `|',
|
||||
i.e. an operand of `|' cannot be an empty subexpression.
|
||||
An empty parenthesized subexpression, `()', is legal and matches an
|
||||
empty (sub)string.
|
||||
An empty string is not a legal RE.
|
||||
.PP
|
||||
A `{' followed by a digit is considered the beginning of bounds for a
|
||||
bounded repetition, which must then follow the syntax for bounds.
|
||||
A `{' \fInot\fR followed by a digit is considered an ordinary character.
|
||||
.PP
|
||||
`^' and `$' beginning and ending subexpressions in obsolete (``basic'')
|
||||
REs are anchors, not ordinary characters.
|
||||
.SH SEE ALSO
|
||||
grep(1), re_format(7)
|
||||
.PP
|
||||
POSIX 1003.2, sections 2.8 (Regular Expression Notation)
|
||||
and
|
||||
B.5 (C Binding for Regular Expression Matching).
|
||||
.SH DIAGNOSTICS
|
||||
Non-zero error codes from
|
||||
.I regcomp
|
||||
and
|
||||
.I regexec
|
||||
include the following:
|
||||
.PP
|
||||
.nf
|
||||
.ta \w'REG_ECOLLATE'u+3n
|
||||
REG_NOMATCH regexec() failed to match
|
||||
REG_BADPAT invalid regular expression
|
||||
REG_ECOLLATE invalid collating element
|
||||
REG_ECTYPE invalid character class
|
||||
REG_EESCAPE \e applied to unescapable character
|
||||
REG_ESUBREG invalid backreference number
|
||||
REG_EBRACK brackets [ ] not balanced
|
||||
REG_EPAREN parentheses ( ) not balanced
|
||||
REG_EBRACE braces { } not balanced
|
||||
REG_BADBR invalid repetition count(s) in { }
|
||||
REG_ERANGE invalid character range in [ ]
|
||||
REG_ESPACE ran out of memory
|
||||
REG_BADRPT ?, *, or + operand invalid
|
||||
REG_EMPTY empty (sub)expression
|
||||
REG_ASSERT ``can't happen''\(emyou found a bug
|
||||
REG_INVARG invalid argument, e.g. negative-length string
|
||||
.fi
|
||||
.SH HISTORY
|
||||
Originally written by Henry Spencer.
|
||||
Altered for inclusion in the 4.4BSD distribution.
|
||||
.SH BUGS
|
||||
This is an alpha release with known defects.
|
||||
Please report problems.
|
||||
.PP
|
||||
There is one known functionality bug.
|
||||
The implementation of internationalization is incomplete:
|
||||
the locale is always assumed to be the default one of 1003.2,
|
||||
and only the collating elements etc. of that locale are available.
|
||||
.PP
|
||||
The back-reference code is subtle and doubts linger about its correctness
|
||||
in complex cases.
|
||||
.PP
|
||||
.I Regexec
|
||||
performance is poor.
|
||||
This will improve with later releases.
|
||||
.I Nmatch
|
||||
exceeding 0 is expensive;
|
||||
.I nmatch
|
||||
exceeding 1 is worse.
|
||||
.I Regexec
|
||||
is largely insensitive to RE complexity \fIexcept\fR that back
|
||||
references are massively expensive.
|
||||
RE length does matter; in particular, there is a strong speed bonus
|
||||
for keeping RE length under about 30 characters,
|
||||
with most special characters counting roughly double.
|
||||
.PP
|
||||
.I Regcomp
|
||||
implements bounded repetitions by macro expansion,
|
||||
which is costly in time and space if counts are large
|
||||
or bounded repetitions are nested.
|
||||
An RE like, say,
|
||||
`((((a{1,100}){1,100}){1,100}){1,100}){1,100}'
|
||||
will (eventually) run almost any existing machine out of swap space.
|
||||
.PP
|
||||
There are suspected problems with response to obscure error conditions.
|
||||
Notably,
|
||||
certain kinds of internal overflow,
|
||||
produced only by truly enormous REs or by multiply nested bounded repetitions,
|
||||
are probably not handled well.
|
||||
.PP
|
||||
Due to a mistake in 1003.2, things like `a)b' are legal REs because `)' is
|
||||
a special character only in the presence of a previous unmatched `('.
|
||||
This can't be fixed until the spec is fixed.
|
||||
.PP
|
||||
The standard's definition of back references is vague.
|
||||
For example, does
|
||||
`a\e(\e(b\e)*\e2\e)*d' match `abbbd'?
|
||||
Until the standard is clarified,
|
||||
behavior in such cases should not be relied on.
|
||||
.PP
|
||||
The implementation of word-boundary matching is a bit of a kludge,
|
||||
and bugs may lurk in combinations of word-boundary matching and anchoring.
|
108
src/backend/regex/regex.h
Normal file
108
src/backend/regex/regex.h
Normal file
@ -0,0 +1,108 @@
|
||||
/*-
|
||||
* Copyright (c) 1992 Henry Spencer.
|
||||
* Copyright (c) 1992, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Henry Spencer of the University of Toronto.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)regex.h 8.2 (Berkeley) 1/3/94
|
||||
*/
|
||||
|
||||
#ifndef _REGEX_H_
|
||||
#define _REGEX_H_
|
||||
|
||||
/* #include <sys/cdefs.h> */
|
||||
/* since not all systems have cdefs.h, we'll use our own here - jolly */
|
||||
#include "cdefs.h"
|
||||
|
||||
/* types */
|
||||
typedef off_t regoff_t;
|
||||
|
||||
typedef struct {
|
||||
int re_magic;
|
||||
size_t re_nsub; /* number of parenthesized subexpressions */
|
||||
__const char *re_endp; /* end pointer for REG_PEND */
|
||||
struct re_guts *re_g; /* none of your business :-) */
|
||||
} regex_t;
|
||||
|
||||
typedef struct {
|
||||
regoff_t rm_so; /* start of match */
|
||||
regoff_t rm_eo; /* end of match */
|
||||
} regmatch_t;
|
||||
|
||||
/* regcomp() flags */
|
||||
#define REG_BASIC 0000
|
||||
#define REG_EXTENDED 0001
|
||||
#define REG_ICASE 0002
|
||||
#define REG_NOSUB 0004
|
||||
#define REG_NEWLINE 0010
|
||||
#define REG_NOSPEC 0020
|
||||
#define REG_PEND 0040
|
||||
#define REG_DUMP 0200
|
||||
|
||||
/* regerror() flags */
|
||||
#define REG_NOMATCH 1
|
||||
#define REG_BADPAT 2
|
||||
#define REG_ECOLLATE 3
|
||||
#define REG_ECTYPE 4
|
||||
#define REG_EESCAPE 5
|
||||
#define REG_ESUBREG 6
|
||||
#define REG_EBRACK 7
|
||||
#define REG_EPAREN 8
|
||||
#define REG_EBRACE 9
|
||||
#define REG_BADBR 10
|
||||
#define REG_ERANGE 11
|
||||
#define REG_ESPACE 12
|
||||
#define REG_BADRPT 13
|
||||
#define REG_EMPTY 14
|
||||
#define REG_ASSERT 15
|
||||
#define REG_INVARG 16
|
||||
#define REG_ATOI 255 /* convert name to number (!) */
|
||||
#define REG_ITOA 0400 /* convert number to name (!) */
|
||||
|
||||
/* regexec() flags */
|
||||
#define REG_NOTBOL 00001
|
||||
#define REG_NOTEOL 00002
|
||||
#define REG_STARTEND 00004
|
||||
#define REG_TRACE 00400 /* tracing of execution */
|
||||
#define REG_LARGE 01000 /* force large representation */
|
||||
#define REG_BACKR 02000 /* force use of backref code */
|
||||
|
||||
__BEGIN_DECLS
|
||||
int pg95_regcomp __P((regex_t *, const char *, int));
|
||||
size_t pg95_regerror __P((int, const regex_t *, char *, size_t));
|
||||
int pg95_regexec __P((const regex_t *,
|
||||
const char *, size_t, regmatch_t [], int));
|
||||
void pg95_regfree __P((regex_t *));
|
||||
__END_DECLS
|
||||
|
||||
#endif /* !_REGEX_H_ */
|
175
src/backend/regex/regex2.h
Normal file
175
src/backend/regex/regex2.h
Normal file
@ -0,0 +1,175 @@
|
||||
/*-
|
||||
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||
* Copyright (c) 1992, 1993, 1994
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Henry Spencer.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)regex2.h 8.4 (Berkeley) 3/20/94
|
||||
*/
|
||||
|
||||
/*
|
||||
* First, the stuff that ends up in the outside-world include file
|
||||
*/
|
||||
/*
|
||||
typedef off_t regoff_t;
|
||||
typedef struct {
|
||||
int re_magic;
|
||||
size_t re_nsub; // number of parenthesized subexpressions
|
||||
const char *re_endp; // end pointer for REG_PEND
|
||||
struct re_guts *re_g; // none of your business :-)
|
||||
} regex_t;
|
||||
typedef struct {
|
||||
regoff_t rm_so; // start of match
|
||||
regoff_t rm_eo; // end of match
|
||||
} regmatch_t;
|
||||
*/
|
||||
/*
|
||||
* internals of regex_t
|
||||
*/
|
||||
#define MAGIC1 ((('r'^0200)<<8) | 'e')
|
||||
|
||||
/*
|
||||
* The internal representation is a *strip*, a sequence of
|
||||
* operators ending with an endmarker. (Some terminology etc. is a
|
||||
* historical relic of earlier versions which used multiple strips.)
|
||||
* Certain oddities in the representation are there to permit running
|
||||
* the machinery backwards; in particular, any deviation from sequential
|
||||
* flow must be marked at both its source and its destination. Some
|
||||
* fine points:
|
||||
*
|
||||
* - OPLUS_ and O_PLUS are *inside* the loop they create.
|
||||
* - OQUEST_ and O_QUEST are *outside* the bypass they create.
|
||||
* - OCH_ and O_CH are *outside* the multi-way branch they create, while
|
||||
* OOR1 and OOR2 are respectively the end and the beginning of one of
|
||||
* the branches. Note that there is an implicit OOR2 following OCH_
|
||||
* and an implicit OOR1 preceding O_CH.
|
||||
*
|
||||
* In state representations, an operator's bit is on to signify a state
|
||||
* immediately *preceding* "execution" of that operator.
|
||||
*/
|
||||
typedef unsigned long sop; /* strip operator */
|
||||
typedef long sopno;
|
||||
#define OPRMASK 0xf8000000
|
||||
#define OPDMASK 0x07ffffff
|
||||
#define OPSHIFT ((unsigned)27)
|
||||
#define OP(n) ((n)&OPRMASK)
|
||||
#define OPND(n) ((n)&OPDMASK)
|
||||
#define SOP(op, opnd) ((op)|(opnd))
|
||||
/* operators meaning operand */
|
||||
/* (back, fwd are offsets) */
|
||||
#define OEND (1<<OPSHIFT) /* endmarker - */
|
||||
#define OCHAR (2<<OPSHIFT) /* character unsigned char */
|
||||
#define OBOL (3<<OPSHIFT) /* left anchor - */
|
||||
#define OEOL (4<<OPSHIFT) /* right anchor - */
|
||||
#define OANY (5<<OPSHIFT) /* . - */
|
||||
#define OANYOF (6<<OPSHIFT) /* [...] set number */
|
||||
#define OBACK_ (7<<OPSHIFT) /* begin \d paren number */
|
||||
#define O_BACK (8<<OPSHIFT) /* end \d paren number */
|
||||
#define OPLUS_ (9<<OPSHIFT) /* + prefix fwd to suffix */
|
||||
#define O_PLUS (10<<OPSHIFT) /* + suffix back to prefix */
|
||||
#define OQUEST_ (11<<OPSHIFT) /* ? prefix fwd to suffix */
|
||||
#define O_QUEST (12<<OPSHIFT) /* ? suffix back to prefix */
|
||||
#define OLPAREN (13<<OPSHIFT) /* ( fwd to ) */
|
||||
#define ORPAREN (14<<OPSHIFT) /* ) back to ( */
|
||||
#define OCH_ (15<<OPSHIFT) /* begin choice fwd to OOR2 */
|
||||
#define OOR1 (16<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */
|
||||
#define OOR2 (17<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */
|
||||
#define O_CH (18<<OPSHIFT) /* end choice back to OOR1 */
|
||||
#define OBOW (19<<OPSHIFT) /* begin word - */
|
||||
#define OEOW (20<<OPSHIFT) /* end word - */
|
||||
|
||||
/*
|
||||
* Structure for [] character-set representation. Character sets are
|
||||
* done as bit vectors, grouped 8 to a byte vector for compactness.
|
||||
* The individual set therefore has both a pointer to the byte vector
|
||||
* and a mask to pick out the relevant bit of each byte. A hash code
|
||||
* simplifies testing whether two sets could be identical.
|
||||
*
|
||||
* This will get trickier for multicharacter collating elements. As
|
||||
* preliminary hooks for dealing with such things, we also carry along
|
||||
* a string of multi-character elements, and decide the size of the
|
||||
* vectors at run time.
|
||||
*/
|
||||
typedef struct {
|
||||
uch *ptr; /* -> uch [csetsize] */
|
||||
uch mask; /* bit within array */
|
||||
uch hash; /* hash code */
|
||||
size_t smultis;
|
||||
char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */
|
||||
} cset;
|
||||
/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
|
||||
#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
|
||||
#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
|
||||
#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)
|
||||
#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */
|
||||
#define MCsub(p, cs, cp) mcsub(p, cs, cp)
|
||||
#define MCin(p, cs, cp) mcin(p, cs, cp)
|
||||
|
||||
/* stuff for character categories */
|
||||
typedef unsigned char cat_t;
|
||||
|
||||
/*
|
||||
* main compiled-expression structure
|
||||
*/
|
||||
struct re_guts {
|
||||
int magic;
|
||||
# define MAGIC2 ((('R'^0200)<<8)|'E')
|
||||
sop *strip; /* malloced area for strip */
|
||||
int csetsize; /* number of bits in a cset vector */
|
||||
int ncsets; /* number of csets in use */
|
||||
cset *sets; /* -> cset [ncsets] */
|
||||
uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
|
||||
int cflags; /* copy of regcomp() cflags argument */
|
||||
sopno nstates; /* = number of sops */
|
||||
sopno firststate; /* the initial OEND (normally 0) */
|
||||
sopno laststate; /* the final OEND */
|
||||
int iflags; /* internal flags */
|
||||
# define USEBOL 01 /* used ^ */
|
||||
# define USEEOL 02 /* used $ */
|
||||
# define BAD 04 /* something wrong */
|
||||
int nbol; /* number of ^ used */
|
||||
int neol; /* number of $ used */
|
||||
int ncategories; /* how many character categories */
|
||||
cat_t *categories; /* ->catspace[-CHAR_MIN] */
|
||||
char *must; /* match must contain this string */
|
||||
int mlen; /* length of must */
|
||||
size_t nsub; /* copy of re_nsub */
|
||||
int backrefs; /* does it use back references? */
|
||||
sopno nplus; /* how deep does it nest +s? */
|
||||
/* catspace must be last */
|
||||
cat_t catspace[1]; /* actually [NC] */
|
||||
};
|
||||
|
||||
/* misc utilities */
|
||||
#define OUT (CHAR_MAX+1) /* a non-character value */
|
||||
#define ISWORD(c) (isalnum(c) || (c) == '_')
|
181
src/backend/regex/regexec.c
Normal file
181
src/backend/regex/regexec.c
Normal file
@ -0,0 +1,181 @@
|
||||
/*-
|
||||
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||
* Copyright (c) 1992, 1993, 1994
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Henry Spencer.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)regexec.c 8.3 (Berkeley) 3/20/94
|
||||
*/
|
||||
|
||||
#if defined(LIBC_SCCS) && !defined(lint)
|
||||
static char sccsid[] = "@(#)regexec.c 8.3 (Berkeley) 3/20/94";
|
||||
#endif /* LIBC_SCCS and not lint */
|
||||
|
||||
/*
|
||||
* the outer shell of regexec()
|
||||
*
|
||||
* This file includes engine.c *twice*, after muchos fiddling with the
|
||||
* macros that code uses. This lets the same code operate on two different
|
||||
* representations for state sets.
|
||||
*/
|
||||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <ctype.h>
|
||||
#include <regex.h>
|
||||
|
||||
#include "utils.h"
|
||||
#include "regex2.h"
|
||||
|
||||
static int nope = 0; /* for use in asserts; shuts lint up */
|
||||
|
||||
/* macros for manipulating states, small version */
|
||||
#define states long
|
||||
#define states1 states /* for later use in regexec() decision */
|
||||
#define CLEAR(v) ((v) = 0)
|
||||
#define SET0(v, n) ((v) &= ~(1 << (n)))
|
||||
#define SET1(v, n) ((v) |= 1 << (n))
|
||||
#define ISSET(v, n) ((v) & (1 << (n)))
|
||||
#define ASSIGN(d, s) ((d) = (s))
|
||||
#define EQ(a, b) ((a) == (b))
|
||||
#define STATEVARS int dummy /* dummy version */
|
||||
#define STATESETUP(m, n) /* nothing */
|
||||
#define STATETEARDOWN(m) /* nothing */
|
||||
#define SETUP(v) ((v) = 0)
|
||||
#define onestate int
|
||||
#define INIT(o, n) ((o) = (unsigned)1 << (n))
|
||||
#define INC(o) ((o) <<= 1)
|
||||
#define ISSTATEIN(v, o) ((v) & (o))
|
||||
/* some abbreviations; note that some of these know variable names! */
|
||||
/* do "if I'm here, I can also be there" etc without branches */
|
||||
#define FWD(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) << (n))
|
||||
#define BACK(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) >> (n))
|
||||
#define ISSETBACK(v, n) ((v) & ((unsigned)here >> (n)))
|
||||
/* function names */
|
||||
#define SNAMES /* engine.c looks after details */
|
||||
|
||||
#include "engine.c"
|
||||
|
||||
/* now undo things */
|
||||
#undef states
|
||||
#undef CLEAR
|
||||
#undef SET0
|
||||
#undef SET1
|
||||
#undef ISSET
|
||||
#undef ASSIGN
|
||||
#undef EQ
|
||||
#undef STATEVARS
|
||||
#undef STATESETUP
|
||||
#undef STATETEARDOWN
|
||||
#undef SETUP
|
||||
#undef onestate
|
||||
#undef INIT
|
||||
#undef INC
|
||||
#undef ISSTATEIN
|
||||
#undef FWD
|
||||
#undef BACK
|
||||
#undef ISSETBACK
|
||||
#undef SNAMES
|
||||
|
||||
/* macros for manipulating states, large version */
|
||||
#define states char *
|
||||
#define CLEAR(v) memset(v, 0, m->g->nstates)
|
||||
#define SET0(v, n) ((v)[n] = 0)
|
||||
#define SET1(v, n) ((v)[n] = 1)
|
||||
#define ISSET(v, n) ((v)[n])
|
||||
#define ASSIGN(d, s) memcpy(d, s, m->g->nstates)
|
||||
#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
|
||||
#define STATEVARS int vn; char *space
|
||||
#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \
|
||||
if ((m)->space == NULL) return(REG_ESPACE); \
|
||||
(m)->vn = 0; }
|
||||
#define STATETEARDOWN(m) { free((m)->space); }
|
||||
#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
|
||||
#define onestate int
|
||||
#define INIT(o, n) ((o) = (n))
|
||||
#define INC(o) ((o)++)
|
||||
#define ISSTATEIN(v, o) ((v)[o])
|
||||
/* some abbreviations; note that some of these know variable names! */
|
||||
/* do "if I'm here, I can also be there" etc without branches */
|
||||
#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here])
|
||||
#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here])
|
||||
#define ISSETBACK(v, n) ((v)[here - (n)])
|
||||
/* function names */
|
||||
#define LNAMES /* flag */
|
||||
|
||||
#include "engine.c"
|
||||
|
||||
/*
|
||||
- regexec - interface for matching
|
||||
= extern int regexec(const regex_t *, const char *, size_t, \
|
||||
= regmatch_t [], int);
|
||||
= #define REG_NOTBOL 00001
|
||||
= #define REG_NOTEOL 00002
|
||||
= #define REG_STARTEND 00004
|
||||
= #define REG_TRACE 00400 // tracing of execution
|
||||
= #define REG_LARGE 01000 // force large representation
|
||||
= #define REG_BACKR 02000 // force use of backref code
|
||||
*
|
||||
* We put this here so we can exploit knowledge of the state representation
|
||||
* when choosing which matcher to call. Also, by this point the matchers
|
||||
* have been prototyped.
|
||||
*/
|
||||
int /* 0 success, REG_NOMATCH failure */
|
||||
pg95_regexec(preg, string, nmatch, pmatch, eflags)
|
||||
const regex_t *preg;
|
||||
const char *string;
|
||||
size_t nmatch;
|
||||
regmatch_t pmatch[];
|
||||
int eflags;
|
||||
{
|
||||
register struct re_guts *g = preg->re_g;
|
||||
#ifdef REDEBUG
|
||||
# define GOODFLAGS(f) (f)
|
||||
#else
|
||||
# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
|
||||
#endif
|
||||
|
||||
if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
|
||||
return(REG_BADPAT);
|
||||
assert(!(g->iflags&BAD));
|
||||
if (g->iflags&BAD) /* backstop for no-debug case */
|
||||
return(REG_BADPAT);
|
||||
eflags = GOODFLAGS(eflags);
|
||||
|
||||
if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE))
|
||||
return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
|
||||
else
|
||||
return(lmatcher(g, (char *)string, nmatch, pmatch, eflags));
|
||||
}
|
71
src/backend/regex/regexp.h
Normal file
71
src/backend/regex/regexp.h
Normal file
@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright (c) 1986 by University of Toronto.
|
||||
* Copyright (c) 1989, 1993
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley
|
||||
* by Henry Spencer.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)regexp.h 8.1 (Berkeley) 6/2/93
|
||||
*/
|
||||
|
||||
#ifndef _REGEXP_H_
|
||||
#define _REGEXP_H_
|
||||
|
||||
/*
|
||||
* Definitions etc. for regexp(3) routines.
|
||||
*
|
||||
* Caveat: this is V8 regexp(3) [actually, a reimplementation thereof],
|
||||
* not the System V one.
|
||||
*/
|
||||
#define NSUBEXP 10
|
||||
typedef struct regexp {
|
||||
char *startp[NSUBEXP];
|
||||
char *endp[NSUBEXP];
|
||||
char regstart; /* Internal use only. */
|
||||
char reganch; /* Internal use only. */
|
||||
char *regmust; /* Internal use only. */
|
||||
int regmlen; /* Internal use only. */
|
||||
char program[1]; /* Unwarranted chumminess with compiler. */
|
||||
} regexp;
|
||||
|
||||
/* #include <sys/cdefs.h> */
|
||||
/* since not all systems have cdefs.h, we'll use our own here - jolly */
|
||||
#include "cdefs.h"
|
||||
|
||||
__BEGIN_DECLS
|
||||
regexp *pg95_regcomp __P((const char *));
|
||||
int pg95_regexec __P((const regexp *, const char *));
|
||||
void pg95_regsub __P((const regexp *, const char *, char *));
|
||||
void pg95_regerror __P((const char *));
|
||||
__END_DECLS
|
||||
|
||||
#endif /* !_REGEXP_H_ */
|
80
src/backend/regex/regfree.c
Normal file
80
src/backend/regex/regfree.c
Normal file
@ -0,0 +1,80 @@
|
||||
/*-
|
||||
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||
* Copyright (c) 1992, 1993, 1994
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Henry Spencer.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)regfree.c 8.3 (Berkeley) 3/20/94
|
||||
*/
|
||||
|
||||
#if defined(LIBC_SCCS) && !defined(lint)
|
||||
static char sccsid[] = "@(#)regfree.c 8.3 (Berkeley) 3/20/94";
|
||||
#endif /* LIBC_SCCS and not lint */
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <regex.h>
|
||||
|
||||
#include "utils.h"
|
||||
#include "regex2.h"
|
||||
|
||||
/*
|
||||
- regfree - free everything
|
||||
= extern void regfree(regex_t *);
|
||||
*/
|
||||
void
|
||||
pg95_regfree(preg)
|
||||
regex_t *preg;
|
||||
{
|
||||
register struct re_guts *g;
|
||||
|
||||
if (preg->re_magic != MAGIC1) /* oops */
|
||||
return; /* nice to complain, but hard */
|
||||
|
||||
g = preg->re_g;
|
||||
if (g == NULL || g->magic != MAGIC2) /* oops again */
|
||||
return;
|
||||
preg->re_magic = 0; /* mark it invalid */
|
||||
g->magic = 0; /* mark it invalid */
|
||||
|
||||
if (g->strip != NULL)
|
||||
free((char *)g->strip);
|
||||
if (g->sets != NULL)
|
||||
free((char *)g->sets);
|
||||
if (g->setbits != NULL)
|
||||
free((char *)g->setbits);
|
||||
if (g->must != NULL)
|
||||
free(g->must);
|
||||
free((char *)g);
|
||||
}
|
57
src/backend/regex/utils.h
Normal file
57
src/backend/regex/utils.h
Normal file
@ -0,0 +1,57 @@
|
||||
/*-
|
||||
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
|
||||
* Copyright (c) 1992, 1993, 1994
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to Berkeley by
|
||||
* Henry Spencer.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* @(#)utils.h 8.3 (Berkeley) 3/20/94
|
||||
*/
|
||||
|
||||
/* utility definitions */
|
||||
#define DUPMAX 100000000 /* xxx is this right? */
|
||||
#define INFINITY (DUPMAX + 1)
|
||||
#define NC (CHAR_MAX - CHAR_MIN + 1)
|
||||
typedef unsigned char uch;
|
||||
|
||||
/* switch off assertions (if not already off) if no REDEBUG */
|
||||
#ifndef REDEBUG
|
||||
#ifndef NDEBUG
|
||||
#define NDEBUG /* no assertions please */
|
||||
#endif
|
||||
#endif
|
||||
#include <assert.h>
|
||||
|
||||
/* for old systems with bcopy() but no memmove() */
|
||||
#if defined(PORTNAME_sparc)
|
||||
#define memmove(d, s, c) bcopy(s, d, c)
|
||||
#endif
|
Reference in New Issue
Block a user