mirror of
https://github.com/MariaDB/server.git
synced 2025-07-29 05:21:33 +03:00
merge 10.0-base ->10.0
This commit is contained in:
@ -8584,7 +8584,7 @@ char *re_eprint(int err)
|
||||
{
|
||||
static char epbuf[100];
|
||||
size_t len __attribute__((unused))=
|
||||
regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
|
||||
regerror(err, (regex_t *)NULL, epbuf, sizeof(epbuf));
|
||||
assert(len <= sizeof(epbuf));
|
||||
return(epbuf);
|
||||
}
|
||||
|
@ -4019,4 +4019,25 @@ c1
|
||||
NULL
|
||||
2
|
||||
DROP TABLE t1,t2;
|
||||
#
|
||||
# MDEV-5369: Wrong result (0 instead of NULL) on 2nd execution of
|
||||
# PS with LEFT JOIN, TEMPTABLE view
|
||||
#
|
||||
CREATE TABLE t1 (a INT) ENGINE=MyISAM;
|
||||
INSERT INTO t1 VALUES (0),(8);
|
||||
CREATE TABLE t2 (pk INT PRIMARY KEY) ENGINE=MyISAM;
|
||||
CREATE ALGORITHM=TEMPTABLE VIEW v2 AS SELECT * FROM t2;
|
||||
SELECT SUM(pk) FROM t1 LEFT JOIN v2 ON a = pk;
|
||||
SUM(pk)
|
||||
NULL
|
||||
PREPARE stmt FROM "SELECT SUM(pk) FROM t1 LEFT JOIN v2 ON a = pk";
|
||||
EXECUTE stmt;
|
||||
SUM(pk)
|
||||
NULL
|
||||
EXECUTE stmt;
|
||||
SUM(pk)
|
||||
NULL
|
||||
DEALLOCATE PREPARE stmt;
|
||||
DROP VIEW v2;
|
||||
DROP TABLE t1, t2;
|
||||
# End of 5.3 tests
|
||||
|
@ -219,7 +219,7 @@ drop table t1;
|
||||
#
|
||||
CREATE TABLE t1 (my_primary_key varchar(10) PRIMARY KEY) ENGINE=CASSANDRA
|
||||
thrift_host='localhost' keyspace='mariadbtest2' column_family = 'cf10';
|
||||
ERROR HY000: Internal error: 'target column family has no key_alias defined, PRIMARY KEY column must be named 'rowkey''
|
||||
ERROR HY000: Internal error: target column family has no key_alias defined, PRIMARY KEY column must be named 'rowkey'
|
||||
CREATE TABLE t1 (rowkey varchar(10) PRIMARY KEY) ENGINE=CASSANDRA
|
||||
thrift_host='localhost' keyspace='mariadbtest2' column_family = 'cf10';
|
||||
DROP TABLE t1;
|
||||
@ -365,7 +365,7 @@ drop table t2;
|
||||
CREATE TABLE t2 (rowkey varchar(32) PRIMARY KEY, varint_col varbinary(2)) ENGINE=CASSANDRA
|
||||
thrift_host='localhost' keyspace='mariadbtest2' column_family = 'cf9';
|
||||
select rowkey, hex(varint_col) from t2;
|
||||
ERROR HY000: Internal error: 'Unable to convert value for field `varint_col` from Cassandra's data format. Source data is 4 bytes, 0x12345678'
|
||||
ERROR HY000: Internal error: Unable to convert value for field `varint_col` from Cassandra's data format. Source data is 4 bytes, 0x12345678
|
||||
drop table t2;
|
||||
#
|
||||
# Decimal datatype support
|
||||
@ -557,7 +557,7 @@ delete from t1;
|
||||
drop table t1;
|
||||
CREATE TABLE t1 (rowkey varchar(10) PRIMARY KEY, dyn blob DYNAMIC_COLUMN_STORAGE=yes) ENGINE=CASSANDRA thrift_host='localhost' keyspace='mariadbtest2' column_family = 'cfd1';
|
||||
select * from t1;
|
||||
ERROR HY000: Internal error: 'Unable to convert value for field `dyn` from Cassandra's data format. Name length exceed limit of 16383: 'very_very_very_very_very_very_very_very_very_very_very_very_very_very_very_very_very_v'
|
||||
ERROR HY000: Internal error: Unable to convert value for field `dyn` from Cassandra's data format. Name length exceed limit of 16383: 'very_very_very_very_very_very_very_very_very_very_very_very_very_very_very_very_very_v
|
||||
drop table t1;
|
||||
CREATE TABLE t1 (rowkey int PRIMARY KEY, dyn blob DYNAMIC_COLUMN_STORAGE=yes)
|
||||
ENGINE=CASSANDRA thrift_host='localhost' keyspace='mariadbtest2' column_family = 'cfd2';
|
||||
|
@ -3,10 +3,23 @@
|
||||
# get .result differences from CURRENT_USER().
|
||||
--source include/not_as_root.inc
|
||||
|
||||
# The previous check verifies that the user does not have root permissions.
|
||||
# However in some cases tests are run under a user named 'root',
|
||||
# even although this user does not have real root permissions.
|
||||
# This test should be skipped in this case, since it does not expect
|
||||
# that there are records in mysql.user where user=<username>
|
||||
if ($USER=="root") {
|
||||
skip Cannot be run by user named 'root' even if it does not have all privileges;
|
||||
}
|
||||
|
||||
if (!$AUTH_SOCKET_SO) {
|
||||
skip No auth_socket plugin;
|
||||
}
|
||||
|
||||
if (!$USER) {
|
||||
skip USER variable is undefined;
|
||||
}
|
||||
|
||||
let $plugindir=`SELECT @@global.plugin_dir`;
|
||||
|
||||
eval install plugin unix_socket soname '$AUTH_SOCKET_SO';
|
||||
|
@ -3594,4 +3594,27 @@ EXECUTE stmt;
|
||||
|
||||
DROP TABLE t1,t2;
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-5369: Wrong result (0 instead of NULL) on 2nd execution of
|
||||
--echo # PS with LEFT JOIN, TEMPTABLE view
|
||||
--echo #
|
||||
|
||||
|
||||
CREATE TABLE t1 (a INT) ENGINE=MyISAM;
|
||||
INSERT INTO t1 VALUES (0),(8);
|
||||
|
||||
CREATE TABLE t2 (pk INT PRIMARY KEY) ENGINE=MyISAM;
|
||||
CREATE ALGORITHM=TEMPTABLE VIEW v2 AS SELECT * FROM t2;
|
||||
|
||||
SELECT SUM(pk) FROM t1 LEFT JOIN v2 ON a = pk;
|
||||
|
||||
PREPARE stmt FROM "SELECT SUM(pk) FROM t1 LEFT JOIN v2 ON a = pk";
|
||||
EXECUTE stmt;
|
||||
EXECUTE stmt;
|
||||
DEALLOCATE PREPARE stmt;
|
||||
|
||||
DROP VIEW v2;
|
||||
DROP TABLE t1, t2;
|
||||
|
||||
--echo # End of 5.3 tests
|
||||
|
@ -61,6 +61,10 @@
|
||||
# 2012-09-08 ChPe added PCRE32 support
|
||||
# 2012-10-23 PH added support for VALGRIND and GCOV
|
||||
# 2012-12-08 PH added patch from Daniel Richard G to quash some MSVC warnings
|
||||
# 2013-07-01 PH realized that the "support" for GCOV was a total nonsense and
|
||||
# so it has been removed.
|
||||
# 2013-10-08 PH got rid of the "source" command, which is a bash-ism (use ".")
|
||||
# 2013-11-05 PH added support for PARENS_NEST_LIMIT
|
||||
|
||||
PROJECT(PCRE C CXX)
|
||||
|
||||
@ -107,6 +111,9 @@ CHECK_TYPE_SIZE("unsigned long long" UNSIGNED_LONG_LONG)
|
||||
SET(PCRE_LINK_SIZE "2" CACHE STRING
|
||||
"Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.")
|
||||
|
||||
SET(PCRE_PARENS_NEST_LIMIT "250" CACHE STRING
|
||||
"Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")
|
||||
|
||||
SET(PCRE_MATCH_LIMIT "10000000" CACHE STRING
|
||||
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
|
||||
|
||||
@ -322,6 +329,15 @@ TARGET_LINK_LIBRARIES(pcreposix pcre)
|
||||
|
||||
# Executables
|
||||
|
||||
# Removed by PH (2008-01-23) because pcredemo shouldn't really be built
|
||||
# automatically, and it gave trouble in some environments anyway.
|
||||
# ADD_EXECUTABLE(pcredemo pcredemo.c)
|
||||
# TARGET_LINK_LIBRARIES(pcredemo pcreposix)
|
||||
# IF(NOT BUILD_SHARED_LIBS)
|
||||
# # make sure to not use declspec(dllimport) in static mode on windows
|
||||
# SET_TARGET_PROPERTIES(pcredemo PROPERTIES COMPILE_FLAGS "-DPCRE_STATIC")
|
||||
# ENDIF(NOT BUILD_SHARED_LIBS)
|
||||
|
||||
IF(PCRE_BUILD_PCREGREP)
|
||||
ADD_EXECUTABLE(pcregrep pcregrep.c)
|
||||
SET(targets ${targets} pcregrep)
|
||||
@ -469,17 +485,25 @@ IF(PCRE_SHOW_REPORT)
|
||||
MESSAGE(STATUS "")
|
||||
MESSAGE(STATUS "PCRE configuration summary:")
|
||||
MESSAGE(STATUS "")
|
||||
# MESSAGE(STATUS " Install prefix .................. : ${CMAKE_INSTALL_PREFIX}")
|
||||
MESSAGE(STATUS " C compiler ...................... : ${CMAKE_C_COMPILER}")
|
||||
# MESSAGE(STATUS " C++ compiler .................... : ${CMAKE_CXX_COMPILER}")
|
||||
MESSAGE(STATUS " C compiler flags ................ : ${CMAKE_C_FLAGS}${cfsp}${CMAKE_C_FLAGS_${buildtype}}")
|
||||
# MESSAGE(STATUS " Enable JIT compiling support .... : ${PCRE_SUPPORT_JIT}")
|
||||
MESSAGE(STATUS " Unicode properties .............. : ${PCRE_SUPPORT_UNICODE_PROPERTIES}")
|
||||
MESSAGE(STATUS " Newline char/sequence ........... : ${PCRE_NEWLINE}")
|
||||
MESSAGE(STATUS " \\R matches only ANYCRLF ......... : ${PCRE_SUPPORT_BSR_ANYCRLF}")
|
||||
# MESSAGE(STATUS " EBCDIC coding ................... : ${PCRE_EBCDIC}")
|
||||
# MESSAGE(STATUS " EBCDIC coding with NL=0x25 ...... : ${PCRE_EBCDIC_NL25}")
|
||||
# MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE_REBUILD_CHARTABLES}")
|
||||
MESSAGE(STATUS " No stack recursion .............. : ${PCRE_NO_RECURSE}")
|
||||
MESSAGE(STATUS " POSIX mem threshold ............. : ${PCRE_POSIX_MALLOC_THRESHOLD}")
|
||||
MESSAGE(STATUS " Internal link size .............. : ${PCRE_LINK_SIZE}")
|
||||
MESSAGE(STATUS " Parentheses nest limit .......... : ${PCRE_PARENS_NEST_LIMIT}")
|
||||
MESSAGE(STATUS " Match limit ..................... : ${PCRE_MATCH_LIMIT}")
|
||||
MESSAGE(STATUS " Match limit recursion ........... : ${PCRE_MATCH_LIMIT_RECURSION}")
|
||||
# MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
|
||||
# MESSAGE(STATUS " Build static libs ............... : ${BUILD_STATIC_LIBS}")
|
||||
MESSAGE(STATUS " Build pcregrep .................. : ${PCRE_BUILD_PCREGREP}")
|
||||
# MESSAGE(STATUS " Enable JIT in pcregrep .......... : ${PCRE_SUPPORT_PCREGREP_JIT}")
|
||||
MESSAGE(STATUS " Buffer size for pcregrep ........ : ${PCREGREP_BUFSIZE}")
|
||||
|
210
pcre/ChangeLog
210
pcre/ChangeLog
@ -1,8 +1,216 @@
|
||||
ChangeLog for PCRE
|
||||
------------------
|
||||
|
||||
Version 8.34 15-December-2013
|
||||
-----------------------------
|
||||
|
||||
1. Add pcre[16|32]_jit_free_unused_memory to forcibly free unused JIT
|
||||
executable memory. Patch inspired by Carsten Klein.
|
||||
|
||||
2. ./configure --enable-coverage defined SUPPORT_GCOV in config.h, although
|
||||
this macro is never tested and has no effect, because the work to support
|
||||
coverage involves only compiling and linking options and special targets in
|
||||
the Makefile. The comment in config.h implied that defining the macro would
|
||||
enable coverage support, which is totally false. There was also support for
|
||||
setting this macro in the CMake files (my fault, I just copied it from
|
||||
configure). SUPPORT_GCOV has now been removed.
|
||||
|
||||
3. Make a small performance improvement in strlen16() and strlen32() in
|
||||
pcretest.
|
||||
|
||||
4. Change 36 for 8.33 left some unreachable statements in pcre_exec.c,
|
||||
detected by the Solaris compiler (gcc doesn't seem to be able to diagnose
|
||||
these cases). There was also one in pcretest.c.
|
||||
|
||||
5. Cleaned up a "may be uninitialized" compiler warning in pcre_exec.c.
|
||||
|
||||
6. In UTF mode, the code for checking whether a group could match an empty
|
||||
string (which is used for indefinitely repeated groups to allow for
|
||||
breaking an infinite loop) was broken when the group contained a repeated
|
||||
negated single-character class with a character that occupied more than one
|
||||
data item and had a minimum repetition of zero (for example, [^\x{100}]* in
|
||||
UTF-8 mode). The effect was undefined: the group might or might not be
|
||||
deemed as matching an empty string, or the program might have crashed.
|
||||
|
||||
7. The code for checking whether a group could match an empty string was not
|
||||
recognizing that \h, \H, \v, \V, and \R must match a character.
|
||||
|
||||
8. Implemented PCRE_INFO_MATCH_EMPTY, which yields 1 if the pattern can match
|
||||
an empty string. If it can, pcretest shows this in its information output.
|
||||
|
||||
9. Fixed two related bugs that applied to Unicode extended grapheme clusters
|
||||
that were repeated with a maximizing qualifier (e.g. \X* or \X{2,5}) when
|
||||
matched by pcre_exec() without using JIT:
|
||||
|
||||
(a) If the rest of the pattern did not match after a maximal run of
|
||||
grapheme clusters, the code for backing up to try with fewer of them
|
||||
did not always back up over a full grapheme when characters that do not
|
||||
have the modifier quality were involved, e.g. Hangul syllables.
|
||||
|
||||
(b) If the match point in a subject started with modifier character, and
|
||||
there was no match, the code could incorrectly back up beyond the match
|
||||
point, and potentially beyond the first character in the subject,
|
||||
leading to a segfault or an incorrect match result.
|
||||
|
||||
10. A conditional group with an assertion condition could lead to PCRE
|
||||
recording an incorrect first data item for a match if no other first data
|
||||
item was recorded. For example, the pattern (?(?=ab)ab) recorded "a" as a
|
||||
first data item, and therefore matched "ca" after "c" instead of at the
|
||||
start.
|
||||
|
||||
11. Change 40 for 8.33 (allowing pcregrep to find empty strings) showed up a
|
||||
bug that caused the command "echo a | ./pcregrep -M '|a'" to loop.
|
||||
|
||||
12. The source of pcregrep now includes z/OS-specific code so that it can be
|
||||
compiled for z/OS as part of the special z/OS distribution.
|
||||
|
||||
13. Added the -T and -TM options to pcretest.
|
||||
|
||||
14. The code in pcre_compile.c for creating the table of named capturing groups
|
||||
has been refactored. Instead of creating the table dynamically during the
|
||||
actual compiling pass, the information is remembered during the pre-compile
|
||||
pass (on the stack unless there are more than 20 named groups, in which
|
||||
case malloc() is used) and the whole table is created before the actual
|
||||
compile happens. This has simplified the code (it is now nearly 150 lines
|
||||
shorter) and prepared the way for better handling of references to groups
|
||||
with duplicate names.
|
||||
|
||||
15. A back reference to a named subpattern when there is more than one of the
|
||||
same name now checks them in the order in which they appear in the pattern.
|
||||
The first one that is set is used for the reference. Previously only the
|
||||
first one was inspected. This change makes PCRE more compatible with Perl.
|
||||
|
||||
16. Unicode character properties were updated from Unicode 6.3.0.
|
||||
|
||||
17. The compile-time code for auto-possessification has been refactored, based
|
||||
on a patch by Zoltan Herczeg. It now happens after instead of during
|
||||
compilation. The code is cleaner, and more cases are handled. The option
|
||||
PCRE_NO_AUTO_POSSESS is added for testing purposes, and the -O and /O
|
||||
options in pcretest are provided to set it. It can also be set by
|
||||
(*NO_AUTO_POSSESS) at the start of a pattern.
|
||||
|
||||
18. The character VT has been added to the default ("C" locale) set of
|
||||
characters that match \s and are generally treated as white space,
|
||||
following this same change in Perl 5.18. There is now no difference between
|
||||
"Perl space" and "POSIX space". Whether VT is treated as white space in
|
||||
other locales depends on the locale.
|
||||
|
||||
19. The code for checking named groups as conditions, either for being set or
|
||||
for being recursed, has been refactored (this is related to 14 and 15
|
||||
above). Processing unduplicated named groups should now be as fast at
|
||||
numerical groups, and processing duplicated groups should be faster than
|
||||
before.
|
||||
|
||||
20. Two patches to the CMake build system, by Alexander Barkov:
|
||||
|
||||
(1) Replace the "source" command by "." in CMakeLists.txt because
|
||||
"source" is a bash-ism.
|
||||
|
||||
(2) Add missing HAVE_STDINT_H and HAVE_INTTYPES_H to config-cmake.h.in;
|
||||
without these the CMake build does not work on Solaris.
|
||||
|
||||
21. Perl has changed its handling of \8 and \9. If there is no previously
|
||||
encountered capturing group of those numbers, they are treated as the
|
||||
literal characters 8 and 9 instead of a binary zero followed by the
|
||||
literals. PCRE now does the same.
|
||||
|
||||
22. Following Perl, added \o{} to specify codepoints in octal, making it
|
||||
possible to specify values greater than 0777 and also making them
|
||||
unambiguous.
|
||||
|
||||
23. Perl now gives an error for missing closing braces after \x{... instead of
|
||||
treating the string as literal. PCRE now does the same.
|
||||
|
||||
24. RunTest used to grumble if an inappropriate test was selected explicitly,
|
||||
but just skip it when running all tests. This make it awkward to run ranges
|
||||
of tests when one of them was inappropriate. Now it just skips any
|
||||
inappropriate tests, as it always did when running all tests.
|
||||
|
||||
25. If PCRE_AUTO_CALLOUT and PCRE_UCP were set for a pattern that contained
|
||||
character types such as \d or \w, too many callouts were inserted, and the
|
||||
data that they returned was rubbish.
|
||||
|
||||
26. In UCP mode, \s was not matching two of the characters that Perl matches,
|
||||
namely NEL (U+0085) and MONGOLIAN VOWEL SEPARATOR (U+180E), though they
|
||||
were matched by \h. The code has now been refactored so that the lists of
|
||||
the horizontal and vertical whitespace characters used for \h and \v (which
|
||||
are defined only in one place) are now also used for \s.
|
||||
|
||||
27. Add JIT support for the 64 bit TileGX architecture.
|
||||
Patch by Jiong Wang (Tilera Corporation).
|
||||
|
||||
28. Possessive quantifiers for classes (both explicit and automatically
|
||||
generated) now use special opcodes instead of wrapping in ONCE brackets.
|
||||
|
||||
29. Whereas an item such as A{4}+ ignored the possessivenes of the quantifier
|
||||
(because it's meaningless), this was not happening when PCRE_CASELESS was
|
||||
set. Not wrong, but inefficient.
|
||||
|
||||
30. Updated perltest.pl to add /u (force Unicode mode) when /W (use Unicode
|
||||
properties for \w, \d, etc) is present in a test regex. Otherwise if the
|
||||
test contains no characters greater than 255, Perl doesn't realise it
|
||||
should be using Unicode semantics.
|
||||
|
||||
31. Upgraded the handling of the POSIX classes [:graph:], [:print:], and
|
||||
[:punct:] when PCRE_UCP is set so as to include the same characters as Perl
|
||||
does in Unicode mode.
|
||||
|
||||
32. Added the "forbid" facility to pcretest so that putting tests into the
|
||||
wrong test files can sometimes be quickly detected.
|
||||
|
||||
33. There is now a limit (default 250) on the depth of nesting of parentheses.
|
||||
This limit is imposed to control the amount of system stack used at compile
|
||||
time. It can be changed at build time by --with-parens-nest-limit=xxx or
|
||||
the equivalent in CMake.
|
||||
|
||||
34. Character classes such as [A-\d] or [a-[:digit:]] now cause compile-time
|
||||
errors. Perl warns for these when in warning mode, but PCRE has no facility
|
||||
for giving warnings.
|
||||
|
||||
35. Change 34 for 8.13 allowed quantifiers on assertions, because Perl does.
|
||||
However, this was not working for (?!) because it is optimized to (*FAIL),
|
||||
for which PCRE does not allow quantifiers. The optimization is now disabled
|
||||
when a quantifier follows (?!). I can't see any use for this, but it makes
|
||||
things uniform.
|
||||
|
||||
36. Perl no longer allows group names to start with digits, so I have made this
|
||||
change also in PCRE. It simplifies the code a bit.
|
||||
|
||||
37. In extended mode, Perl ignores spaces before a + that indicates a
|
||||
possessive quantifier. PCRE allowed a space before the quantifier, but not
|
||||
before the possessive +. It now does.
|
||||
|
||||
38. The use of \K (reset reported match start) within a repeated possessive
|
||||
group such as (a\Kb)*+ was not working.
|
||||
|
||||
40. Document that the same character tables must be used at compile time and
|
||||
run time, and that the facility to pass tables to pcre_exec() and
|
||||
pcre_dfa_exec() is for use only with saved/restored patterns.
|
||||
|
||||
41. Applied Jeff Trawick's patch CMakeLists.txt, which "provides two new
|
||||
features for Builds with MSVC:
|
||||
|
||||
1. Support pcre.rc and/or pcreposix.rc (as is already done for MinGW
|
||||
builds). The .rc files can be used to set FileDescription and many other
|
||||
attributes.
|
||||
|
||||
2. Add an option (-DINSTALL_MSVC_PDB) to enable installation of .pdb files.
|
||||
This allows higher-level build scripts which want .pdb files to avoid
|
||||
hard-coding the exact files needed."
|
||||
|
||||
42. Added support for [[:<:]] and [[:>:]] as used in the BSD POSIX library to
|
||||
mean "start of word" and "end of word", respectively, as a transition aid.
|
||||
|
||||
43. A minimizing repeat of a class containing codepoints greater than 255 in
|
||||
non-UTF 16-bit or 32-bit modes caused an internal error when PCRE was
|
||||
compiled to use the heap for recursion.
|
||||
|
||||
44. Got rid of some compiler warnings for unused variables when UTF but not UCP
|
||||
is configured.
|
||||
|
||||
|
||||
Version 8.33 28-May-2013
|
||||
--------------------------
|
||||
------------------------
|
||||
|
||||
1. Added 'U' to some constants that are compared to unsigned integers, to
|
||||
avoid compiler signed/unsigned warnings. Added (int) casts to unsigned
|
||||
|
263
pcre/HACKING
263
pcre/HACKING
@ -54,12 +54,12 @@ Support for 16-bit and 32-bit data strings
|
||||
|
||||
From release 8.30, PCRE supports 16-bit as well as 8-bit data strings; and from
|
||||
release 8.32, PCRE supports 32-bit data strings. The library can be compiled
|
||||
in any combination of 8-bit, 16-bit or 32-bit modes, creating different
|
||||
libraries. In the description that follows, the word "short" is
|
||||
used for a 16-bit data quantity, and the word "unit" is used for a quantity
|
||||
that is a byte in 8-bit mode, a short in 16-bit mode and a 32-bit unsigned
|
||||
integer in 32-bit mode. However, so as not to over-complicate the text, the
|
||||
names of PCRE functions are given in 8-bit form only.
|
||||
in any combination of 8-bit, 16-bit or 32-bit modes, creating up to three
|
||||
different libraries. In the description that follows, the word "short" is used
|
||||
for a 16-bit data quantity, and the word "unit" is used for a quantity that is
|
||||
a byte in 8-bit mode, a short in 16-bit mode and a 32-bit word in 32-bit mode.
|
||||
However, so as not to over-complicate the text, the names of PCRE functions are
|
||||
given in 8-bit form only.
|
||||
|
||||
|
||||
Computing the memory requirement: how it was
|
||||
@ -94,6 +94,11 @@ runs more slowly than before (30% or more, depending on the pattern) because it
|
||||
is doing a full analysis of the pattern. My hope was that this would not be a
|
||||
big issue, and in the event, nobody has commented on it.
|
||||
|
||||
At release 8.34, a limit on the nesting depth of parentheses was re-introduced
|
||||
(default 250, settable at build time) so as to put a limit on the amount of
|
||||
system stack used by pcre_compile(). This is a safety feature for environments
|
||||
with small stacks where the patterns are provided by users.
|
||||
|
||||
|
||||
Traditional matching function
|
||||
-----------------------------
|
||||
@ -122,27 +127,28 @@ same way. See the user documentation for details.
|
||||
The algorithm that is used for pcre_dfa_exec() is not a traditional FSM,
|
||||
because it may have a number of states active at one time. More work would be
|
||||
needed at compile time to produce a traditional FSM where only one state is
|
||||
ever active at once. I believe some other regex matchers work this way.
|
||||
ever active at once. I believe some other regex matchers work this way. JIT
|
||||
support is not available for this kind of matching.
|
||||
|
||||
|
||||
Changeable options
|
||||
------------------
|
||||
|
||||
The /i, /m, or /s options (PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL) may
|
||||
change in the middle of patterns. From PCRE 8.13, their processing is handled
|
||||
entirely at compile time by generating different opcodes for the different
|
||||
settings. The runtime functions do not need to keep track of an options state
|
||||
any more.
|
||||
The /i, /m, or /s options (PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and some
|
||||
others) may change in the middle of patterns. From PCRE 8.13, their processing
|
||||
is handled entirely at compile time by generating different opcodes for the
|
||||
different settings. The runtime functions do not need to keep track of an
|
||||
options state any more.
|
||||
|
||||
|
||||
Format of compiled patterns
|
||||
---------------------------
|
||||
|
||||
The compiled form of a pattern is a vector of units (bytes in 8-bit mode, or
|
||||
shorts in 16-bit mode, 32-bit unsigned integers in 32-bit mode), containing
|
||||
items of variable length. The first unit in an item contains an opcode, and
|
||||
the length of the item is either implicit in the opcode or contained in the
|
||||
data that follows it.
|
||||
The compiled form of a pattern is a vector of unsigned units (bytes in 8-bit
|
||||
mode, shorts in 16-bit mode, 32-bit words in 32-bit mode), containing items of
|
||||
variable length. The first unit in an item contains an opcode, and the length
|
||||
of the item is either implicit in the opcode or contained in the data that
|
||||
follows it.
|
||||
|
||||
In many cases listed below, LINK_SIZE data values are specified for offsets
|
||||
within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
|
||||
@ -151,8 +157,10 @@ default value for LINK_SIZE is 2, but PCRE can be compiled to use 3-byte or
|
||||
LINK_SIZE values are available only in 8-bit mode.) Specifing a LINK_SIZE
|
||||
larger than 2 is necessary only when patterns whose compiled length is greater
|
||||
than 64K are going to be processed. In this description, we assume the "normal"
|
||||
compilation options. Data values that are counts (e.g. for quantifiers) are
|
||||
always just two bytes long (one short in 16-bit mode).
|
||||
compilation options. Data values that are counts (e.g. quantifiers) are two
|
||||
bytes long in 8-bit mode (most significant byte first), or one unit in 16-bit
|
||||
and 32-bit modes.
|
||||
|
||||
|
||||
Opcodes with no following data
|
||||
------------------------------
|
||||
@ -162,7 +170,7 @@ These items are all just one unit long
|
||||
OP_END end of pattern
|
||||
OP_ANY match any one character other than newline
|
||||
OP_ALLANY match any one character, including newline
|
||||
OP_ANYBYTE match any single byte, even in UTF-8 mode
|
||||
OP_ANYBYTE match any single unit, even in UTF-8/16 mode
|
||||
OP_SOD match start of data: \A
|
||||
OP_SOM, start of match (subject + offset): \G
|
||||
OP_SET_SOM, set start of match (\K)
|
||||
@ -180,28 +188,33 @@ These items are all just one unit long
|
||||
OP_VSPACE \v
|
||||
OP_NOT_WORDCHAR \W
|
||||
OP_WORDCHAR \w
|
||||
OP_EODN match end of data or \n at end: \Z
|
||||
OP_EODN match end of data or newline at end: \Z
|
||||
OP_EOD match end of data: \z
|
||||
OP_DOLL $ (end of data, or before final newline)
|
||||
OP_DOLLM $ multiline mode (end of data or before newline)
|
||||
OP_EXTUNI match an extended Unicode character
|
||||
OP_EXTUNI match an extended Unicode grapheme cluster
|
||||
OP_ANYNL match any Unicode newline sequence
|
||||
|
||||
OP_ASSERT_ACCEPT )
|
||||
OP_ACCEPT ) These are Perl 5.10's "backtracking control
|
||||
OP_COMMIT ) verbs". If OP_ACCEPT is inside capturing
|
||||
OP_FAIL ) parentheses, it may be preceded by one or more
|
||||
OP_PRUNE ) OP_CLOSE, followed by a 2-byte number,
|
||||
OP_SKIP ) indicating which parentheses must be closed.
|
||||
OP_PRUNE ) OP_CLOSE, each followed by a count that
|
||||
OP_SKIP ) indicates which parentheses must be closed.
|
||||
OP_THEN )
|
||||
|
||||
OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion.
|
||||
This ends the assertion, not the entire pattern match.
|
||||
|
||||
|
||||
Backtracking control verbs with (optional) data
|
||||
-----------------------------------------------
|
||||
Backtracking control verbs with optional data
|
||||
---------------------------------------------
|
||||
|
||||
(*THEN) without an argument generates the opcode OP_THEN and no following data.
|
||||
OP_MARK is followed by the mark name, preceded by a one-unit length, and
|
||||
followed by a binary zero. For (*PRUNE), (*SKIP), and (*THEN) with arguments,
|
||||
the opcodes OP_PRUNE_ARG, OP_SKIP_ARG, and OP_THEN_ARG are used, with the name
|
||||
following in the same format.
|
||||
following in the same format as OP_MARK.
|
||||
|
||||
|
||||
Matching literal characters
|
||||
@ -212,6 +225,10 @@ casefully. For caseless matching, OP_CHARI is used. In UTF-8 or UTF-16 modes,
|
||||
the character may be more than one unit long. In UTF-32 mode, characters
|
||||
are always exactly one unit long.
|
||||
|
||||
If there is only one character in a character class, OP_CHAR or OP_CHARI is
|
||||
used for a positive class, and OP_NOT or OP_NOTI for a negative one (that is,
|
||||
for something like [^a]).
|
||||
|
||||
|
||||
Repeating single characters
|
||||
---------------------------
|
||||
@ -232,10 +249,9 @@ following opcodes, which come in caseful and caseless versions:
|
||||
|
||||
Each opcode is followed by the character that is to be repeated. In ASCII mode,
|
||||
these are two-unit items; in UTF-8 or UTF-16 modes, the length is variable; in
|
||||
UTF-32 mode these are one-unit items.
|
||||
Those with "MIN" in their names are the minimizing versions. Those with "POS"
|
||||
in their names are possessive versions. Other repeats make use of these
|
||||
opcodes:
|
||||
UTF-32 mode these are one-unit items. Those with "MIN" in their names are the
|
||||
minimizing versions. Those with "POS" in their names are possessive versions.
|
||||
Other repeats make use of these opcodes:
|
||||
|
||||
Caseful Caseless
|
||||
OP_UPTO OP_UPTOI
|
||||
@ -243,10 +259,15 @@ opcodes:
|
||||
OP_POSUPTO OP_POSUPTOI
|
||||
OP_EXACT OP_EXACTI
|
||||
|
||||
Each of these is followed by a two-byte (one short) count (most significant
|
||||
byte first in 8-bit mode) and then the repeated character. OP_UPTO matches from
|
||||
0 to the given number. A repeat with a non-zero minimum and a fixed maximum is
|
||||
coded as an OP_EXACT followed by an OP_UPTO (or OP_MINUPTO or OPT_POSUPTO).
|
||||
Each of these is followed by a count and then the repeated character. OP_UPTO
|
||||
matches from 0 to the given number. A repeat with a non-zero minimum and a
|
||||
fixed maximum is coded as an OP_EXACT followed by an OP_UPTO (or OP_MINUPTO or
|
||||
OPT_POSUPTO).
|
||||
|
||||
Another set of matching repeating opcodes (called OP_NOTSTAR, OP_NOTSTARI,
|
||||
etc.) are used for repeated, negated, single-character classes such as [^a]*.
|
||||
The normal single-character opcodes (OP_STAR, etc.) are used for repeated
|
||||
positive single-character classes.
|
||||
|
||||
|
||||
Repeating character types
|
||||
@ -277,7 +298,10 @@ Match by Unicode property
|
||||
OP_PROP and OP_NOTPROP are used for positive and negative matches of a
|
||||
character by testing its Unicode property (the \p and \P escape sequences).
|
||||
Each is followed by two units that encode the desired property as a type and a
|
||||
value.
|
||||
value. The types are a set of #defines of the form PT_xxx, and the values are
|
||||
enumerations of the form ucp_xx, defined in the ucp.h source file. The value is
|
||||
relevant only for PT_GC (General Category), PT_PC (Particular Category), and
|
||||
PT_SC (Script).
|
||||
|
||||
Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
|
||||
three units: OP_PROP or OP_NOTPROP, and then the desired property type and
|
||||
@ -287,67 +311,88 @@ value.
|
||||
Character classes
|
||||
-----------------
|
||||
|
||||
If there is only one character in the class, OP_CHAR or OP_CHARI is used for a
|
||||
If there is only one character in a class, OP_CHAR or OP_CHARI is used for a
|
||||
positive class, and OP_NOT or OP_NOTI for a negative one (that is, for
|
||||
something like [^a]).
|
||||
|
||||
Another set of 13 repeating opcodes (called OP_NOTSTAR etc.) are used for
|
||||
repeated, negated, single-character classes. The normal single-character
|
||||
opcodes (OP_STAR, etc.) are used for repeated positive single-character
|
||||
classes.
|
||||
A set of repeating opcodes (called OP_NOTSTAR etc.) are used for repeated,
|
||||
negated, single-character classes. The normal single-character opcodes
|
||||
(OP_STAR, etc.) are used for repeated positive single-character classes.
|
||||
|
||||
When there is more than one character in a class and all the characters are
|
||||
When there is more than one character in a class, and all the code points are
|
||||
less than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a
|
||||
negative one. In either case, the opcode is followed by a 32-byte (16-short)
|
||||
bit map containing a 1 bit for every character that is acceptable. The bits are
|
||||
counted from the least significant end of each unit. In caseless mode, bits for
|
||||
both cases are set.
|
||||
negative one. In either case, the opcode is followed by a 32-byte (16-short,
|
||||
8-word) bit map containing a 1 bit for every character that is acceptable. The
|
||||
bits are counted from the least significant end of each unit. In caseless mode,
|
||||
bits for both cases are set.
|
||||
|
||||
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32 mode,
|
||||
subject characters with values greater than 255 can be handled correctly. For
|
||||
OP_CLASS they do not match, whereas for OP_NCLASS they do.
|
||||
The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8/16/32
|
||||
mode, subject characters with values greater than 255 can be handled correctly.
|
||||
For OP_CLASS they do not match, whereas for OP_NCLASS they do.
|
||||
|
||||
For classes containing characters with values greater than 255, OP_XCLASS is
|
||||
used. It optionally uses a bit map (if any characters lie within it), followed
|
||||
by a list of pairs (for a range) and single characters. In caseless mode, both
|
||||
cases are explicitly listed. There is a flag character than indicates whether
|
||||
it is a positive or a negative class.
|
||||
For classes containing characters with values greater than 255 or that contain
|
||||
\p or \P, OP_XCLASS is used. It optionally uses a bit map if any code points
|
||||
are less than 256, followed by a list of pairs (for a range) and single
|
||||
characters. In caseless mode, both cases are explicitly listed.
|
||||
|
||||
OP_XCLASS is followed by a unit containing flag bits: XCL_NOT indicates that
|
||||
this is a negative class, and XCL_MAP indicates that a bit map is present.
|
||||
There follows the bit map, if XCL_MAP is set, and then a sequence of items
|
||||
coded as follows:
|
||||
|
||||
XCL_END marks the end of the list
|
||||
XCL_SINGLE one character follows
|
||||
XCL_RANGE two characters follow
|
||||
XCL_PROP a Unicode property (type, value) follows
|
||||
XCL_NOTPROP a Unicode property (type, value) follows
|
||||
|
||||
If a range starts with a code point less than 256 and ends with one greater
|
||||
than 256, an XCL_RANGE item is used, without setting any bits in the bit map.
|
||||
This means that if no other items in the class set bits in the map, a map is
|
||||
not needed.
|
||||
|
||||
|
||||
Back references
|
||||
---------------
|
||||
|
||||
OP_REF (caseful) or OP_REFI (caseless) is followed by two bytes (one short)
|
||||
containing the reference number.
|
||||
OP_REF (caseful) or OP_REFI (caseless) is followed by a count containing the
|
||||
reference number if the reference is to a unique capturing group (either by
|
||||
number or by name). When named groups are used, there may be more than one
|
||||
group with the same name. In this case, a reference by name generates OP_DNREF
|
||||
or OP_DNREFI. These are followed by two counts: the index (not the byte offset)
|
||||
in the group name table of the first entry for the requred name, followed by
|
||||
the number of groups with the same name.
|
||||
|
||||
|
||||
Repeating character classes and back references
|
||||
-----------------------------------------------
|
||||
|
||||
Single-character classes are handled specially (see above). This section
|
||||
applies to OP_CLASS and OP_REF[I]. In both cases, the repeat information
|
||||
follows the base item. The matching code looks at the following opcode to see
|
||||
if it is one of
|
||||
applies to other classes and also to back references. In both cases, the repeat
|
||||
information follows the base item. The matching code looks at the following
|
||||
opcode to see if it is one of
|
||||
|
||||
OP_CRSTAR
|
||||
OP_CRMINSTAR
|
||||
OP_CRPOSSTAR
|
||||
OP_CRPLUS
|
||||
OP_CRMINPLUS
|
||||
OP_CRPOSPLUS
|
||||
OP_CRQUERY
|
||||
OP_CRMINQUERY
|
||||
OP_CRPOSQUERY
|
||||
OP_CRRANGE
|
||||
OP_CRMINRANGE
|
||||
OP_CRPOSRANGE
|
||||
|
||||
All but the last two are just single-unit items. The others are followed by
|
||||
four bytes (two shorts) of data, comprising the minimum and maximum repeat
|
||||
counts. There are no special possessive opcodes for these repeats; a possessive
|
||||
repeat is compiled into an atomic group.
|
||||
All but the last three are single-unit items, with no data. The others are
|
||||
followed by the minimum and maximum repeat counts.
|
||||
|
||||
|
||||
Brackets and alternation
|
||||
------------------------
|
||||
|
||||
A pair of non-capturing (round) brackets is wrapped round each expression at
|
||||
A pair of non-capturing round brackets is wrapped round each expression at
|
||||
compile time, so alternation always happens in the context of brackets.
|
||||
|
||||
[Note for North Americans: "bracket" to some English speakers, including
|
||||
@ -364,13 +409,13 @@ A bracket opcode is followed by LINK_SIZE bytes which give the offset to the
|
||||
next alternative OP_ALT or, if there aren't any branches, to the matching
|
||||
OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to
|
||||
the next one, or to the OP_KET opcode. For capturing brackets, the bracket
|
||||
number immediately follows the offset, always as a 2-byte (one short) item.
|
||||
number is a count that immediately follows the offset.
|
||||
|
||||
OP_KET is used for subpatterns that do not repeat indefinitely, and
|
||||
OP_KETRMIN and OP_KETRMAX are used for indefinite repetitions, minimally or
|
||||
maximally respectively (see below for possessive repetitions). All three are
|
||||
followed by LINK_SIZE bytes giving (as a positive number) the offset back to
|
||||
the matching bracket opcode.
|
||||
OP_KET is used for subpatterns that do not repeat indefinitely, and OP_KETRMIN
|
||||
and OP_KETRMAX are used for indefinite repetitions, minimally or maximally
|
||||
respectively (see below for possessive repetitions). All three are followed by
|
||||
LINK_SIZE bytes giving (as a positive number) the offset back to the matching
|
||||
bracket opcode.
|
||||
|
||||
If a subpattern is quantified such that it is permitted to match zero times, it
|
||||
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
|
||||
@ -397,6 +442,7 @@ final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
|
||||
that it needs to check for matching an empty string when it hits OP_KETRMIN or
|
||||
OP_KETRMAX, and if so, to break the loop.
|
||||
|
||||
|
||||
Possessive brackets
|
||||
-------------------
|
||||
|
||||
@ -407,26 +453,34 @@ of OP_SCBRA. The end of such a group is marked by OP_KETRPOS. If the minimum
|
||||
repetition is zero, the group is preceded by OP_BRAPOSZERO.
|
||||
|
||||
|
||||
Once-only (atomic) groups
|
||||
-------------------------
|
||||
|
||||
These are just like other subpatterns, but they start with the opcode
|
||||
OP_ONCE or OP_ONCE_NC. The former is used when there are no capturing brackets
|
||||
within the atomic group; the latter when there are. The distinction is needed
|
||||
for when there is a backtrack to before the group - any captures within the
|
||||
group must be reset, so it is necessary to retain backtracking points inside
|
||||
the group even after it is complete in order to do this. When there are no
|
||||
captures in an atomic group, all the backtracking can be discarded when it is
|
||||
complete. This is more efficient, and also uses less stack.
|
||||
|
||||
The check for matching an empty string in an unbounded repeat is handled
|
||||
entirely at runtime, so there are just these two opcodes for atomic groups.
|
||||
|
||||
|
||||
Assertions
|
||||
----------
|
||||
|
||||
Forward assertions are just like other subpatterns, but starting with one of
|
||||
the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
|
||||
Forward assertions are also just like other subpatterns, but starting with one
|
||||
of the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
|
||||
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
|
||||
is OP_REVERSE, followed by a two byte (one short) count of the number of
|
||||
characters to move back the pointer in the subject string. In ASCII mode, the
|
||||
count is a number of units, but in UTF-8/16 mode each character may occupy more
|
||||
than one unit; in UTF-32 mode each character occupies exactly one unit.
|
||||
A separate count is present in each alternative of a lookbehind
|
||||
assertion, allowing them to have different fixed lengths.
|
||||
|
||||
|
||||
Once-only (atomic) subpatterns
|
||||
------------------------------
|
||||
|
||||
These are also just like other subpatterns, but they start with the opcode
|
||||
OP_ONCE. The check for matching an empty string in an unbounded repeat is
|
||||
handled entirely at runtime, so there is just this one opcode.
|
||||
is OP_REVERSE, followed by a count of the number of characters to move back the
|
||||
pointer in the subject string. In ASCII mode, the count is a number of units,
|
||||
but in UTF-8/16 mode each character may occupy more than one unit; in UTF-32
|
||||
mode each character occupies exactly one unit. A separate count is present in
|
||||
each alternative of a lookbehind assertion, allowing them to have different
|
||||
fixed lengths.
|
||||
|
||||
|
||||
Conditional subpatterns
|
||||
@ -435,28 +489,29 @@ Conditional subpatterns
|
||||
These are like other subpatterns, but they start with the opcode OP_COND, or
|
||||
OP_SCOND for one that might match an empty string in an unbounded repeat. If
|
||||
the condition is a back reference, this is stored at the start of the
|
||||
subpattern using the opcode OP_CREF followed by two bytes (one short)
|
||||
containing the reference number. OP_NCREF is used instead if the reference was
|
||||
generated by name (so that the runtime code knows to check for duplicate
|
||||
names).
|
||||
subpattern using the opcode OP_CREF followed by a count containing the
|
||||
reference number, provided that the reference is to a unique capturing group.
|
||||
If the reference was by name and there is more than one group with that name,
|
||||
OP_DNCREF is used instead. It is followed by two counts: the index in the group
|
||||
names table, and the number of groups with the same name.
|
||||
|
||||
If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
|
||||
group x" (coded as "(?(Rx)"), the group number is stored at the start of the
|
||||
subpattern using the opcode OP_RREF or OP_NRREF (cf OP_NCREF), and a value of
|
||||
zero for "the whole pattern". For a DEFINE condition, just the single unit
|
||||
OP_DEF is used (it has no associated data). Otherwise, a conditional subpattern
|
||||
always starts with one of the assertions.
|
||||
subpattern using the opcode OP_RREF (with a value of zero for "the whole
|
||||
pattern") or OP_DNRREF (with data as for OP_DNCREF). For a DEFINE condition,
|
||||
just the single unit OP_DEF is used (it has no associated data). Otherwise, a
|
||||
conditional subpattern always starts with one of the assertions.
|
||||
|
||||
|
||||
Recursion
|
||||
---------
|
||||
|
||||
Recursion either matches the current regex, or some subexpression. The opcode
|
||||
OP_RECURSE is followed by an value which is the offset to the starting bracket
|
||||
from the start of the whole pattern. From release 6.5, OP_RECURSE is
|
||||
automatically wrapped inside OP_ONCE brackets (because otherwise some patterns
|
||||
broke it). OP_RECURSE is also used for "subroutine" calls, even though they
|
||||
are not strictly a recursion.
|
||||
OP_RECURSE is followed by aLINK_SIZE value that is the offset to the starting
|
||||
bracket from the start of the whole pattern. From release 6.5, OP_RECURSE is
|
||||
automatically wrapped inside OP_ONCE brackets, because otherwise some patterns
|
||||
broke it. OP_RECURSE is also used for "subroutine" calls, even though they are
|
||||
not strictly a recursion.
|
||||
|
||||
|
||||
Callout
|
||||
@ -464,10 +519,10 @@ Callout
|
||||
|
||||
OP_CALLOUT is followed by one unit of data that holds a callout number in the
|
||||
range 0 to 254 for manual callouts, or 255 for an automatic callout. In both
|
||||
cases there follows a two-byte (one short) value giving the offset in the
|
||||
pattern to the start of the following item, and another two-byte (one short)
|
||||
item giving the length of the next item.
|
||||
|
||||
cases there follows a count giving the offset in the pattern string to the
|
||||
start of the following item, and another count giving the length of this item.
|
||||
These values make is possible for pcretest to output useful tracing information
|
||||
using automatic callouts.
|
||||
|
||||
Philip Hazel
|
||||
February 2012
|
||||
November 2013
|
||||
|
47
pcre/NEWS
47
pcre/NEWS
@ -1,6 +1,53 @@
|
||||
News about PCRE releases
|
||||
------------------------
|
||||
|
||||
Release 8.34 15-December-2013
|
||||
-----------------------------
|
||||
|
||||
As well as fixing the inevitable bugs, performance has been improved by
|
||||
refactoring and extending the amount of "auto-possessification" that PCRE does.
|
||||
Other notable changes:
|
||||
|
||||
. Implemented PCRE_INFO_MATCH_EMPTY, which yields 1 if the pattern can match
|
||||
an empty string. If it can, pcretest shows this in its information output.
|
||||
|
||||
. A back reference to a named subpattern when there is more than one of the
|
||||
same name now checks them in the order in which they appear in the pattern.
|
||||
The first one that is set is used for the reference. Previously only the
|
||||
first one was inspected. This change makes PCRE more compatible with Perl.
|
||||
|
||||
. Unicode character properties were updated from Unicode 6.3.0.
|
||||
|
||||
. The character VT has been added to the set of characters that match \s and
|
||||
are generally treated as white space, following this same change in Perl
|
||||
5.18. There is now no difference between "Perl space" and "POSIX space".
|
||||
|
||||
. Perl has changed its handling of \8 and \9. If there is no previously
|
||||
encountered capturing group of those numbers, they are treated as the
|
||||
literal characters 8 and 9 instead of a binary zero followed by the
|
||||
literals. PCRE now does the same.
|
||||
|
||||
. Following Perl, added \o{} to specify codepoints in octal, making it
|
||||
possible to specify values greater than 0777 and also making them
|
||||
unambiguous.
|
||||
|
||||
. In UCP mode, \s was not matching two of the characters that Perl matches,
|
||||
namely NEL (U+0085) and MONGOLIAN VOWEL SEPARATOR (U+180E), though they
|
||||
were matched by \h.
|
||||
|
||||
. Add JIT support for the 64 bit TileGX architecture.
|
||||
|
||||
. Upgraded the handling of the POSIX classes [:graph:], [:print:], and
|
||||
[:punct:] when PCRE_UCP is set so as to include the same characters as Perl
|
||||
does in Unicode mode.
|
||||
|
||||
. Perl no longer allows group names to start with digits, so I have made this
|
||||
change also in PCRE.
|
||||
|
||||
. Added support for [[:<:]] and [[:>:]] as used in the BSD POSIX library to
|
||||
mean "start of word" and "end of word", respectively, as a transition aid.
|
||||
|
||||
|
||||
Release 8.33 28-May-2013
|
||||
--------------------------
|
||||
|
||||
|
@ -171,8 +171,8 @@ can skip ahead to the CMake section.
|
||||
pcre16_version.c
|
||||
pcre16_xclass.c
|
||||
|
||||
(8) If you want to build a 16-bit library (as well as, or instead of the 8-bit
|
||||
or 32-bit libraries) repeat steps 5-6 with the following files:
|
||||
(8) If you want to build a 32-bit library (as well as, or instead of the 8-bit
|
||||
or 16-bit libraries) repeat steps 5-6 with the following files:
|
||||
|
||||
pcre32_byte_order.c
|
||||
pcre32_chartables.c
|
||||
|
28
pcre/README
28
pcre/README
@ -9,8 +9,10 @@ from:
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.zip
|
||||
|
||||
There is a mailing list for discussion about the development of PCRE at
|
||||
pcre-dev@exim.org. You can access the archives and subscribe or manage your
|
||||
subscription here:
|
||||
|
||||
pcre-dev@exim.org
|
||||
https://lists.exim.org/mailman/listinfo/pcre-dev
|
||||
|
||||
Please read the NEWS file if you are upgrading from a previous release.
|
||||
The contents of this README file are:
|
||||
@ -112,6 +114,11 @@ contributions provided support for compiling PCRE on various flavours of
|
||||
Windows (I myself do not use Windows). Nowadays there is more Windows support
|
||||
in the standard distribution, so these contibutions have been archived.
|
||||
|
||||
A PCRE user maintains downloadable Windows binaries of the pcregrep and
|
||||
pcretest programs here:
|
||||
|
||||
http://www.rexegg.com/pcregrep-pcretest.html
|
||||
|
||||
|
||||
Building PCRE on non-Unix-like systems
|
||||
--------------------------------------
|
||||
@ -262,9 +269,17 @@ library. They are also documented in the pcrebuild man page.
|
||||
|
||||
on the "configure" command.
|
||||
|
||||
. PCRE has a counter that can be set to limit the amount of resources it uses.
|
||||
If the limit is exceeded during a match, the match fails. The default is ten
|
||||
million. You can change the default by setting, for example,
|
||||
. PCRE has a counter that limits the depth of nesting of parentheses in a
|
||||
pattern. This limits the amount of system stack that a pattern uses when it
|
||||
is compiled. The default is 250, but you can change it by setting, for
|
||||
example,
|
||||
|
||||
--with-parens-nest-limit=500
|
||||
|
||||
. PCRE has a counter that can be set to limit the amount of resources it uses
|
||||
when matching a pattern. If the limit is exceeded during a match, the match
|
||||
fails. The default is ten million. You can change the default by setting, for
|
||||
example,
|
||||
|
||||
--with-match-limit=500000
|
||||
|
||||
@ -344,7 +359,8 @@ library. They are also documented in the pcrebuild man page.
|
||||
report is generated by running "make coverage". If ccache is installed on
|
||||
your system, it must be disabled when building PCRE for coverage reporting.
|
||||
You can do this by setting the environment variable CCACHE_DISABLE=1 before
|
||||
running "make" to build PCRE.
|
||||
running "make" to build PCRE. There is more information about coverage
|
||||
reporting in the "pcrebuild" documentation.
|
||||
|
||||
. The pcregrep program currently supports only 8-bit data files, and so
|
||||
requires the 8-bit PCRE library. It is possible to compile pcregrep to use
|
||||
@ -971,4 +987,4 @@ pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
|
||||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 28 April 2013
|
||||
Last updated: 05 November 2013
|
||||
|
@ -502,6 +502,10 @@ echo "---------------------------- Test 105 -----------------------------" >>tes
|
||||
(cd $srcdir; $valgrind $pcregrep --colour=always "ipsum|" ./testdata/grepinput3) >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
|
||||
echo "---------------------------- Test 106 -----------------------------" >>testtry
|
||||
(cd $srcdir; echo "a" | $valgrind $pcregrep -M "|a" ) >>testtry 2>&1
|
||||
echo "RC=$?" >>testtry
|
||||
|
||||
|
||||
# Now compare the results.
|
||||
|
||||
|
99
pcre/RunTest
99
pcre/RunTest
@ -14,11 +14,11 @@
|
||||
# UTF-8 with the UTF-8 check turned off; for this, studying must also be
|
||||
# disabled with /SS.
|
||||
#
|
||||
# When JIT support is available, all the tests are also run with -s+ to test
|
||||
# (again, almost) everything with studying and the JIT option, unless "nojit"
|
||||
# is given on the command line. There are also two tests for JIT-specific
|
||||
# features, one to be run when JIT support is available (unless "nojit" is
|
||||
# specified), and one when it is not.
|
||||
# When JIT support is available, all appropriate tests are also run with -s+ to
|
||||
# test (again, almost) everything with studying and the JIT option, unless
|
||||
# "nojit" is given on the command line. There are also two tests for
|
||||
# JIT-specific features, one to be run when JIT support is available (unless
|
||||
# "nojit" is specified), and one when it is not.
|
||||
#
|
||||
# Whichever of the 8-, 16- and 32-bit libraries exist are tested. It is also
|
||||
# possible to select which to test by giving "-8", "-16" or "-32" on the
|
||||
@ -30,9 +30,13 @@
|
||||
# runs tests 3 to 15, excluding test 10, and just "~10" runs all the tests
|
||||
# except test 10. Whatever order the arguments are in, the tests are always run
|
||||
# in numerical order.
|
||||
|
||||
#
|
||||
# Inappropriate tests are automatically skipped (with a comment to say so): for
|
||||
# example, if JIT support is not compiled, test 12 is skipped, whereas if JIT
|
||||
# support is compiled, test 13 is skipped.
|
||||
#
|
||||
# Other arguments can be one of the words "valgrind", "valgrind-log", or "sim"
|
||||
# followed by an argument to run cross- compiled executables under a simulator,
|
||||
# followed by an argument to run cross-compiled executables under a simulator,
|
||||
# for example:
|
||||
#
|
||||
# RunTest 3 sim "qemu-arm -s 8388608"
|
||||
@ -62,8 +66,8 @@ title8="Test 8: DFA matching main functionality"
|
||||
title9="Test 9: DFA matching with UTF"
|
||||
title10="Test 10: DFA matching with Unicode properties"
|
||||
title11="Test 11: Internal offsets and code size tests"
|
||||
title12="Test 12: JIT-specific features (JIT available)"
|
||||
title13="Test 13: JIT-specific features (JIT not available)"
|
||||
title12="Test 12: JIT-specific features (when JIT is available)"
|
||||
title13="Test 13: JIT-specific features (when JIT is not available)"
|
||||
title14="Test 14: Specials for the basic 8-bit library"
|
||||
title15="Test 15: Specials for the 8-bit library with UTF-8 support"
|
||||
title16="Test 16: Specials for the 8-bit library with Unicode propery support"
|
||||
@ -350,79 +354,6 @@ if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then
|
||||
jitopt=-s+
|
||||
fi
|
||||
|
||||
# Handle any explicit skips
|
||||
|
||||
for i in $skip; do eval do$i=no; done
|
||||
|
||||
# If any unsuitable tests were explicitly requested, grumble.
|
||||
|
||||
if [ $utf -eq 0 ] ; then
|
||||
if [ $do4 = yes ] ; then
|
||||
echo "Can't run test 4 because UTF support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do5 = yes ] ; then
|
||||
echo "Can't run test 5 because UTF support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do9 = yes ] ; then
|
||||
echo "Can't run test 8 because UTF support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do15 = yes ] ; then
|
||||
echo "Can't run test 15 because UTF support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do18 = yes ] ; then
|
||||
echo "Can't run test 18 because UTF support is not configured"
|
||||
fi
|
||||
if [ $do22 = yes ] ; then
|
||||
echo "Can't run test 22 because UTF support is not configured"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $ucp -eq 0 ] ; then
|
||||
if [ $do6 = yes ] ; then
|
||||
echo "Can't run test 6 because Unicode property support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do7 = yes ] ; then
|
||||
echo "Can't run test 7 because Unicode property support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do10 = yes ] ; then
|
||||
echo "Can't run test 10 because Unicode property support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do16 = yes ] ; then
|
||||
echo "Can't run test 16 because Unicode property support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
if [ $do19 = yes ] ; then
|
||||
echo "Can't run test 19 because Unicode property support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $link_size -ne 2 ] ; then
|
||||
if [ $do11 = yes ] ; then
|
||||
echo "Can't run test 11 because the link size ($link_size) is not 2"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $jit -eq 0 ] ; then
|
||||
if [ $do12 = "yes" ] ; then
|
||||
echo "Can't run test 12 because JIT support is not configured"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
if [ $do13 = "yes" ] ; then
|
||||
echo "Can't run test 13 because JIT support is configured"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# If no specific tests were requested, select all. Those that are not
|
||||
# relevant will be automatically skipped.
|
||||
|
||||
@ -461,8 +392,8 @@ if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a \
|
||||
do26=yes
|
||||
fi
|
||||
|
||||
# Handle any explicit skips (again, so that an argument list may consist only
|
||||
# of explicit skips).
|
||||
# Handle any explicit skips at this stage, so that an argument list may consist
|
||||
# only of explicit skips.
|
||||
|
||||
for i in $skip; do eval do$i=no; done
|
||||
|
||||
|
@ -46,6 +46,7 @@
|
||||
#define NEWLINE @NEWLINE@
|
||||
#define POSIX_MALLOC_THRESHOLD @PCRE_POSIX_MALLOC_THRESHOLD@
|
||||
#define LINK_SIZE @PCRE_LINK_SIZE@
|
||||
#define PARENS_NEST_LIMIT @PCRE_PARENS_NEST_LIMIT@
|
||||
#define MATCH_LIMIT @PCRE_MATCH_LIMIT@
|
||||
#define MATCH_LIMIT_RECURSION @PCRE_MATCH_LIMIT_RECURSION@
|
||||
#define PCREGREP_BUFSIZE @PCREGREP_BUFSIZE@
|
||||
|
@ -9,17 +9,17 @@ dnl The PCRE_PRERELEASE feature is for identifying release candidates. It might
|
||||
dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||
|
||||
m4_define(pcre_major, [8])
|
||||
m4_define(pcre_minor, [33])
|
||||
m4_define(pcre_minor, [34])
|
||||
m4_define(pcre_prerelease, [])
|
||||
m4_define(pcre_date, [2013-05-28])
|
||||
m4_define(pcre_date, [2013-12-15])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
||||
# Libtool shared library interface versions (current:revision:age)
|
||||
m4_define(libpcre_version, [3:1:2])
|
||||
m4_define(libpcre16_version, [2:1:2])
|
||||
m4_define(libpcre32_version, [0:1:0])
|
||||
m4_define(libpcre_version, [3:2:2])
|
||||
m4_define(libpcre16_version, [2:2:2])
|
||||
m4_define(libpcre32_version, [0:2:0])
|
||||
m4_define(libpcreposix_version, [0:2:0])
|
||||
m4_define(libpcrecpp_version, [0:0:0])
|
||||
|
||||
@ -275,6 +275,12 @@ AC_ARG_WITH(link-size,
|
||||
[internal link size (2, 3, or 4 allowed; default=2)]),
|
||||
, with_link_size=2)
|
||||
|
||||
# Handle --with-parens-nest-limit=N
|
||||
AC_ARG_WITH(parens-nest-limit,
|
||||
AS_HELP_STRING([--with-parens-nest-limit=N],
|
||||
[nested parentheses limit (default=250)]),
|
||||
, with_parens_nest_limit=250)
|
||||
|
||||
# Handle --with-match-limit=N
|
||||
AC_ARG_WITH(match-limit,
|
||||
AS_HELP_STRING([--with-match-limit=N],
|
||||
@ -784,6 +790,11 @@ AC_DEFINE_UNQUOTED([POSIX_MALLOC_THRESHOLD], [$with_posix_malloc_threshold], [
|
||||
faster than using malloc() for each call. The threshold above which
|
||||
the stack is no longer used is defined by POSIX_MALLOC_THRESHOLD.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
|
||||
The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
stack that is used while compiling a pattern.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
|
||||
The value of MATCH_LIMIT determines the default number of times the
|
||||
internal match() function can be called during a single execution of
|
||||
@ -957,7 +968,7 @@ if test "$enable_pcretest_libreadline" = "yes"; then
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check for valgrind
|
||||
# Handle valgrind support
|
||||
|
||||
if test "$enable_valgrind" = "yes"; then
|
||||
m4_ifdef([PKG_CHECK_MODULES],
|
||||
@ -965,7 +976,7 @@ if test "$enable_valgrind" = "yes"; then
|
||||
[AC_MSG_ERROR([pkg-config not supported])])
|
||||
fi
|
||||
|
||||
# test code coverage reporting
|
||||
# Handle code coverage reporting support
|
||||
if test "$enable_coverage" = "yes"; then
|
||||
if test "x$GCC" != "xyes"; then
|
||||
AC_MSG_ERROR([Code coverage reports can only be generated when using GCC])
|
||||
@ -996,11 +1007,7 @@ if test "$enable_coverage" = "yes"; then
|
||||
AC_MSG_ERROR([genhtml not found])
|
||||
fi
|
||||
|
||||
AC_DEFINE([SUPPORT_GCOV],[1], [
|
||||
Define to allow pcretest and pcregrep to be linked with gcov, so that they
|
||||
are able to generate code coverage reports.])
|
||||
|
||||
# And add flags needed for gcov
|
||||
# Set flags needed for gcov
|
||||
GCOV_CFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
||||
GCOV_CXXFLAGS="-O0 -ggdb3 -fprofile-arcs -ftest-coverage"
|
||||
GCOV_LIBS="-lgcov"
|
||||
@ -1075,6 +1082,7 @@ $PACKAGE-$VERSION configuration summary:
|
||||
Use stack recursion ............. : ${enable_stack_for_recursion}
|
||||
POSIX mem threshold ............. : ${with_posix_malloc_threshold}
|
||||
Internal link size .............. : ${with_link_size}
|
||||
Nested parentheses limit ........ : ${with_parens_nest_limit}
|
||||
Match limit ..................... : ${with_match_limit}
|
||||
Match limit recursion ........... : ${with_match_limit_recursion}
|
||||
Build shared libs ............... : ${enable_shared}
|
||||
|
@ -171,8 +171,8 @@ can skip ahead to the CMake section.
|
||||
pcre16_version.c
|
||||
pcre16_xclass.c
|
||||
|
||||
(8) If you want to build a 16-bit library (as well as, or instead of the 8-bit
|
||||
or 32-bit libraries) repeat steps 5-6 with the following files:
|
||||
(8) If you want to build a 32-bit library (as well as, or instead of the 8-bit
|
||||
or 16-bit libraries) repeat steps 5-6 with the following files:
|
||||
|
||||
pcre32_byte_order.c
|
||||
pcre32_chartables.c
|
||||
|
@ -9,8 +9,10 @@ from:
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.zip
|
||||
|
||||
There is a mailing list for discussion about the development of PCRE at
|
||||
pcre-dev@exim.org. You can access the archives and subscribe or manage your
|
||||
subscription here:
|
||||
|
||||
pcre-dev@exim.org
|
||||
https://lists.exim.org/mailman/listinfo/pcre-dev
|
||||
|
||||
Please read the NEWS file if you are upgrading from a previous release.
|
||||
The contents of this README file are:
|
||||
@ -112,6 +114,11 @@ contributions provided support for compiling PCRE on various flavours of
|
||||
Windows (I myself do not use Windows). Nowadays there is more Windows support
|
||||
in the standard distribution, so these contibutions have been archived.
|
||||
|
||||
A PCRE user maintains downloadable Windows binaries of the pcregrep and
|
||||
pcretest programs here:
|
||||
|
||||
http://www.rexegg.com/pcregrep-pcretest.html
|
||||
|
||||
|
||||
Building PCRE on non-Unix-like systems
|
||||
--------------------------------------
|
||||
@ -262,9 +269,17 @@ library. They are also documented in the pcrebuild man page.
|
||||
|
||||
on the "configure" command.
|
||||
|
||||
. PCRE has a counter that can be set to limit the amount of resources it uses.
|
||||
If the limit is exceeded during a match, the match fails. The default is ten
|
||||
million. You can change the default by setting, for example,
|
||||
. PCRE has a counter that limits the depth of nesting of parentheses in a
|
||||
pattern. This limits the amount of system stack that a pattern uses when it
|
||||
is compiled. The default is 250, but you can change it by setting, for
|
||||
example,
|
||||
|
||||
--with-parens-nest-limit=500
|
||||
|
||||
. PCRE has a counter that can be set to limit the amount of resources it uses
|
||||
when matching a pattern. If the limit is exceeded during a match, the match
|
||||
fails. The default is ten million. You can change the default by setting, for
|
||||
example,
|
||||
|
||||
--with-match-limit=500000
|
||||
|
||||
@ -344,7 +359,8 @@ library. They are also documented in the pcrebuild man page.
|
||||
report is generated by running "make coverage". If ccache is installed on
|
||||
your system, it must be disabled when building PCRE for coverage reporting.
|
||||
You can do this by setting the environment variable CCACHE_DISABLE=1 before
|
||||
running "make" to build PCRE.
|
||||
running "make" to build PCRE. There is more information about coverage
|
||||
reporting in the "pcrebuild" documentation.
|
||||
|
||||
. The pcregrep program currently supports only 8-bit data files, and so
|
||||
requires the 8-bit PCRE library. It is possible to compile pcregrep to use
|
||||
@ -971,4 +987,4 @@ pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
|
||||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 28 April 2013
|
||||
Last updated: 05 November 2013
|
||||
|
@ -23,8 +23,8 @@ man page, in case the conversion went wrong.
|
||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
||||
<P>
|
||||
<b>pcre-config [--prefix] [--exec-prefix] [--version] [--libs]</b>
|
||||
<b>[--libs16] [--libs32] [--libs-cpp] [--libs-posix]</b>
|
||||
<b>[--cflags] [--cflags-posix]</b>
|
||||
<b> [--libs16] [--libs32] [--libs-cpp] [--libs-posix]</b>
|
||||
<b> [--cflags] [--cflags-posix]</b>
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
||||
<P>
|
||||
|
@ -62,7 +62,7 @@ The current implementation of PCRE corresponds approximately with Perl 5.12,
|
||||
including support for UTF-8/16/32 encoded strings and Unicode general category
|
||||
properties. However, UTF-8/16/32 and Unicode support has to be explicitly
|
||||
enabled; it is not the default. The Unicode tables correspond to Unicode
|
||||
release 6.2.0.
|
||||
release 6.3.0.
|
||||
</P>
|
||||
<P>
|
||||
In addition to the Perl-compatible matching function, PCRE contains an
|
||||
|
@ -42,126 +42,126 @@ man page, in case the conversion went wrong.
|
||||
<br><a name="SEC1" href="#TOC1">PCRE 16-BIT API BASIC FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>pcre16 *pcre16_compile(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre16 *pcre16_compile2(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>int *<i>errorcodeptr</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int *<i>errorcodeptr</i>,</b>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre16_extra *pcre16_study(const pcre16 *<i>code</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char **<i>errptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre16_free_study(pcre16_extra *<i>extra</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_dfa_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">PCRE 16-BIT API STRING EXTRACTION FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>int pcre16_copy_named_substring(const pcre16 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||
<b>PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||
<b> PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_copy_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
|
||||
<b>int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
|
||||
<b> int <i>buffersize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_get_named_substring(const pcre16 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_get_stringnumber(const pcre16 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>name</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b>" PCRE_SPTR16 <i>name</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_get_stringtable_entries(const pcre16 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_get_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_get_substring_list(PCRE_SPTR16 <i>subject</i>,</b>
|
||||
<b>int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre16_free_substring(PCRE_SPTR16 <i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre16_free_substring_list(PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">PCRE 16-BIT API AUXILIARY FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>pcre16_jit_stack *pcre16_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre16_jit_stack_free(pcre16_jit_stack *<i>stack</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre16_assign_jit_stack(pcre16_extra *<i>extra</i>,</b>
|
||||
<b>pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>const unsigned char *pcre16_maketables(void);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_fullinfo(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||
<b>int <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_refcount(pcre16 *<i>code</i>, int <i>adjust</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_config(int <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>const char *pcre16_version(void);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_pattern_to_host_byte_order(pcre16 *<i>code</i>,</b>
|
||||
<b>pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
<b> pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">PCRE 16-BIT API INDIRECTED FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>void *(*pcre16_malloc)(size_t);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void (*pcre16_free)(void *);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void *(*pcre16_stack_malloc)(size_t);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void (*pcre16_stack_free)(void *);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>int (*pcre16_callout)(pcre16_callout_block *);</b>
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">PCRE 16-BIT API 16-BIT-ONLY FUNCTION</a><br>
|
||||
<P>
|
||||
<b>int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *<i>output</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>byte_order</i>,</b>
|
||||
<b>int <i>keep_boms</i>);</b>
|
||||
<b> PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>byte_order</i>,</b>
|
||||
<b> int <i>keep_boms</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">THE PCRE 16-BIT LIBRARY</a><br>
|
||||
<P>
|
||||
|
@ -42,126 +42,125 @@ man page, in case the conversion went wrong.
|
||||
<br><a name="SEC1" href="#TOC1">PCRE 32-BIT API BASIC FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>pcre32 *pcre32_compile(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre32 *pcre32_compile2(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>int *<i>errorcodeptr</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int *<i>errorcodeptr</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre32_extra *pcre32_study(const pcre32 *<i>code</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char **<i>errptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre32_free_study(pcre32_extra *<i>extra</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_dfa_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">PCRE 32-BIT API STRING EXTRACTION FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>int pcre32_copy_named_substring(const pcre32 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||
<b>PCRE_UCHAR32 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||
<b> PCRE_UCHAR32 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_copy_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
|
||||
<b>int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
|
||||
<b> int <i>buffersize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_get_named_substring(const pcre32 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||
<b>PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||
<b> PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_get_stringnumber(const pcre32 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>name</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR32 <i>name</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_get_stringtable_entries(const pcre32 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_get_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b>PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b> PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_get_substring_list(PCRE_SPTR32 <i>subject</i>,</b>
|
||||
<b>int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre32_free_substring(PCRE_SPTR32 <i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre32_free_substring_list(PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">PCRE 32-BIT API AUXILIARY FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>pcre32_jit_stack *pcre32_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre32_jit_stack_free(pcre32_jit_stack *<i>stack</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre32_assign_jit_stack(pcre32_extra *<i>extra</i>,</b>
|
||||
<b>pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>const unsigned char *pcre32_maketables(void);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_fullinfo(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||
<b>int <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_refcount(pcre32 *<i>code</i>, int <i>adjust</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_config(int <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>const char *pcre32_version(void);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_pattern_to_host_byte_order(pcre32 *<i>code</i>,</b>
|
||||
<b>pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
<b> pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">PCRE 32-BIT API INDIRECTED FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>void *(*pcre32_malloc)(size_t);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void (*pcre32_free)(void *);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void *(*pcre32_stack_malloc)(size_t);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void (*pcre32_stack_free)(void *);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>int (*pcre32_callout)(pcre32_callout_block *);</b>
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">PCRE 32-BIT API 32-BIT-ONLY FUNCTION</a><br>
|
||||
<P>
|
||||
<b>int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *<i>output</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>input</i>, int <i>length</i>, int *<i>byte_order</i>,</b>
|
||||
<b>int <i>keep_boms</i>);</b>
|
||||
<b> PCRE_SPTR32 <i>input</i>, int <i>length</i>, int *<i>byte_order</i>,</b>
|
||||
<b> int <i>keep_boms</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">THE PCRE 32-BIT LIBRARY</a><br>
|
||||
<P>
|
||||
|
@ -20,15 +20,15 @@ SYNOPSIS
|
||||
</P>
|
||||
<P>
|
||||
<b>void pcre_assign_jit_stack(pcre_extra *<i>extra</i>,</b>
|
||||
<b>pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre16_assign_jit_stack(pcre16_extra *<i>extra</i>,</b>
|
||||
<b>pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> pcre16_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre32_assign_jit_stack(pcre32_extra *<i>extra</i>,</b>
|
||||
<b>pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
<b> pcre32_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -20,18 +20,18 @@ SYNOPSIS
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre16 *pcre16_compile(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre32 *pcre32_compile(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
@ -65,6 +65,7 @@ The option bits are:
|
||||
PCRE_FIRSTLINE Force matching to be before newline
|
||||
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
||||
PCRE_MULTILINE ^ and $ match newlines within data
|
||||
PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF)
|
||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
||||
sequences
|
||||
@ -73,6 +74,8 @@ The option bits are:
|
||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||
theses (named ones available)
|
||||
PCRE_NO_AUTO_POSSESS Disable auto-possessification
|
||||
PCRE_NO_START_OPTIMIZE Disable match-time start optimizations
|
||||
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
||||
validity (only relevant if
|
||||
PCRE_UTF16 is set)
|
||||
|
@ -20,21 +20,21 @@ SYNOPSIS
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>int *<i>errorcodeptr</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int *<i>errorcodeptr</i>,</b>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre16 *pcre16_compile2(PCRE_SPTR16 <i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>int *<i>errorcodeptr</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int *<i>errorcodeptr</i>,</b>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre32 *pcre32_compile2(PCRE_SPTR32 <i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>int *<i>errorcodeptr</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
<b>" int *<i>errorcodeptr</i>,£</b>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
@ -69,6 +69,7 @@ The option bits are:
|
||||
PCRE_FIRSTLINE Force matching to be before newline
|
||||
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
||||
PCRE_MULTILINE ^ and $ match newlines within data
|
||||
PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF)
|
||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
||||
sequences
|
||||
@ -77,6 +78,8 @@ The option bits are:
|
||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||
theses (named ones available)
|
||||
PCRE_NO_AUTO_POSSESS Disable auto-possessification
|
||||
PCRE_NO_START_OPTIMIZE Disable match-time start optimizations
|
||||
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
||||
validity (only relevant if
|
||||
PCRE_UTF16 is set)
|
||||
|
@ -48,6 +48,7 @@ point to an unsigned long integer. The available codes are:
|
||||
target architecture for the JIT compiler,
|
||||
or NULL if there is no JIT support
|
||||
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
|
||||
PCRE_CONFIG_PARENS_LIMIT Parentheses nesting limit
|
||||
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
|
||||
PCRE_CONFIG_MATCH_LIMIT_RECURSION
|
||||
Internal recursion depth limit
|
||||
|
@ -20,21 +20,21 @@ SYNOPSIS
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
|
||||
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||
<b>char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||
<b> char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_copy_named_substring(const pcre16 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||
<b>PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||
<b> PCRE_UCHAR16 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_copy_named_substring(const pcre32 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||
<b>PCRE_UCHAR32 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||
<b> PCRE_UCHAR32 *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -20,18 +20,18 @@ SYNOPSIS
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
||||
<b>int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
||||
<b> int <i>buffersize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_copy_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
|
||||
<b>int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR16 *<i>buffer</i>,</b>
|
||||
<b> int <i>buffersize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_copy_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
|
||||
<b>int <i>buffersize</i>);</b>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>, PCRE_UCHAR32 *<i>buffer</i>,</b>
|
||||
<b> int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -20,21 +20,21 @@ SYNOPSIS
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_dfa_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_dfa_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -20,18 +20,18 @@ SYNOPSIS
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -20,15 +20,15 @@ SYNOPSIS
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||
<b>int <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_fullinfo(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||
<b>int <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_fullinfo(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||
<b>int <i>what</i>, void *<i>where</i>);</b>
|
||||
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -20,21 +20,21 @@ SYNOPSIS
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
|
||||
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||
<b>const char **<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||
<b> const char **<i>stringptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_get_named_substring(const pcre16 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, PCRE_SPTR16 <i>stringname</i>,</b>
|
||||
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_get_named_substring(const pcre32 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||
<b>PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||
<b> PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, PCRE_SPTR32 <i>stringname</i>,</b>
|
||||
<b> PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -20,15 +20,15 @@ SYNOPSIS
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
|
||||
<b>const char *<i>name</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>name</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_get_stringnumber(const pcre16 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>name</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>name</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_get_stringnumber(const pcre32 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>name</i>);</b>
|
||||
<b> PCRE_SPTR32 <i>name</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -20,15 +20,15 @@ SYNOPSIS
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
|
||||
<b>const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_get_stringtable_entries(const pcre16 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>name</i>, PCRE_UCHAR16 **<i>first</i>, PCRE_UCHAR16 **<i>last</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_get_stringtable_entries(const pcre32 *<i>code</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
|
||||
<b> PCRE_SPTR32 <i>name</i>, PCRE_UCHAR32 **<i>first</i>, PCRE_UCHAR32 **<i>last</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -20,18 +20,18 @@ SYNOPSIS
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b>const char **<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b> const char **<i>stringptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_get_substring(PCRE_SPTR16 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b>PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b> PCRE_SPTR16 *<i>stringptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_get_substring(PCRE_SPTR32 <i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b>PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b> PCRE_SPTR32 *<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -20,15 +20,15 @@ SYNOPSIS
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
|
||||
<b>int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_get_substring_list(PCRE_SPTR16 <i>subject</i>,</b>
|
||||
<b>int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR16 **<i>listptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_get_substring_list(PCRE_SPTR32 <i>subject</i>,</b>
|
||||
<b>int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
|
||||
<b> int *<i>ovector</i>, int <i>stringcount</i>, PCRE_SPTR32 **<i>listptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -20,21 +20,21 @@ SYNOPSIS
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_jit_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b>pcre_jit_stack *<i>jstack</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b> pcre_jit_stack *<i>jstack</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_jit_exec(const pcre16 *<i>code</i>, const pcre16_extra *<i>extra</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b>pcre_jit_stack *<i>jstack</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> PCRE_SPTR16 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b> pcre_jit_stack *<i>jstack</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_jit_exec(const pcre32 *<i>code</i>, const pcre32_extra *<i>extra</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b>pcre_jit_stack *<i>jstack</i>);</b>
|
||||
<b> PCRE_SPTR32 <i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b> pcre_jit_stack *<i>jstack</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -20,15 +20,15 @@ SYNOPSIS
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre_jit_stack *pcre_jit_stack_alloc(int <i>startsize</i>,</b>
|
||||
<b>int <i>maxsize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>maxsize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre16_jit_stack *pcre16_jit_stack_alloc(int <i>startsize</i>,</b>
|
||||
<b>int <i>maxsize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>maxsize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre32_jit_stack *pcre32_jit_stack_alloc(int <i>startsize</i>,</b>
|
||||
<b>int <i>maxsize</i>);</b>
|
||||
<b> int <i>maxsize</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -20,15 +20,15 @@ SYNOPSIS
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre_pattern_to_host_byte_order(pcre *<i>code</i>,</b>
|
||||
<b>pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre16_pattern_to_host_byte_order(pcre16 *<i>code</i>,</b>
|
||||
<b>pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> pcre16_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre32_pattern_to_host_byte_order(pcre32 *<i>code</i>,</b>
|
||||
<b>pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
<b> pcre32_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -20,15 +20,15 @@ SYNOPSIS
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char **<i>errptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre16_extra *pcre16_study(const pcre16 *<i>code</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char **<i>errptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre32_extra *pcre32_study(const pcre32 *<i>code</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>);</b>
|
||||
<b> const char **<i>errptr</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -20,8 +20,8 @@ SYNOPSIS
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *<i>output</i>,</b>
|
||||
<b>PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>host_byte_order</i>,</b>
|
||||
<b>int <i>keep_boms</i>);</b>
|
||||
<b> PCRE_SPTR16 <i>input</i>, int <i>length</i>, int *<i>host_byte_order</i>,</b>
|
||||
<b> int <i>keep_boms</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -20,8 +20,8 @@ SYNOPSIS
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *<i>output</i>,</b>
|
||||
<b>PCRE_SPTR32 <i>input</i>, int <i>length</i>, int *<i>host_byte_order</i>,</b>
|
||||
<b>int <i>keep_boms</i>);</b>
|
||||
<b> PCRE_SPTR32 <i>input</i>, int <i>length</i>, int *<i>host_byte_order</i>,</b>
|
||||
<b> int <i>keep_boms</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
|
@ -46,125 +46,125 @@ man page, in case the conversion went wrong.
|
||||
<br><a name="SEC1" href="#TOC1">PCRE NATIVE API BASIC FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>int *<i>errorcodeptr</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int *<i>errorcodeptr</i>,</b>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char **<i>errptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre_free_study(pcre_extra *<i>extra</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">PCRE NATIVE API STRING EXTRACTION FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
|
||||
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||
<b>char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||
<b> char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
||||
<b>int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
||||
<b> int <i>buffersize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
|
||||
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||
<b>const char **<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||
<b> const char **<i>stringptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
|
||||
<b>const char *<i>name</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>name</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
|
||||
<b>const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b>const char **<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b> const char **<i>stringptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
|
||||
<b>int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre_free_substring(const char *<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre_free_substring_list(const char **<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">PCRE NATIVE API AUXILIARY FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>int pcre_jit_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b>pcre_jit_stack *<i>jstack</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b> pcre_jit_stack *<i>jstack</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre_jit_stack *pcre_jit_stack_alloc(int <i>startsize</i>, int <i>maxsize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre_jit_stack_free(pcre_jit_stack *<i>stack</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void pcre_assign_jit_stack(pcre_extra *<i>extra</i>,</b>
|
||||
<b>pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> pcre_jit_callback <i>callback</i>, void *<i>data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>const unsigned char *pcre_maketables(void);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||
<b>int <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre_refcount(pcre *<i>code</i>, int <i>adjust</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>const char *pcre_version(void);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre_pattern_to_host_byte_order(pcre *<i>code</i>,</b>
|
||||
<b>pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
<b> pcre_extra *<i>extra</i>, const unsigned char *<i>tables</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">PCRE NATIVE API INDIRECTED FUNCTIONS</a><br>
|
||||
<P>
|
||||
<b>void *(*pcre_malloc)(size_t);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void (*pcre_free)(void *);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void *(*pcre_stack_malloc)(size_t);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>void (*pcre_stack_free)(void *);</b>
|
||||
</P>
|
||||
<P>
|
||||
<br>
|
||||
<br>
|
||||
<b>int (*pcre_callout)(pcre_callout_block *);</b>
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">PCRE 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a><br>
|
||||
@ -483,6 +483,13 @@ interface uses <b>malloc()</b> for output vectors. Further details are given in
|
||||
the
|
||||
<a href="pcreposix.html"><b>pcreposix</b></a>
|
||||
documentation.
|
||||
<pre>
|
||||
PCRE_CONFIG_PARENS_LIMIT
|
||||
</pre>
|
||||
The output is a long integer that gives the maximum depth of nesting of
|
||||
parentheses (of any kind) in a pattern. This limit is imposed to cap the amount
|
||||
of system stack used when a pattern is compiled. It is specified when PCRE is
|
||||
built; the default is 250.
|
||||
<pre>
|
||||
PCRE_CONFIG_MATCH_LIMIT
|
||||
</pre>
|
||||
@ -509,12 +516,14 @@ avoiding the use of the stack.
|
||||
<br><a name="SEC11" href="#TOC1">COMPILING A PATTERN</a><br>
|
||||
<P>
|
||||
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
|
||||
<b>int *<i>errorcodeptr</i>,</b>
|
||||
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b>const unsigned char *<i>tableptr</i>);</b>
|
||||
<b> int *<i>errorcodeptr</i>,</b>
|
||||
<b> const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
|
||||
<b> const unsigned char *<i>tableptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
Either of the functions <b>pcre_compile()</b> or <b>pcre_compile2()</b> can be
|
||||
@ -580,8 +589,9 @@ If the final argument, <i>tableptr</i>, is NULL, PCRE uses a default set of
|
||||
character tables that are built when PCRE is compiled, using the default C
|
||||
locale. Otherwise, <i>tableptr</i> must be an address that is the result of a
|
||||
call to <b>pcre_maketables()</b>. This value is stored with the compiled
|
||||
pattern, and used again by <b>pcre_exec()</b>, unless another table pointer is
|
||||
passed to it. For more discussion, see the section on locale support below.
|
||||
pattern, and used again by <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b> when the
|
||||
pattern is matched. For more discussion, see the section on locale support
|
||||
below.
|
||||
</P>
|
||||
<P>
|
||||
This code fragment shows a typical straightforward call to <b>pcre_compile()</b>:
|
||||
@ -666,12 +676,24 @@ documentation.
|
||||
<pre>
|
||||
PCRE_EXTENDED
|
||||
</pre>
|
||||
If this bit is set, white space data characters in the pattern are totally
|
||||
ignored except when escaped or inside a character class. White space does not
|
||||
include the VT character (code 11). In addition, characters between an
|
||||
unescaped # outside a character class and the next newline, inclusive, are also
|
||||
ignored. This is equivalent to Perl's /x option, and it can be changed within a
|
||||
pattern by a (?x) option setting.
|
||||
If this bit is set, most white space characters in the pattern are totally
|
||||
ignored except when escaped or inside a character class. However, white space
|
||||
is not allowed within sequences such as (?> that introduce various
|
||||
parenthesized subpatterns, nor within a numerical quantifier such as {1,3}.
|
||||
However, ignorable white space is permitted between an item and a following
|
||||
quantifier and between a quantifier and a following + that indicates
|
||||
possessiveness.
|
||||
</P>
|
||||
<P>
|
||||
White space did not used to include the VT character (code 11), because Perl
|
||||
did not treat this character as white space. However, Perl changed at release
|
||||
5.18, so PCRE followed at release 8.34, and VT is now treated as white space.
|
||||
</P>
|
||||
<P>
|
||||
PCRE_EXTENDED also causes characters between an unescaped # outside a character
|
||||
class and the next newline, inclusive, to be ignored. PCRE_EXTENDED is
|
||||
equivalent to Perl's /x option, and it can be changed within a pattern by a
|
||||
(?x) option setting.
|
||||
</P>
|
||||
<P>
|
||||
Which characters are interpreted as newlines is controlled by the options
|
||||
@ -824,6 +846,15 @@ the pattern. Any opening parenthesis that is not followed by ? behaves as if it
|
||||
were followed by ?: but named parentheses can still be used for capturing (and
|
||||
they acquire numbers in the usual way). There is no equivalent of this option
|
||||
in Perl.
|
||||
<pre>
|
||||
PCRE_NO_AUTO_POSSESS
|
||||
</pre>
|
||||
If this option is set, it disables "auto-possessification". This is an
|
||||
optimization that, for example, turns a+b into a++b in order to avoid
|
||||
backtracks into a+ that can never be successful. However, if callouts are in
|
||||
use, auto-possessification means that some of them are never taken. You can set
|
||||
this option if you want the matching functions to do a full unoptimized search
|
||||
and run all the callouts, but it is mainly provided for testing purposes.
|
||||
<pre>
|
||||
PCRE_NO_START_OPTIMIZE
|
||||
</pre>
|
||||
@ -875,10 +906,10 @@ page. If an invalid UTF-8 sequence is found, <b>pcre_compile()</b> returns an
|
||||
error. If you already know that your pattern is valid, and you want to skip
|
||||
this check for performance reasons, you can set the PCRE_NO_UTF8_CHECK option.
|
||||
When it is set, the effect of passing an invalid UTF-8 string as a pattern is
|
||||
undefined. It may cause your program to crash. Note that this option can also
|
||||
be passed to <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b>, to suppress the
|
||||
validity checking of subject strings only. If the same string is being matched
|
||||
many times, the option can be safely set for the second and subsequent
|
||||
undefined. It may cause your program to crash or loop. Note that this option
|
||||
can also be passed to <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b>, to suppress
|
||||
the validity checking of subject strings only. If the same string is being
|
||||
matched many times, the option can be safely set for the second and subsequent
|
||||
matchings to improve performance.
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">COMPILATION ERROR CODES</a><br>
|
||||
@ -923,7 +954,7 @@ have fallen out of use. To avoid confusion, they have not been re-used.
|
||||
31 POSIX collating elements are not supported
|
||||
32 this version of PCRE is compiled without UTF support
|
||||
33 [this code is not in use]
|
||||
34 character value in \x{...} sequence is too large
|
||||
34 character value in \x{} or \o{} is too large
|
||||
35 invalid condition (?(0)
|
||||
36 \C not allowed in lookbehind assertion
|
||||
37 PCRE does not support \L, \l, \N{name}, \U, or \u
|
||||
@ -971,14 +1002,20 @@ have fallen out of use. To avoid confusion, they have not been re-used.
|
||||
75 name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
|
||||
76 character value in \u.... sequence is too large
|
||||
77 invalid UTF-32 string (specifically UTF-32)
|
||||
78 setting UTF is disabled by the application
|
||||
79 non-hex character in \x{} (closing brace missing?)
|
||||
80 non-octal character in \o{} (closing brace missing?)
|
||||
81 missing opening brace after \o
|
||||
82 parentheses are too deeply nested
|
||||
83 invalid range in character class
|
||||
</pre>
|
||||
The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may
|
||||
be used if the limits were changed when PCRE was built.
|
||||
<a name="studyingapattern"></a></P>
|
||||
<br><a name="SEC13" href="#TOC1">STUDYING A PATTERN</a><br>
|
||||
<P>
|
||||
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i></b>
|
||||
<b>const char **<i>errptr</i>);</b>
|
||||
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
|
||||
<b> const char **<i>errptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
If a compiled pattern is going to be used several times, it is worth spending
|
||||
@ -1101,15 +1138,18 @@ There is a longer discussion of PCRE_NO_START_OPTIMIZE
|
||||
<P>
|
||||
PCRE handles caseless matching, and determines whether characters are letters,
|
||||
digits, or whatever, by reference to a set of tables, indexed by character
|
||||
value. When running in UTF-8 mode, this applies only to characters
|
||||
with codes less than 128. By default, higher-valued codes never match escapes
|
||||
such as \w or \d, but they can be tested with \p if PCRE is built with
|
||||
Unicode character property support. Alternatively, the PCRE_UCP option can be
|
||||
set at compile time; this causes \w and friends to use Unicode property
|
||||
support instead of built-in tables. The use of locales with Unicode is
|
||||
discouraged. If you are handling characters with codes greater than 128, you
|
||||
should either use UTF-8 and Unicode, or use locales, but not try to mix the
|
||||
two.
|
||||
code point. When running in UTF-8 mode, or in the 16- or 32-bit libraries, this
|
||||
applies only to characters with code points less than 256. By default,
|
||||
higher-valued code points never match escapes such as \w or \d. However, if
|
||||
PCRE is built with Unicode property support, all characters can be tested with
|
||||
\p and \P, or, alternatively, the PCRE_UCP option can be set when a pattern
|
||||
is compiled; this causes \w and friends to use Unicode property support
|
||||
instead of the built-in tables.
|
||||
</P>
|
||||
<P>
|
||||
The use of locales with Unicode is discouraged. If you are handling characters
|
||||
with code points greater than 128, you should either use Unicode support, or
|
||||
use locales, but not try to mix the two.
|
||||
</P>
|
||||
<P>
|
||||
PCRE contains an internal set of tables that are used when the final argument
|
||||
@ -1127,10 +1167,10 @@ for this locale support is expected to die away.
|
||||
<P>
|
||||
External tables are built by calling the <b>pcre_maketables()</b> function,
|
||||
which has no arguments, in the relevant locale. The result can then be passed
|
||||
to <b>pcre_compile()</b> or <b>pcre_exec()</b> as often as necessary. For
|
||||
example, to build and use tables that are appropriate for the French locale
|
||||
(where accented characters with values greater than 128 are treated as letters),
|
||||
the following code could be used:
|
||||
to <b>pcre_compile()</b> as often as necessary. For example, to build and use
|
||||
tables that are appropriate for the French locale (where accented characters
|
||||
with values greater than 128 are treated as letters), the following code could
|
||||
be used:
|
||||
<pre>
|
||||
setlocale(LC_CTYPE, "fr_FR");
|
||||
tables = pcre_maketables();
|
||||
@ -1148,21 +1188,25 @@ needed.
|
||||
<P>
|
||||
The pointer that is passed to <b>pcre_compile()</b> is saved with the compiled
|
||||
pattern, and the same tables are used via this pointer by <b>pcre_study()</b>
|
||||
and normally also by <b>pcre_exec()</b>. Thus, by default, for any single
|
||||
and also by <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b>. Thus, for any single
|
||||
pattern, compilation, studying and matching all happen in the same locale, but
|
||||
different patterns can be compiled in different locales.
|
||||
different patterns can be processed in different locales.
|
||||
</P>
|
||||
<P>
|
||||
It is possible to pass a table pointer or NULL (indicating the use of the
|
||||
internal tables) to <b>pcre_exec()</b>. Although not intended for this purpose,
|
||||
this facility could be used to match a pattern in a different locale from the
|
||||
one in which it was compiled. Passing table pointers at run time is discussed
|
||||
below in the section on matching a pattern.
|
||||
internal tables) to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> (see the
|
||||
discussion below in the section on matching a pattern). This facility is
|
||||
provided for use with pre-compiled patterns that have been saved and reloaded.
|
||||
Character tables are not saved with patterns, so if a non-standard table was
|
||||
used at compile time, it must be provided again when the reloaded pattern is
|
||||
matched. Attempting to use this facility to match a pattern in a different
|
||||
locale from the one in which it was compiled is likely to lead to anomalous
|
||||
(usually incorrect) results.
|
||||
<a name="infoaboutpattern"></a></P>
|
||||
<br><a name="SEC15" href="#TOC1">INFORMATION ABOUT A PATTERN</a><br>
|
||||
<P>
|
||||
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||
<b>int <i>what</i>, void *<i>where</i>);</b>
|
||||
<b> int <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
The <b>pcre_fullinfo()</b> function returns information about a compiled
|
||||
@ -1303,9 +1347,14 @@ is -1.
|
||||
</P>
|
||||
<P>
|
||||
Since for the 32-bit library using the non-UTF-32 mode, this function is unable
|
||||
to return the full 32-bit range of the character, this value is deprecated;
|
||||
to return the full 32-bit range of characters, this value is deprecated;
|
||||
instead the PCRE_INFO_REQUIREDCHARFLAGS and PCRE_INFO_REQUIREDCHAR values should
|
||||
be used.
|
||||
<pre>
|
||||
PCRE_INFO_MATCH_EMPTY
|
||||
</pre>
|
||||
Return 1 if the pattern can match an empty string, otherwise 0. The fourth
|
||||
argument should point to an <b>int</b> variable.
|
||||
<pre>
|
||||
PCRE_INFO_MATCHLIMIT
|
||||
</pre>
|
||||
@ -1364,16 +1413,18 @@ contains the parenthesis number. The rest of the entry is the corresponding
|
||||
name, zero terminated.
|
||||
</P>
|
||||
<P>
|
||||
The names are in alphabetical order. Duplicate names may appear if (?| is used
|
||||
to create multiple groups with the same number, as described in the
|
||||
The names are in alphabetical order. If (?| is used to create multiple groups
|
||||
with the same number, as described in the
|
||||
<a href="pcrepattern.html#dupsubpatternnumber">section on duplicate subpattern numbers</a>
|
||||
in the
|
||||
<a href="pcrepattern.html"><b>pcrepattern</b></a>
|
||||
page. Duplicate names for subpatterns with different numbers are permitted only
|
||||
if PCRE_DUPNAMES is set. In all cases of duplicate names, they appear in the
|
||||
table in the order in which they were found in the pattern. In the absence of
|
||||
(?| this is the order of increasing number; when (?| is used this is not
|
||||
necessarily the case because later subpatterns may have lower numbers.
|
||||
page, the groups may be given the same name, but there is only one entry in the
|
||||
table. Different names for groups of the same number are not permitted.
|
||||
Duplicate names for subpatterns with different numbers are permitted,
|
||||
but only if PCRE_DUPNAMES is set. They appear in the table in the order in
|
||||
which they were found in the pattern. In the absence of (?| this is the order
|
||||
of increasing number; when (?| is used this is not necessarily the case because
|
||||
later subpatterns may have lower numbers.
|
||||
</P>
|
||||
<P>
|
||||
As a simple example of the name/number table, consider the following pattern
|
||||
@ -1487,30 +1538,14 @@ returned. For anchored patterns, 0 is returned.
|
||||
<pre>
|
||||
PCRE_INFO_FIRSTCHARACTER
|
||||
</pre>
|
||||
Return the fixed first character value, if PCRE_INFO_FIRSTCHARACTERFLAGS
|
||||
returned 1; otherwise returns 0. The fourth argument should point to an
|
||||
<b>uint_t</b> variable.
|
||||
Return the fixed first character value in the situation where
|
||||
PCRE_INFO_FIRSTCHARACTERFLAGS returns 1; otherwise return 0. The fourth
|
||||
argument should point to an <b>uint_t</b> variable.
|
||||
</P>
|
||||
<P>
|
||||
In the 8-bit library, the value is always less than 256. In the 16-bit library
|
||||
the value can be up to 0xffff. In the 32-bit library in UTF-32 mode the value
|
||||
can be up to 0x10ffff, and up to 0xffffffff when not using UTF-32 mode.
|
||||
</P>
|
||||
<P>
|
||||
If there is no fixed first value, and if either
|
||||
<br>
|
||||
<br>
|
||||
(a) the pattern was compiled with the PCRE_MULTILINE option, and every branch
|
||||
starts with "^", or
|
||||
<br>
|
||||
<br>
|
||||
(b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not set
|
||||
(if it were set, the pattern would be anchored),
|
||||
<br>
|
||||
<br>
|
||||
-1 is returned, indicating that the pattern matches only at the start of a
|
||||
subject string or after any newline within the string. Otherwise -2 is
|
||||
returned. For anchored patterns, -2 is returned.
|
||||
<pre>
|
||||
PCRE_INFO_REQUIREDCHARFLAGS
|
||||
</pre>
|
||||
@ -1559,8 +1594,8 @@ is different. (This seems a highly unlikely scenario.)
|
||||
<br><a name="SEC17" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
|
||||
<P>
|
||||
<b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
The function <b>pcre_exec()</b> is called to match a subject string against a
|
||||
@ -1723,17 +1758,22 @@ and is described in the
|
||||
documentation.
|
||||
</P>
|
||||
<P>
|
||||
The <i>tables</i> field is used to pass a character tables pointer to
|
||||
<b>pcre_exec()</b>; this overrides the value that is stored with the compiled
|
||||
pattern. A non-NULL value is stored with the compiled pattern only if custom
|
||||
tables were supplied to <b>pcre_compile()</b> via its <i>tableptr</i> argument.
|
||||
If NULL is passed to <b>pcre_exec()</b> using this mechanism, it forces PCRE's
|
||||
internal tables to be used. This facility is helpful when re-using patterns
|
||||
that have been saved after compiling with an external set of tables, because
|
||||
the external tables might be at a different address when <b>pcre_exec()</b> is
|
||||
called. See the
|
||||
The <i>tables</i> field is provided for use with patterns that have been
|
||||
pre-compiled using custom character tables, saved to disc or elsewhere, and
|
||||
then reloaded, because the tables that were used to compile a pattern are not
|
||||
saved with it. See the
|
||||
<a href="pcreprecompile.html"><b>pcreprecompile</b></a>
|
||||
documentation for a discussion of saving compiled patterns for later use.
|
||||
documentation for a discussion of saving compiled patterns for later use. If
|
||||
NULL is passed using this mechanism, it forces PCRE's internal tables to be
|
||||
used.
|
||||
</P>
|
||||
<P>
|
||||
<b>Warning:</b> The tables that <b>pcre_exec()</b> uses must be the same as those
|
||||
that were used when the pattern was compiled. If this is not the case, the
|
||||
behaviour of <b>pcre_exec()</b> is undefined. Therefore, when a pattern is
|
||||
compiled and matched in the same process, this field should never be set. In
|
||||
this (the most common) case, the correct table pointer is automatically passed
|
||||
with the compiled pattern from <b>pcre_compile()</b> to <b>pcre_exec()</b>.
|
||||
</P>
|
||||
<P>
|
||||
If PCRE_EXTRA_MARK is set in the <i>flags</i> field, the <i>mark</i> field must
|
||||
@ -1951,7 +1991,7 @@ all the matches in a single subject string. However, you should be sure that
|
||||
the value of <i>startoffset</i> points to the start of a character (or the end
|
||||
of the subject). When PCRE_NO_UTF8_CHECK is set, the effect of passing an
|
||||
invalid string as a subject or an invalid value of <i>startoffset</i> is
|
||||
undefined. Your program may crash.
|
||||
undefined. Your program may crash or loop.
|
||||
<pre>
|
||||
PCRE_PARTIAL_HARD
|
||||
PCRE_PARTIAL_SOFT
|
||||
@ -2413,17 +2453,17 @@ no longer in use and is never returned.
|
||||
<br><a name="SEC18" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
|
||||
<P>
|
||||
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
||||
<b>int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
|
||||
<b> int <i>buffersize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b>const char **<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>stringcount</i>, int <i>stringnumber</i>,</b>
|
||||
<b> const char **<i>stringptr</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
|
||||
<b>int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
||||
<b> int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
Captured substrings can be accessed directly by using the offsets returned by
|
||||
@ -2508,19 +2548,19 @@ provided.
|
||||
<br><a name="SEC19" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
|
||||
<P>
|
||||
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
|
||||
<b>const char *<i>name</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>name</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
|
||||
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||
<b>char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||
<b> char *<i>buffer</i>, int <i>buffersize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
|
||||
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||
<b>const char **<i>stringptr</i>);</b>
|
||||
<b> const char *<i>subject</i>, int *<i>ovector</i>,</b>
|
||||
<b> int <i>stringcount</i>, const char *<i>stringname</i>,</b>
|
||||
<b> const char **<i>stringptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
To extract a substring by name, you first have to find associated number.
|
||||
@ -2572,7 +2612,7 @@ same number causes an error at compile time.
|
||||
<br><a name="SEC20" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
|
||||
<P>
|
||||
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
|
||||
<b>const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
||||
<b> const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
When a pattern is compiled with the PCRE_DUPNAMES option, names for subpatterns
|
||||
@ -2653,9 +2693,9 @@ the value returned is the size of each block that is obtained from the heap.
|
||||
<br><a name="SEC23" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
|
||||
<P>
|
||||
<b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
|
||||
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
<b> const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
|
||||
<b> int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
|
||||
<b> int *<i>workspace</i>, int <i>wscount</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
The function <b>pcre_dfa_exec()</b> is called to match a subject string against
|
||||
@ -2784,6 +2824,15 @@ matching string is given first. If there were too many matches to fit into
|
||||
the longest matches. Unlike <b>pcre_exec()</b>, <b>pcre_dfa_exec()</b> can use
|
||||
the entire <i>ovector</i> for returning matched strings.
|
||||
</P>
|
||||
<P>
|
||||
NOTE: PCRE's "auto-possessification" optimization usually applies to character
|
||||
repeats at the end of a pattern (as well as internally). For example, the
|
||||
pattern "a\d+" is compiled as if it were "a\d++" because there is no point
|
||||
even considering the possibility of backtracking into the repeated digits. For
|
||||
DFA matching, this means that only one possible match is found. If you really
|
||||
do want multiple matches in such cases, either use an ungreedy repeat
|
||||
("a\d+?") or set the PCRE_NO_AUTO_POSSESS option when compiling.
|
||||
</P>
|
||||
<br><b>
|
||||
Error returns from <b>pcre_dfa_exec()</b>
|
||||
</b><br>
|
||||
@ -2850,7 +2899,7 @@ Cambridge CB2 3QH, England.
|
||||
</P>
|
||||
<br><a name="SEC26" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 12 May 2013
|
||||
Last updated: 12 November 2013
|
||||
<br>
|
||||
Copyright © 1997-2013 University of Cambridge.
|
||||
<br>
|
||||
|
@ -77,15 +77,50 @@ independent groups).
|
||||
Automatic callouts can be used for tracking the progress of pattern matching.
|
||||
The
|
||||
<a href="pcretest.html"><b>pcretest</b></a>
|
||||
command has an option that sets automatic callouts; when it is used, the output
|
||||
indicates how the pattern is matched. This is useful information when you are
|
||||
trying to optimize the performance of a particular pattern.
|
||||
program has a pattern qualifier (/C) that sets automatic callouts; when it is
|
||||
used, the output indicates how the pattern is being matched. This is useful
|
||||
information when you are trying to optimize the performance of a particular
|
||||
pattern.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">MISSING CALLOUTS</a><br>
|
||||
<P>
|
||||
You should be aware that, because of optimizations in the way PCRE matches
|
||||
patterns by default, callouts sometimes do not happen. For example, if the
|
||||
pattern is
|
||||
You should be aware that, because of optimizations in the way PCRE compiles and
|
||||
matches patterns, callouts sometimes do not happen exactly as you might expect.
|
||||
</P>
|
||||
<P>
|
||||
At compile time, PCRE "auto-possessifies" repeated items when it knows that
|
||||
what follows cannot be part of the repeat. For example, a+[bc] is compiled as
|
||||
if it were a++[bc]. The <b>pcretest</b> output when this pattern is anchored and
|
||||
then applied with automatic callouts to the string "aaaa" is:
|
||||
<pre>
|
||||
--->aaaa
|
||||
+0 ^ ^
|
||||
+1 ^ a+
|
||||
+3 ^ ^ [bc]
|
||||
No match
|
||||
</pre>
|
||||
This indicates that when matching [bc] fails, there is no backtracking into a+
|
||||
and therefore the callouts that would be taken for the backtracks do not occur.
|
||||
You can disable the auto-possessify feature by passing PCRE_NO_AUTO_POSSESS
|
||||
to <b>pcre_compile()</b>, or starting the pattern with (*NO_AUTO_POSSESS). If
|
||||
this is done in <b>pcretest</b> (using the /O qualifier), the output changes to
|
||||
this:
|
||||
<pre>
|
||||
--->aaaa
|
||||
+0 ^ ^
|
||||
+1 ^ a+
|
||||
+3 ^ ^ [bc]
|
||||
+3 ^ ^ [bc]
|
||||
+3 ^ ^ [bc]
|
||||
+3 ^^ [bc]
|
||||
No match
|
||||
</pre>
|
||||
This time, when matching [bc] fails, the matcher backtracks into a+ and tries
|
||||
again, repeatedly, until a+ itself fails.
|
||||
</P>
|
||||
<P>
|
||||
Other optimizations that provide fast "no match" results also affect callouts.
|
||||
For example, if the pattern is
|
||||
<pre>
|
||||
ab(?C4)cd
|
||||
</pre>
|
||||
@ -109,11 +144,11 @@ callouts such as the example above are obeyed.
|
||||
<br><a name="SEC4" href="#TOC1">THE CALLOUT INTERFACE</a><br>
|
||||
<P>
|
||||
During matching, when PCRE reaches a callout point, the external function
|
||||
defined by <i>pcre_callout</i> or <i>pcre[16|32]_callout</i> is called
|
||||
(if it is set). This applies to both normal and DFA matching. The only
|
||||
argument to the callout function is a pointer to a <b>pcre_callout</b>
|
||||
or <b>pcre[16|32]_callout</b> block.
|
||||
These structures contains the following fields:
|
||||
defined by <i>pcre_callout</i> or <i>pcre[16|32]_callout</i> is called (if it is
|
||||
set). This applies to both normal and DFA matching. The only argument to the
|
||||
callout function is a pointer to a <b>pcre_callout</b> or
|
||||
<b>pcre[16|32]_callout</b> block. These structures contains the following
|
||||
fields:
|
||||
<pre>
|
||||
int <i>version</i>;
|
||||
int <i>callout_number</i>;
|
||||
@ -242,7 +277,7 @@ Cambridge CB2 3QH, England.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 03 March 2013
|
||||
Last updated: 12 November 2013
|
||||
<br>
|
||||
Copyright © 1997-2013 University of Cambridge.
|
||||
<br>
|
||||
|
@ -138,18 +138,24 @@ an error is given at compile time.
|
||||
<P>
|
||||
15. Perl recognizes comments in some places that PCRE does not, for example,
|
||||
between the ( and ? at the start of a subpattern. If the /x modifier is set,
|
||||
Perl allows white space between ( and ? but PCRE never does, even if the
|
||||
PCRE_EXTENDED option is set.
|
||||
Perl allows white space between ( and ? (though current Perls warn that this is
|
||||
deprecated) but PCRE never does, even if the PCRE_EXTENDED option is set.
|
||||
</P>
|
||||
<P>
|
||||
16. In PCRE, the upper/lower case character properties Lu and Ll are not
|
||||
16. Perl, when in warning mode, gives warnings for character classes such as
|
||||
[A-\d] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE has no
|
||||
warning features, so it gives an error in these cases because they are almost
|
||||
certainly user mistakes.
|
||||
</P>
|
||||
<P>
|
||||
17. In PCRE, the upper/lower case character properties Lu and Ll are not
|
||||
affected when case-independent matching is specified. For example, \p{Lu}
|
||||
always matches an upper case letter. I think Perl has changed in this respect;
|
||||
in the release at the time of writing (5.16), \p{Lu} and \p{Ll} match all
|
||||
letters, regardless of case, when case independence is specified.
|
||||
</P>
|
||||
<P>
|
||||
17. PCRE provides some extensions to the Perl regular expression facilities.
|
||||
18. PCRE provides some extensions to the Perl regular expression facilities.
|
||||
Perl 5.10 includes new features that are not in earlier versions of Perl, some
|
||||
of which (such as named parentheses) have been in PCRE for some time. This list
|
||||
is with respect to Perl 5.10:
|
||||
@ -220,7 +226,7 @@ Cambridge CB2 3QH, England.
|
||||
REVISION
|
||||
</b><br>
|
||||
<P>
|
||||
Last updated: 19 March 2013
|
||||
Last updated: 10 November 2013
|
||||
<br>
|
||||
Copyright © 1997-2013 University of Cambridge.
|
||||
<br>
|
||||
|
@ -21,9 +21,10 @@ practice be relevant.
|
||||
</P>
|
||||
<P>
|
||||
The maximum length of a compiled pattern is approximately 64K data units (bytes
|
||||
for the 8-bit library, 32-bit units for the 32-bit library, and 32-bit units for
|
||||
the 32-bit library) if PCRE is compiled with the default internal linkage size
|
||||
of 2 bytes. If you want to process regular expressions that are truly enormous,
|
||||
for the 8-bit library, 16-bit units for the 16-bit library, and 32-bit units for
|
||||
the 32-bit library) if PCRE is compiled with the default internal linkage size,
|
||||
which is 2 bytes for the 8-bit and 16-bit libraries, and 4 bytes for the 32-bit
|
||||
library. If you want to process regular expressions that are truly enormous,
|
||||
you can compile PCRE with an internal linkage size of 3 or 4 (when building the
|
||||
16-bit or 32-bit library, 3 is rounded up to 4). See the <b>README</b> file in
|
||||
the source distribution and the
|
||||
@ -36,7 +37,10 @@ All values in repeating quantifiers must be less than 65536.
|
||||
</P>
|
||||
<P>
|
||||
There is no limit to the number of parenthesized subpatterns, but there can be
|
||||
no more than 65535 capturing subpatterns.
|
||||
no more than 65535 capturing subpatterns. There is, however, a limit to the
|
||||
depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
|
||||
order to limit the amount of system stack used at compile time. The limit can
|
||||
be specified when PCRE is built; the default is 250.
|
||||
</P>
|
||||
<P>
|
||||
There is a limit to the number of forward references to subsequent subpatterns
|
||||
@ -50,7 +54,7 @@ maximum number of named subpatterns is 10000.
|
||||
</P>
|
||||
<P>
|
||||
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
|
||||
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit library.
|
||||
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit libraries.
|
||||
</P>
|
||||
<P>
|
||||
The maximum length of a subject string is the largest positive number that an
|
||||
@ -77,9 +81,9 @@ Cambridge CB2 3QH, England.
|
||||
REVISION
|
||||
</b><br>
|
||||
<P>
|
||||
Last updated: 04 May 2012
|
||||
Last updated: 05 November 2013
|
||||
<br>
|
||||
Copyright © 1997-2012 University of Cambridge.
|
||||
Copyright © 1997-2013 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
|
@ -126,6 +126,15 @@ character of the subject. The algorithm does not automatically move on to find
|
||||
matches that start at later positions.
|
||||
</P>
|
||||
<P>
|
||||
PCRE's "auto-possessification" optimization usually applies to character
|
||||
repeats at the end of a pattern (as well as internally). For example, the
|
||||
pattern "a\d+" is compiled as if it were "a\d++" because there is no point
|
||||
even considering the possibility of backtracking into the repeated digits. For
|
||||
DFA matching, this means that only one possible match is found. If you really
|
||||
do want multiple matches in such cases, either use an ungreedy repeat
|
||||
("a\d+?") or set the PCRE_NO_AUTO_POSSESS option when compiling.
|
||||
</P>
|
||||
<P>
|
||||
There are a number of features of PCRE regular expressions that are not
|
||||
supported by the alternative matching algorithm. They are as follows:
|
||||
</P>
|
||||
@ -224,7 +233,7 @@ Cambridge CB2 3QH, England.
|
||||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 08 January 2012
|
||||
Last updated: 12 November 2013
|
||||
<br>
|
||||
Copyright © 1997-2012 University of Cambridge.
|
||||
<br>
|
||||
|
@ -306,6 +306,16 @@ not retain the previously partially-matched string. It is up to the calling
|
||||
program to do that if it needs to.
|
||||
</P>
|
||||
<P>
|
||||
That means that, for an unanchored pattern, if a continued match fails, it is
|
||||
not possible to try again at a new starting point. All this facility is capable
|
||||
of doing is continuing with the previous match attempt. In the previous
|
||||
example, if the second set of data is "ug23" the result is no match, even
|
||||
though there would be a match for "aug23" if the entire string were given at
|
||||
once. Depending on the application, this may or may not be what you want.
|
||||
The only way to allow for starting again at the next character is to retain the
|
||||
matched part of the subject and try a new complete match.
|
||||
</P>
|
||||
<P>
|
||||
You can set the PCRE_PARTIAL_SOFT or PCRE_PARTIAL_HARD options with
|
||||
PCRE_DFA_RESTART to continue partial matching over multiple segments. This
|
||||
facility can be used to pass very long subject strings to the DFA matching
|
||||
@ -490,7 +500,7 @@ Cambridge CB2 3QH, England.
|
||||
</P>
|
||||
<br><a name="SEC11" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 20 February 2013
|
||||
Last updated: 02 July 2013
|
||||
<br>
|
||||
Copyright © 1997-2013 University of Cambridge.
|
||||
<br>
|
||||
|
@ -23,25 +23,26 @@ man page, in case the conversion went wrong.
|
||||
<li><a name="TOC8" href="#SEC8">MATCHING A SINGLE DATA UNIT</a>
|
||||
<li><a name="TOC9" href="#SEC9">SQUARE BRACKETS AND CHARACTER CLASSES</a>
|
||||
<li><a name="TOC10" href="#SEC10">POSIX CHARACTER CLASSES</a>
|
||||
<li><a name="TOC11" href="#SEC11">VERTICAL BAR</a>
|
||||
<li><a name="TOC12" href="#SEC12">INTERNAL OPTION SETTING</a>
|
||||
<li><a name="TOC13" href="#SEC13">SUBPATTERNS</a>
|
||||
<li><a name="TOC14" href="#SEC14">DUPLICATE SUBPATTERN NUMBERS</a>
|
||||
<li><a name="TOC15" href="#SEC15">NAMED SUBPATTERNS</a>
|
||||
<li><a name="TOC16" href="#SEC16">REPETITION</a>
|
||||
<li><a name="TOC17" href="#SEC17">ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS</a>
|
||||
<li><a name="TOC18" href="#SEC18">BACK REFERENCES</a>
|
||||
<li><a name="TOC19" href="#SEC19">ASSERTIONS</a>
|
||||
<li><a name="TOC20" href="#SEC20">CONDITIONAL SUBPATTERNS</a>
|
||||
<li><a name="TOC21" href="#SEC21">COMMENTS</a>
|
||||
<li><a name="TOC22" href="#SEC22">RECURSIVE PATTERNS</a>
|
||||
<li><a name="TOC23" href="#SEC23">SUBPATTERNS AS SUBROUTINES</a>
|
||||
<li><a name="TOC24" href="#SEC24">ONIGURUMA SUBROUTINE SYNTAX</a>
|
||||
<li><a name="TOC25" href="#SEC25">CALLOUTS</a>
|
||||
<li><a name="TOC26" href="#SEC26">BACKTRACKING CONTROL</a>
|
||||
<li><a name="TOC27" href="#SEC27">SEE ALSO</a>
|
||||
<li><a name="TOC28" href="#SEC28">AUTHOR</a>
|
||||
<li><a name="TOC29" href="#SEC29">REVISION</a>
|
||||
<li><a name="TOC11" href="#SEC11">COMPATIBILITY FEATURE FOR WORD BOUNDARIES</a>
|
||||
<li><a name="TOC12" href="#SEC12">VERTICAL BAR</a>
|
||||
<li><a name="TOC13" href="#SEC13">INTERNAL OPTION SETTING</a>
|
||||
<li><a name="TOC14" href="#SEC14">SUBPATTERNS</a>
|
||||
<li><a name="TOC15" href="#SEC15">DUPLICATE SUBPATTERN NUMBERS</a>
|
||||
<li><a name="TOC16" href="#SEC16">NAMED SUBPATTERNS</a>
|
||||
<li><a name="TOC17" href="#SEC17">REPETITION</a>
|
||||
<li><a name="TOC18" href="#SEC18">ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS</a>
|
||||
<li><a name="TOC19" href="#SEC19">BACK REFERENCES</a>
|
||||
<li><a name="TOC20" href="#SEC20">ASSERTIONS</a>
|
||||
<li><a name="TOC21" href="#SEC21">CONDITIONAL SUBPATTERNS</a>
|
||||
<li><a name="TOC22" href="#SEC22">COMMENTS</a>
|
||||
<li><a name="TOC23" href="#SEC23">RECURSIVE PATTERNS</a>
|
||||
<li><a name="TOC24" href="#SEC24">SUBPATTERNS AS SUBROUTINES</a>
|
||||
<li><a name="TOC25" href="#SEC25">ONIGURUMA SUBROUTINE SYNTAX</a>
|
||||
<li><a name="TOC26" href="#SEC26">CALLOUTS</a>
|
||||
<li><a name="TOC27" href="#SEC27">BACKTRACKING CONTROL</a>
|
||||
<li><a name="TOC28" href="#SEC28">SEE ALSO</a>
|
||||
<li><a name="TOC29" href="#SEC29">AUTHOR</a>
|
||||
<li><a name="TOC30" href="#SEC30">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">PCRE REGULAR EXPRESSION DETAILS</a><br>
|
||||
<P>
|
||||
@ -116,21 +117,33 @@ appearance causes an error.
|
||||
Unicode property support
|
||||
</b><br>
|
||||
<P>
|
||||
Another special sequence that may appear at the start of a pattern is
|
||||
<pre>
|
||||
(*UCP)
|
||||
</pre>
|
||||
Another special sequence that may appear at the start of a pattern is (*UCP).
|
||||
This has the same effect as setting the PCRE_UCP option: it causes sequences
|
||||
such as \d and \w to use Unicode properties to determine character types,
|
||||
instead of recognizing only characters with codes less than 128 via a lookup
|
||||
table.
|
||||
</P>
|
||||
<br><b>
|
||||
Disabling auto-possessification
|
||||
</b><br>
|
||||
<P>
|
||||
If a pattern starts with (*NO_AUTO_POSSESS), it has the same effect as setting
|
||||
the PCRE_NO_AUTO_POSSESS option at compile time. This stops PCRE from making
|
||||
quantifiers possessive when what follows cannot match the repeated item. For
|
||||
example, by default a+b is treated as a++b. For more details, see the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<br><b>
|
||||
Disabling start-up optimizations
|
||||
</b><br>
|
||||
<P>
|
||||
If a pattern starts with (*NO_START_OPT), it has the same effect as setting the
|
||||
PCRE_NO_START_OPTIMIZE option either at compile or matching time.
|
||||
PCRE_NO_START_OPTIMIZE option either at compile or matching time. This disables
|
||||
several optimizations for quickly reaching "no match" results. For more
|
||||
details, see the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
documentation.
|
||||
<a name="newlines"></a></P>
|
||||
<br><b>
|
||||
Newline conventions
|
||||
@ -193,10 +206,10 @@ pattern of the form
|
||||
(*LIMIT_RECURSION=d)
|
||||
</pre>
|
||||
where d is any number of decimal digits. However, the value of the setting must
|
||||
be less than the value set by the caller of <b>pcre_exec()</b> for it to have
|
||||
any effect. In other words, the pattern writer can lower the limit set by the
|
||||
programmer, but not raise it. If there is more than one setting of one of these
|
||||
limits, the lower value is used.
|
||||
be less than the value set (or defaulted) by the caller of <b>pcre_exec()</b>
|
||||
for it to have any effect. In other words, the pattern writer can lower the
|
||||
limits set by the programmer, but not raise them. If there is more than one
|
||||
setting of one of these limits, the lower value is used.
|
||||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">EBCDIC CHARACTER CODES</a><br>
|
||||
<P>
|
||||
@ -283,10 +296,11 @@ backslash. All other characters (in particular, those whose codepoints are
|
||||
greater than 127) are treated as literals.
|
||||
</P>
|
||||
<P>
|
||||
If a pattern is compiled with the PCRE_EXTENDED option, white space in the
|
||||
pattern (other than in a character class) and characters between a # outside
|
||||
a character class and the next newline are ignored. An escaping backslash can
|
||||
be used to include a white space or # character as part of the pattern.
|
||||
If a pattern is compiled with the PCRE_EXTENDED option, most white space in the
|
||||
pattern (other than in a character class), and characters between a # outside a
|
||||
character class and the next newline, inclusive, are ignored. An escaping
|
||||
backslash can be used to include a white space or # character as part of the
|
||||
pattern.
|
||||
</P>
|
||||
<P>
|
||||
If you want to remove the special meaning from a sequence of characters, you
|
||||
@ -324,7 +338,9 @@ one of the following escape sequences than the binary character it represents:
|
||||
\n linefeed (hex 0A)
|
||||
\r carriage return (hex 0D)
|
||||
\t tab (hex 09)
|
||||
\0dd character with octal code 0dd
|
||||
\ddd character with octal code ddd, or back reference
|
||||
\o{ddd..} character with octal code ddd..
|
||||
\xhh character with hex code hh
|
||||
\x{hhh..} character with hex code hhh.. (non-JavaScript mode)
|
||||
\uhhhh character with hex code hhhh (JavaScript mode only)
|
||||
@ -347,42 +363,6 @@ the EBCDIC letters are disjoint, \cZ becomes hex 29 (Z is E9), and other
|
||||
characters also generate different values.
|
||||
</P>
|
||||
<P>
|
||||
By default, after \x, from zero to two hexadecimal digits are read (letters
|
||||
can be in upper or lower case). Any number of hexadecimal digits may appear
|
||||
between \x{ and }, but the character code is constrained as follows:
|
||||
<pre>
|
||||
8-bit non-UTF mode less than 0x100
|
||||
8-bit UTF-8 mode less than 0x10ffff and a valid codepoint
|
||||
16-bit non-UTF mode less than 0x10000
|
||||
16-bit UTF-16 mode less than 0x10ffff and a valid codepoint
|
||||
32-bit non-UTF mode less than 0x80000000
|
||||
32-bit UTF-32 mode less than 0x10ffff and a valid codepoint
|
||||
</pre>
|
||||
Invalid Unicode codepoints are the range 0xd800 to 0xdfff (the so-called
|
||||
"surrogate" codepoints), and 0xffef.
|
||||
</P>
|
||||
<P>
|
||||
If characters other than hexadecimal digits appear between \x{ and }, or if
|
||||
there is no terminating }, this form of escape is not recognized. Instead, the
|
||||
initial \x will be interpreted as a basic hexadecimal escape, with no
|
||||
following digits, giving a character whose value is zero.
|
||||
</P>
|
||||
<P>
|
||||
If the PCRE_JAVASCRIPT_COMPAT option is set, the interpretation of \x is
|
||||
as just described only when it is followed by two hexadecimal digits.
|
||||
Otherwise, it matches a literal "x" character. In JavaScript mode, support for
|
||||
code points greater than 256 is provided by \u, which must be followed by
|
||||
four hexadecimal digits; otherwise it matches a literal "u" character.
|
||||
Character codes specified by \u in JavaScript mode are constrained in the same
|
||||
was as those specified by \x in non-JavaScript mode.
|
||||
</P>
|
||||
<P>
|
||||
Characters whose value is less than 256 can be defined by either of the two
|
||||
syntaxes for \x (or by \u in JavaScript mode). There is no difference in the
|
||||
way they are handled. For example, \xdc is exactly the same as \x{dc} (or
|
||||
\u00dc in JavaScript mode).
|
||||
</P>
|
||||
<P>
|
||||
After \0 up to two further octal digits are read. If there are fewer than two
|
||||
digits, just those that are present are used. Thus the sequence \0\x\07
|
||||
specifies two binary zeros followed by a BEL character (code value 7). Make
|
||||
@ -390,9 +370,23 @@ sure you supply two digits after the initial zero if the pattern character that
|
||||
follows is itself an octal digit.
|
||||
</P>
|
||||
<P>
|
||||
The handling of a backslash followed by a digit other than 0 is complicated.
|
||||
Outside a character class, PCRE reads it and any following digits as a decimal
|
||||
number. If the number is less than 10, or if there have been at least that many
|
||||
The escape \o must be followed by a sequence of octal digits, enclosed in
|
||||
braces. An error occurs if this is not the case. This escape is a recent
|
||||
addition to Perl; it provides way of specifying character code points as octal
|
||||
numbers greater than 0777, and it also allows octal numbers and back references
|
||||
to be unambiguously specified.
|
||||
</P>
|
||||
<P>
|
||||
For greater clarity and unambiguity, it is best to avoid following \ by a
|
||||
digit greater than zero. Instead, use \o{} or \x{} to specify character
|
||||
numbers, and \g{} to specify back references. The following paragraphs
|
||||
describe the old, ambiguous syntax.
|
||||
</P>
|
||||
<P>
|
||||
The handling of a backslash followed by a digit other than 0 is complicated,
|
||||
and Perl has changed in recent releases, causing PCRE also to change. Outside a
|
||||
character class, PCRE reads the digit and any following digits as a decimal
|
||||
number. If the number is less than 8, or if there have been at least that many
|
||||
previous capturing left parentheses in the expression, the entire sequence is
|
||||
taken as a <i>back reference</i>. A description of how this works is given
|
||||
<a href="#backreferences">later,</a>
|
||||
@ -400,12 +394,11 @@ following the discussion of
|
||||
<a href="#subpattern">parenthesized subpatterns.</a>
|
||||
</P>
|
||||
<P>
|
||||
Inside a character class, or if the decimal number is greater than 9 and there
|
||||
have not been that many capturing subpatterns, PCRE re-reads up to three octal
|
||||
digits following the backslash, and uses them to generate a data character. Any
|
||||
subsequent digits stand for themselves. The value of the character is
|
||||
constrained in the same way as characters specified in hexadecimal.
|
||||
For example:
|
||||
Inside a character class, or if the decimal number following \ is greater than
|
||||
7 and there have not been that many capturing subpatterns, PCRE handles \8 and
|
||||
\9 as the literal characters "8" and "9", and otherwise re-reads up to three
|
||||
octal digits following the backslash, using them to generate a data character.
|
||||
Any subsequent digits stand for themselves. For example:
|
||||
<pre>
|
||||
\040 is another way of writing an ASCII space
|
||||
\40 is the same, provided there are fewer than 40 previous capturing subpatterns
|
||||
@ -415,12 +408,53 @@ For example:
|
||||
\0113 is a tab followed by the character "3"
|
||||
\113 might be a back reference, otherwise the character with octal code 113
|
||||
\377 might be a back reference, otherwise the value 255 (decimal)
|
||||
\81 is either a back reference, or a binary zero followed by the two characters "8" and "1"
|
||||
\81 is either a back reference, or the two characters "8" and "1"
|
||||
</pre>
|
||||
Note that octal values of 100 or greater must not be introduced by a leading
|
||||
zero, because no more than three octal digits are ever read.
|
||||
Note that octal values of 100 or greater that are specified using this syntax
|
||||
must not be introduced by a leading zero, because no more than three octal
|
||||
digits are ever read.
|
||||
</P>
|
||||
<P>
|
||||
By default, after \x that is not followed by {, from zero to two hexadecimal
|
||||
digits are read (letters can be in upper or lower case). Any number of
|
||||
hexadecimal digits may appear between \x{ and }. If a character other than
|
||||
a hexadecimal digit appears between \x{ and }, or if there is no terminating
|
||||
}, an error occurs.
|
||||
</P>
|
||||
<P>
|
||||
If the PCRE_JAVASCRIPT_COMPAT option is set, the interpretation of \x is
|
||||
as just described only when it is followed by two hexadecimal digits.
|
||||
Otherwise, it matches a literal "x" character. In JavaScript mode, support for
|
||||
code points greater than 256 is provided by \u, which must be followed by
|
||||
four hexadecimal digits; otherwise it matches a literal "u" character.
|
||||
</P>
|
||||
<P>
|
||||
Characters whose value is less than 256 can be defined by either of the two
|
||||
syntaxes for \x (or by \u in JavaScript mode). There is no difference in the
|
||||
way they are handled. For example, \xdc is exactly the same as \x{dc} (or
|
||||
\u00dc in JavaScript mode).
|
||||
</P>
|
||||
<br><b>
|
||||
Constraints on character values
|
||||
</b><br>
|
||||
<P>
|
||||
Characters that are specified using octal or hexadecimal numbers are
|
||||
limited to certain values, as follows:
|
||||
<pre>
|
||||
8-bit non-UTF mode less than 0x100
|
||||
8-bit UTF-8 mode less than 0x10ffff and a valid codepoint
|
||||
16-bit non-UTF mode less than 0x10000
|
||||
16-bit UTF-16 mode less than 0x10ffff and a valid codepoint
|
||||
32-bit non-UTF mode less than 0x100000000
|
||||
32-bit UTF-32 mode less than 0x10ffff and a valid codepoint
|
||||
</pre>
|
||||
Invalid Unicode codepoints are the range 0xd800 to 0xdfff (the so-called
|
||||
"surrogate" codepoints), and 0xffef.
|
||||
</P>
|
||||
<br><b>
|
||||
Escape sequences in character classes
|
||||
</b><br>
|
||||
<P>
|
||||
All the sequences that define a single character value can be used both inside
|
||||
and outside character classes. In addition, inside a character class, \b is
|
||||
interpreted as the backspace character (hex 08).
|
||||
@ -498,11 +532,14 @@ matching point is at the end of the subject string, all of them fail, because
|
||||
there is no character to match.
|
||||
</P>
|
||||
<P>
|
||||
For compatibility with Perl, \s does not match the VT character (code 11).
|
||||
This makes it different from the the POSIX "space" class. The \s characters
|
||||
are HT (9), LF (10), FF (12), CR (13), and space (32). If "use locale;" is
|
||||
included in a Perl script, \s may match the VT character. In PCRE, it never
|
||||
does.
|
||||
For compatibility with Perl, \s did not used to match the VT character (code
|
||||
11), which made it different from the the POSIX "space" class. However, Perl
|
||||
added VT at release 5.18, and PCRE followed suit at release 8.34. The default
|
||||
\s characters are now HT (9), LF (10), VT (11), FF (12), CR (13), and space
|
||||
(32), which are defined as white space in the "C" locale. This list may vary if
|
||||
locale-specific matching is taking place. For example, in some locales the
|
||||
"non-breaking space" character (\xA0) is recognized as white space, and in
|
||||
others the VT character is not.
|
||||
</P>
|
||||
<P>
|
||||
A "word" character is an underscore or any character that is a letter or digit.
|
||||
@ -513,21 +550,23 @@ place (see
|
||||
in the
|
||||
<a href="pcreapi.html"><b>pcreapi</b></a>
|
||||
page). For example, in a French locale such as "fr_FR" in Unix-like systems,
|
||||
or "french" in Windows, some character codes greater than 128 are used for
|
||||
or "french" in Windows, some character codes greater than 127 are used for
|
||||
accented letters, and these are then matched by \w. The use of locales with
|
||||
Unicode is discouraged.
|
||||
</P>
|
||||
<P>
|
||||
By default, in a UTF mode, characters with values greater than 128 never match
|
||||
\d, \s, or \w, and always match \D, \S, and \W. These sequences retain
|
||||
their original meanings from before UTF support was available, mainly for
|
||||
efficiency reasons. However, if PCRE is compiled with Unicode property support,
|
||||
and the PCRE_UCP option is set, the behaviour is changed so that Unicode
|
||||
properties are used to determine character types, as follows:
|
||||
By default, characters whose code points are greater than 127 never match \d,
|
||||
\s, or \w, and always match \D, \S, and \W, although this may vary for
|
||||
characters in the range 128-255 when locale-specific matching is happening.
|
||||
These escape sequences retain their original meanings from before Unicode
|
||||
support was available, mainly for efficiency reasons. If PCRE is compiled with
|
||||
Unicode property support, and the PCRE_UCP option is set, the behaviour is
|
||||
changed so that Unicode properties are used to determine character types, as
|
||||
follows:
|
||||
<pre>
|
||||
\d any character that \p{Nd} matches (decimal digit)
|
||||
\s any character that \p{Z} matches, plus HT, LF, FF, CR
|
||||
\w any character that \p{L} or \p{N} matches, plus underscore
|
||||
\d any character that matches \p{Nd} (decimal digit)
|
||||
\s any character that matches \p{Z} or \h or \v
|
||||
\w any character that matches \p{L} or \p{N}, plus underscore
|
||||
</pre>
|
||||
The upper case escapes match the inverse sets of characters. Note that \d
|
||||
matches only decimal digits, whereas \w matches any Unicode digit, as well as
|
||||
@ -538,7 +577,7 @@ is noticeably slower when PCRE_UCP is set.
|
||||
<P>
|
||||
The sequences \h, \H, \v, and \V are features that were added to Perl at
|
||||
release 5.10. In contrast to the other sequences, which match only ASCII
|
||||
characters by default, these always match certain high-valued codepoints,
|
||||
characters by default, these always match certain high-valued code points,
|
||||
whether or not PCRE_UCP is set. The horizontal space characters are:
|
||||
<pre>
|
||||
U+0009 Horizontal tab (HT)
|
||||
@ -913,9 +952,9 @@ PCRE's additional properties
|
||||
<P>
|
||||
As well as the standard Unicode properties described above, PCRE supports four
|
||||
more that make it possible to convert traditional escape sequences such as \w
|
||||
and \s and POSIX character classes to use Unicode properties. PCRE uses these
|
||||
non-standard, non-Perl properties internally when PCRE_UCP is set. However,
|
||||
they may also be used explicitly. These properties are:
|
||||
and \s to use Unicode properties. PCRE uses these non-standard, non-Perl
|
||||
properties internally when PCRE_UCP is set. However, they may also be used
|
||||
explicitly. These properties are:
|
||||
<pre>
|
||||
Xan Any alphanumeric character
|
||||
Xps Any POSIX space character
|
||||
@ -925,8 +964,9 @@ they may also be used explicitly. These properties are:
|
||||
Xan matches characters that have either the L (letter) or the N (number)
|
||||
property. Xps matches the characters tab, linefeed, vertical tab, form feed, or
|
||||
carriage return, and any other character that has the Z (separator) property.
|
||||
Xsp is the same as Xps, except that vertical tab is excluded. Xwd matches the
|
||||
same characters as Xan, plus underscore.
|
||||
Xsp is the same as Xps; it used to exclude vertical tab, for Perl
|
||||
compatibility, but Perl changed, and so PCRE followed at release 8.34. Xwd
|
||||
matches the same characters as Xan, plus underscore.
|
||||
</P>
|
||||
<P>
|
||||
There is another non-standard property, Xuc, which matches any character that
|
||||
@ -1218,7 +1258,9 @@ The minus (hyphen) character can be used to specify a range of characters in a
|
||||
character class. For example, [d-m] matches any letter between d and m,
|
||||
inclusive. If a minus character is required in a class, it must be escaped with
|
||||
a backslash or appear in a position where it cannot be interpreted as
|
||||
indicating a range, typically as the first or last character in the class.
|
||||
indicating a range, typically as the first or last character in the class, or
|
||||
immediately after a range. For example, [b-d-z] matches letters in the range b
|
||||
to d, a hyphen character, or z.
|
||||
</P>
|
||||
<P>
|
||||
It is not possible to have the literal character "]" as the end character of a
|
||||
@ -1230,6 +1272,12 @@ followed by two other characters. The octal or hexadecimal representation of
|
||||
"]" can also be used to end a range.
|
||||
</P>
|
||||
<P>
|
||||
An error is generated if a POSIX character class (see below) or an escape
|
||||
sequence other than one that defines a single character appears at a point
|
||||
where a range ending character is expected. For example, [z-\xff] is valid,
|
||||
but [A-\d] and [A-[:digit:]] are not.
|
||||
</P>
|
||||
<P>
|
||||
Ranges operate in the collating sequence of character values. They can also be
|
||||
used for characters specified numerically, for example [\000-\037]. Ranges
|
||||
can include any characters that are valid for the current mode.
|
||||
@ -1269,9 +1317,9 @@ something AND NOT ...".
|
||||
The only metacharacters that are recognized in character classes are backslash,
|
||||
hyphen (only where it can be interpreted as specifying a range), circumflex
|
||||
(only at the start), opening square bracket (only when it can be interpreted as
|
||||
introducing a POSIX class name - see the next section), and the terminating
|
||||
closing square bracket. However, escaping other non-alphanumeric characters
|
||||
does no harm.
|
||||
introducing a POSIX class name, or for a special compatibility feature - see
|
||||
the next two sections), and the terminating closing square bracket. However,
|
||||
escaping other non-alphanumeric characters does no harm.
|
||||
</P>
|
||||
<br><a name="SEC10" href="#TOC1">POSIX CHARACTER CLASSES</a><br>
|
||||
<P>
|
||||
@ -1294,15 +1342,17 @@ are:
|
||||
lower lower case letters
|
||||
print printing characters, including space
|
||||
punct printing characters, excluding letters and digits and space
|
||||
space white space (not quite the same as \s)
|
||||
space white space (the same as \s from PCRE 8.34)
|
||||
upper upper case letters
|
||||
word "word" characters (same as \w)
|
||||
xdigit hexadecimal digits
|
||||
</pre>
|
||||
The "space" characters are HT (9), LF (10), VT (11), FF (12), CR (13), and
|
||||
space (32). Notice that this list includes the VT character (code 11). This
|
||||
makes "space" different to \s, which does not include VT (for Perl
|
||||
compatibility).
|
||||
The default "space" characters are HT (9), LF (10), VT (11), FF (12), CR (13),
|
||||
and space (32). If locale-specific matching is taking place, the list of space
|
||||
characters may be different; there may be fewer or more of them. "Space" used
|
||||
to be different to \s, which did not include VT, for Perl compatibility.
|
||||
However, Perl changed at release 5.18, and PCRE followed at release 8.34.
|
||||
"Space" and \s now match the same set of characters.
|
||||
</P>
|
||||
<P>
|
||||
The name "word" is a Perl extension, and "blank" is a GNU extension from Perl
|
||||
@ -1316,11 +1366,11 @@ syntax [.ch.] and [=ch=] where "ch" is a "collating element", but these are not
|
||||
supported, and an error is given if they are encountered.
|
||||
</P>
|
||||
<P>
|
||||
By default, in UTF modes, characters with values greater than 128 do not match
|
||||
any of the POSIX character classes. However, if the PCRE_UCP option is passed
|
||||
to <b>pcre_compile()</b>, some of the classes are changed so that Unicode
|
||||
character properties are used. This is achieved by replacing the POSIX classes
|
||||
by other sequences, as follows:
|
||||
By default, characters with values greater than 128 do not match any of the
|
||||
POSIX character classes. However, if the PCRE_UCP option is passed to
|
||||
<b>pcre_compile()</b>, some of the classes are changed so that Unicode character
|
||||
properties are used. This is achieved by replacing certain POSIX classes by
|
||||
other sequences, as follows:
|
||||
<pre>
|
||||
[:alnum:] becomes \p{Xan}
|
||||
[:alpha:] becomes \p{L}
|
||||
@ -1331,11 +1381,56 @@ by other sequences, as follows:
|
||||
[:upper:] becomes \p{Lu}
|
||||
[:word:] becomes \p{Xwd}
|
||||
</pre>
|
||||
Negated versions, such as [:^alpha:] use \P instead of \p. The other POSIX
|
||||
classes are unchanged, and match only characters with code points less than
|
||||
128.
|
||||
Negated versions, such as [:^alpha:] use \P instead of \p. Three other POSIX
|
||||
classes are handled specially in UCP mode:
|
||||
</P>
|
||||
<br><a name="SEC11" href="#TOC1">VERTICAL BAR</a><br>
|
||||
<P>
|
||||
[:graph:]
|
||||
This matches characters that have glyphs that mark the page when printed. In
|
||||
Unicode property terms, it matches all characters with the L, M, N, P, S, or Cf
|
||||
properties, except for:
|
||||
<pre>
|
||||
U+061C Arabic Letter Mark
|
||||
U+180E Mongolian Vowel Separator
|
||||
U+2066 - U+2069 Various "isolate"s
|
||||
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
[:print:]
|
||||
This matches the same characters as [:graph:] plus space characters that are
|
||||
not controls, that is, characters with the Zs property.
|
||||
</P>
|
||||
<P>
|
||||
[:punct:]
|
||||
This matches all characters that have the Unicode P (punctuation) property,
|
||||
plus those characters whose code points are less than 128 that have the S
|
||||
(Symbol) property.
|
||||
</P>
|
||||
<P>
|
||||
The other POSIX classes are unchanged, and match only characters with code
|
||||
points less than 128.
|
||||
</P>
|
||||
<br><a name="SEC11" href="#TOC1">COMPATIBILITY FEATURE FOR WORD BOUNDARIES</a><br>
|
||||
<P>
|
||||
In the POSIX.2 compliant library that was included in 4.4BSD Unix, the ugly
|
||||
syntax [[:<:]] and [[:>:]] is used for matching "start of word" and "end of
|
||||
word". PCRE treats these items as follows:
|
||||
<pre>
|
||||
[[:<:]] is converted to \b(?=\w)
|
||||
[[:>:]] is converted to \b(?<=\w)
|
||||
</pre>
|
||||
Only these exact character sequences are recognized. A sequence such as
|
||||
[a[:<:]b] provokes error for an unrecognized POSIX class name. This support is
|
||||
not compatible with Perl. It is provided to help migrations from other
|
||||
environments, and is best not used in any new patterns. Note that \b matches
|
||||
at the start and the end of a word (see
|
||||
<a href="#smallassertions">"Simple assertions"</a>
|
||||
above), and in a Perl-style pattern the preceding or following character
|
||||
normally shows which is wanted, without the need for the assertions that are
|
||||
used above in order to give exactly the POSIX behaviour.
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">VERTICAL BAR</a><br>
|
||||
<P>
|
||||
Vertical bar characters are used to separate alternative patterns. For example,
|
||||
the pattern
|
||||
@ -1350,7 +1445,7 @@ that succeeds is used. If the alternatives are within a subpattern
|
||||
"succeeds" means matching the rest of the main pattern as well as the
|
||||
alternative in the subpattern.
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">INTERNAL OPTION SETTING</a><br>
|
||||
<br><a name="SEC13" href="#TOC1">INTERNAL OPTION SETTING</a><br>
|
||||
<P>
|
||||
The settings of the PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and
|
||||
PCRE_EXTENDED options (which are Perl-compatible) can be changed from within
|
||||
@ -1413,7 +1508,7 @@ options, respectively. The (*UTF) sequence is a generic version that can be
|
||||
used with any of the libraries. However, the application can set the
|
||||
PCRE_NEVER_UTF option, which locks out the use of the (*UTF) sequences.
|
||||
<a name="subpattern"></a></P>
|
||||
<br><a name="SEC13" href="#TOC1">SUBPATTERNS</a><br>
|
||||
<br><a name="SEC14" href="#TOC1">SUBPATTERNS</a><br>
|
||||
<P>
|
||||
Subpatterns are delimited by parentheses (round brackets), which can be nested.
|
||||
Turning part of a pattern into a subpattern does two things:
|
||||
@ -1469,7 +1564,7 @@ from left to right, and options are not reset until the end of the subpattern
|
||||
is reached, an option setting in one branch does affect subsequent branches, so
|
||||
the above patterns match "SUNDAY" as well as "Saturday".
|
||||
<a name="dupsubpatternnumber"></a></P>
|
||||
<br><a name="SEC14" href="#TOC1">DUPLICATE SUBPATTERN NUMBERS</a><br>
|
||||
<br><a name="SEC15" href="#TOC1">DUPLICATE SUBPATTERN NUMBERS</a><br>
|
||||
<P>
|
||||
Perl 5.10 introduced a feature whereby each alternative in a subpattern uses
|
||||
the same numbers for its capturing parentheses. Such a subpattern starts with
|
||||
@ -1513,7 +1608,7 @@ true if any of the subpatterns of that number have matched.
|
||||
An alternative approach to using this "branch reset" feature is to use
|
||||
duplicate named subpatterns, as described in the next section.
|
||||
</P>
|
||||
<br><a name="SEC15" href="#TOC1">NAMED SUBPATTERNS</a><br>
|
||||
<br><a name="SEC16" href="#TOC1">NAMED SUBPATTERNS</a><br>
|
||||
<P>
|
||||
Identifying capturing parentheses by number is simple, but it can be very hard
|
||||
to keep track of the numbers in complicated regular expressions. Furthermore,
|
||||
@ -1535,11 +1630,12 @@ and
|
||||
can be made by name as well as by number.
|
||||
</P>
|
||||
<P>
|
||||
Names consist of up to 32 alphanumeric characters and underscores. Named
|
||||
capturing parentheses are still allocated numbers as well as names, exactly as
|
||||
if the names were not present. The PCRE API provides function calls for
|
||||
extracting the name-to-number translation table from a compiled pattern. There
|
||||
is also a convenience function for extracting a captured substring by name.
|
||||
Names consist of up to 32 alphanumeric characters and underscores, but must
|
||||
start with a non-digit. Named capturing parentheses are still allocated numbers
|
||||
as well as names, exactly as if the names were not present. The PCRE API
|
||||
provides function calls for extracting the name-to-number translation table
|
||||
from a compiled pattern. There is also a convenience function for extracting a
|
||||
captured substring by name.
|
||||
</P>
|
||||
<P>
|
||||
By default, a name must be unique within a pattern, but it is possible to relax
|
||||
@ -1568,9 +1664,23 @@ matched. This saves searching to find which numbered subpattern it was.
|
||||
</P>
|
||||
<P>
|
||||
If you make a back reference to a non-unique named subpattern from elsewhere in
|
||||
the pattern, the one that corresponds to the first occurrence of the name is
|
||||
used. In the absence of duplicate numbers (see the previous section) this is
|
||||
the one with the lowest number. If you use a named reference in a condition
|
||||
the pattern, the subpatterns to which the name refers are checked in the order
|
||||
in which they appear in the overall pattern. The first one that is set is used
|
||||
for the reference. For example, this pattern matches both "foofoo" and
|
||||
"barbar" but not "foobar" or "barfoo":
|
||||
<pre>
|
||||
(?:(?<n>foo)|(?<n>bar))\k<n>
|
||||
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
If you make a subroutine call to a non-unique named subpattern, the one that
|
||||
corresponds to the first occurrence of the name is used. In the absence of
|
||||
duplicate numbers (see the previous section) this is the one with the lowest
|
||||
number.
|
||||
</P>
|
||||
<P>
|
||||
If you use a named reference in a condition
|
||||
test (see the
|
||||
<a href="#conditions">section about conditions</a>
|
||||
below), either to check whether a subpattern has matched, or to check for
|
||||
@ -1585,10 +1695,11 @@ documentation.
|
||||
<b>Warning:</b> You cannot use different names to distinguish between two
|
||||
subpatterns with the same number because PCRE uses only the numbers when
|
||||
matching. For this reason, an error is given at compile time if different names
|
||||
are given to subpatterns with the same number. However, you can give the same
|
||||
name to subpatterns with the same number, even when PCRE_DUPNAMES is not set.
|
||||
are given to subpatterns with the same number. However, you can always give the
|
||||
same name to subpatterns with the same number, even when PCRE_DUPNAMES is not
|
||||
set.
|
||||
</P>
|
||||
<br><a name="SEC16" href="#TOC1">REPETITION</a><br>
|
||||
<br><a name="SEC17" href="#TOC1">REPETITION</a><br>
|
||||
<P>
|
||||
Repetition is specified by quantifiers, which can follow any of the following
|
||||
items:
|
||||
@ -1756,7 +1867,7 @@ example, after
|
||||
</pre>
|
||||
matches "aba" the value of the second captured substring is "b".
|
||||
<a name="atomicgroup"></a></P>
|
||||
<br><a name="SEC17" href="#TOC1">ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS</a><br>
|
||||
<br><a name="SEC18" href="#TOC1">ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS</a><br>
|
||||
<P>
|
||||
With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy")
|
||||
repetition, failure of what follows normally causes the repeated item to be
|
||||
@ -1860,7 +1971,7 @@ an atomic group, like this:
|
||||
</pre>
|
||||
sequences of non-digits cannot be broken, and failure happens quickly.
|
||||
<a name="backreferences"></a></P>
|
||||
<br><a name="SEC18" href="#TOC1">BACK REFERENCES</a><br>
|
||||
<br><a name="SEC19" href="#TOC1">BACK REFERENCES</a><br>
|
||||
<P>
|
||||
Outside a character class, a backslash followed by a digit greater than 0 (and
|
||||
possibly further digits) is a back reference to a capturing subpattern earlier
|
||||
@ -1988,7 +2099,7 @@ as an
|
||||
Once the whole group has been matched, a subsequent matching failure cannot
|
||||
cause backtracking into the middle of the group.
|
||||
<a name="bigassertions"></a></P>
|
||||
<br><a name="SEC19" href="#TOC1">ASSERTIONS</a><br>
|
||||
<br><a name="SEC20" href="#TOC1">ASSERTIONS</a><br>
|
||||
<P>
|
||||
An assertion is a test on the characters following or preceding the current
|
||||
matching point that does not actually consume any characters. The simple
|
||||
@ -2178,7 +2289,7 @@ preceded by "foo", while
|
||||
is another pattern that matches "foo" preceded by three digits and any three
|
||||
characters that are not "999".
|
||||
<a name="conditions"></a></P>
|
||||
<br><a name="SEC20" href="#TOC1">CONDITIONAL SUBPATTERNS</a><br>
|
||||
<br><a name="SEC21" href="#TOC1">CONDITIONAL SUBPATTERNS</a><br>
|
||||
<P>
|
||||
It is possible to cause the matching process to obey a subpattern
|
||||
conditionally or to choose between two alternative subpatterns, depending on
|
||||
@ -2252,12 +2363,7 @@ Checking for a used subpattern by name
|
||||
<P>
|
||||
Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used
|
||||
subpattern by name. For compatibility with earlier versions of PCRE, which had
|
||||
this facility before Perl, the syntax (?(name)...) is also recognized. However,
|
||||
there is a possible ambiguity with this syntax, because subpattern names may
|
||||
consist entirely of digits. PCRE looks first for a named subpattern; if it
|
||||
cannot find one and the name consists entirely of digits, PCRE looks for a
|
||||
subpattern of that number, which must be greater than zero. Using subpattern
|
||||
names that consist entirely of digits is not recommended.
|
||||
this facility before Perl, the syntax (?(name)...) is also recognized.
|
||||
</P>
|
||||
<P>
|
||||
Rewriting the above example to use a named subpattern gives this:
|
||||
@ -2333,7 +2439,7 @@ subject is matched against the first alternative; otherwise it is matched
|
||||
against the second. This pattern matches strings in one of the two forms
|
||||
dd-aaa-dd or dd-dd-dd, where aaa are letters and dd are digits.
|
||||
<a name="comments"></a></P>
|
||||
<br><a name="SEC21" href="#TOC1">COMMENTS</a><br>
|
||||
<br><a name="SEC22" href="#TOC1">COMMENTS</a><br>
|
||||
<P>
|
||||
There are two ways of including comments in patterns that are processed by
|
||||
PCRE. In both cases, the start of the comment must not be in a character class,
|
||||
@ -2362,7 +2468,7 @@ a newline in the pattern. The sequence \n is still literal at this stage, so
|
||||
it does not terminate the comment. Only an actual character with the code value
|
||||
0x0a (the default newline) does so.
|
||||
<a name="recursion"></a></P>
|
||||
<br><a name="SEC22" href="#TOC1">RECURSIVE PATTERNS</a><br>
|
||||
<br><a name="SEC23" href="#TOC1">RECURSIVE PATTERNS</a><br>
|
||||
<P>
|
||||
Consider the problem of matching a string in parentheses, allowing for
|
||||
unlimited nested parentheses. Without the use of recursion, the best that can
|
||||
@ -2577,7 +2683,7 @@ now match "b" and so the whole match succeeds. In Perl, the pattern fails to
|
||||
match because inside the recursive call \1 cannot access the externally set
|
||||
value.
|
||||
<a name="subpatternsassubroutines"></a></P>
|
||||
<br><a name="SEC23" href="#TOC1">SUBPATTERNS AS SUBROUTINES</a><br>
|
||||
<br><a name="SEC24" href="#TOC1">SUBPATTERNS AS SUBROUTINES</a><br>
|
||||
<P>
|
||||
If the syntax for a recursive subpattern call (either by number or by
|
||||
name) is used outside the parentheses to which it refers, it operates like a
|
||||
@ -2618,7 +2724,7 @@ different calls. For example, consider this pattern:
|
||||
It matches "abcabc". It does not match "abcABC" because the change of
|
||||
processing option does not affect the called subpattern.
|
||||
<a name="onigurumasubroutines"></a></P>
|
||||
<br><a name="SEC24" href="#TOC1">ONIGURUMA SUBROUTINE SYNTAX</a><br>
|
||||
<br><a name="SEC25" href="#TOC1">ONIGURUMA SUBROUTINE SYNTAX</a><br>
|
||||
<P>
|
||||
For compatibility with Oniguruma, the non-Perl syntax \g followed by a name or
|
||||
a number enclosed either in angle brackets or single quotes, is an alternative
|
||||
@ -2636,7 +2742,7 @@ plus or a minus sign it is taken as a relative reference. For example:
|
||||
Note that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are <i>not</i>
|
||||
synonymous. The former is a back reference; the latter is a subroutine call.
|
||||
</P>
|
||||
<br><a name="SEC25" href="#TOC1">CALLOUTS</a><br>
|
||||
<br><a name="SEC26" href="#TOC1">CALLOUTS</a><br>
|
||||
<P>
|
||||
Perl has a feature whereby using the sequence (?{...}) causes arbitrary Perl
|
||||
code to be obeyed in the middle of matching a regular expression. This makes it
|
||||
@ -2674,12 +2780,18 @@ During matching, when PCRE reaches a callout point, the external function is
|
||||
called. It is provided with the number of the callout, the position in the
|
||||
pattern, and, optionally, one item of data originally supplied by the caller of
|
||||
the matching function. The callout function may cause matching to proceed, to
|
||||
backtrack, or to fail altogether. A complete description of the interface to
|
||||
the callout function is given in the
|
||||
backtrack, or to fail altogether.
|
||||
</P>
|
||||
<P>
|
||||
By default, PCRE implements a number of optimizations at compile time and
|
||||
matching time, and one side-effect is that sometimes callouts are skipped. If
|
||||
you need all possible callouts to happen, you need to set options that disable
|
||||
the relevant optimizations. More details, and a complete description of the
|
||||
interface to the callout function, are given in the
|
||||
<a href="pcrecallout.html"><b>pcrecallout</b></a>
|
||||
documentation.
|
||||
<a name="backtrackcontrol"></a></P>
|
||||
<br><a name="SEC26" href="#TOC1">BACKTRACKING CONTROL</a><br>
|
||||
<br><a name="SEC27" href="#TOC1">BACKTRACKING CONTROL</a><br>
|
||||
<P>
|
||||
Perl 5.10 introduced a number of "Special Backtracking Control Verbs", which
|
||||
are still described in the Perl documentation as "experimental and subject to
|
||||
@ -3026,7 +3138,7 @@ example:
|
||||
<pre>
|
||||
...(*COMMIT)(*PRUNE)...
|
||||
</pre>
|
||||
If there is a matching failure to the right, backtracking onto (*PRUNE) cases
|
||||
If there is a matching failure to the right, backtracking onto (*PRUNE) causes
|
||||
it to be triggered, and its action is taken. There can never be a backtrack
|
||||
onto (*COMMIT).
|
||||
<a name="btrepeat"></a></P>
|
||||
@ -3093,12 +3205,12 @@ the subroutine match to fail.
|
||||
the subpattern that has alternatives. If there is no such group within the
|
||||
subpattern, (*THEN) causes the subroutine match to fail.
|
||||
</P>
|
||||
<br><a name="SEC27" href="#TOC1">SEE ALSO</a><br>
|
||||
<br><a name="SEC28" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
<b>pcreapi</b>(3), <b>pcrecallout</b>(3), <b>pcrematching</b>(3),
|
||||
<b>pcresyntax</b>(3), <b>pcre</b>(3), <b>pcre16(3)</b>, <b>pcre32(3)</b>.
|
||||
</P>
|
||||
<br><a name="SEC28" href="#TOC1">AUTHOR</a><br>
|
||||
<br><a name="SEC29" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
@ -3107,9 +3219,9 @@ University Computing Service
|
||||
Cambridge CB2 3QH, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC29" href="#TOC1">REVISION</a><br>
|
||||
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 26 April 2013
|
||||
Last updated: 03 December 2013
|
||||
<br>
|
||||
Copyright © 1997-2013 University of Cambridge.
|
||||
<br>
|
||||
|
@ -13,7 +13,7 @@ from the original man page. If there is any nonsense in it, please consult the
|
||||
man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<ul>
|
||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS OF POSIX API</a>
|
||||
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
|
||||
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
|
||||
<li><a name="TOC3" href="#SEC3">COMPILING A PATTERN</a>
|
||||
<li><a name="TOC4" href="#SEC4">MATCHING NEWLINE CHARACTERS</a>
|
||||
@ -23,23 +23,21 @@ man page, in case the conversion went wrong.
|
||||
<li><a name="TOC8" href="#SEC8">AUTHOR</a>
|
||||
<li><a name="TOC9" href="#SEC9">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS OF POSIX API</a><br>
|
||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
||||
<P>
|
||||
<b>#include <pcreposix.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int regcomp(regex_t *<i>preg</i>, const char *<i>pattern</i>,</b>
|
||||
<b>int <i>cflags</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> int <i>cflags</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int regexec(regex_t *<i>preg</i>, const char *<i>string</i>,</b>
|
||||
<b>size_t <i>nmatch</i>, regmatch_t <i>pmatch</i>[], int <i>eflags</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b>size_t regerror(int <i>errcode</i>, const regex_t *<i>preg</i>,</b>
|
||||
<b>char *<i>errbuf</i>, size_t <i>errbuf_size</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
<b> size_t <i>nmatch</i>, regmatch_t <i>pmatch</i>[], int <i>eflags</i>);</b>
|
||||
<b> size_t regerror(int <i>errcode</i>, const regex_t *<i>preg</i>,</b>
|
||||
<b> char *<i>errbuf</i>, size_t <i>errbuf_size</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>void regfree(regex_t *<i>preg</i>);</b>
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
||||
|
@ -102,8 +102,8 @@ study data.
|
||||
<br><a name="SEC3" href="#TOC1">RE-USING A PRECOMPILED PATTERN</a><br>
|
||||
<P>
|
||||
Re-using a precompiled pattern is straightforward. Having reloaded it into main
|
||||
memory, called <b>pcre[16|32]_pattern_to_host_byte_order()</b> if necessary,
|
||||
you pass its pointer to <b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> in
|
||||
memory, called <b>pcre[16|32]_pattern_to_host_byte_order()</b> if necessary, you
|
||||
pass its pointer to <b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> in
|
||||
the usual way.
|
||||
</P>
|
||||
<P>
|
||||
@ -119,6 +119,11 @@ in the
|
||||
documentation.
|
||||
</P>
|
||||
<P>
|
||||
<b>Warning:</b> The tables that <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b> use
|
||||
must be the same as those that were used when the pattern was compiled. If this
|
||||
is not the case, the behaviour is undefined.
|
||||
</P>
|
||||
<P>
|
||||
If you did not provide custom character tables when the pattern was compiled,
|
||||
the pointer in the compiled pattern is NULL, which causes the matching
|
||||
functions to use PCRE's internal tables. Thus, you do not need to take any
|
||||
@ -126,9 +131,9 @@ special action at run time in this case.
|
||||
</P>
|
||||
<P>
|
||||
If you saved study data with the compiled pattern, you need to create your own
|
||||
<b>pcre[16|32]_extra</b> data block and set the <i>study_data</i> field to point to the
|
||||
reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in the
|
||||
<i>flags</i> field to indicate that study data is present. Then pass the
|
||||
<b>pcre[16|32]_extra</b> data block and set the <i>study_data</i> field to point
|
||||
to the reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in
|
||||
the <i>flags</i> field to indicate that study data is present. Then pass the
|
||||
<b>pcre[16|32]_extra</b> block to the matching function in the usual way. If the
|
||||
pattern was studied for just-in-time optimization, that data cannot be saved,
|
||||
and so is lost by a save/restore cycle.
|
||||
@ -149,9 +154,9 @@ Cambridge CB2 3QH, England.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 24 June 2012
|
||||
Last updated: 12 November 2013
|
||||
<br>
|
||||
Copyright © 1997-2012 University of Cambridge.
|
||||
Copyright © 1997-2013 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE index page</a>.
|
||||
|
@ -65,10 +65,14 @@ documentation. This document contains a quick-reference summary of the syntax.
|
||||
\n newline (hex 0A)
|
||||
\r carriage return (hex 0D)
|
||||
\t tab (hex 09)
|
||||
\0dd character with octal code 0dd
|
||||
\ddd character with octal code ddd, or backreference
|
||||
\o{ddd..} character with octal code ddd..
|
||||
\xhh character with hex code hh
|
||||
\x{hhh..} character with hex code hhh..
|
||||
</PRE>
|
||||
</pre>
|
||||
Note that \0dd is always an octal code, and that \8 and \9 are the literal
|
||||
characters "8" and "9".
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">CHARACTER TYPES</a><br>
|
||||
<P>
|
||||
@ -92,9 +96,11 @@ documentation. This document contains a quick-reference summary of the syntax.
|
||||
\W a "non-word" character
|
||||
\X a Unicode extended grapheme cluster
|
||||
</pre>
|
||||
In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
|
||||
characters, even in a UTF mode. However, this can be changed by setting the
|
||||
PCRE_UCP option.
|
||||
By default, \d, \s, and \w match only ASCII characters, even in UTF-8 mode
|
||||
or in the 16- bit and 32-bit libraries. However, if locale-specific matching is
|
||||
happening, \s and \w may also match characters with code points in the range
|
||||
128-255. If the PCRE_UCP option is set, the behaviour of these escape sequences
|
||||
is changed to use Unicode properties and they match many more characters.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">GENERAL CATEGORY PROPERTIES FOR \p and \P</a><br>
|
||||
<P>
|
||||
@ -150,11 +156,13 @@ PCRE_UCP option.
|
||||
<pre>
|
||||
Xan Alphanumeric: union of properties L and N
|
||||
Xps POSIX space: property Z or tab, NL, VT, FF, CR
|
||||
Xsp Perl space: property Z or tab, NL, FF, CR
|
||||
Xsp Perl space: property Z or tab, NL, VT, FF, CR
|
||||
Xuc Univerally-named character: one that can be
|
||||
represented by a Universal Character Name
|
||||
Xwd Perl word: property Xan or underscore
|
||||
</PRE>
|
||||
</pre>
|
||||
Perl and POSIX space are now the same. Perl added VT to its space character set
|
||||
at release 5.18 and PCRE changed at release 8.34.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
|
||||
<P>
|
||||
@ -385,7 +393,9 @@ newline-setting options with similar syntax:
|
||||
(*UTF32) set UTF-32 mode: 32-bit library (PCRE_UTF32)
|
||||
(*UTF) set appropriate UTF mode for the library in use
|
||||
(*UCP) set PCRE_UCP (use Unicode properties for \d etc)
|
||||
</PRE>
|
||||
</pre>
|
||||
Note that LIMIT_MATCH and LIMIT_RECURSION can only reduce the value of the
|
||||
limits set by the caller of pcre_exec(), not increase them.
|
||||
</P>
|
||||
<br><a name="SEC17" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
|
||||
<P>
|
||||
@ -516,7 +526,7 @@ Cambridge CB2 3QH, England.
|
||||
</P>
|
||||
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 26 April 2013
|
||||
Last updated: 12 November 2013
|
||||
<br>
|
||||
Copyright © 1997-2013 University of Cambridge.
|
||||
<br>
|
||||
|
@ -187,6 +187,11 @@ equivalent to adding <b>/M</b> to each regular expression. The size is given in
|
||||
bytes for both libraries.
|
||||
</P>
|
||||
<P>
|
||||
<b>-O</b>
|
||||
Behave as if each pattern has the <b>/O</b> modifier, that is disable
|
||||
auto-possessification for all patterns.
|
||||
</P>
|
||||
<P>
|
||||
<b>-o</b> <i>osize</i>
|
||||
Set the number of elements in the output vector that is used when calling
|
||||
<b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b> to be <i>osize</i>. The
|
||||
@ -256,19 +261,24 @@ should never be studied (see the <b>/S</b> pattern modifier below).
|
||||
</P>
|
||||
<P>
|
||||
<b>-t</b>
|
||||
Run each compile, study, and match many times with a timer, and output
|
||||
resulting time per compile or match (in milliseconds). Do not set <b>-m</b> with
|
||||
<b>-t</b>, because you will then get the size output a zillion times, and the
|
||||
timing will be distorted. You can control the number of iterations that are
|
||||
used for timing by following <b>-t</b> with a number (as a separate item on the
|
||||
command line). For example, "-t 1000" would iterate 1000 times. The default is
|
||||
to iterate 500000 times.
|
||||
Run each compile, study, and match many times with a timer, and output the
|
||||
resulting times per compile, study, or match (in milliseconds). Do not set
|
||||
<b>-m</b> with <b>-t</b>, because you will then get the size output a zillion
|
||||
times, and the timing will be distorted. You can control the number of
|
||||
iterations that are used for timing by following <b>-t</b> with a number (as a
|
||||
separate item on the command line). For example, "-t 1000" iterates 1000 times.
|
||||
The default is to iterate 500000 times.
|
||||
</P>
|
||||
<P>
|
||||
<b>-tm</b>
|
||||
This is like <b>-t</b> except that it times only the matching phase, not the
|
||||
compile or study phases.
|
||||
</P>
|
||||
<P>
|
||||
<b>-T</b> <b>-TM</b>
|
||||
These behave like <b>-t</b> and <b>-tm</b>, but in addition, at the end of a run,
|
||||
the total times for all compiles, studies, and matches are output.
|
||||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">DESCRIPTION</a><br>
|
||||
<P>
|
||||
If <b>pcretest</b> is given two filename arguments, it reads from the first and
|
||||
@ -287,7 +297,7 @@ option states whether or not <b>readline()</b> will be used.
|
||||
<P>
|
||||
The program handles any number of sets of input on a single input file. Each
|
||||
set starts with a regular expression, and continues with any number of data
|
||||
lines to be matched against the pattern.
|
||||
lines to be matched against that pattern.
|
||||
</P>
|
||||
<P>
|
||||
Each data line is matched separately and independently. If you want to do
|
||||
@ -361,6 +371,7 @@ sections.
|
||||
<b>/M</b> show compiled memory size
|
||||
<b>/m</b> set PCRE_MULTILINE
|
||||
<b>/N</b> set PCRE_NO_AUTO_CAPTURE
|
||||
<b>/O</b> set PCRE_NO_AUTO_POSSESS
|
||||
<b>/P</b> use the POSIX wrapper
|
||||
<b>/S</b> study the pattern after compilation
|
||||
<b>/s</b> set PCRE_DOTALL
|
||||
@ -419,6 +430,7 @@ options that do not correspond to anything in Perl:
|
||||
<b>/f</b> PCRE_FIRSTLINE
|
||||
<b>/J</b> PCRE_DUPNAMES
|
||||
<b>/N</b> PCRE_NO_AUTO_CAPTURE
|
||||
<b>/O</b> PCRE_NO_AUTO_POSSESS
|
||||
<b>/U</b> PCRE_UNGREEDY
|
||||
<b>/W</b> PCRE_UCP
|
||||
<b>/X</b> PCRE_EXTRA
|
||||
@ -562,8 +574,8 @@ matched. There are a number of qualifying characters that may follow <b>/S</b>.
|
||||
They may appear in any order.
|
||||
</P>
|
||||
<P>
|
||||
If <b>S</b> is followed by an exclamation mark, <b>pcre[16|32]_study()</b> is called
|
||||
with the PCRE_STUDY_EXTRA_NEEDED option, causing it always to return a
|
||||
If <b>/S</b> is followed by an exclamation mark, <b>pcre[16|32]_study()</b> is
|
||||
called with the PCRE_STUDY_EXTRA_NEEDED option, causing it always to return a
|
||||
<b>pcre_extra</b> block, even when studying discovers no useful information.
|
||||
</P>
|
||||
<P>
|
||||
@ -642,6 +654,37 @@ function:
|
||||
The <b>/+</b> modifier works as described above. All other modifiers are
|
||||
ignored.
|
||||
</P>
|
||||
<br><b>
|
||||
Locking out certain modifiers
|
||||
</b><br>
|
||||
<P>
|
||||
PCRE can be compiled with or without support for certain features such as
|
||||
UTF-8/16/32 or Unicode properties. Accordingly, the standard tests are split up
|
||||
into a number of different files that are selected for running depending on
|
||||
which features are available. When updating the tests, it is all too easy to
|
||||
put a new test into the wrong file by mistake; for example, to put a test that
|
||||
requires UTF support into a file that is used when it is not available. To help
|
||||
detect such mistakes as early as possible, there is a facility for locking out
|
||||
specific modifiers. If an input line for <b>pcretest</b> starts with the string
|
||||
"< forbid " the following sequence of characters is taken as a list of
|
||||
forbidden modifiers. For example, in the test files that must not use UTF or
|
||||
Unicode property support, this line appears:
|
||||
<pre>
|
||||
< forbid 8W
|
||||
</pre>
|
||||
This locks out the /8 and /W modifiers. An immediate error is given if they are
|
||||
subsequently encountered. If the character string contains < but not >, all the
|
||||
multi-character modifiers that begin with < are locked out. Otherwise, such
|
||||
modifiers must be explicitly listed, for example:
|
||||
<pre>
|
||||
< forbid <JS><cr>
|
||||
</pre>
|
||||
There must be a single space between < and "forbid" for this feature to be
|
||||
recognised. If there is not, the line is interpreted either as a request to
|
||||
re-load a pre-compiled pattern (see "SAVING AND RELOADING COMPILED PATTERNS"
|
||||
below) or, if there is a another < character, as a pattern that uses < as its
|
||||
delimiter.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">DATA LINES</a><br>
|
||||
<P>
|
||||
Before each data line is passed to <b>pcre[16|32]_exec()</b>, leading and trailing
|
||||
@ -662,6 +705,7 @@ recognized:
|
||||
\v vertical tab (\x0b)
|
||||
\nnn octal character (up to 3 octal digits); always
|
||||
a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode
|
||||
\o{dd...} octal character (any number of octal digits}
|
||||
\xhh hexadecimal byte (up to 2 hex digits)
|
||||
\x{hh...} hexadecimal character (any number of hex digits)
|
||||
\A pass the PCRE_ANCHORED option to <b>pcre[16|32]_exec()</b> or <b>pcre[16|32]_dfa_exec()</b>
|
||||
@ -1031,10 +1075,9 @@ writing the file, <b>pcretest</b> expects to read a new pattern.
|
||||
</P>
|
||||
<P>
|
||||
A saved pattern can be reloaded into <b>pcretest</b> by specifying < and a file
|
||||
name instead of a pattern. The name of the file must not contain a < character,
|
||||
as otherwise <b>pcretest</b> will interpret the line as a pattern delimited by <
|
||||
characters.
|
||||
For example:
|
||||
name instead of a pattern. There must be no space between < and the file name,
|
||||
which must not contain a < character, as otherwise <b>pcretest</b> will
|
||||
interpret the line as a pattern delimited by < characters. For example:
|
||||
<pre>
|
||||
re> </some/file
|
||||
Compiled pattern loaded from /some/file
|
||||
@ -1091,7 +1134,7 @@ Cambridge CB2 3QH, England.
|
||||
</P>
|
||||
<br><a name="SEC17" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 26 April 2013
|
||||
Last updated: 12 November 2013
|
||||
<br>
|
||||
Copyright © 1997-2013 University of Cambridge.
|
||||
<br>
|
||||
|
@ -4,11 +4,11 @@ pcre-config - program to return PCRE configuration
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
|
||||
.ti +5n
|
||||
.B [--libs16] [--libs32] [--libs-cpp] [--libs-posix]
|
||||
.ti +5n
|
||||
.B [--cflags] [--cflags-posix]
|
||||
.B " [--libs16] [--libs32] [--libs-cpp] [--libs-posix]"
|
||||
.B " [--cflags] [--cflags-posix]"
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
|
@ -8,8 +8,8 @@ NAME
|
||||
SYNOPSIS
|
||||
|
||||
pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
|
||||
[--libs16] [--libs32] [--libs-cpp] [--libs-posix]
|
||||
[--cflags] [--cflags-posix]
|
||||
[--libs16] [--libs32] [--libs-cpp] [--libs-posix]
|
||||
[--cflags] [--cflags-posix]
|
||||
|
||||
|
||||
DESCRIPTION
|
||||
|
@ -1,4 +1,4 @@
|
||||
.TH PCRE 3 "13 May 2013" "PCRE 8.33"
|
||||
.TH PCRE 3 "01 Oct 2013" "PCRE 8.33"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH INTRODUCTION
|
||||
@ -44,7 +44,7 @@ The current implementation of PCRE corresponds approximately with Perl 5.12,
|
||||
including support for UTF-8/16/32 encoded strings and Unicode general category
|
||||
properties. However, UTF-8/16/32 and Unicode support has to be explicitly
|
||||
enabled; it is not the default. The Unicode tables correspond to Unicode
|
||||
release 6.2.0.
|
||||
release 6.3.0.
|
||||
.P
|
||||
In addition to the Perl-compatible matching function, PCRE contains an
|
||||
alternative function that matches the same compiled patterns in a different
|
||||
|
2291
pcre/doc/pcre.txt
2291
pcre/doc/pcre.txt
File diff suppressed because it is too large
Load Diff
@ -8,140 +8,120 @@ PCRE - Perl-compatible regular expressions
|
||||
.SH "PCRE 16-BIT API BASIC FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.SM
|
||||
.nf
|
||||
.B pcre16 *pcre16_compile(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.PP
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.sp
|
||||
.B pcre16 *pcre16_compile2(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B int *\fIerrorcodeptr\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.PP
|
||||
.B " int *\fIerrorcodeptr\fP,"
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.sp
|
||||
.B pcre16_extra *pcre16_study(const pcre16 *\fIcode\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP);
|
||||
.PP
|
||||
.B " const char **\fIerrptr\fP);"
|
||||
.sp
|
||||
.B void pcre16_free_study(pcre16_extra *\fIextra\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B int pcre16_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||
.sp
|
||||
.B int pcre16_dfa_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||
.ti +5n
|
||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
||||
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "PCRE 16-BIT API STRING EXTRACTION FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre16_copy_named_substring(const pcre16 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
|
||||
.ti +5n
|
||||
.B PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,"
|
||||
.B " PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||
.sp
|
||||
.B int pcre16_copy_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,
|
||||
.ti +5n
|
||||
.B int \fIbuffersize\fP);
|
||||
.PP
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,"
|
||||
.B " int \fIbuffersize\fP);"
|
||||
.sp
|
||||
.B int pcre16_get_named_substring(const pcre16 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 *\fIstringptr\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,"
|
||||
.B " PCRE_SPTR16 *\fIstringptr\fP);"
|
||||
.sp
|
||||
.B int pcre16_get_stringnumber(const pcre16 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 \fIname\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIname\fP);
|
||||
.sp
|
||||
.B int pcre16_get_stringtable_entries(const pcre16 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);"
|
||||
.sp
|
||||
.B int pcre16_get_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 *\fIstringptr\fP);
|
||||
.PP
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||
.B " PCRE_SPTR16 *\fIstringptr\fP);"
|
||||
.sp
|
||||
.B int pcre16_get_substring_list(PCRE_SPTR16 \fIsubject\fP,
|
||||
.ti +5n
|
||||
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR16 **\fIlistptr\fP);"
|
||||
.PP
|
||||
.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR16 **\fIlistptr\fP);"
|
||||
.sp
|
||||
.B void pcre16_free_substring(PCRE_SPTR16 \fIstringptr\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B void pcre16_free_substring_list(PCRE_SPTR16 *\fIstringptr\fP);
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "PCRE 16-BIT API AUXILIARY FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B pcre16_jit_stack *pcre16_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B void pcre16_jit_stack_free(pcre16_jit_stack *\fIstack\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP,
|
||||
.ti +5n
|
||||
.B pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
||||
.PP
|
||||
.B " pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||
.sp
|
||||
.B const unsigned char *pcre16_maketables(void);
|
||||
.PP
|
||||
.sp
|
||||
.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
||||
.PP
|
||||
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||
.sp
|
||||
.B int pcre16_refcount(pcre16 *\fIcode\fP, int \fIadjust\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B int pcre16_config(int \fIwhat\fP, void *\fIwhere\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B const char *pcre16_version(void);
|
||||
.PP
|
||||
.sp
|
||||
.B int pcre16_pattern_to_host_byte_order(pcre16 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
||||
.B " pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "PCRE 16-BIT API INDIRECTED FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B void *(*pcre16_malloc)(size_t);
|
||||
.PP
|
||||
.sp
|
||||
.B void (*pcre16_free)(void *);
|
||||
.PP
|
||||
.sp
|
||||
.B void *(*pcre16_stack_malloc)(size_t);
|
||||
.PP
|
||||
.sp
|
||||
.B void (*pcre16_stack_free)(void *);
|
||||
.PP
|
||||
.sp
|
||||
.B int (*pcre16_callout)(pcre16_callout_block *);
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "PCRE 16-BIT API 16-BIT-ONLY FUNCTION"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *\fIoutput\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,
|
||||
.ti +5n
|
||||
.B int \fIkeep_boms\fP);
|
||||
.B " PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,"
|
||||
.B " int \fIkeep_boms\fP);"
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "THE PCRE 16-BIT LIBRARY"
|
||||
|
@ -8,140 +8,119 @@ PCRE - Perl-compatible regular expressions
|
||||
.SH "PCRE 32-BIT API BASIC FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.SM
|
||||
.nf
|
||||
.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.PP
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.sp
|
||||
.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B int *\fIerrorcodeptr\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.PP
|
||||
.B " int *\fIerrorcodeptr\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.sp
|
||||
.B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP);
|
||||
.PP
|
||||
.B " const char **\fIerrptr\fP);"
|
||||
.sp
|
||||
.B void pcre32_free_study(pcre32_extra *\fIextra\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||
.sp
|
||||
.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||
.ti +5n
|
||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
||||
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "PCRE 32-BIT API STRING EXTRACTION FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
|
||||
.ti +5n
|
||||
.B PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,"
|
||||
.B " PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||
.sp
|
||||
.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,
|
||||
.ti +5n
|
||||
.B int \fIbuffersize\fP);
|
||||
.PP
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,"
|
||||
.B " int \fIbuffersize\fP);"
|
||||
.sp
|
||||
.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 *\fIstringptr\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,"
|
||||
.B " PCRE_SPTR32 *\fIstringptr\fP);"
|
||||
.sp
|
||||
.B int pcre32_get_stringnumber(const pcre32 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 \fIname\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR32 \fIname\fP);"
|
||||
.sp
|
||||
.B int pcre32_get_stringtable_entries(const pcre32 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);"
|
||||
.sp
|
||||
.B int pcre32_get_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 *\fIstringptr\fP);
|
||||
.PP
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||
.B " PCRE_SPTR32 *\fIstringptr\fP);"
|
||||
.sp
|
||||
.B int pcre32_get_substring_list(PCRE_SPTR32 \fIsubject\fP,
|
||||
.ti +5n
|
||||
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR32 **\fIlistptr\fP);"
|
||||
.PP
|
||||
.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR32 **\fIlistptr\fP);"
|
||||
.sp
|
||||
.B void pcre32_free_substring(PCRE_SPTR32 \fIstringptr\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B void pcre32_free_substring_list(PCRE_SPTR32 *\fIstringptr\fP);
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "PCRE 32-BIT API AUXILIARY FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B pcre32_jit_stack *pcre32_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B void pcre32_jit_stack_free(pcre32_jit_stack *\fIstack\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP,
|
||||
.ti +5n
|
||||
.B pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
||||
.PP
|
||||
.B " pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||
.sp
|
||||
.B const unsigned char *pcre32_maketables(void);
|
||||
.PP
|
||||
.sp
|
||||
.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
||||
.PP
|
||||
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||
.sp
|
||||
.B int pcre32_refcount(pcre32 *\fIcode\fP, int \fIadjust\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B int pcre32_config(int \fIwhat\fP, void *\fIwhere\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B const char *pcre32_version(void);
|
||||
.PP
|
||||
.sp
|
||||
.B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
||||
.B " pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "PCRE 32-BIT API INDIRECTED FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B void *(*pcre32_malloc)(size_t);
|
||||
.PP
|
||||
.sp
|
||||
.B void (*pcre32_free)(void *);
|
||||
.PP
|
||||
.sp
|
||||
.B void *(*pcre32_stack_malloc)(size_t);
|
||||
.PP
|
||||
.sp
|
||||
.B void (*pcre32_stack_free)(void *);
|
||||
.PP
|
||||
.sp
|
||||
.B int (*pcre32_callout)(pcre32_callout_block *);
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "PCRE 32-BIT API 32-BIT-ONLY FUNCTION"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,
|
||||
.ti +5n
|
||||
.B int \fIkeep_boms\fP);
|
||||
.B " PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIbyte_order\fP,"
|
||||
.B " int \fIkeep_boms\fP);"
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "THE PCRE 32-BIT LIBRARY"
|
||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B void pcre_assign_jit_stack(pcre_extra *\fIextra\fP,
|
||||
.ti +5n
|
||||
.B pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
||||
.PP
|
||||
.B " pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||
.sp
|
||||
.B void pcre16_assign_jit_stack(pcre16_extra *\fIextra\fP,
|
||||
.ti +5n
|
||||
.B pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
||||
.PP
|
||||
.B " pcre16_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||
.sp
|
||||
.B void pcre32_assign_jit_stack(pcre32_extra *\fIextra\fP,
|
||||
.ti +5n
|
||||
.B pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
||||
.B " pcre32_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
@ -1,4 +1,4 @@
|
||||
.TH PCRE_COMPILE 3 "24 June 2012" "PCRE 8.30"
|
||||
.TH PCRE_COMPILE 3 "01 October 2013" "PCRE 8.34"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
@ -6,24 +6,19 @@ PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.PP
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.sp
|
||||
.B pcre16 *pcre16_compile(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.PP
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.sp
|
||||
.B pcre32 *pcre32_compile(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
@ -56,6 +51,7 @@ The option bits are:
|
||||
PCRE_FIRSTLINE Force matching to be before newline
|
||||
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
||||
PCRE_MULTILINE ^ and $ match newlines within data
|
||||
PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF)
|
||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
||||
sequences
|
||||
@ -64,6 +60,8 @@ The option bits are:
|
||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||
theses (named ones available)
|
||||
PCRE_NO_AUTO_POSSESS Disable auto-possessification
|
||||
PCRE_NO_START_OPTIMIZE Disable match-time start optimizations
|
||||
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
||||
validity (only relevant if
|
||||
PCRE_UTF16 is set)
|
||||
|
@ -1,4 +1,4 @@
|
||||
.TH PCRE_COMPILE2 3 "24 June 2012" "PCRE 8.30"
|
||||
.TH PCRE_COMPILE2 3 "01 October 2013" "PCRE 8.34"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B int *\fIerrorcodeptr\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.PP
|
||||
.B " int *\fIerrorcodeptr\fP,"
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.sp
|
||||
.B pcre16 *pcre16_compile2(PCRE_SPTR16 \fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B int *\fIerrorcodeptr\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.PP
|
||||
.B " int *\fIerrorcodeptr\fP,"
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.sp
|
||||
.B pcre32 *pcre32_compile2(PCRE_SPTR32 \fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B int *\fIerrorcodeptr\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.B " int *\fIerrorcodeptr\fP,£
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
@ -64,6 +56,7 @@ The option bits are:
|
||||
PCRE_FIRSTLINE Force matching to be before newline
|
||||
PCRE_JAVASCRIPT_COMPAT JavaScript compatibility
|
||||
PCRE_MULTILINE ^ and $ match newlines within data
|
||||
PCRE_NEVER_UTF Lock out UTF, e.g. via (*UTF)
|
||||
PCRE_NEWLINE_ANY Recognize any Unicode newline sequence
|
||||
PCRE_NEWLINE_ANYCRLF Recognize CR, LF, and CRLF as newline
|
||||
sequences
|
||||
@ -72,6 +65,8 @@ The option bits are:
|
||||
PCRE_NEWLINE_LF Set LF as the newline sequence
|
||||
PCRE_NO_AUTO_CAPTURE Disable numbered capturing paren-
|
||||
theses (named ones available)
|
||||
PCRE_NO_AUTO_POSSESS Disable auto-possessification
|
||||
PCRE_NO_START_OPTIMIZE Disable match-time start optimizations
|
||||
PCRE_NO_UTF16_CHECK Do not check the pattern for UTF-16
|
||||
validity (only relevant if
|
||||
PCRE_UTF16 is set)
|
||||
|
@ -1,4 +1,4 @@
|
||||
.TH PCRE_CONFIG 3 "24 June 2012" "PCRE 8.30"
|
||||
.TH PCRE_CONFIG 3 "05 November 2013" "PCRE 8.34"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
@ -33,6 +33,7 @@ point to an unsigned long integer. The available codes are:
|
||||
target architecture for the JIT compiler,
|
||||
or NULL if there is no JIT support
|
||||
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
|
||||
PCRE_CONFIG_PARENS_LIMIT Parentheses nesting limit
|
||||
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
|
||||
PCRE_CONFIG_MATCH_LIMIT_RECURSION
|
||||
Internal recursion depth limit
|
||||
|
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_copy_named_substring(const pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B const char *\fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
||||
.ti +5n
|
||||
.B char *\fIbuffer\fP, int \fIbuffersize\fP);
|
||||
.PP
|
||||
.B " const char *\fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, const char *\fIstringname\fP,"
|
||||
.B " char *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||
.sp
|
||||
.B int pcre16_copy_named_substring(const pcre16 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
|
||||
.ti +5n
|
||||
.B PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,"
|
||||
.B " PCRE_UCHAR16 *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||
.sp
|
||||
.B int pcre32_copy_named_substring(const pcre32 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
|
||||
.ti +5n
|
||||
.B PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);
|
||||
.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,"
|
||||
.B " PCRE_UCHAR32 *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
@ -6,24 +6,19 @@ PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,
|
||||
.ti +5n
|
||||
.B int \fIbuffersize\fP);
|
||||
.PP
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,"
|
||||
.B " int \fIbuffersize\fP);"
|
||||
.sp
|
||||
.B int pcre16_copy_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,
|
||||
.ti +5n
|
||||
.B int \fIbuffersize\fP);
|
||||
.PP
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR16 *\fIbuffer\fP,"
|
||||
.B " int \fIbuffersize\fP);"
|
||||
.sp
|
||||
.B int pcre32_copy_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,
|
||||
.ti +5n
|
||||
.B int \fIbuffersize\fP);
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP, PCRE_UCHAR32 *\fIbuffer\fP,"
|
||||
.B " int \fIbuffersize\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||
.ti +5n
|
||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
||||
.PP
|
||||
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||
.sp
|
||||
.B int pcre16_dfa_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||
.ti +5n
|
||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||
.sp
|
||||
.B int pcre32_dfa_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||
.ti +5n
|
||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
||||
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
@ -6,24 +6,19 @@ PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
||||
.PP
|
||||
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||
.sp
|
||||
.B int pcre16_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||
.sp
|
||||
.B int pcre32_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
||||
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
||||
.PP
|
||||
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||
.sp
|
||||
.B int pcre16_fullinfo(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
||||
.PP
|
||||
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||
.sp
|
||||
.B int pcre32_fullinfo(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
||||
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_get_named_substring(const pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B const char *\fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIstringptr\fP);
|
||||
.PP
|
||||
.B " const char *\fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, const char *\fIstringname\fP,"
|
||||
.B " const char **\fIstringptr\fP);"
|
||||
.sp
|
||||
.B int pcre16_get_named_substring(const pcre16 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 *\fIstringptr\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, PCRE_SPTR16 \fIstringname\fP,"
|
||||
.B " PCRE_SPTR16 *\fIstringptr\fP);"
|
||||
.sp
|
||||
.B int pcre32_get_named_substring(const pcre32 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 *\fIstringptr\fP);
|
||||
.B " PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, PCRE_SPTR32 \fIstringname\fP,"
|
||||
.B " PCRE_SPTR32 *\fIstringptr\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_get_stringnumber(const pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B const char *\fIname\fP);
|
||||
.PP
|
||||
.B " const char *\fIname\fP);"
|
||||
.sp
|
||||
.B int pcre16_get_stringnumber(const pcre16 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 \fIname\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIname\fP);"
|
||||
.sp
|
||||
.B int pcre32_get_stringnumber(const pcre32 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 \fIname\fP);
|
||||
.B " PCRE_SPTR32 \fIname\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);
|
||||
.PP
|
||||
.B " const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);"
|
||||
.sp
|
||||
.B int pcre16_get_stringtable_entries(const pcre16 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIname\fP, PCRE_UCHAR16 **\fIfirst\fP, PCRE_UCHAR16 **\fIlast\fP);"
|
||||
.sp
|
||||
.B int pcre32_get_stringtable_entries(const pcre32 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);
|
||||
.B " PCRE_SPTR32 \fIname\fP, PCRE_UCHAR32 **\fIfirst\fP, PCRE_UCHAR32 **\fIlast\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
@ -6,24 +6,19 @@ PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIstringptr\fP);
|
||||
.PP
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||
.B " const char **\fIstringptr\fP);"
|
||||
.sp
|
||||
.B int pcre16_get_substring(PCRE_SPTR16 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 *\fIstringptr\fP);
|
||||
.PP
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||
.B " PCRE_SPTR16 *\fIstringptr\fP);"
|
||||
.sp
|
||||
.B int pcre32_get_substring(PCRE_SPTR32 \fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 *\fIstringptr\fP);
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||
.B " PCRE_SPTR32 *\fIstringptr\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_get_substring_list(const char *\fIsubject\fP,
|
||||
.ti +5n
|
||||
.B int *\fIovector\fP, int \fIstringcount\fP, "const char ***\fIlistptr\fP);"
|
||||
.PP
|
||||
.B " int *\fIovector\fP, int \fIstringcount\fP, const char ***\fIlistptr\fP);"
|
||||
.sp
|
||||
.B int pcre16_get_substring_list(PCRE_SPTR16 \fIsubject\fP,
|
||||
.ti +5n
|
||||
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR16 **\fIlistptr\fP);"
|
||||
.PP
|
||||
.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR16 **\fIlistptr\fP);"
|
||||
.sp
|
||||
.B int pcre32_get_substring_list(PCRE_SPTR32 \fIsubject\fP,
|
||||
.ti +5n
|
||||
.B int *\fIovector\fP, int \fIstringcount\fP, "PCRE_SPTR32 **\fIlistptr\fP);"
|
||||
.B " int *\fIovector\fP, int \fIstringcount\fP, PCRE_SPTR32 **\fIlistptr\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
@ -6,30 +6,22 @@ PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_jit_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||
.ti +5n
|
||||
.B pcre_jit_stack *\fIjstack\fP);
|
||||
.PP
|
||||
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||
.B " pcre_jit_stack *\fIjstack\fP);"
|
||||
.sp
|
||||
.B int pcre16_jit_exec(const pcre16 *\fIcode\fP, "const pcre16_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "PCRE_SPTR16 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||
.ti +5n
|
||||
.B pcre_jit_stack *\fIjstack\fP);
|
||||
.PP
|
||||
.B " PCRE_SPTR16 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||
.B " pcre_jit_stack *\fIjstack\fP);"
|
||||
.sp
|
||||
.B int pcre32_jit_exec(const pcre32 *\fIcode\fP, "const pcre32_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "PCRE_SPTR32 \fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||
.ti +5n
|
||||
.B pcre_jit_stack *\fIjstack\fP);
|
||||
.B " PCRE_SPTR32 \fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||
.B " pcre_jit_stack *\fIjstack\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B pcre_jit_stack *pcre_jit_stack_alloc(int \fIstartsize\fP,
|
||||
.ti +5n
|
||||
.B int \fImaxsize\fP);
|
||||
.PP
|
||||
.B " int \fImaxsize\fP);"
|
||||
.sp
|
||||
.B pcre16_jit_stack *pcre16_jit_stack_alloc(int \fIstartsize\fP,
|
||||
.ti +5n
|
||||
.B int \fImaxsize\fP);
|
||||
.PP
|
||||
.B " int \fImaxsize\fP);"
|
||||
.sp
|
||||
.B pcre32_jit_stack *pcre32_jit_stack_alloc(int \fIstartsize\fP,
|
||||
.ti +5n
|
||||
.B int \fImaxsize\fP);
|
||||
.B " int \fImaxsize\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre_pattern_to_host_byte_order(pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B pcre_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
||||
.PP
|
||||
.B " pcre_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||
.sp
|
||||
.B int pcre16_pattern_to_host_byte_order(pcre16 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
||||
.PP
|
||||
.B " pcre16_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||
.sp
|
||||
.B int pcre32_pattern_to_host_byte_order(pcre32 *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
||||
.B " pcre32_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
@ -6,18 +6,16 @@ PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP);
|
||||
.PP
|
||||
.B " const char **\fIerrptr\fP);"
|
||||
.sp
|
||||
.B pcre16_extra *pcre16_study(const pcre16 *\fIcode\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP);
|
||||
.PP
|
||||
.B " const char **\fIerrptr\fP);"
|
||||
.sp
|
||||
.B pcre32_extra *pcre32_study(const pcre32 *\fIcode\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP);
|
||||
.B " const char **\fIerrptr\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
@ -6,12 +6,11 @@ PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *\fIoutput\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,
|
||||
.ti +5n
|
||||
.B int \fIkeep_boms\fP);
|
||||
.B " PCRE_SPTR16 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,"
|
||||
.B " int \fIkeep_boms\fP);"
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
|
@ -6,12 +6,11 @@ PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
.B #include <pcre.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int pcre32_utf32_to_host_byte_order(PCRE_UCHAR32 *\fIoutput\fP,
|
||||
.ti +5n
|
||||
.B PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,
|
||||
.ti +5n
|
||||
.B int \fIkeep_boms\fP);
|
||||
.B " PCRE_SPTR32 \fIinput\fP, int \fIlength\fP, int *\fIhost_byte_order\fP,"
|
||||
.B " int \fIkeep_boms\fP);"
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
|
@ -1,4 +1,4 @@
|
||||
.TH PCREAPI 3 "12 May 2013" "PCRE 8.33"
|
||||
.TH PCREAPI 3 "12 November 2013" "PCRE 8.34"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.sp
|
||||
@ -8,138 +8,115 @@ PCRE - Perl-compatible regular expressions
|
||||
.SH "PCRE NATIVE API BASIC FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.SM
|
||||
.nf
|
||||
.B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.PP
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.sp
|
||||
.B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B int *\fIerrorcodeptr\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.PP
|
||||
.B " int *\fIerrorcodeptr\fP,"
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.sp
|
||||
.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP);
|
||||
.PP
|
||||
.B " const char **\fIerrptr\fP);"
|
||||
.sp
|
||||
.B void pcre_free_study(pcre_extra *\fIextra\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
||||
.PP
|
||||
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||
.sp
|
||||
.B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||
.ti +5n
|
||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
||||
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "PCRE NATIVE API STRING EXTRACTION FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre_copy_named_substring(const pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B const char *\fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
||||
.ti +5n
|
||||
.B char *\fIbuffer\fP, int \fIbuffersize\fP);
|
||||
.PP
|
||||
.B " const char *\fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, const char *\fIstringname\fP,"
|
||||
.B " char *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||
.sp
|
||||
.B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,
|
||||
.ti +5n
|
||||
.B int \fIbuffersize\fP);
|
||||
.PP
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,"
|
||||
.B " int \fIbuffersize\fP);"
|
||||
.sp
|
||||
.B int pcre_get_named_substring(const pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B const char *\fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIstringptr\fP);
|
||||
.PP
|
||||
.B " const char *\fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, const char *\fIstringname\fP,"
|
||||
.B " const char **\fIstringptr\fP);"
|
||||
.sp
|
||||
.B int pcre_get_stringnumber(const pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B const char *\fIname\fP);
|
||||
.PP
|
||||
.B " const char *\fIname\fP);"
|
||||
.sp
|
||||
.B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);
|
||||
.PP
|
||||
.B " const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);"
|
||||
.sp
|
||||
.B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIstringptr\fP);
|
||||
.PP
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||
.B " const char **\fIstringptr\fP);"
|
||||
.sp
|
||||
.B int pcre_get_substring_list(const char *\fIsubject\fP,
|
||||
.ti +5n
|
||||
.B int *\fIovector\fP, int \fIstringcount\fP, "const char ***\fIlistptr\fP);"
|
||||
.PP
|
||||
.B " int *\fIovector\fP, int \fIstringcount\fP, const char ***\fIlistptr\fP);"
|
||||
.sp
|
||||
.B void pcre_free_substring(const char *\fIstringptr\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B void pcre_free_substring_list(const char **\fIstringptr\fP);
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "PCRE NATIVE API AUXILIARY FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre_jit_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||
.ti +5n
|
||||
.B pcre_jit_stack *\fIjstack\fP);
|
||||
.PP
|
||||
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||
.B " pcre_jit_stack *\fIjstack\fP);"
|
||||
.sp
|
||||
.B pcre_jit_stack *pcre_jit_stack_alloc(int \fIstartsize\fP, int \fImaxsize\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B void pcre_jit_stack_free(pcre_jit_stack *\fIstack\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B void pcre_assign_jit_stack(pcre_extra *\fIextra\fP,
|
||||
.ti +5n
|
||||
.B pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);
|
||||
.PP
|
||||
.B " pcre_jit_callback \fIcallback\fP, void *\fIdata\fP);"
|
||||
.sp
|
||||
.B const unsigned char *pcre_maketables(void);
|
||||
.PP
|
||||
.sp
|
||||
.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
||||
.PP
|
||||
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||
.sp
|
||||
.B int pcre_refcount(pcre *\fIcode\fP, int \fIadjust\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B int pcre_config(int \fIwhat\fP, void *\fIwhere\fP);
|
||||
.PP
|
||||
.sp
|
||||
.B const char *pcre_version(void);
|
||||
.PP
|
||||
.sp
|
||||
.B int pcre_pattern_to_host_byte_order(pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B pcre_extra *\fIextra\fP, const unsigned char *\fItables\fP);
|
||||
.B " pcre_extra *\fIextra\fP, const unsigned char *\fItables\fP);"
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "PCRE NATIVE API INDIRECTED FUNCTIONS"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B void *(*pcre_malloc)(size_t);
|
||||
.PP
|
||||
.sp
|
||||
.B void (*pcre_free)(void *);
|
||||
.PP
|
||||
.sp
|
||||
.B void *(*pcre_stack_malloc)(size_t);
|
||||
.PP
|
||||
.sp
|
||||
.B void (*pcre_stack_free)(void *);
|
||||
.PP
|
||||
.sp
|
||||
.B int (*pcre_callout)(pcre_callout_block *);
|
||||
.fi
|
||||
.
|
||||
.
|
||||
.SH "PCRE 8-BIT, 16-BIT, AND 32-BIT LIBRARIES"
|
||||
@ -482,6 +459,13 @@ the
|
||||
\fBpcreposix\fP
|
||||
.\"
|
||||
documentation.
|
||||
.sp
|
||||
PCRE_CONFIG_PARENS_LIMIT
|
||||
.sp
|
||||
The output is a long integer that gives the maximum depth of nesting of
|
||||
parentheses (of any kind) in a pattern. This limit is imposed to cap the amount
|
||||
of system stack used when a pattern is compiled. It is specified when PCRE is
|
||||
built; the default is 250.
|
||||
.sp
|
||||
PCRE_CONFIG_MATCH_LIMIT
|
||||
.sp
|
||||
@ -509,19 +493,16 @@ avoiding the use of the stack.
|
||||
.SH "COMPILING A PATTERN"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.sp
|
||||
.B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
|
||||
.ti +5n
|
||||
.B int *\fIerrorcodeptr\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
|
||||
.ti +5n
|
||||
.B const unsigned char *\fItableptr\fP);
|
||||
.B " int *\fIerrorcodeptr\fP,"
|
||||
.B " const char **\fIerrptr\fP, int *\fIerroffset\fP,"
|
||||
.B " const unsigned char *\fItableptr\fP);"
|
||||
.fi
|
||||
.P
|
||||
Either of the functions \fBpcre_compile()\fP or \fBpcre_compile2()\fP can be
|
||||
called to compile a pattern into an internal form. The only difference between
|
||||
@ -581,8 +562,9 @@ If the final argument, \fItableptr\fP, is NULL, PCRE uses a default set of
|
||||
character tables that are built when PCRE is compiled, using the default C
|
||||
locale. Otherwise, \fItableptr\fP must be an address that is the result of a
|
||||
call to \fBpcre_maketables()\fP. This value is stored with the compiled
|
||||
pattern, and used again by \fBpcre_exec()\fP, unless another table pointer is
|
||||
passed to it. For more discussion, see the section on locale support below.
|
||||
pattern, and used again by \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP when the
|
||||
pattern is matched. For more discussion, see the section on locale support
|
||||
below.
|
||||
.P
|
||||
This code fragment shows a typical straightforward call to \fBpcre_compile()\fP:
|
||||
.sp
|
||||
@ -670,12 +652,22 @@ documentation.
|
||||
.sp
|
||||
PCRE_EXTENDED
|
||||
.sp
|
||||
If this bit is set, white space data characters in the pattern are totally
|
||||
ignored except when escaped or inside a character class. White space does not
|
||||
include the VT character (code 11). In addition, characters between an
|
||||
unescaped # outside a character class and the next newline, inclusive, are also
|
||||
ignored. This is equivalent to Perl's /x option, and it can be changed within a
|
||||
pattern by a (?x) option setting.
|
||||
If this bit is set, most white space characters in the pattern are totally
|
||||
ignored except when escaped or inside a character class. However, white space
|
||||
is not allowed within sequences such as (?> that introduce various
|
||||
parenthesized subpatterns, nor within a numerical quantifier such as {1,3}.
|
||||
However, ignorable white space is permitted between an item and a following
|
||||
quantifier and between a quantifier and a following + that indicates
|
||||
possessiveness.
|
||||
.P
|
||||
White space did not used to include the VT character (code 11), because Perl
|
||||
did not treat this character as white space. However, Perl changed at release
|
||||
5.18, so PCRE followed at release 8.34, and VT is now treated as white space.
|
||||
.P
|
||||
PCRE_EXTENDED also causes characters between an unescaped # outside a character
|
||||
class and the next newline, inclusive, to be ignored. PCRE_EXTENDED is
|
||||
equivalent to Perl's /x option, and it can be changed within a pattern by a
|
||||
(?x) option setting.
|
||||
.P
|
||||
Which characters are interpreted as newlines is controlled by the options
|
||||
passed to \fBpcre_compile()\fP or by a special sequence at the start of the
|
||||
@ -820,6 +812,15 @@ the pattern. Any opening parenthesis that is not followed by ? behaves as if it
|
||||
were followed by ?: but named parentheses can still be used for capturing (and
|
||||
they acquire numbers in the usual way). There is no equivalent of this option
|
||||
in Perl.
|
||||
.sp
|
||||
PCRE_NO_AUTO_POSSESS
|
||||
.sp
|
||||
If this option is set, it disables "auto-possessification". This is an
|
||||
optimization that, for example, turns a+b into a++b in order to avoid
|
||||
backtracks into a+ that can never be successful. However, if callouts are in
|
||||
use, auto-possessification means that some of them are never taken. You can set
|
||||
this option if you want the matching functions to do a full unoptimized search
|
||||
and run all the callouts, but it is mainly provided for testing purposes.
|
||||
.sp
|
||||
PCRE_NO_START_OPTIMIZE
|
||||
.sp
|
||||
@ -886,10 +887,10 @@ page. If an invalid UTF-8 sequence is found, \fBpcre_compile()\fP returns an
|
||||
error. If you already know that your pattern is valid, and you want to skip
|
||||
this check for performance reasons, you can set the PCRE_NO_UTF8_CHECK option.
|
||||
When it is set, the effect of passing an invalid UTF-8 string as a pattern is
|
||||
undefined. It may cause your program to crash. Note that this option can also
|
||||
be passed to \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP, to suppress the
|
||||
validity checking of subject strings only. If the same string is being matched
|
||||
many times, the option can be safely set for the second and subsequent
|
||||
undefined. It may cause your program to crash or loop. Note that this option
|
||||
can also be passed to \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP, to suppress
|
||||
the validity checking of subject strings only. If the same string is being
|
||||
matched many times, the option can be safely set for the second and subsequent
|
||||
matchings to improve performance.
|
||||
.
|
||||
.
|
||||
@ -936,7 +937,7 @@ have fallen out of use. To avoid confusion, they have not been re-used.
|
||||
31 POSIX collating elements are not supported
|
||||
32 this version of PCRE is compiled without UTF support
|
||||
33 [this code is not in use]
|
||||
34 character value in \ex{...} sequence is too large
|
||||
34 character value in \ex{} or \eo{} is too large
|
||||
35 invalid condition (?(0)
|
||||
36 \eC not allowed in lookbehind assertion
|
||||
37 PCRE does not support \eL, \el, \eN{name}, \eU, or \eu
|
||||
@ -984,6 +985,12 @@ have fallen out of use. To avoid confusion, they have not been re-used.
|
||||
75 name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
|
||||
76 character value in \eu.... sequence is too large
|
||||
77 invalid UTF-32 string (specifically UTF-32)
|
||||
78 setting UTF is disabled by the application
|
||||
79 non-hex character in \ex{} (closing brace missing?)
|
||||
80 non-octal character in \eo{} (closing brace missing?)
|
||||
81 missing opening brace after \eo
|
||||
82 parentheses are too deeply nested
|
||||
83 invalid range in character class
|
||||
.sp
|
||||
The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may
|
||||
be used if the limits were changed when PCRE was built.
|
||||
@ -993,9 +1000,10 @@ be used if the limits were changed when PCRE was built.
|
||||
.SH "STUDYING A PATTERN"
|
||||
.rs
|
||||
.sp
|
||||
.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP
|
||||
.ti +5n
|
||||
.B const char **\fIerrptr\fP);
|
||||
.nf
|
||||
.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP,
|
||||
.B " const char **\fIerrptr\fP);"
|
||||
.fi
|
||||
.PP
|
||||
If a compiled pattern is going to be used several times, it is worth spending
|
||||
more time analyzing it in order to speed up the time taken for matching. The
|
||||
@ -1117,15 +1125,17 @@ below.
|
||||
.sp
|
||||
PCRE handles caseless matching, and determines whether characters are letters,
|
||||
digits, or whatever, by reference to a set of tables, indexed by character
|
||||
value. When running in UTF-8 mode, this applies only to characters
|
||||
with codes less than 128. By default, higher-valued codes never match escapes
|
||||
such as \ew or \ed, but they can be tested with \ep if PCRE is built with
|
||||
Unicode character property support. Alternatively, the PCRE_UCP option can be
|
||||
set at compile time; this causes \ew and friends to use Unicode property
|
||||
support instead of built-in tables. The use of locales with Unicode is
|
||||
discouraged. If you are handling characters with codes greater than 128, you
|
||||
should either use UTF-8 and Unicode, or use locales, but not try to mix the
|
||||
two.
|
||||
code point. When running in UTF-8 mode, or in the 16- or 32-bit libraries, this
|
||||
applies only to characters with code points less than 256. By default,
|
||||
higher-valued code points never match escapes such as \ew or \ed. However, if
|
||||
PCRE is built with Unicode property support, all characters can be tested with
|
||||
\ep and \eP, or, alternatively, the PCRE_UCP option can be set when a pattern
|
||||
is compiled; this causes \ew and friends to use Unicode property support
|
||||
instead of the built-in tables.
|
||||
.P
|
||||
The use of locales with Unicode is discouraged. If you are handling characters
|
||||
with code points greater than 128, you should either use Unicode support, or
|
||||
use locales, but not try to mix the two.
|
||||
.P
|
||||
PCRE contains an internal set of tables that are used when the final argument
|
||||
of \fBpcre_compile()\fP is NULL. These are sufficient for many applications.
|
||||
@ -1140,10 +1150,10 @@ for this locale support is expected to die away.
|
||||
.P
|
||||
External tables are built by calling the \fBpcre_maketables()\fP function,
|
||||
which has no arguments, in the relevant locale. The result can then be passed
|
||||
to \fBpcre_compile()\fP or \fBpcre_exec()\fP as often as necessary. For
|
||||
example, to build and use tables that are appropriate for the French locale
|
||||
(where accented characters with values greater than 128 are treated as letters),
|
||||
the following code could be used:
|
||||
to \fBpcre_compile()\fP as often as necessary. For example, to build and use
|
||||
tables that are appropriate for the French locale (where accented characters
|
||||
with values greater than 128 are treated as letters), the following code could
|
||||
be used:
|
||||
.sp
|
||||
setlocale(LC_CTYPE, "fr_FR");
|
||||
tables = pcre_maketables();
|
||||
@ -1159,24 +1169,29 @@ needed.
|
||||
.P
|
||||
The pointer that is passed to \fBpcre_compile()\fP is saved with the compiled
|
||||
pattern, and the same tables are used via this pointer by \fBpcre_study()\fP
|
||||
and normally also by \fBpcre_exec()\fP. Thus, by default, for any single
|
||||
and also by \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP. Thus, for any single
|
||||
pattern, compilation, studying and matching all happen in the same locale, but
|
||||
different patterns can be compiled in different locales.
|
||||
different patterns can be processed in different locales.
|
||||
.P
|
||||
It is possible to pass a table pointer or NULL (indicating the use of the
|
||||
internal tables) to \fBpcre_exec()\fP. Although not intended for this purpose,
|
||||
this facility could be used to match a pattern in a different locale from the
|
||||
one in which it was compiled. Passing table pointers at run time is discussed
|
||||
below in the section on matching a pattern.
|
||||
internal tables) to \fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP (see the
|
||||
discussion below in the section on matching a pattern). This facility is
|
||||
provided for use with pre-compiled patterns that have been saved and reloaded.
|
||||
Character tables are not saved with patterns, so if a non-standard table was
|
||||
used at compile time, it must be provided again when the reloaded pattern is
|
||||
matched. Attempting to use this facility to match a pattern in a different
|
||||
locale from the one in which it was compiled is likely to lead to anomalous
|
||||
(usually incorrect) results.
|
||||
.
|
||||
.
|
||||
.\" HTML <a name="infoaboutpattern"></a>
|
||||
.SH "INFORMATION ABOUT A PATTERN"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B int \fIwhat\fP, void *\fIwhere\fP);
|
||||
.B " int \fIwhat\fP, void *\fIwhere\fP);"
|
||||
.fi
|
||||
.PP
|
||||
The \fBpcre_fullinfo()\fP function returns information about a compiled
|
||||
pattern. It replaces the \fBpcre_info()\fP function, which was removed from the
|
||||
@ -1310,9 +1325,14 @@ only if it follows something of variable length. For example, for the pattern
|
||||
is -1.
|
||||
.P
|
||||
Since for the 32-bit library using the non-UTF-32 mode, this function is unable
|
||||
to return the full 32-bit range of the character, this value is deprecated;
|
||||
to return the full 32-bit range of characters, this value is deprecated;
|
||||
instead the PCRE_INFO_REQUIREDCHARFLAGS and PCRE_INFO_REQUIREDCHAR values should
|
||||
be used.
|
||||
.sp
|
||||
PCRE_INFO_MATCH_EMPTY
|
||||
.sp
|
||||
Return 1 if the pattern can match an empty string, otherwise 0. The fourth
|
||||
argument should point to an \fBint\fP variable.
|
||||
.sp
|
||||
PCRE_INFO_MATCHLIMIT
|
||||
.sp
|
||||
@ -1369,8 +1389,8 @@ most significant byte first. In the 16-bit library, the pointer points to
|
||||
contains the parenthesis number. The rest of the entry is the corresponding
|
||||
name, zero terminated.
|
||||
.P
|
||||
The names are in alphabetical order. Duplicate names may appear if (?| is used
|
||||
to create multiple groups with the same number, as described in the
|
||||
The names are in alphabetical order. If (?| is used to create multiple groups
|
||||
with the same number, as described in the
|
||||
.\" HTML <a href="pcrepattern.html#dupsubpatternnumber">
|
||||
.\" </a>
|
||||
section on duplicate subpattern numbers
|
||||
@ -1379,11 +1399,13 @@ in the
|
||||
.\" HREF
|
||||
\fBpcrepattern\fP
|
||||
.\"
|
||||
page. Duplicate names for subpatterns with different numbers are permitted only
|
||||
if PCRE_DUPNAMES is set. In all cases of duplicate names, they appear in the
|
||||
table in the order in which they were found in the pattern. In the absence of
|
||||
(?| this is the order of increasing number; when (?| is used this is not
|
||||
necessarily the case because later subpatterns may have lower numbers.
|
||||
page, the groups may be given the same name, but there is only one entry in the
|
||||
table. Different names for groups of the same number are not permitted.
|
||||
Duplicate names for subpatterns with different numbers are permitted,
|
||||
but only if PCRE_DUPNAMES is set. They appear in the table in the order in
|
||||
which they were found in the pattern. In the absence of (?| this is the order
|
||||
of increasing number; when (?| is used this is not necessarily the case because
|
||||
later subpatterns may have lower numbers.
|
||||
.P
|
||||
As a simple example of the name/number table, consider the following pattern
|
||||
after compilation by the 8-bit library (assume PCRE_EXTENDED is set, so white
|
||||
@ -1501,25 +1523,13 @@ returned. For anchored patterns, 0 is returned.
|
||||
.sp
|
||||
PCRE_INFO_FIRSTCHARACTER
|
||||
.sp
|
||||
Return the fixed first character value, if PCRE_INFO_FIRSTCHARACTERFLAGS
|
||||
returned 1; otherwise returns 0. The fourth argument should point to an
|
||||
\fBuint_t\fP variable.
|
||||
Return the fixed first character value in the situation where
|
||||
PCRE_INFO_FIRSTCHARACTERFLAGS returns 1; otherwise return 0. The fourth
|
||||
argument should point to an \fBuint_t\fP variable.
|
||||
.P
|
||||
In the 8-bit library, the value is always less than 256. In the 16-bit library
|
||||
the value can be up to 0xffff. In the 32-bit library in UTF-32 mode the value
|
||||
can be up to 0x10ffff, and up to 0xffffffff when not using UTF-32 mode.
|
||||
.P
|
||||
If there is no fixed first value, and if either
|
||||
.sp
|
||||
(a) the pattern was compiled with the PCRE_MULTILINE option, and every branch
|
||||
starts with "^", or
|
||||
.sp
|
||||
(b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not set
|
||||
(if it were set, the pattern would be anchored),
|
||||
.sp
|
||||
-1 is returned, indicating that the pattern matches only at the start of a
|
||||
subject string or after any newline within the string. Otherwise -2 is
|
||||
returned. For anchored patterns, -2 is returned.
|
||||
.sp
|
||||
PCRE_INFO_REQUIREDCHARFLAGS
|
||||
.sp
|
||||
@ -1567,11 +1577,11 @@ is different. (This seems a highly unlikely scenario.)
|
||||
.SH "MATCHING A PATTERN: THE TRADITIONAL FUNCTION"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
|
||||
.B " const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);"
|
||||
.fi
|
||||
.P
|
||||
The function \fBpcre_exec()\fP is called to match a subject string against a
|
||||
compiled pattern, which is passed in the \fIcode\fP argument. If the
|
||||
@ -1724,19 +1734,23 @@ and is described in the
|
||||
.\"
|
||||
documentation.
|
||||
.P
|
||||
The \fItables\fP field is used to pass a character tables pointer to
|
||||
\fBpcre_exec()\fP; this overrides the value that is stored with the compiled
|
||||
pattern. A non-NULL value is stored with the compiled pattern only if custom
|
||||
tables were supplied to \fBpcre_compile()\fP via its \fItableptr\fP argument.
|
||||
If NULL is passed to \fBpcre_exec()\fP using this mechanism, it forces PCRE's
|
||||
internal tables to be used. This facility is helpful when re-using patterns
|
||||
that have been saved after compiling with an external set of tables, because
|
||||
the external tables might be at a different address when \fBpcre_exec()\fP is
|
||||
called. See the
|
||||
The \fItables\fP field is provided for use with patterns that have been
|
||||
pre-compiled using custom character tables, saved to disc or elsewhere, and
|
||||
then reloaded, because the tables that were used to compile a pattern are not
|
||||
saved with it. See the
|
||||
.\" HREF
|
||||
\fBpcreprecompile\fP
|
||||
.\"
|
||||
documentation for a discussion of saving compiled patterns for later use.
|
||||
documentation for a discussion of saving compiled patterns for later use. If
|
||||
NULL is passed using this mechanism, it forces PCRE's internal tables to be
|
||||
used.
|
||||
.P
|
||||
\fBWarning:\fP The tables that \fBpcre_exec()\fP uses must be the same as those
|
||||
that were used when the pattern was compiled. If this is not the case, the
|
||||
behaviour of \fBpcre_exec()\fP is undefined. Therefore, when a pattern is
|
||||
compiled and matched in the same process, this field should never be set. In
|
||||
this (the most common) case, the correct table pointer is automatically passed
|
||||
with the compiled pattern from \fBpcre_compile()\fP to \fBpcre_exec()\fP.
|
||||
.P
|
||||
If PCRE_EXTRA_MARK is set in the \fIflags\fP field, the \fImark\fP field must
|
||||
be set to point to a suitable variable. If the pattern contains any
|
||||
@ -1960,7 +1974,7 @@ all the matches in a single subject string. However, you should be sure that
|
||||
the value of \fIstartoffset\fP points to the start of a character (or the end
|
||||
of the subject). When PCRE_NO_UTF8_CHECK is set, the effect of passing an
|
||||
invalid string as a subject or an invalid value of \fIstartoffset\fP is
|
||||
undefined. Your program may crash.
|
||||
undefined. Your program may crash or loop.
|
||||
.sp
|
||||
PCRE_PARTIAL_HARD
|
||||
PCRE_PARTIAL_SOFT
|
||||
@ -2423,21 +2437,18 @@ no longer in use and is never returned.
|
||||
.SH "EXTRACTING CAPTURED SUBSTRINGS BY NUMBER"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,
|
||||
.ti +5n
|
||||
.B int \fIbuffersize\fP);
|
||||
.PP
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,"
|
||||
.B " int \fIbuffersize\fP);"
|
||||
.sp
|
||||
.B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, int \fIstringnumber\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIstringptr\fP);
|
||||
.PP
|
||||
.B " int \fIstringcount\fP, int \fIstringnumber\fP,"
|
||||
.B " const char **\fIstringptr\fP);"
|
||||
.sp
|
||||
.B int pcre_get_substring_list(const char *\fIsubject\fP,
|
||||
.ti +5n
|
||||
.B int *\fIovector\fP, int \fIstringcount\fP, "const char ***\fIlistptr\fP);"
|
||||
.B " int *\fIovector\fP, int \fIstringcount\fP, const char ***\fIlistptr\fP);"
|
||||
.fi
|
||||
.PP
|
||||
Captured substrings can be accessed directly by using the offsets returned by
|
||||
\fBpcre_exec()\fP in \fIovector\fP. For convenience, the functions
|
||||
@ -2516,25 +2527,20 @@ provided.
|
||||
.SH "EXTRACTING CAPTURED SUBSTRINGS BY NAME"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre_get_stringnumber(const pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B const char *\fIname\fP);
|
||||
.PP
|
||||
.B " const char *\fIname\fP);"
|
||||
.sp
|
||||
.B int pcre_copy_named_substring(const pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B const char *\fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
||||
.ti +5n
|
||||
.B char *\fIbuffer\fP, int \fIbuffersize\fP);
|
||||
.PP
|
||||
.B " const char *\fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, const char *\fIstringname\fP,"
|
||||
.B " char *\fIbuffer\fP, int \fIbuffersize\fP);"
|
||||
.sp
|
||||
.B int pcre_get_named_substring(const pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B const char *\fIsubject\fP, int *\fIovector\fP,
|
||||
.ti +5n
|
||||
.B int \fIstringcount\fP, const char *\fIstringname\fP,
|
||||
.ti +5n
|
||||
.B const char **\fIstringptr\fP);
|
||||
.B " const char *\fIsubject\fP, int *\fIovector\fP,"
|
||||
.B " int \fIstringcount\fP, const char *\fIstringname\fP,"
|
||||
.B " const char **\fIstringptr\fP);"
|
||||
.fi
|
||||
.PP
|
||||
To extract a substring by name, you first have to find associated number.
|
||||
For example, for this pattern
|
||||
@ -2586,9 +2592,10 @@ same number causes an error at compile time.
|
||||
.SH "DUPLICATE SUBPATTERN NAMES"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
|
||||
.ti +5n
|
||||
.B const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);
|
||||
.B " const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);"
|
||||
.fi
|
||||
.PP
|
||||
When a pattern is compiled with the PCRE_DUPNAMES option, names for subpatterns
|
||||
are not required to be unique. (Duplicate names are always allowed for
|
||||
@ -2677,13 +2684,12 @@ the value returned is the size of each block that is obtained from the heap.
|
||||
.SH "MATCHING A PATTERN: THE ALTERNATIVE FUNCTION"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
|
||||
.ti +5n
|
||||
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
|
||||
.ti +5n
|
||||
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
|
||||
.ti +5n
|
||||
.B int *\fIworkspace\fP, int \fIwscount\fP);
|
||||
.B " const char *\fIsubject\fP, int \fIlength\fP, int \fIstartoffset\fP,"
|
||||
.B " int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,"
|
||||
.B " int *\fIworkspace\fP, int \fIwscount\fP);"
|
||||
.fi
|
||||
.P
|
||||
The function \fBpcre_dfa_exec()\fP is called to match a subject string against
|
||||
a compiled pattern, using a matching algorithm that scans the subject string
|
||||
@ -2810,6 +2816,14 @@ matching string is given first. If there were too many matches to fit into
|
||||
\fIovector\fP, the yield of the function is zero, and the vector is filled with
|
||||
the longest matches. Unlike \fBpcre_exec()\fP, \fBpcre_dfa_exec()\fP can use
|
||||
the entire \fIovector\fP for returning matched strings.
|
||||
.P
|
||||
NOTE: PCRE's "auto-possessification" optimization usually applies to character
|
||||
repeats at the end of a pattern (as well as internally). For example, the
|
||||
pattern "a\ed+" is compiled as if it were "a\ed++" because there is no point
|
||||
even considering the possibility of backtracking into the repeated digits. For
|
||||
DFA matching, this means that only one possible match is found. If you really
|
||||
do want multiple matches in such cases, either use an ungreedy repeat
|
||||
("a\ed+?") or set the PCRE_NO_AUTO_POSSESS option when compiling.
|
||||
.
|
||||
.
|
||||
.SS "Error returns from \fBpcre_dfa_exec()\fP"
|
||||
@ -2886,6 +2900,6 @@ Cambridge CB2 3QH, England.
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 12 May 2013
|
||||
Last updated: 12 November 2013
|
||||
Copyright (c) 1997-2013 University of Cambridge.
|
||||
.fi
|
||||
|
@ -1,4 +1,4 @@
|
||||
.TH PCRECALLOUT 3 "03 March 2013" "PCRE 8.33"
|
||||
.TH PCRECALLOUT 3 "12 November 2013" "PCRE 8.34"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH SYNOPSIS
|
||||
@ -55,17 +55,50 @@ The
|
||||
.\" HREF
|
||||
\fBpcretest\fP
|
||||
.\"
|
||||
command has an option that sets automatic callouts; when it is used, the output
|
||||
indicates how the pattern is matched. This is useful information when you are
|
||||
trying to optimize the performance of a particular pattern.
|
||||
program has a pattern qualifier (/C) that sets automatic callouts; when it is
|
||||
used, the output indicates how the pattern is being matched. This is useful
|
||||
information when you are trying to optimize the performance of a particular
|
||||
pattern.
|
||||
.
|
||||
.
|
||||
.SH "MISSING CALLOUTS"
|
||||
.rs
|
||||
.sp
|
||||
You should be aware that, because of optimizations in the way PCRE matches
|
||||
patterns by default, callouts sometimes do not happen. For example, if the
|
||||
pattern is
|
||||
You should be aware that, because of optimizations in the way PCRE compiles and
|
||||
matches patterns, callouts sometimes do not happen exactly as you might expect.
|
||||
.P
|
||||
At compile time, PCRE "auto-possessifies" repeated items when it knows that
|
||||
what follows cannot be part of the repeat. For example, a+[bc] is compiled as
|
||||
if it were a++[bc]. The \fBpcretest\fP output when this pattern is anchored and
|
||||
then applied with automatic callouts to the string "aaaa" is:
|
||||
.sp
|
||||
--->aaaa
|
||||
+0 ^ ^
|
||||
+1 ^ a+
|
||||
+3 ^ ^ [bc]
|
||||
No match
|
||||
.sp
|
||||
This indicates that when matching [bc] fails, there is no backtracking into a+
|
||||
and therefore the callouts that would be taken for the backtracks do not occur.
|
||||
You can disable the auto-possessify feature by passing PCRE_NO_AUTO_POSSESS
|
||||
to \fBpcre_compile()\fP, or starting the pattern with (*NO_AUTO_POSSESS). If
|
||||
this is done in \fBpcretest\fP (using the /O qualifier), the output changes to
|
||||
this:
|
||||
.sp
|
||||
--->aaaa
|
||||
+0 ^ ^
|
||||
+1 ^ a+
|
||||
+3 ^ ^ [bc]
|
||||
+3 ^ ^ [bc]
|
||||
+3 ^ ^ [bc]
|
||||
+3 ^^ [bc]
|
||||
No match
|
||||
.sp
|
||||
This time, when matching [bc] fails, the matcher backtracks into a+ and tries
|
||||
again, repeatedly, until a+ itself fails.
|
||||
.P
|
||||
Other optimizations that provide fast "no match" results also affect callouts.
|
||||
For example, if the pattern is
|
||||
.sp
|
||||
ab(?C4)cd
|
||||
.sp
|
||||
@ -89,11 +122,11 @@ callouts such as the example above are obeyed.
|
||||
.rs
|
||||
.sp
|
||||
During matching, when PCRE reaches a callout point, the external function
|
||||
defined by \fIpcre_callout\fP or \fIpcre[16|32]_callout\fP is called
|
||||
(if it is set). This applies to both normal and DFA matching. The only
|
||||
argument to the callout function is a pointer to a \fBpcre_callout\fP
|
||||
or \fBpcre[16|32]_callout\fP block.
|
||||
These structures contains the following fields:
|
||||
defined by \fIpcre_callout\fP or \fIpcre[16|32]_callout\fP is called (if it is
|
||||
set). This applies to both normal and DFA matching. The only argument to the
|
||||
callout function is a pointer to a \fBpcre_callout\fP or
|
||||
\fBpcre[16|32]_callout\fP block. These structures contains the following
|
||||
fields:
|
||||
.sp
|
||||
int \fIversion\fP;
|
||||
int \fIcallout_number\fP;
|
||||
@ -217,6 +250,6 @@ Cambridge CB2 3QH, England.
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 03 March 2013
|
||||
Last updated: 12 November 2013
|
||||
Copyright (c) 1997-2013 University of Cambridge.
|
||||
.fi
|
||||
|
@ -1,4 +1,4 @@
|
||||
.TH PCRECOMPAT 3 "19 March 2013" "PCRE 8.33"
|
||||
.TH PCRECOMPAT 3 "10 November 2013" "PCRE 8.34"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH "DIFFERENCES BETWEEN PCRE AND PERL"
|
||||
@ -122,16 +122,21 @@ an error is given at compile time.
|
||||
.P
|
||||
15. Perl recognizes comments in some places that PCRE does not, for example,
|
||||
between the ( and ? at the start of a subpattern. If the /x modifier is set,
|
||||
Perl allows white space between ( and ? but PCRE never does, even if the
|
||||
PCRE_EXTENDED option is set.
|
||||
Perl allows white space between ( and ? (though current Perls warn that this is
|
||||
deprecated) but PCRE never does, even if the PCRE_EXTENDED option is set.
|
||||
.P
|
||||
16. In PCRE, the upper/lower case character properties Lu and Ll are not
|
||||
16. Perl, when in warning mode, gives warnings for character classes such as
|
||||
[A-\ed] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE has no
|
||||
warning features, so it gives an error in these cases because they are almost
|
||||
certainly user mistakes.
|
||||
.P
|
||||
17. In PCRE, the upper/lower case character properties Lu and Ll are not
|
||||
affected when case-independent matching is specified. For example, \ep{Lu}
|
||||
always matches an upper case letter. I think Perl has changed in this respect;
|
||||
in the release at the time of writing (5.16), \ep{Lu} and \ep{Ll} match all
|
||||
letters, regardless of case, when case independence is specified.
|
||||
.P
|
||||
17. PCRE provides some extensions to the Perl regular expression facilities.
|
||||
18. PCRE provides some extensions to the Perl regular expression facilities.
|
||||
Perl 5.10 includes new features that are not in earlier versions of Perl, some
|
||||
of which (such as named parentheses) have been in PCRE for some time. This list
|
||||
is with respect to Perl 5.10:
|
||||
@ -190,6 +195,6 @@ Cambridge CB2 3QH, England.
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 19 March 2013
|
||||
Last updated: 10 November 2013
|
||||
Copyright (c) 1997-2013 University of Cambridge.
|
||||
.fi
|
||||
|
@ -1,4 +1,4 @@
|
||||
.TH PCRELIMITS 3 "24 June 2012" "PCRE 8.30"
|
||||
.TH PCRELIMITS 3 "05 November 2013" "PCRE 8.34"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH "SIZE AND OTHER LIMITATIONS"
|
||||
@ -8,9 +8,10 @@ There are some size limitations in PCRE but it is hoped that they will never in
|
||||
practice be relevant.
|
||||
.P
|
||||
The maximum length of a compiled pattern is approximately 64K data units (bytes
|
||||
for the 8-bit library, 32-bit units for the 32-bit library, and 32-bit units for
|
||||
the 32-bit library) if PCRE is compiled with the default internal linkage size
|
||||
of 2 bytes. If you want to process regular expressions that are truly enormous,
|
||||
for the 8-bit library, 16-bit units for the 16-bit library, and 32-bit units for
|
||||
the 32-bit library) if PCRE is compiled with the default internal linkage size,
|
||||
which is 2 bytes for the 8-bit and 16-bit libraries, and 4 bytes for the 32-bit
|
||||
library. If you want to process regular expressions that are truly enormous,
|
||||
you can compile PCRE with an internal linkage size of 3 or 4 (when building the
|
||||
16-bit or 32-bit library, 3 is rounded up to 4). See the \fBREADME\fP file in
|
||||
the source distribution and the
|
||||
@ -23,7 +24,10 @@ However, the speed of execution is slower.
|
||||
All values in repeating quantifiers must be less than 65536.
|
||||
.P
|
||||
There is no limit to the number of parenthesized subpatterns, but there can be
|
||||
no more than 65535 capturing subpatterns.
|
||||
no more than 65535 capturing subpatterns. There is, however, a limit to the
|
||||
depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
|
||||
order to limit the amount of system stack used at compile time. The limit can
|
||||
be specified when PCRE is built; the default is 250.
|
||||
.P
|
||||
There is a limit to the number of forward references to subsequent subpatterns
|
||||
of around 200,000. Repeated forward references with fixed upper limits, for
|
||||
@ -34,7 +38,7 @@ The maximum length of name for a named subpattern is 32 characters, and the
|
||||
maximum number of named subpatterns is 10000.
|
||||
.P
|
||||
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
|
||||
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit library.
|
||||
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit libraries.
|
||||
.P
|
||||
The maximum length of a subject string is the largest positive number that an
|
||||
integer variable can hold. However, when using the traditional matching
|
||||
@ -62,6 +66,6 @@ Cambridge CB2 3QH, England.
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 04 May 2012
|
||||
Copyright (c) 1997-2012 University of Cambridge.
|
||||
Last updated: 05 November 2013
|
||||
Copyright (c) 1997-2013 University of Cambridge.
|
||||
.fi
|
||||
|
@ -1,4 +1,4 @@
|
||||
.TH PCREMATCHING 3 "08 January 2012" "PCRE 8.30"
|
||||
.TH PCREMATCHING 3 "12 November 2013" "PCRE 8.34"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH "PCRE MATCHING ALGORITHMS"
|
||||
@ -106,6 +106,14 @@ the three strings "caterpillar", "cater", and "cat" that start at the fifth
|
||||
character of the subject. The algorithm does not automatically move on to find
|
||||
matches that start at later positions.
|
||||
.P
|
||||
PCRE's "auto-possessification" optimization usually applies to character
|
||||
repeats at the end of a pattern (as well as internally). For example, the
|
||||
pattern "a\ed+" is compiled as if it were "a\ed++" because there is no point
|
||||
even considering the possibility of backtracking into the repeated digits. For
|
||||
DFA matching, this means that only one possible match is found. If you really
|
||||
do want multiple matches in such cases, either use an ungreedy repeat
|
||||
("a\ed+?") or set the PCRE_NO_AUTO_POSSESS option when compiling.
|
||||
.P
|
||||
There are a number of features of PCRE regular expressions that are not
|
||||
supported by the alternative matching algorithm. They are as follows:
|
||||
.P
|
||||
@ -201,6 +209,6 @@ Cambridge CB2 3QH, England.
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 08 January 2012
|
||||
Last updated: 12 November 2013
|
||||
Copyright (c) 1997-2012 University of Cambridge.
|
||||
.fi
|
||||
|
@ -1,4 +1,4 @@
|
||||
.TH PCREPARTIAL 3 "20 February 2013" "PCRE 8.33"
|
||||
.TH PCREPARTIAL 3 "02 July 2013" "PCRE 8.34"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH "PARTIAL MATCHING IN PCRE"
|
||||
@ -280,6 +280,15 @@ Notice that when the match is complete, only the last part is shown; PCRE does
|
||||
not retain the previously partially-matched string. It is up to the calling
|
||||
program to do that if it needs to.
|
||||
.P
|
||||
That means that, for an unanchored pattern, if a continued match fails, it is
|
||||
not possible to try again at a new starting point. All this facility is capable
|
||||
of doing is continuing with the previous match attempt. In the previous
|
||||
example, if the second set of data is "ug23" the result is no match, even
|
||||
though there would be a match for "aug23" if the entire string were given at
|
||||
once. Depending on the application, this may or may not be what you want.
|
||||
The only way to allow for starting again at the next character is to retain the
|
||||
matched part of the subject and try a new complete match.
|
||||
.P
|
||||
You can set the PCRE_PARTIAL_SOFT or PCRE_PARTIAL_HARD options with
|
||||
PCRE_DFA_RESTART to continue partial matching over multiple segments. This
|
||||
facility can be used to pass very long subject strings to the DFA matching
|
||||
@ -462,6 +471,6 @@ Cambridge CB2 3QH, England.
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 20 February 2013
|
||||
Last updated: 02 July 2013
|
||||
Copyright (c) 1997-2013 University of Cambridge.
|
||||
.fi
|
||||
|
@ -1,4 +1,4 @@
|
||||
.TH PCREPATTERN 3 "26 April 2013" "PCRE 8.33"
|
||||
.TH PCREPATTERN 3 "03 December 2013" "PCRE 8.34"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH "PCRE REGULAR EXPRESSION DETAILS"
|
||||
@ -80,21 +80,37 @@ appearance causes an error.
|
||||
.SS "Unicode property support"
|
||||
.rs
|
||||
.sp
|
||||
Another special sequence that may appear at the start of a pattern is
|
||||
.sp
|
||||
(*UCP)
|
||||
.sp
|
||||
Another special sequence that may appear at the start of a pattern is (*UCP).
|
||||
This has the same effect as setting the PCRE_UCP option: it causes sequences
|
||||
such as \ed and \ew to use Unicode properties to determine character types,
|
||||
instead of recognizing only characters with codes less than 128 via a lookup
|
||||
table.
|
||||
.
|
||||
.
|
||||
.SS "Disabling auto-possessification"
|
||||
.rs
|
||||
.sp
|
||||
If a pattern starts with (*NO_AUTO_POSSESS), it has the same effect as setting
|
||||
the PCRE_NO_AUTO_POSSESS option at compile time. This stops PCRE from making
|
||||
quantifiers possessive when what follows cannot match the repeated item. For
|
||||
example, by default a+b is treated as a++b. For more details, see the
|
||||
.\" HREF
|
||||
\fBpcreapi\fP
|
||||
.\"
|
||||
documentation.
|
||||
.
|
||||
.
|
||||
.SS "Disabling start-up optimizations"
|
||||
.rs
|
||||
.sp
|
||||
If a pattern starts with (*NO_START_OPT), it has the same effect as setting the
|
||||
PCRE_NO_START_OPTIMIZE option either at compile or matching time.
|
||||
PCRE_NO_START_OPTIMIZE option either at compile or matching time. This disables
|
||||
several optimizations for quickly reaching "no match" results. For more
|
||||
details, see the
|
||||
.\" HREF
|
||||
\fBpcreapi\fP
|
||||
.\"
|
||||
documentation.
|
||||
.
|
||||
.
|
||||
.\" HTML <a name="newlines"></a>
|
||||
@ -164,10 +180,10 @@ pattern of the form
|
||||
(*LIMIT_RECURSION=d)
|
||||
.sp
|
||||
where d is any number of decimal digits. However, the value of the setting must
|
||||
be less than the value set by the caller of \fBpcre_exec()\fP for it to have
|
||||
any effect. In other words, the pattern writer can lower the limit set by the
|
||||
programmer, but not raise it. If there is more than one setting of one of these
|
||||
limits, the lower value is used.
|
||||
be less than the value set (or defaulted) by the caller of \fBpcre_exec()\fP
|
||||
for it to have any effect. In other words, the pattern writer can lower the
|
||||
limits set by the programmer, but not raise them. If there is more than one
|
||||
setting of one of these limits, the lower value is used.
|
||||
.
|
||||
.
|
||||
.SH "EBCDIC CHARACTER CODES"
|
||||
@ -257,10 +273,11 @@ In a UTF mode, only ASCII numbers and letters have any special meaning after a
|
||||
backslash. All other characters (in particular, those whose codepoints are
|
||||
greater than 127) are treated as literals.
|
||||
.P
|
||||
If a pattern is compiled with the PCRE_EXTENDED option, white space in the
|
||||
pattern (other than in a character class) and characters between a # outside
|
||||
a character class and the next newline are ignored. An escaping backslash can
|
||||
be used to include a white space or # character as part of the pattern.
|
||||
If a pattern is compiled with the PCRE_EXTENDED option, most white space in the
|
||||
pattern (other than in a character class), and characters between a # outside a
|
||||
character class and the next newline, inclusive, are ignored. An escaping
|
||||
backslash can be used to include a white space or # character as part of the
|
||||
pattern.
|
||||
.P
|
||||
If you want to remove the special meaning from a sequence of characters, you
|
||||
can do so by putting them between \eQ and \eE. This is different from Perl in
|
||||
@ -300,7 +317,9 @@ one of the following escape sequences than the binary character it represents:
|
||||
\en linefeed (hex 0A)
|
||||
\er carriage return (hex 0D)
|
||||
\et tab (hex 09)
|
||||
\e0dd character with octal code 0dd
|
||||
\eddd character with octal code ddd, or back reference
|
||||
\eo{ddd..} character with octal code ddd..
|
||||
\exhh character with hex code hh
|
||||
\ex{hhh..} character with hex code hhh.. (non-JavaScript mode)
|
||||
\euhhhh character with hex code hhhh (JavaScript mode only)
|
||||
@ -321,47 +340,27 @@ byte are inverted. Thus \ecA becomes hex 01, as in ASCII (A is C1), but because
|
||||
the EBCDIC letters are disjoint, \ecZ becomes hex 29 (Z is E9), and other
|
||||
characters also generate different values.
|
||||
.P
|
||||
By default, after \ex, from zero to two hexadecimal digits are read (letters
|
||||
can be in upper or lower case). Any number of hexadecimal digits may appear
|
||||
between \ex{ and }, but the character code is constrained as follows:
|
||||
.sp
|
||||
8-bit non-UTF mode less than 0x100
|
||||
8-bit UTF-8 mode less than 0x10ffff and a valid codepoint
|
||||
16-bit non-UTF mode less than 0x10000
|
||||
16-bit UTF-16 mode less than 0x10ffff and a valid codepoint
|
||||
32-bit non-UTF mode less than 0x80000000
|
||||
32-bit UTF-32 mode less than 0x10ffff and a valid codepoint
|
||||
.sp
|
||||
Invalid Unicode codepoints are the range 0xd800 to 0xdfff (the so-called
|
||||
"surrogate" codepoints), and 0xffef.
|
||||
.P
|
||||
If characters other than hexadecimal digits appear between \ex{ and }, or if
|
||||
there is no terminating }, this form of escape is not recognized. Instead, the
|
||||
initial \ex will be interpreted as a basic hexadecimal escape, with no
|
||||
following digits, giving a character whose value is zero.
|
||||
.P
|
||||
If the PCRE_JAVASCRIPT_COMPAT option is set, the interpretation of \ex is
|
||||
as just described only when it is followed by two hexadecimal digits.
|
||||
Otherwise, it matches a literal "x" character. In JavaScript mode, support for
|
||||
code points greater than 256 is provided by \eu, which must be followed by
|
||||
four hexadecimal digits; otherwise it matches a literal "u" character.
|
||||
Character codes specified by \eu in JavaScript mode are constrained in the same
|
||||
was as those specified by \ex in non-JavaScript mode.
|
||||
.P
|
||||
Characters whose value is less than 256 can be defined by either of the two
|
||||
syntaxes for \ex (or by \eu in JavaScript mode). There is no difference in the
|
||||
way they are handled. For example, \exdc is exactly the same as \ex{dc} (or
|
||||
\eu00dc in JavaScript mode).
|
||||
.P
|
||||
After \e0 up to two further octal digits are read. If there are fewer than two
|
||||
digits, just those that are present are used. Thus the sequence \e0\ex\e07
|
||||
specifies two binary zeros followed by a BEL character (code value 7). Make
|
||||
sure you supply two digits after the initial zero if the pattern character that
|
||||
follows is itself an octal digit.
|
||||
.P
|
||||
The handling of a backslash followed by a digit other than 0 is complicated.
|
||||
Outside a character class, PCRE reads it and any following digits as a decimal
|
||||
number. If the number is less than 10, or if there have been at least that many
|
||||
The escape \eo must be followed by a sequence of octal digits, enclosed in
|
||||
braces. An error occurs if this is not the case. This escape is a recent
|
||||
addition to Perl; it provides way of specifying character code points as octal
|
||||
numbers greater than 0777, and it also allows octal numbers and back references
|
||||
to be unambiguously specified.
|
||||
.P
|
||||
For greater clarity and unambiguity, it is best to avoid following \e by a
|
||||
digit greater than zero. Instead, use \eo{} or \ex{} to specify character
|
||||
numbers, and \eg{} to specify back references. The following paragraphs
|
||||
describe the old, ambiguous syntax.
|
||||
.P
|
||||
The handling of a backslash followed by a digit other than 0 is complicated,
|
||||
and Perl has changed in recent releases, causing PCRE also to change. Outside a
|
||||
character class, PCRE reads the digit and any following digits as a decimal
|
||||
number. If the number is less than 8, or if there have been at least that many
|
||||
previous capturing left parentheses in the expression, the entire sequence is
|
||||
taken as a \fIback reference\fP. A description of how this works is given
|
||||
.\" HTML <a href="#backreferences">
|
||||
@ -374,12 +373,11 @@ following the discussion of
|
||||
parenthesized subpatterns.
|
||||
.\"
|
||||
.P
|
||||
Inside a character class, or if the decimal number is greater than 9 and there
|
||||
have not been that many capturing subpatterns, PCRE re-reads up to three octal
|
||||
digits following the backslash, and uses them to generate a data character. Any
|
||||
subsequent digits stand for themselves. The value of the character is
|
||||
constrained in the same way as characters specified in hexadecimal.
|
||||
For example:
|
||||
Inside a character class, or if the decimal number following \e is greater than
|
||||
7 and there have not been that many capturing subpatterns, PCRE handles \e8 and
|
||||
\e9 as the literal characters "8" and "9", and otherwise re-reads up to three
|
||||
octal digits following the backslash, using them to generate a data character.
|
||||
Any subsequent digits stand for themselves. For example:
|
||||
.sp
|
||||
\e040 is another way of writing an ASCII space
|
||||
.\" JOIN
|
||||
@ -398,12 +396,51 @@ For example:
|
||||
\e377 might be a back reference, otherwise
|
||||
the value 255 (decimal)
|
||||
.\" JOIN
|
||||
\e81 is either a back reference, or a binary zero
|
||||
followed by the two characters "8" and "1"
|
||||
\e81 is either a back reference, or the two
|
||||
characters "8" and "1"
|
||||
.sp
|
||||
Note that octal values of 100 or greater must not be introduced by a leading
|
||||
zero, because no more than three octal digits are ever read.
|
||||
Note that octal values of 100 or greater that are specified using this syntax
|
||||
must not be introduced by a leading zero, because no more than three octal
|
||||
digits are ever read.
|
||||
.P
|
||||
By default, after \ex that is not followed by {, from zero to two hexadecimal
|
||||
digits are read (letters can be in upper or lower case). Any number of
|
||||
hexadecimal digits may appear between \ex{ and }. If a character other than
|
||||
a hexadecimal digit appears between \ex{ and }, or if there is no terminating
|
||||
}, an error occurs.
|
||||
.P
|
||||
If the PCRE_JAVASCRIPT_COMPAT option is set, the interpretation of \ex is
|
||||
as just described only when it is followed by two hexadecimal digits.
|
||||
Otherwise, it matches a literal "x" character. In JavaScript mode, support for
|
||||
code points greater than 256 is provided by \eu, which must be followed by
|
||||
four hexadecimal digits; otherwise it matches a literal "u" character.
|
||||
.P
|
||||
Characters whose value is less than 256 can be defined by either of the two
|
||||
syntaxes for \ex (or by \eu in JavaScript mode). There is no difference in the
|
||||
way they are handled. For example, \exdc is exactly the same as \ex{dc} (or
|
||||
\eu00dc in JavaScript mode).
|
||||
.
|
||||
.
|
||||
.SS "Constraints on character values"
|
||||
.rs
|
||||
.sp
|
||||
Characters that are specified using octal or hexadecimal numbers are
|
||||
limited to certain values, as follows:
|
||||
.sp
|
||||
8-bit non-UTF mode less than 0x100
|
||||
8-bit UTF-8 mode less than 0x10ffff and a valid codepoint
|
||||
16-bit non-UTF mode less than 0x10000
|
||||
16-bit UTF-16 mode less than 0x10ffff and a valid codepoint
|
||||
32-bit non-UTF mode less than 0x100000000
|
||||
32-bit UTF-32 mode less than 0x10ffff and a valid codepoint
|
||||
.sp
|
||||
Invalid Unicode codepoints are the range 0xd800 to 0xdfff (the so-called
|
||||
"surrogate" codepoints), and 0xffef.
|
||||
.
|
||||
.
|
||||
.SS "Escape sequences in character classes"
|
||||
.rs
|
||||
.sp
|
||||
All the sequences that define a single character value can be used both inside
|
||||
and outside character classes. In addition, inside a character class, \eb is
|
||||
interpreted as the backspace character (hex 08).
|
||||
@ -494,11 +531,14 @@ classes. They each match one character of the appropriate type. If the current
|
||||
matching point is at the end of the subject string, all of them fail, because
|
||||
there is no character to match.
|
||||
.P
|
||||
For compatibility with Perl, \es does not match the VT character (code 11).
|
||||
This makes it different from the the POSIX "space" class. The \es characters
|
||||
are HT (9), LF (10), FF (12), CR (13), and space (32). If "use locale;" is
|
||||
included in a Perl script, \es may match the VT character. In PCRE, it never
|
||||
does.
|
||||
For compatibility with Perl, \es did not used to match the VT character (code
|
||||
11), which made it different from the the POSIX "space" class. However, Perl
|
||||
added VT at release 5.18, and PCRE followed suit at release 8.34. The default
|
||||
\es characters are now HT (9), LF (10), VT (11), FF (12), CR (13), and space
|
||||
(32), which are defined as white space in the "C" locale. This list may vary if
|
||||
locale-specific matching is taking place. For example, in some locales the
|
||||
"non-breaking space" character (\exA0) is recognized as white space, and in
|
||||
others the VT character is not.
|
||||
.P
|
||||
A "word" character is an underscore or any character that is a letter or digit.
|
||||
By default, the definition of letters and digits is controlled by PCRE's
|
||||
@ -513,20 +553,22 @@ in the
|
||||
\fBpcreapi\fP
|
||||
.\"
|
||||
page). For example, in a French locale such as "fr_FR" in Unix-like systems,
|
||||
or "french" in Windows, some character codes greater than 128 are used for
|
||||
or "french" in Windows, some character codes greater than 127 are used for
|
||||
accented letters, and these are then matched by \ew. The use of locales with
|
||||
Unicode is discouraged.
|
||||
.P
|
||||
By default, in a UTF mode, characters with values greater than 128 never match
|
||||
\ed, \es, or \ew, and always match \eD, \eS, and \eW. These sequences retain
|
||||
their original meanings from before UTF support was available, mainly for
|
||||
efficiency reasons. However, if PCRE is compiled with Unicode property support,
|
||||
and the PCRE_UCP option is set, the behaviour is changed so that Unicode
|
||||
properties are used to determine character types, as follows:
|
||||
By default, characters whose code points are greater than 127 never match \ed,
|
||||
\es, or \ew, and always match \eD, \eS, and \eW, although this may vary for
|
||||
characters in the range 128-255 when locale-specific matching is happening.
|
||||
These escape sequences retain their original meanings from before Unicode
|
||||
support was available, mainly for efficiency reasons. If PCRE is compiled with
|
||||
Unicode property support, and the PCRE_UCP option is set, the behaviour is
|
||||
changed so that Unicode properties are used to determine character types, as
|
||||
follows:
|
||||
.sp
|
||||
\ed any character that \ep{Nd} matches (decimal digit)
|
||||
\es any character that \ep{Z} matches, plus HT, LF, FF, CR
|
||||
\ew any character that \ep{L} or \ep{N} matches, plus underscore
|
||||
\ed any character that matches \ep{Nd} (decimal digit)
|
||||
\es any character that matches \ep{Z} or \eh or \ev
|
||||
\ew any character that matches \ep{L} or \ep{N}, plus underscore
|
||||
.sp
|
||||
The upper case escapes match the inverse sets of characters. Note that \ed
|
||||
matches only decimal digits, whereas \ew matches any Unicode digit, as well as
|
||||
@ -536,7 +578,7 @@ is noticeably slower when PCRE_UCP is set.
|
||||
.P
|
||||
The sequences \eh, \eH, \ev, and \eV are features that were added to Perl at
|
||||
release 5.10. In contrast to the other sequences, which match only ASCII
|
||||
characters by default, these always match certain high-valued codepoints,
|
||||
characters by default, these always match certain high-valued code points,
|
||||
whether or not PCRE_UCP is set. The horizontal space characters are:
|
||||
.sp
|
||||
U+0009 Horizontal tab (HT)
|
||||
@ -906,9 +948,9 @@ the "mark" property always have the "extend" grapheme breaking property.
|
||||
.sp
|
||||
As well as the standard Unicode properties described above, PCRE supports four
|
||||
more that make it possible to convert traditional escape sequences such as \ew
|
||||
and \es and POSIX character classes to use Unicode properties. PCRE uses these
|
||||
non-standard, non-Perl properties internally when PCRE_UCP is set. However,
|
||||
they may also be used explicitly. These properties are:
|
||||
and \es to use Unicode properties. PCRE uses these non-standard, non-Perl
|
||||
properties internally when PCRE_UCP is set. However, they may also be used
|
||||
explicitly. These properties are:
|
||||
.sp
|
||||
Xan Any alphanumeric character
|
||||
Xps Any POSIX space character
|
||||
@ -918,8 +960,9 @@ they may also be used explicitly. These properties are:
|
||||
Xan matches characters that have either the L (letter) or the N (number)
|
||||
property. Xps matches the characters tab, linefeed, vertical tab, form feed, or
|
||||
carriage return, and any other character that has the Z (separator) property.
|
||||
Xsp is the same as Xps, except that vertical tab is excluded. Xwd matches the
|
||||
same characters as Xan, plus underscore.
|
||||
Xsp is the same as Xps; it used to exclude vertical tab, for Perl
|
||||
compatibility, but Perl changed, and so PCRE followed at release 8.34. Xwd
|
||||
matches the same characters as Xan, plus underscore.
|
||||
.P
|
||||
There is another non-standard property, Xuc, which matches any character that
|
||||
can be represented by a Universal Character Name in C++ and other programming
|
||||
@ -1215,7 +1258,9 @@ The minus (hyphen) character can be used to specify a range of characters in a
|
||||
character class. For example, [d-m] matches any letter between d and m,
|
||||
inclusive. If a minus character is required in a class, it must be escaped with
|
||||
a backslash or appear in a position where it cannot be interpreted as
|
||||
indicating a range, typically as the first or last character in the class.
|
||||
indicating a range, typically as the first or last character in the class, or
|
||||
immediately after a range. For example, [b-d-z] matches letters in the range b
|
||||
to d, a hyphen character, or z.
|
||||
.P
|
||||
It is not possible to have the literal character "]" as the end character of a
|
||||
range. A pattern such as [W-]46] is interpreted as a class of two characters
|
||||
@ -1225,6 +1270,11 @@ the end of range, so [W-\e]46] is interpreted as a class containing a range
|
||||
followed by two other characters. The octal or hexadecimal representation of
|
||||
"]" can also be used to end a range.
|
||||
.P
|
||||
An error is generated if a POSIX character class (see below) or an escape
|
||||
sequence other than one that defines a single character appears at a point
|
||||
where a range ending character is expected. For example, [z-\exff] is valid,
|
||||
but [A-\ed] and [A-[:digit:]] are not.
|
||||
.P
|
||||
Ranges operate in the collating sequence of character values. They can also be
|
||||
used for characters specified numerically, for example [\e000-\e037]. Ranges
|
||||
can include any characters that are valid for the current mode.
|
||||
@ -1263,9 +1313,9 @@ something AND NOT ...".
|
||||
The only metacharacters that are recognized in character classes are backslash,
|
||||
hyphen (only where it can be interpreted as specifying a range), circumflex
|
||||
(only at the start), opening square bracket (only when it can be interpreted as
|
||||
introducing a POSIX class name - see the next section), and the terminating
|
||||
closing square bracket. However, escaping other non-alphanumeric characters
|
||||
does no harm.
|
||||
introducing a POSIX class name, or for a special compatibility feature - see
|
||||
the next two sections), and the terminating closing square bracket. However,
|
||||
escaping other non-alphanumeric characters does no harm.
|
||||
.
|
||||
.
|
||||
.SH "POSIX CHARACTER CLASSES"
|
||||
@ -1290,15 +1340,17 @@ are:
|
||||
lower lower case letters
|
||||
print printing characters, including space
|
||||
punct printing characters, excluding letters and digits and space
|
||||
space white space (not quite the same as \es)
|
||||
space white space (the same as \es from PCRE 8.34)
|
||||
upper upper case letters
|
||||
word "word" characters (same as \ew)
|
||||
xdigit hexadecimal digits
|
||||
.sp
|
||||
The "space" characters are HT (9), LF (10), VT (11), FF (12), CR (13), and
|
||||
space (32). Notice that this list includes the VT character (code 11). This
|
||||
makes "space" different to \es, which does not include VT (for Perl
|
||||
compatibility).
|
||||
The default "space" characters are HT (9), LF (10), VT (11), FF (12), CR (13),
|
||||
and space (32). If locale-specific matching is taking place, the list of space
|
||||
characters may be different; there may be fewer or more of them. "Space" used
|
||||
to be different to \es, which did not include VT, for Perl compatibility.
|
||||
However, Perl changed at release 5.18, and PCRE followed at release 8.34.
|
||||
"Space" and \es now match the same set of characters.
|
||||
.P
|
||||
The name "word" is a Perl extension, and "blank" is a GNU extension from Perl
|
||||
5.8. Another Perl extension is negation, which is indicated by a ^ character
|
||||
@ -1310,11 +1362,11 @@ matches "1", "2", or any non-digit. PCRE (and Perl) also recognize the POSIX
|
||||
syntax [.ch.] and [=ch=] where "ch" is a "collating element", but these are not
|
||||
supported, and an error is given if they are encountered.
|
||||
.P
|
||||
By default, in UTF modes, characters with values greater than 128 do not match
|
||||
any of the POSIX character classes. However, if the PCRE_UCP option is passed
|
||||
to \fBpcre_compile()\fP, some of the classes are changed so that Unicode
|
||||
character properties are used. This is achieved by replacing the POSIX classes
|
||||
by other sequences, as follows:
|
||||
By default, characters with values greater than 128 do not match any of the
|
||||
POSIX character classes. However, if the PCRE_UCP option is passed to
|
||||
\fBpcre_compile()\fP, some of the classes are changed so that Unicode character
|
||||
properties are used. This is achieved by replacing certain POSIX classes by
|
||||
other sequences, as follows:
|
||||
.sp
|
||||
[:alnum:] becomes \ep{Xan}
|
||||
[:alpha:] becomes \ep{L}
|
||||
@ -1325,9 +1377,54 @@ by other sequences, as follows:
|
||||
[:upper:] becomes \ep{Lu}
|
||||
[:word:] becomes \ep{Xwd}
|
||||
.sp
|
||||
Negated versions, such as [:^alpha:] use \eP instead of \ep. The other POSIX
|
||||
classes are unchanged, and match only characters with code points less than
|
||||
128.
|
||||
Negated versions, such as [:^alpha:] use \eP instead of \ep. Three other POSIX
|
||||
classes are handled specially in UCP mode:
|
||||
.TP 10
|
||||
[:graph:]
|
||||
This matches characters that have glyphs that mark the page when printed. In
|
||||
Unicode property terms, it matches all characters with the L, M, N, P, S, or Cf
|
||||
properties, except for:
|
||||
.sp
|
||||
U+061C Arabic Letter Mark
|
||||
U+180E Mongolian Vowel Separator
|
||||
U+2066 - U+2069 Various "isolate"s
|
||||
.sp
|
||||
.TP 10
|
||||
[:print:]
|
||||
This matches the same characters as [:graph:] plus space characters that are
|
||||
not controls, that is, characters with the Zs property.
|
||||
.TP 10
|
||||
[:punct:]
|
||||
This matches all characters that have the Unicode P (punctuation) property,
|
||||
plus those characters whose code points are less than 128 that have the S
|
||||
(Symbol) property.
|
||||
.P
|
||||
The other POSIX classes are unchanged, and match only characters with code
|
||||
points less than 128.
|
||||
.
|
||||
.
|
||||
.SH "COMPATIBILITY FEATURE FOR WORD BOUNDARIES"
|
||||
.rs
|
||||
.sp
|
||||
In the POSIX.2 compliant library that was included in 4.4BSD Unix, the ugly
|
||||
syntax [[:<:]] and [[:>:]] is used for matching "start of word" and "end of
|
||||
word". PCRE treats these items as follows:
|
||||
.sp
|
||||
[[:<:]] is converted to \eb(?=\ew)
|
||||
[[:>:]] is converted to \eb(?<=\ew)
|
||||
.sp
|
||||
Only these exact character sequences are recognized. A sequence such as
|
||||
[a[:<:]b] provokes error for an unrecognized POSIX class name. This support is
|
||||
not compatible with Perl. It is provided to help migrations from other
|
||||
environments, and is best not used in any new patterns. Note that \eb matches
|
||||
at the start and the end of a word (see
|
||||
.\" HTML <a href="#smallassertions">
|
||||
.\" </a>
|
||||
"Simple assertions"
|
||||
.\"
|
||||
above), and in a Perl-style pattern the preceding or following character
|
||||
normally shows which is wanted, without the need for the assertions that are
|
||||
used above in order to give exactly the POSIX behaviour.
|
||||
.
|
||||
.
|
||||
.SH "VERTICAL BAR"
|
||||
@ -1547,11 +1644,12 @@ conditions,
|
||||
.\"
|
||||
can be made by name as well as by number.
|
||||
.P
|
||||
Names consist of up to 32 alphanumeric characters and underscores. Named
|
||||
capturing parentheses are still allocated numbers as well as names, exactly as
|
||||
if the names were not present. The PCRE API provides function calls for
|
||||
extracting the name-to-number translation table from a compiled pattern. There
|
||||
is also a convenience function for extracting a captured substring by name.
|
||||
Names consist of up to 32 alphanumeric characters and underscores, but must
|
||||
start with a non-digit. Named capturing parentheses are still allocated numbers
|
||||
as well as names, exactly as if the names were not present. The PCRE API
|
||||
provides function calls for extracting the name-to-number translation table
|
||||
from a compiled pattern. There is also a convenience function for extracting a
|
||||
captured substring by name.
|
||||
.P
|
||||
By default, a name must be unique within a pattern, but it is possible to relax
|
||||
this constraint by setting the PCRE_DUPNAMES option at compile time. (Duplicate
|
||||
@ -1577,9 +1675,20 @@ for the first (and in this example, the only) subpattern of that name that
|
||||
matched. This saves searching to find which numbered subpattern it was.
|
||||
.P
|
||||
If you make a back reference to a non-unique named subpattern from elsewhere in
|
||||
the pattern, the one that corresponds to the first occurrence of the name is
|
||||
used. In the absence of duplicate numbers (see the previous section) this is
|
||||
the one with the lowest number. If you use a named reference in a condition
|
||||
the pattern, the subpatterns to which the name refers are checked in the order
|
||||
in which they appear in the overall pattern. The first one that is set is used
|
||||
for the reference. For example, this pattern matches both "foofoo" and
|
||||
"barbar" but not "foobar" or "barfoo":
|
||||
.sp
|
||||
(?:(?<n>foo)|(?<n>bar))\ek<n>
|
||||
.sp
|
||||
.P
|
||||
If you make a subroutine call to a non-unique named subpattern, the one that
|
||||
corresponds to the first occurrence of the name is used. In the absence of
|
||||
duplicate numbers (see the previous section) this is the one with the lowest
|
||||
number.
|
||||
.P
|
||||
If you use a named reference in a condition
|
||||
test (see the
|
||||
.\"
|
||||
.\" HTML <a href="#conditions">
|
||||
@ -1599,8 +1708,9 @@ documentation.
|
||||
\fBWarning:\fP You cannot use different names to distinguish between two
|
||||
subpatterns with the same number because PCRE uses only the numbers when
|
||||
matching. For this reason, an error is given at compile time if different names
|
||||
are given to subpatterns with the same number. However, you can give the same
|
||||
name to subpatterns with the same number, even when PCRE_DUPNAMES is not set.
|
||||
are given to subpatterns with the same number. However, you can always give the
|
||||
same name to subpatterns with the same number, even when PCRE_DUPNAMES is not
|
||||
set.
|
||||
.
|
||||
.
|
||||
.SH REPETITION
|
||||
@ -2271,12 +2381,7 @@ This makes the fragment independent of the parentheses in the larger pattern.
|
||||
.sp
|
||||
Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used
|
||||
subpattern by name. For compatibility with earlier versions of PCRE, which had
|
||||
this facility before Perl, the syntax (?(name)...) is also recognized. However,
|
||||
there is a possible ambiguity with this syntax, because subpattern names may
|
||||
consist entirely of digits. PCRE looks first for a named subpattern; if it
|
||||
cannot find one and the name consists entirely of digits, PCRE looks for a
|
||||
subpattern of that number, which must be greater than zero. Using subpattern
|
||||
names that consist entirely of digits is not recommended.
|
||||
this facility before Perl, the syntax (?(name)...) is also recognized.
|
||||
.P
|
||||
Rewriting the above example to use a named subpattern gives this:
|
||||
.sp
|
||||
@ -2698,8 +2803,13 @@ During matching, when PCRE reaches a callout point, the external function is
|
||||
called. It is provided with the number of the callout, the position in the
|
||||
pattern, and, optionally, one item of data originally supplied by the caller of
|
||||
the matching function. The callout function may cause matching to proceed, to
|
||||
backtrack, or to fail altogether. A complete description of the interface to
|
||||
the callout function is given in the
|
||||
backtrack, or to fail altogether.
|
||||
.P
|
||||
By default, PCRE implements a number of optimizations at compile time and
|
||||
matching time, and one side-effect is that sometimes callouts are skipped. If
|
||||
you need all possible callouts to happen, you need to set options that disable
|
||||
the relevant optimizations. More details, and a complete description of the
|
||||
interface to the callout function, are given in the
|
||||
.\" HREF
|
||||
\fBpcrecallout\fP
|
||||
.\"
|
||||
@ -3060,7 +3170,7 @@ example:
|
||||
.sp
|
||||
...(*COMMIT)(*PRUNE)...
|
||||
.sp
|
||||
If there is a matching failure to the right, backtracking onto (*PRUNE) cases
|
||||
If there is a matching failure to the right, backtracking onto (*PRUNE) causes
|
||||
it to be triggered, and its action is taken. There can never be a backtrack
|
||||
onto (*COMMIT).
|
||||
.
|
||||
@ -3145,6 +3255,6 @@ Cambridge CB2 3QH, England.
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 26 April 2013
|
||||
Last updated: 03 December 2013
|
||||
Copyright (c) 1997-2013 University of Cambridge.
|
||||
.fi
|
||||
|
@ -1,25 +1,22 @@
|
||||
.TH PCREPOSIX 3 "09 January 2012" "PCRE 8.30"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions.
|
||||
.SH "SYNOPSIS OF POSIX API"
|
||||
.SH "SYNOPSIS"
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcreposix.h>
|
||||
.PP
|
||||
.SM
|
||||
.nf
|
||||
.B int regcomp(regex_t *\fIpreg\fP, const char *\fIpattern\fP,
|
||||
.ti +5n
|
||||
.B int \fIcflags\fP);
|
||||
.PP
|
||||
.B " int \fIcflags\fP);"
|
||||
.sp
|
||||
.B int regexec(regex_t *\fIpreg\fP, const char *\fIstring\fP,
|
||||
.ti +5n
|
||||
.B size_t \fInmatch\fP, regmatch_t \fIpmatch\fP[], int \fIeflags\fP);
|
||||
.PP
|
||||
.B size_t regerror(int \fIerrcode\fP, const regex_t *\fIpreg\fP,
|
||||
.ti +5n
|
||||
.B char *\fIerrbuf\fP, size_t \fIerrbuf_size\fP);
|
||||
.PP
|
||||
.B " size_t \fInmatch\fP, regmatch_t \fIpmatch\fP[], int \fIeflags\fP);"
|
||||
.B " size_t regerror(int \fIerrcode\fP, const regex_t *\fIpreg\fP,"
|
||||
.B " char *\fIerrbuf\fP, size_t \fIerrbuf_size\fP);"
|
||||
.sp
|
||||
.B void regfree(regex_t *\fIpreg\fP);
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
|
@ -1,4 +1,4 @@
|
||||
.TH PCREPRECOMPILE 3 "24 June 2012" "PCRE 8.30"
|
||||
.TH PCREPRECOMPILE 3 "12 November 2013" "PCRE 8.34"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH "SAVING AND RE-USING PRECOMPILED PCRE PATTERNS"
|
||||
@ -90,8 +90,8 @@ study data.
|
||||
.rs
|
||||
.sp
|
||||
Re-using a precompiled pattern is straightforward. Having reloaded it into main
|
||||
memory, called \fBpcre[16|32]_pattern_to_host_byte_order()\fP if necessary,
|
||||
you pass its pointer to \fBpcre[16|32]_exec()\fP or \fBpcre[16|32]_dfa_exec()\fP in
|
||||
memory, called \fBpcre[16|32]_pattern_to_host_byte_order()\fP if necessary, you
|
||||
pass its pointer to \fBpcre[16|32]_exec()\fP or \fBpcre[16|32]_dfa_exec()\fP in
|
||||
the usual way.
|
||||
.P
|
||||
However, if you passed a pointer to custom character tables when the pattern
|
||||
@ -110,15 +110,19 @@ in the
|
||||
.\"
|
||||
documentation.
|
||||
.P
|
||||
\fBWarning:\fP The tables that \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP use
|
||||
must be the same as those that were used when the pattern was compiled. If this
|
||||
is not the case, the behaviour is undefined.
|
||||
.P
|
||||
If you did not provide custom character tables when the pattern was compiled,
|
||||
the pointer in the compiled pattern is NULL, which causes the matching
|
||||
functions to use PCRE's internal tables. Thus, you do not need to take any
|
||||
special action at run time in this case.
|
||||
.P
|
||||
If you saved study data with the compiled pattern, you need to create your own
|
||||
\fBpcre[16|32]_extra\fP data block and set the \fIstudy_data\fP field to point to the
|
||||
reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in the
|
||||
\fIflags\fP field to indicate that study data is present. Then pass the
|
||||
\fBpcre[16|32]_extra\fP data block and set the \fIstudy_data\fP field to point
|
||||
to the reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in
|
||||
the \fIflags\fP field to indicate that study data is present. Then pass the
|
||||
\fBpcre[16|32]_extra\fP block to the matching function in the usual way. If the
|
||||
pattern was studied for just-in-time optimization, that data cannot be saved,
|
||||
and so is lost by a save/restore cycle.
|
||||
@ -146,6 +150,6 @@ Cambridge CB2 3QH, England.
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 24 June 2012
|
||||
Copyright (c) 1997-2012 University of Cambridge.
|
||||
Last updated: 12 November 2013
|
||||
Copyright (c) 1997-2013 University of Cambridge.
|
||||
.fi
|
||||
|
@ -1,4 +1,4 @@
|
||||
.TH PCRESYNTAX 3 "26 April 2013" "PCRE 8.33"
|
||||
.TH PCRESYNTAX 3 "12 November 2013" "PCRE 8.34"
|
||||
.SH NAME
|
||||
PCRE - Perl-compatible regular expressions
|
||||
.SH "PCRE REGULAR EXPRESSION SYNTAX SUMMARY"
|
||||
@ -29,9 +29,14 @@ documentation. This document contains a quick-reference summary of the syntax.
|
||||
\en newline (hex 0A)
|
||||
\er carriage return (hex 0D)
|
||||
\et tab (hex 09)
|
||||
\e0dd character with octal code 0dd
|
||||
\eddd character with octal code ddd, or backreference
|
||||
\eo{ddd..} character with octal code ddd..
|
||||
\exhh character with hex code hh
|
||||
\ex{hhh..} character with hex code hhh..
|
||||
.sp
|
||||
Note that \e0dd is always an octal code, and that \e8 and \e9 are the literal
|
||||
characters "8" and "9".
|
||||
.
|
||||
.
|
||||
.SH "CHARACTER TYPES"
|
||||
@ -56,9 +61,11 @@ documentation. This document contains a quick-reference summary of the syntax.
|
||||
\eW a "non-word" character
|
||||
\eX a Unicode extended grapheme cluster
|
||||
.sp
|
||||
In PCRE, by default, \ed, \eD, \es, \eS, \ew, and \eW recognize only ASCII
|
||||
characters, even in a UTF mode. However, this can be changed by setting the
|
||||
PCRE_UCP option.
|
||||
By default, \ed, \es, and \ew match only ASCII characters, even in UTF-8 mode
|
||||
or in the 16- bit and 32-bit libraries. However, if locale-specific matching is
|
||||
happening, \es and \ew may also match characters with code points in the range
|
||||
128-255. If the PCRE_UCP option is set, the behaviour of these escape sequences
|
||||
is changed to use Unicode properties and they match many more characters.
|
||||
.
|
||||
.
|
||||
.SH "GENERAL CATEGORY PROPERTIES FOR \ep and \eP"
|
||||
@ -115,10 +122,13 @@ PCRE_UCP option.
|
||||
.sp
|
||||
Xan Alphanumeric: union of properties L and N
|
||||
Xps POSIX space: property Z or tab, NL, VT, FF, CR
|
||||
Xsp Perl space: property Z or tab, NL, FF, CR
|
||||
Xsp Perl space: property Z or tab, NL, VT, FF, CR
|
||||
Xuc Univerally-named character: one that can be
|
||||
represented by a Universal Character Name
|
||||
Xwd Perl word: property Xan or underscore
|
||||
.sp
|
||||
Perl and POSIX space are now the same. Perl added VT to its space character set
|
||||
at release 5.18 and PCRE changed at release 8.34.
|
||||
.
|
||||
.
|
||||
.SH "SCRIPT NAMES FOR \ep AND \eP"
|
||||
@ -355,6 +365,9 @@ newline-setting options with similar syntax:
|
||||
(*UTF32) set UTF-32 mode: 32-bit library (PCRE_UTF32)
|
||||
(*UTF) set appropriate UTF mode for the library in use
|
||||
(*UCP) set PCRE_UCP (use Unicode properties for \ed etc)
|
||||
.sp
|
||||
Note that LIMIT_MATCH and LIMIT_RECURSION can only reduce the value of the
|
||||
limits set by the caller of pcre_exec(), not increase them.
|
||||
.
|
||||
.
|
||||
.SH "LOOKAHEAD AND LOOKBEHIND ASSERTIONS"
|
||||
@ -495,6 +508,6 @@ Cambridge CB2 3QH, England.
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 26 April 2013
|
||||
Last updated: 12 November 2013
|
||||
Copyright (c) 1997-2013 University of Cambridge.
|
||||
.fi
|
||||
|
@ -1,4 +1,4 @@
|
||||
.TH PCRETEST 1 "26 April 2013" "PCRE 8.33"
|
||||
.TH PCRETEST 1 "12 November 2013" "PCRE 8.34"
|
||||
.SH NAME
|
||||
pcretest - a program for testing Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
@ -155,6 +155,10 @@ Output the size of each compiled pattern after it has been compiled. This is
|
||||
equivalent to adding \fB/M\fP to each regular expression. The size is given in
|
||||
bytes for both libraries.
|
||||
.TP 10
|
||||
\fB-O\fP
|
||||
Behave as if each pattern has the \fB/O\fP modifier, that is disable
|
||||
auto-possessification for all patterns.
|
||||
.TP 10
|
||||
\fB-o\fP \fIosize\fP
|
||||
Set the number of elements in the output vector that is used when calling
|
||||
\fBpcre[16|32]_exec()\fP or \fBpcre[16|32]_dfa_exec()\fP to be \fIosize\fP. The
|
||||
@ -216,17 +220,21 @@ contains (*MARK) items there may also be differences, for the same reason. The
|
||||
should never be studied (see the \fB/S\fP pattern modifier below).
|
||||
.TP 10
|
||||
\fB-t\fP
|
||||
Run each compile, study, and match many times with a timer, and output
|
||||
resulting time per compile or match (in milliseconds). Do not set \fB-m\fP with
|
||||
\fB-t\fP, because you will then get the size output a zillion times, and the
|
||||
timing will be distorted. You can control the number of iterations that are
|
||||
used for timing by following \fB-t\fP with a number (as a separate item on the
|
||||
command line). For example, "-t 1000" would iterate 1000 times. The default is
|
||||
to iterate 500000 times.
|
||||
Run each compile, study, and match many times with a timer, and output the
|
||||
resulting times per compile, study, or match (in milliseconds). Do not set
|
||||
\fB-m\fP with \fB-t\fP, because you will then get the size output a zillion
|
||||
times, and the timing will be distorted. You can control the number of
|
||||
iterations that are used for timing by following \fB-t\fP with a number (as a
|
||||
separate item on the command line). For example, "-t 1000" iterates 1000 times.
|
||||
The default is to iterate 500000 times.
|
||||
.TP 10
|
||||
\fB-tm\fP
|
||||
This is like \fB-t\fP except that it times only the matching phase, not the
|
||||
compile or study phases.
|
||||
.TP 10
|
||||
\fB-T\fP \fB-TM\fP
|
||||
These behave like \fB-t\fP and \fB-tm\fP, but in addition, at the end of a run,
|
||||
the total times for all compiles, studies, and matches are output.
|
||||
.
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
@ -246,7 +254,7 @@ option states whether or not \fBreadline()\fP will be used.
|
||||
.P
|
||||
The program handles any number of sets of input on a single input file. Each
|
||||
set starts with a regular expression, and continues with any number of data
|
||||
lines to be matched against the pattern.
|
||||
lines to be matched against that pattern.
|
||||
.P
|
||||
Each data line is matched separately and independently. If you want to do
|
||||
multi-line matches, you have to use the \en escape sequence (or \er or \er\en,
|
||||
@ -320,6 +328,7 @@ sections.
|
||||
\fB/M\fP show compiled memory size
|
||||
\fB/m\fP set PCRE_MULTILINE
|
||||
\fB/N\fP set PCRE_NO_AUTO_CAPTURE
|
||||
\fB/O\fP set PCRE_NO_AUTO_POSSESS
|
||||
\fB/P\fP use the POSIX wrapper
|
||||
\fB/S\fP study the pattern after compilation
|
||||
\fB/s\fP set PCRE_DOTALL
|
||||
@ -376,6 +385,7 @@ options that do not correspond to anything in Perl:
|
||||
\fB/f\fP PCRE_FIRSTLINE
|
||||
\fB/J\fP PCRE_DUPNAMES
|
||||
\fB/N\fP PCRE_NO_AUTO_CAPTURE
|
||||
\fB/O\fP PCRE_NO_AUTO_POSSESS
|
||||
\fB/U\fP PCRE_UNGREEDY
|
||||
\fB/W\fP PCRE_UCP
|
||||
\fB/X\fP PCRE_EXTRA
|
||||
@ -508,8 +518,8 @@ expression has been compiled, and the results used when the expression is
|
||||
matched. There are a number of qualifying characters that may follow \fB/S\fP.
|
||||
They may appear in any order.
|
||||
.P
|
||||
If \fBS\fP is followed by an exclamation mark, \fBpcre[16|32]_study()\fP is called
|
||||
with the PCRE_STUDY_EXTRA_NEEDED option, causing it always to return a
|
||||
If \fB/S\fP is followed by an exclamation mark, \fBpcre[16|32]_study()\fP is
|
||||
called with the PCRE_STUDY_EXTRA_NEEDED option, causing it always to return a
|
||||
\fBpcre_extra\fP block, even when studying discovers no useful information.
|
||||
.P
|
||||
If \fB/S\fP is followed by a second S character, it suppresses studying, even
|
||||
@ -585,6 +595,37 @@ The \fB/+\fP modifier works as described above. All other modifiers are
|
||||
ignored.
|
||||
.
|
||||
.
|
||||
.SS "Locking out certain modifiers"
|
||||
.rs
|
||||
.sp
|
||||
PCRE can be compiled with or without support for certain features such as
|
||||
UTF-8/16/32 or Unicode properties. Accordingly, the standard tests are split up
|
||||
into a number of different files that are selected for running depending on
|
||||
which features are available. When updating the tests, it is all too easy to
|
||||
put a new test into the wrong file by mistake; for example, to put a test that
|
||||
requires UTF support into a file that is used when it is not available. To help
|
||||
detect such mistakes as early as possible, there is a facility for locking out
|
||||
specific modifiers. If an input line for \fBpcretest\fP starts with the string
|
||||
"< forbid " the following sequence of characters is taken as a list of
|
||||
forbidden modifiers. For example, in the test files that must not use UTF or
|
||||
Unicode property support, this line appears:
|
||||
.sp
|
||||
< forbid 8W
|
||||
.sp
|
||||
This locks out the /8 and /W modifiers. An immediate error is given if they are
|
||||
subsequently encountered. If the character string contains < but not >, all the
|
||||
multi-character modifiers that begin with < are locked out. Otherwise, such
|
||||
modifiers must be explicitly listed, for example:
|
||||
.sp
|
||||
< forbid <JS><cr>
|
||||
.sp
|
||||
There must be a single space between < and "forbid" for this feature to be
|
||||
recognised. If there is not, the line is interpreted either as a request to
|
||||
re-load a pre-compiled pattern (see "SAVING AND RELOADING COMPILED PATTERNS"
|
||||
below) or, if there is a another < character, as a pattern that uses < as its
|
||||
delimiter.
|
||||
.
|
||||
.
|
||||
.SH "DATA LINES"
|
||||
.rs
|
||||
.sp
|
||||
@ -608,6 +649,7 @@ recognized:
|
||||
\ev vertical tab (\ex0b)
|
||||
\ennn octal character (up to 3 octal digits); always
|
||||
a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode
|
||||
\eo{dd...} octal character (any number of octal digits}
|
||||
\exhh hexadecimal byte (up to 2 hex digits)
|
||||
\ex{hh...} hexadecimal character (any number of hex digits)
|
||||
.\" JOIN
|
||||
@ -1031,10 +1073,9 @@ exact copy of the compiled pattern. If there is additional study data, this
|
||||
writing the file, \fBpcretest\fP expects to read a new pattern.
|
||||
.P
|
||||
A saved pattern can be reloaded into \fBpcretest\fP by specifying < and a file
|
||||
name instead of a pattern. The name of the file must not contain a < character,
|
||||
as otherwise \fBpcretest\fP will interpret the line as a pattern delimited by <
|
||||
characters.
|
||||
For example:
|
||||
name instead of a pattern. There must be no space between < and the file name,
|
||||
which must not contain a < character, as otherwise \fBpcretest\fP will
|
||||
interpret the line as a pattern delimited by < characters. For example:
|
||||
.sp
|
||||
re> </some/file
|
||||
Compiled pattern loaded from /some/file
|
||||
@ -1094,6 +1135,6 @@ Cambridge CB2 3QH, England.
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 26 April 2013
|
||||
Last updated: 12 November 2013
|
||||
Copyright (c) 1997-2013 University of Cambridge.
|
||||
.fi
|
||||
|
@ -138,32 +138,35 @@ COMMAND LINE OPTIONS
|
||||
compiled. This is equivalent to adding /M to each regular
|
||||
expression. The size is given in bytes for both libraries.
|
||||
|
||||
-o osize Set the number of elements in the output vector that is used
|
||||
when calling pcre[16|32]_exec() or pcre[16|32]_dfa_exec() to
|
||||
be osize. The default value is 45, which is enough for 14
|
||||
-O Behave as if each pattern has the /O modifier, that is dis-
|
||||
able auto-possessification for all patterns.
|
||||
|
||||
-o osize Set the number of elements in the output vector that is used
|
||||
when calling pcre[16|32]_exec() or pcre[16|32]_dfa_exec() to
|
||||
be osize. The default value is 45, which is enough for 14
|
||||
capturing subexpressions for pcre[16|32]_exec() or 22 differ-
|
||||
ent matches for pcre[16|32]_dfa_exec(). The vector size can
|
||||
be changed for individual matching calls by including \O in
|
||||
ent matches for pcre[16|32]_dfa_exec(). The vector size can
|
||||
be changed for individual matching calls by including \O in
|
||||
the data line (see below).
|
||||
|
||||
-p Behave as if each pattern has the /P modifier; the POSIX
|
||||
wrapper API is used to call PCRE. None of the other options
|
||||
has any effect when -p is set. This option can be used only
|
||||
-p Behave as if each pattern has the /P modifier; the POSIX
|
||||
wrapper API is used to call PCRE. None of the other options
|
||||
has any effect when -p is set. This option can be used only
|
||||
with the 8-bit library.
|
||||
|
||||
-q Do not output the version number of pcretest at the start of
|
||||
-q Do not output the version number of pcretest at the start of
|
||||
execution.
|
||||
|
||||
-S size On Unix-like systems, set the size of the run-time stack to
|
||||
-S size On Unix-like systems, set the size of the run-time stack to
|
||||
size megabytes.
|
||||
|
||||
-s or -s+ Behave as if each pattern has the /S modifier; in other
|
||||
words, force each pattern to be studied. If -s+ is used, all
|
||||
the JIT compile options are passed to pcre[16|32]_study(),
|
||||
causing just-in-time optimization to be set up if it is
|
||||
available, for both full and partial matching. Specific JIT
|
||||
-s or -s+ Behave as if each pattern has the /S modifier; in other
|
||||
words, force each pattern to be studied. If -s+ is used, all
|
||||
the JIT compile options are passed to pcre[16|32]_study(),
|
||||
causing just-in-time optimization to be set up if it is
|
||||
available, for both full and partial matching. Specific JIT
|
||||
compile options can be selected by following -s+ with a digit
|
||||
in the range 1 to 7, which selects the JIT compile modes as
|
||||
in the range 1 to 7, which selects the JIT compile modes as
|
||||
follows:
|
||||
|
||||
1 normal match only
|
||||
@ -173,115 +176,119 @@ COMMAND LINE OPTIONS
|
||||
6 soft and hard partial match
|
||||
7 all three modes (default)
|
||||
|
||||
If -s++ is used instead of -s+ (with or without a following
|
||||
digit), the text "(JIT)" is added to the first output line
|
||||
If -s++ is used instead of -s+ (with or without a following
|
||||
digit), the text "(JIT)" is added to the first output line
|
||||
after a match or no match when JIT-compiled code was actually
|
||||
used.
|
||||
|
||||
Note that there are pattern options that can override -s,
|
||||
Note that there are pattern options that can override -s,
|
||||
either specifying no studying at all, or suppressing JIT com-
|
||||
pilation.
|
||||
|
||||
If the /I or /D option is present on a pattern (requesting
|
||||
output about the compiled pattern), information about the
|
||||
result of studying is not included when studying is caused
|
||||
only by -s and neither -i nor -d is present on the command
|
||||
line. This behaviour means that the output from tests that
|
||||
are run with and without -s should be identical, except when
|
||||
If the /I or /D option is present on a pattern (requesting
|
||||
output about the compiled pattern), information about the
|
||||
result of studying is not included when studying is caused
|
||||
only by -s and neither -i nor -d is present on the command
|
||||
line. This behaviour means that the output from tests that
|
||||
are run with and without -s should be identical, except when
|
||||
options that output information about the actual running of a
|
||||
match are set.
|
||||
|
||||
The -M, -t, and -tm options, which give information about
|
||||
resources used, are likely to produce different output with
|
||||
and without -s. Output may also differ if the /C option is
|
||||
The -M, -t, and -tm options, which give information about
|
||||
resources used, are likely to produce different output with
|
||||
and without -s. Output may also differ if the /C option is
|
||||
present on an individual pattern. This uses callouts to trace
|
||||
the the matching process, and this may be different between
|
||||
studied and non-studied patterns. If the pattern contains
|
||||
(*MARK) items there may also be differences, for the same
|
||||
the the matching process, and this may be different between
|
||||
studied and non-studied patterns. If the pattern contains
|
||||
(*MARK) items there may also be differences, for the same
|
||||
reason. The -s command line option can be overridden for spe-
|
||||
cific patterns that should never be studied (see the /S pat-
|
||||
cific patterns that should never be studied (see the /S pat-
|
||||
tern modifier below).
|
||||
|
||||
-t Run each compile, study, and match many times with a timer,
|
||||
and output resulting time per compile or match (in millisec-
|
||||
onds). Do not set -m with -t, because you will then get the
|
||||
size output a zillion times, and the timing will be dis-
|
||||
torted. You can control the number of iterations that are
|
||||
used for timing by following -t with a number (as a separate
|
||||
item on the command line). For example, "-t 1000" would iter-
|
||||
ate 1000 times. The default is to iterate 500000 times.
|
||||
-t Run each compile, study, and match many times with a timer,
|
||||
and output the resulting times per compile, study, or match
|
||||
(in milliseconds). Do not set -m with -t, because you will
|
||||
then get the size output a zillion times, and the timing will
|
||||
be distorted. You can control the number of iterations that
|
||||
are used for timing by following -t with a number (as a sepa-
|
||||
rate item on the command line). For example, "-t 1000" iter-
|
||||
ates 1000 times. The default is to iterate 500000 times.
|
||||
|
||||
-tm This is like -t except that it times only the matching phase,
|
||||
not the compile or study phases.
|
||||
|
||||
-T -TM These behave like -t and -tm, but in addition, at the end of
|
||||
a run, the total times for all compiles, studies, and matches
|
||||
are output.
|
||||
|
||||
|
||||
DESCRIPTION
|
||||
|
||||
If pcretest is given two filename arguments, it reads from the first
|
||||
If pcretest is given two filename arguments, it reads from the first
|
||||
and writes to the second. If it is given only one filename argument, it
|
||||
reads from that file and writes to stdout. Otherwise, it reads from
|
||||
stdin and writes to stdout, and prompts for each line of input, using
|
||||
reads from that file and writes to stdout. Otherwise, it reads from
|
||||
stdin and writes to stdout, and prompts for each line of input, using
|
||||
"re>" to prompt for regular expressions, and "data>" to prompt for data
|
||||
lines.
|
||||
|
||||
When pcretest is built, a configuration option can specify that it
|
||||
should be linked with the libreadline library. When this is done, if
|
||||
When pcretest is built, a configuration option can specify that it
|
||||
should be linked with the libreadline library. When this is done, if
|
||||
the input is from a terminal, it is read using the readline() function.
|
||||
This provides line-editing and history facilities. The output from the
|
||||
This provides line-editing and history facilities. The output from the
|
||||
-help option states whether or not readline() will be used.
|
||||
|
||||
The program handles any number of sets of input on a single input file.
|
||||
Each set starts with a regular expression, and continues with any num-
|
||||
ber of data lines to be matched against the pattern.
|
||||
Each set starts with a regular expression, and continues with any num-
|
||||
ber of data lines to be matched against that pattern.
|
||||
|
||||
Each data line is matched separately and independently. If you want to
|
||||
Each data line is matched separately and independently. If you want to
|
||||
do multi-line matches, you have to use the \n escape sequence (or \r or
|
||||
\r\n, etc., depending on the newline setting) in a single line of input
|
||||
to encode the newline sequences. There is no limit on the length of
|
||||
data lines; the input buffer is automatically extended if it is too
|
||||
to encode the newline sequences. There is no limit on the length of
|
||||
data lines; the input buffer is automatically extended if it is too
|
||||
small.
|
||||
|
||||
An empty line signals the end of the data lines, at which point a new
|
||||
regular expression is read. The regular expressions are given enclosed
|
||||
An empty line signals the end of the data lines, at which point a new
|
||||
regular expression is read. The regular expressions are given enclosed
|
||||
in any non-alphanumeric delimiters other than backslash, for example:
|
||||
|
||||
/(a|bc)x+yz/
|
||||
|
||||
White space before the initial delimiter is ignored. A regular expres-
|
||||
sion may be continued over several input lines, in which case the new-
|
||||
line characters are included within it. It is possible to include the
|
||||
White space before the initial delimiter is ignored. A regular expres-
|
||||
sion may be continued over several input lines, in which case the new-
|
||||
line characters are included within it. It is possible to include the
|
||||
delimiter within the pattern by escaping it, for example
|
||||
|
||||
/abc\/def/
|
||||
|
||||
If you do so, the escape and the delimiter form part of the pattern,
|
||||
but since delimiters are always non-alphanumeric, this does not affect
|
||||
its interpretation. If the terminating delimiter is immediately fol-
|
||||
If you do so, the escape and the delimiter form part of the pattern,
|
||||
but since delimiters are always non-alphanumeric, this does not affect
|
||||
its interpretation. If the terminating delimiter is immediately fol-
|
||||
lowed by a backslash, for example,
|
||||
|
||||
/abc/\
|
||||
|
||||
then a backslash is added to the end of the pattern. This is done to
|
||||
provide a way of testing the error condition that arises if a pattern
|
||||
then a backslash is added to the end of the pattern. This is done to
|
||||
provide a way of testing the error condition that arises if a pattern
|
||||
finishes with a backslash, because
|
||||
|
||||
/abc\/
|
||||
|
||||
is interpreted as the first line of a pattern that starts with "abc/",
|
||||
is interpreted as the first line of a pattern that starts with "abc/",
|
||||
causing pcretest to read the next line as a continuation of the regular
|
||||
expression.
|
||||
|
||||
|
||||
PATTERN MODIFIERS
|
||||
|
||||
A pattern may be followed by any number of modifiers, which are mostly
|
||||
single characters, though some of these can be qualified by further
|
||||
characters. Following Perl usage, these are referred to below as, for
|
||||
example, "the /i modifier", even though the delimiter of the pattern
|
||||
need not always be a slash, and no slash is used when writing modi-
|
||||
fiers. White space may appear between the final pattern delimiter and
|
||||
the first modifier, and between the modifiers themselves. For refer-
|
||||
ence, here is a complete list of modifiers. They fall into several
|
||||
A pattern may be followed by any number of modifiers, which are mostly
|
||||
single characters, though some of these can be qualified by further
|
||||
characters. Following Perl usage, these are referred to below as, for
|
||||
example, "the /i modifier", even though the delimiter of the pattern
|
||||
need not always be a slash, and no slash is used when writing modi-
|
||||
fiers. White space may appear between the final pattern delimiter and
|
||||
the first modifier, and between the modifiers themselves. For refer-
|
||||
ence, here is a complete list of modifiers. They fall into several
|
||||
groups that are described in detail in the following sections.
|
||||
|
||||
/8 set UTF mode
|
||||
@ -307,6 +314,7 @@ PATTERN MODIFIERS
|
||||
/M show compiled memory size
|
||||
/m set PCRE_MULTILINE
|
||||
/N set PCRE_NO_AUTO_CAPTURE
|
||||
/O set PCRE_NO_AUTO_POSSESS
|
||||
/P use the POSIX wrapper
|
||||
/S study the pattern after compilation
|
||||
/s set PCRE_DOTALL
|
||||
@ -331,8 +339,8 @@ PATTERN MODIFIERS
|
||||
Perl-compatible modifiers
|
||||
|
||||
The /i, /m, /s, and /x modifiers set the PCRE_CASELESS, PCRE_MULTILINE,
|
||||
PCRE_DOTALL, or PCRE_EXTENDED options, respectively, when
|
||||
pcre[16|32]_compile() is called. These four modifier letters have the
|
||||
PCRE_DOTALL, or PCRE_EXTENDED options, respectively, when
|
||||
pcre[16|32]_compile() is called. These four modifier letters have the
|
||||
same effect as they do in Perl. For example:
|
||||
|
||||
/caseless/i
|
||||
@ -340,7 +348,7 @@ PATTERN MODIFIERS
|
||||
|
||||
Modifiers for other PCRE options
|
||||
|
||||
The following table shows additional modifiers for setting PCRE com-
|
||||
The following table shows additional modifiers for setting PCRE com-
|
||||
pile-time options that do not correspond to anything in Perl:
|
||||
|
||||
/8 PCRE_UTF8 ) when using the 8-bit
|
||||
@ -359,6 +367,7 @@ PATTERN MODIFIERS
|
||||
/f PCRE_FIRSTLINE
|
||||
/J PCRE_DUPNAMES
|
||||
/N PCRE_NO_AUTO_CAPTURE
|
||||
/O PCRE_NO_AUTO_POSSESS
|
||||
/U PCRE_UNGREEDY
|
||||
/W PCRE_UCP
|
||||
/X PCRE_EXTRA
|
||||
@ -372,138 +381,138 @@ PATTERN MODIFIERS
|
||||
/<bsr_unicode> PCRE_BSR_UNICODE
|
||||
/<JS> PCRE_JAVASCRIPT_COMPAT
|
||||
|
||||
The modifiers that are enclosed in angle brackets are literal strings
|
||||
as shown, including the angle brackets, but the letters within can be
|
||||
in either case. This example sets multiline matching with CRLF as the
|
||||
The modifiers that are enclosed in angle brackets are literal strings
|
||||
as shown, including the angle brackets, but the letters within can be
|
||||
in either case. This example sets multiline matching with CRLF as the
|
||||
line ending sequence:
|
||||
|
||||
/^abc/m<CRLF>
|
||||
|
||||
As well as turning on the PCRE_UTF8/16/32 option, the /8 modifier
|
||||
causes all non-printing characters in output strings to be printed
|
||||
As well as turning on the PCRE_UTF8/16/32 option, the /8 modifier
|
||||
causes all non-printing characters in output strings to be printed
|
||||
using the \x{hh...} notation. Otherwise, those less than 0x100 are out-
|
||||
put in hex without the curly brackets.
|
||||
|
||||
Full details of the PCRE options are given in the pcreapi documenta-
|
||||
Full details of the PCRE options are given in the pcreapi documenta-
|
||||
tion.
|
||||
|
||||
Finding all matches in a string
|
||||
|
||||
Searching for all possible matches within each subject string can be
|
||||
requested by the /g or /G modifier. After finding a match, PCRE is
|
||||
Searching for all possible matches within each subject string can be
|
||||
requested by the /g or /G modifier. After finding a match, PCRE is
|
||||
called again to search the remainder of the subject string. The differ-
|
||||
ence between /g and /G is that the former uses the startoffset argument
|
||||
to pcre[16|32]_exec() to start searching at a new point within the
|
||||
entire string (which is in effect what Perl does), whereas the latter
|
||||
passes over a shortened substring. This makes a difference to the
|
||||
matching process if the pattern begins with a lookbehind assertion
|
||||
to pcre[16|32]_exec() to start searching at a new point within the
|
||||
entire string (which is in effect what Perl does), whereas the latter
|
||||
passes over a shortened substring. This makes a difference to the
|
||||
matching process if the pattern begins with a lookbehind assertion
|
||||
(including \b or \B).
|
||||
|
||||
If any call to pcre[16|32]_exec() in a /g or /G sequence matches an
|
||||
empty string, the next call is done with the PCRE_NOTEMPTY_ATSTART and
|
||||
PCRE_ANCHORED flags set in order to search for another, non-empty,
|
||||
match at the same point. If this second match fails, the start offset
|
||||
is advanced, and the normal match is retried. This imitates the way
|
||||
If any call to pcre[16|32]_exec() in a /g or /G sequence matches an
|
||||
empty string, the next call is done with the PCRE_NOTEMPTY_ATSTART and
|
||||
PCRE_ANCHORED flags set in order to search for another, non-empty,
|
||||
match at the same point. If this second match fails, the start offset
|
||||
is advanced, and the normal match is retried. This imitates the way
|
||||
Perl handles such cases when using the /g modifier or the split() func-
|
||||
tion. Normally, the start offset is advanced by one character, but if
|
||||
the newline convention recognizes CRLF as a newline, and the current
|
||||
tion. Normally, the start offset is advanced by one character, but if
|
||||
the newline convention recognizes CRLF as a newline, and the current
|
||||
character is CR followed by LF, an advance of two is used.
|
||||
|
||||
Other modifiers
|
||||
|
||||
There are yet more modifiers for controlling the way pcretest operates.
|
||||
|
||||
The /+ modifier requests that as well as outputting the substring that
|
||||
matched the entire pattern, pcretest should in addition output the
|
||||
remainder of the subject string. This is useful for tests where the
|
||||
subject contains multiple copies of the same substring. If the + modi-
|
||||
fier appears twice, the same action is taken for captured substrings.
|
||||
In each case the remainder is output on the following line with a plus
|
||||
character following the capture number. Note that this modifier must
|
||||
not immediately follow the /S modifier because /S+ and /S++ have other
|
||||
The /+ modifier requests that as well as outputting the substring that
|
||||
matched the entire pattern, pcretest should in addition output the
|
||||
remainder of the subject string. This is useful for tests where the
|
||||
subject contains multiple copies of the same substring. If the + modi-
|
||||
fier appears twice, the same action is taken for captured substrings.
|
||||
In each case the remainder is output on the following line with a plus
|
||||
character following the capture number. Note that this modifier must
|
||||
not immediately follow the /S modifier because /S+ and /S++ have other
|
||||
meanings.
|
||||
|
||||
The /= modifier requests that the values of all potential captured
|
||||
parentheses be output after a match. By default, only those up to the
|
||||
The /= modifier requests that the values of all potential captured
|
||||
parentheses be output after a match. By default, only those up to the
|
||||
highest one actually used in the match are output (corresponding to the
|
||||
return code from pcre[16|32]_exec()). Values in the offsets vector cor-
|
||||
responding to higher numbers should be set to -1, and these are output
|
||||
as "<unset>". This modifier gives a way of checking that this is hap-
|
||||
responding to higher numbers should be set to -1, and these are output
|
||||
as "<unset>". This modifier gives a way of checking that this is hap-
|
||||
pening.
|
||||
|
||||
The /B modifier is a debugging feature. It requests that pcretest out-
|
||||
put a representation of the compiled code after compilation. Normally
|
||||
this information contains length and offset values; however, if /Z is
|
||||
also present, this data is replaced by spaces. This is a special fea-
|
||||
ture for use in the automatic test scripts; it ensures that the same
|
||||
The /B modifier is a debugging feature. It requests that pcretest out-
|
||||
put a representation of the compiled code after compilation. Normally
|
||||
this information contains length and offset values; however, if /Z is
|
||||
also present, this data is replaced by spaces. This is a special fea-
|
||||
ture for use in the automatic test scripts; it ensures that the same
|
||||
output is generated for different internal link sizes.
|
||||
|
||||
The /D modifier is a PCRE debugging feature, and is equivalent to /BI,
|
||||
The /D modifier is a PCRE debugging feature, and is equivalent to /BI,
|
||||
that is, both the /B and the /I modifiers.
|
||||
|
||||
The /F modifier causes pcretest to flip the byte order of the 2-byte
|
||||
The /F modifier causes pcretest to flip the byte order of the 2-byte
|
||||
and 4-byte fields in the compiled pattern. This facility is for testing
|
||||
the feature in PCRE that allows it to execute patterns that were com-
|
||||
the feature in PCRE that allows it to execute patterns that were com-
|
||||
piled on a host with a different endianness. This feature is not avail-
|
||||
able when the POSIX interface to PCRE is being used, that is, when the
|
||||
able when the POSIX interface to PCRE is being used, that is, when the
|
||||
/P pattern modifier is specified. See also the section about saving and
|
||||
reloading compiled patterns below.
|
||||
|
||||
The /I modifier requests that pcretest output information about the
|
||||
compiled pattern (whether it is anchored, has a fixed first character,
|
||||
and so on). It does this by calling pcre[16|32]_fullinfo() after com-
|
||||
piling a pattern. If the pattern is studied, the results of that are
|
||||
The /I modifier requests that pcretest output information about the
|
||||
compiled pattern (whether it is anchored, has a fixed first character,
|
||||
and so on). It does this by calling pcre[16|32]_fullinfo() after com-
|
||||
piling a pattern. If the pattern is studied, the results of that are
|
||||
also output.
|
||||
|
||||
The /K modifier requests pcretest to show names from backtracking con-
|
||||
trol verbs that are returned from calls to pcre[16|32]_exec(). It
|
||||
causes pcretest to create a pcre[16|32]_extra block if one has not
|
||||
already been created by a call to pcre[16|32]_study(), and to set the
|
||||
PCRE_EXTRA_MARK flag and the mark field within it, every time that
|
||||
pcre[16|32]_exec() is called. If the variable that the mark field
|
||||
points to is non-NULL for a match, non-match, or partial match,
|
||||
pcretest prints the string to which it points. For a match, this is
|
||||
shown on a line by itself, tagged with "MK:". For a non-match it is
|
||||
The /K modifier requests pcretest to show names from backtracking con-
|
||||
trol verbs that are returned from calls to pcre[16|32]_exec(). It
|
||||
causes pcretest to create a pcre[16|32]_extra block if one has not
|
||||
already been created by a call to pcre[16|32]_study(), and to set the
|
||||
PCRE_EXTRA_MARK flag and the mark field within it, every time that
|
||||
pcre[16|32]_exec() is called. If the variable that the mark field
|
||||
points to is non-NULL for a match, non-match, or partial match,
|
||||
pcretest prints the string to which it points. For a match, this is
|
||||
shown on a line by itself, tagged with "MK:". For a non-match it is
|
||||
added to the message.
|
||||
|
||||
The /L modifier must be followed directly by the name of a locale, for
|
||||
The /L modifier must be followed directly by the name of a locale, for
|
||||
example,
|
||||
|
||||
/pattern/Lfr_FR
|
||||
|
||||
For this reason, it must be the last modifier. The given locale is set,
|
||||
pcre[16|32]_maketables() is called to build a set of character tables
|
||||
for the locale, and this is then passed to pcre[16|32]_compile() when
|
||||
compiling the regular expression. Without an /L (or /T) modifier, NULL
|
||||
is passed as the tables pointer; that is, /L applies only to the
|
||||
pcre[16|32]_maketables() is called to build a set of character tables
|
||||
for the locale, and this is then passed to pcre[16|32]_compile() when
|
||||
compiling the regular expression. Without an /L (or /T) modifier, NULL
|
||||
is passed as the tables pointer; that is, /L applies only to the
|
||||
expression on which it appears.
|
||||
|
||||
The /M modifier causes the size in bytes of the memory block used to
|
||||
hold the compiled pattern to be output. This does not include the size
|
||||
of the pcre[16|32] block; it is just the actual compiled data. If the
|
||||
The /M modifier causes the size in bytes of the memory block used to
|
||||
hold the compiled pattern to be output. This does not include the size
|
||||
of the pcre[16|32] block; it is just the actual compiled data. If the
|
||||
pattern is successfully studied with the PCRE_STUDY_JIT_COMPILE option,
|
||||
the size of the JIT compiled code is also output.
|
||||
|
||||
The /S modifier causes pcre[16|32]_study() to be called after the
|
||||
expression has been compiled, and the results used when the expression
|
||||
The /S modifier causes pcre[16|32]_study() to be called after the
|
||||
expression has been compiled, and the results used when the expression
|
||||
is matched. There are a number of qualifying characters that may follow
|
||||
/S. They may appear in any order.
|
||||
|
||||
If S is followed by an exclamation mark, pcre[16|32]_study() is called
|
||||
with the PCRE_STUDY_EXTRA_NEEDED option, causing it always to return a
|
||||
If /S is followed by an exclamation mark, pcre[16|32]_study() is called
|
||||
with the PCRE_STUDY_EXTRA_NEEDED option, causing it always to return a
|
||||
pcre_extra block, even when studying discovers no useful information.
|
||||
|
||||
If /S is followed by a second S character, it suppresses studying, even
|
||||
if it was requested externally by the -s command line option. This
|
||||
makes it possible to specify that certain patterns are always studied,
|
||||
if it was requested externally by the -s command line option. This
|
||||
makes it possible to specify that certain patterns are always studied,
|
||||
and others are never studied, independently of -s. This feature is used
|
||||
in the test files in a few cases where the output is different when the
|
||||
pattern is studied.
|
||||
|
||||
If the /S modifier is followed by a + character, the call to
|
||||
pcre[16|32]_study() is made with all the JIT study options, requesting
|
||||
just-in-time optimization support if it is available, for both normal
|
||||
and partial matching. If you want to restrict the JIT compiling modes,
|
||||
If the /S modifier is followed by a + character, the call to
|
||||
pcre[16|32]_study() is made with all the JIT study options, requesting
|
||||
just-in-time optimization support if it is available, for both normal
|
||||
and partial matching. If you want to restrict the JIT compiling modes,
|
||||
you can follow /S+ with a digit in the range 1 to 7:
|
||||
|
||||
1 normal match only
|
||||
@ -514,40 +523,40 @@ PATTERN MODIFIERS
|
||||
7 all three modes (default)
|
||||
|
||||
If /S++ is used instead of /S+ (with or without a following digit), the
|
||||
text "(JIT)" is added to the first output line after a match or no
|
||||
text "(JIT)" is added to the first output line after a match or no
|
||||
match when JIT-compiled code was actually used.
|
||||
|
||||
Note that there is also an independent /+ modifier; it must not be
|
||||
Note that there is also an independent /+ modifier; it must not be
|
||||
given immediately after /S or /S+ because this will be misinterpreted.
|
||||
|
||||
If JIT studying is successful, the compiled JIT code will automatically
|
||||
be used when pcre[16|32]_exec() is run, except when incompatible run-
|
||||
time options are specified. For more details, see the pcrejit documen-
|
||||
tation. See also the \J escape sequence below for a way of setting the
|
||||
be used when pcre[16|32]_exec() is run, except when incompatible run-
|
||||
time options are specified. For more details, see the pcrejit documen-
|
||||
tation. See also the \J escape sequence below for a way of setting the
|
||||
size of the JIT stack.
|
||||
|
||||
Finally, if /S is followed by a minus character, JIT compilation is
|
||||
suppressed, even if it was requested externally by the -s command line
|
||||
option. This makes it possible to specify that JIT is never to be used
|
||||
Finally, if /S is followed by a minus character, JIT compilation is
|
||||
suppressed, even if it was requested externally by the -s command line
|
||||
option. This makes it possible to specify that JIT is never to be used
|
||||
for certain patterns.
|
||||
|
||||
The /T modifier must be followed by a single digit. It causes a spe-
|
||||
The /T modifier must be followed by a single digit. It causes a spe-
|
||||
cific set of built-in character tables to be passed to pcre[16|32]_com-
|
||||
pile(). It is used in the standard PCRE tests to check behaviour with
|
||||
pile(). It is used in the standard PCRE tests to check behaviour with
|
||||
different character tables. The digit specifies the tables as follows:
|
||||
|
||||
0 the default ASCII tables, as distributed in
|
||||
pcre_chartables.c.dist
|
||||
1 a set of tables defining ISO 8859 characters
|
||||
|
||||
In table 1, some characters whose codes are greater than 128 are iden-
|
||||
In table 1, some characters whose codes are greater than 128 are iden-
|
||||
tified as letters, digits, spaces, etc.
|
||||
|
||||
Using the POSIX wrapper API
|
||||
|
||||
The /P modifier causes pcretest to call PCRE via the POSIX wrapper API
|
||||
rather than its native API. This supports only the 8-bit library. When
|
||||
/P is set, the following modifiers set options for the regcomp() func-
|
||||
The /P modifier causes pcretest to call PCRE via the POSIX wrapper API
|
||||
rather than its native API. This supports only the 8-bit library. When
|
||||
/P is set, the following modifiers set options for the regcomp() func-
|
||||
tion:
|
||||
|
||||
/i REG_ICASE
|
||||
@ -558,9 +567,40 @@ PATTERN MODIFIERS
|
||||
/W REG_UCP ) the POSIX standard
|
||||
/8 REG_UTF8 )
|
||||
|
||||
The /+ modifier works as described above. All other modifiers are
|
||||
The /+ modifier works as described above. All other modifiers are
|
||||
ignored.
|
||||
|
||||
Locking out certain modifiers
|
||||
|
||||
PCRE can be compiled with or without support for certain features such
|
||||
as UTF-8/16/32 or Unicode properties. Accordingly, the standard tests
|
||||
are split up into a number of different files that are selected for
|
||||
running depending on which features are available. When updating the
|
||||
tests, it is all too easy to put a new test into the wrong file by mis-
|
||||
take; for example, to put a test that requires UTF support into a file
|
||||
that is used when it is not available. To help detect such mistakes as
|
||||
early as possible, there is a facility for locking out specific modi-
|
||||
fiers. If an input line for pcretest starts with the string "< forbid "
|
||||
the following sequence of characters is taken as a list of forbidden
|
||||
modifiers. For example, in the test files that must not use UTF or Uni-
|
||||
code property support, this line appears:
|
||||
|
||||
< forbid 8W
|
||||
|
||||
This locks out the /8 and /W modifiers. An immediate error is given if
|
||||
they are subsequently encountered. If the character string contains <
|
||||
but not >, all the multi-character modifiers that begin with < are
|
||||
locked out. Otherwise, such modifiers must be explicitly listed, for
|
||||
example:
|
||||
|
||||
< forbid <JS><cr>
|
||||
|
||||
There must be a single space between < and "forbid" for this feature to
|
||||
be recognised. If there is not, the line is interpreted either as a
|
||||
request to re-load a pre-compiled pattern (see "SAVING AND RELOADING
|
||||
COMPILED PATTERNS" below) or, if there is a another < character, as a
|
||||
pattern that uses < as its delimiter.
|
||||
|
||||
|
||||
DATA LINES
|
||||
|
||||
@ -583,6 +623,7 @@ DATA LINES
|
||||
\v vertical tab (\x0b)
|
||||
\nnn octal character (up to 3 octal digits); always
|
||||
a byte unless > 255 in UTF-8 or 16-bit or 32-bit mode
|
||||
\o{dd...} octal character (any number of octal digits}
|
||||
\xhh hexadecimal byte (up to 2 hex digits)
|
||||
\x{hh...} hexadecimal character (any number of hex digits)
|
||||
\A pass the PCRE_ANCHORED option to pcre[16|32]_exec()
|
||||
@ -974,50 +1015,51 @@ SAVING AND RELOADING COMPILED PATTERNS
|
||||
writing the file, pcretest expects to read a new pattern.
|
||||
|
||||
A saved pattern can be reloaded into pcretest by specifying < and a
|
||||
file name instead of a pattern. The name of the file must not contain a
|
||||
< character, as otherwise pcretest will interpret the line as a pattern
|
||||
delimited by < characters. For example:
|
||||
file name instead of a pattern. There must be no space between < and
|
||||
the file name, which must not contain a < character, as otherwise
|
||||
pcretest will interpret the line as a pattern delimited by < charac-
|
||||
ters. For example:
|
||||
|
||||
re> </some/file
|
||||
Compiled pattern loaded from /some/file
|
||||
No study data
|
||||
|
||||
If the pattern was previously studied with the JIT optimization, the
|
||||
JIT information cannot be saved and restored, and so is lost. When the
|
||||
pattern has been loaded, pcretest proceeds to read data lines in the
|
||||
If the pattern was previously studied with the JIT optimization, the
|
||||
JIT information cannot be saved and restored, and so is lost. When the
|
||||
pattern has been loaded, pcretest proceeds to read data lines in the
|
||||
usual way.
|
||||
|
||||
You can copy a file written by pcretest to a different host and reload
|
||||
it there, even if the new host has opposite endianness to the one on
|
||||
which the pattern was compiled. For example, you can compile on an i86
|
||||
machine and run on a SPARC machine. When a pattern is reloaded on a
|
||||
You can copy a file written by pcretest to a different host and reload
|
||||
it there, even if the new host has opposite endianness to the one on
|
||||
which the pattern was compiled. For example, you can compile on an i86
|
||||
machine and run on a SPARC machine. When a pattern is reloaded on a
|
||||
host with different endianness, the confirmation message is changed to:
|
||||
|
||||
Compiled pattern (byte-inverted) loaded from /some/file
|
||||
|
||||
The test suite contains some saved pre-compiled patterns with different
|
||||
endianness. These are reloaded using "<!" instead of just "<". This
|
||||
endianness. These are reloaded using "<!" instead of just "<". This
|
||||
suppresses the "(byte-inverted)" text so that the output is the same on
|
||||
all hosts. It also forces debugging output once the pattern has been
|
||||
all hosts. It also forces debugging output once the pattern has been
|
||||
reloaded.
|
||||
|
||||
File names for saving and reloading can be absolute or relative, but
|
||||
note that the shell facility of expanding a file name that starts with
|
||||
File names for saving and reloading can be absolute or relative, but
|
||||
note that the shell facility of expanding a file name that starts with
|
||||
a tilde (~) is not available.
|
||||
|
||||
The ability to save and reload files in pcretest is intended for test-
|
||||
ing and experimentation. It is not intended for production use because
|
||||
only a single pattern can be written to a file. Furthermore, there is
|
||||
no facility for supplying custom character tables for use with a
|
||||
reloaded pattern. If the original pattern was compiled with custom
|
||||
tables, an attempt to match a subject string using a reloaded pattern
|
||||
is likely to cause pcretest to crash. Finally, if you attempt to load
|
||||
The ability to save and reload files in pcretest is intended for test-
|
||||
ing and experimentation. It is not intended for production use because
|
||||
only a single pattern can be written to a file. Furthermore, there is
|
||||
no facility for supplying custom character tables for use with a
|
||||
reloaded pattern. If the original pattern was compiled with custom
|
||||
tables, an attempt to match a subject string using a reloaded pattern
|
||||
is likely to cause pcretest to crash. Finally, if you attempt to load
|
||||
a file that is not in the correct format, the result is undefined.
|
||||
|
||||
|
||||
SEE ALSO
|
||||
|
||||
pcre(3), pcre16(3), pcre32(3), pcreapi(3), pcrecallout(3), pcrejit,
|
||||
pcre(3), pcre16(3), pcre32(3), pcreapi(3), pcrecallout(3), pcrejit,
|
||||
pcrematching(3), pcrepartial(d), pcrepattern(3), pcreprecompile(3).
|
||||
|
||||
|
||||
@ -1030,5 +1072,5 @@ AUTHOR
|
||||
|
||||
REVISION
|
||||
|
||||
Last updated: 26 April 2013
|
||||
Last updated: 12 November 2013
|
||||
Copyright (c) 1997-2013 University of Cambridge.
|
||||
|
57
pcre/maria-patches/pcre_stack_guard.diff
Normal file
57
pcre/maria-patches/pcre_stack_guard.diff
Normal file
@ -0,0 +1,57 @@
|
||||
=== modified file 'pcre/pcre.h.in'
|
||||
--- pcre/pcre.h.in 2013-09-26 14:02:17 +0000
|
||||
+++ pcre/pcre.h.in 2013-10-02 07:58:29 +0000
|
||||
@@ -486,6 +486,7 @@ PCRE_EXP_DECL void (*pcre_free)(void *)
|
||||
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre_stack_free)(void *);
|
||||
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
|
||||
+PCRE_EXP_DECL int (*pcre_stack_guard)(void);
|
||||
|
||||
PCRE_EXP_DECL void *(*pcre16_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre16_free)(void *);
|
||||
@@ -504,6 +505,7 @@ PCRE_EXP_DECL void pcre_free(void *);
|
||||
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre_stack_free(void *);
|
||||
PCRE_EXP_DECL int pcre_callout(pcre_callout_block *);
|
||||
+PCRE_EXP_DECL int pcre_stack_guard(void);
|
||||
|
||||
PCRE_EXP_DECL void *pcre16_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre16_free(void *);
|
||||
|
||||
=== modified file 'pcre/pcre_compile.c'
|
||||
--- pcre/pcre_compile.c 2013-09-26 14:02:17 +0000
|
||||
+++ pcre/pcre_compile.c 2013-10-02 07:58:29 +0000
|
||||
@@ -7107,6 +7107,12 @@ unsigned int orig_bracount;
|
||||
unsigned int max_bracount;
|
||||
branch_chain bc;
|
||||
|
||||
+if (pcre_stack_guard && pcre_stack_guard())
|
||||
+{
|
||||
+ *errorcodeptr= ERR23;
|
||||
+ return FALSE;
|
||||
+}
|
||||
+
|
||||
bc.outer = bcptr;
|
||||
bc.current_branch = code;
|
||||
|
||||
|
||||
=== modified file 'pcre/pcre_globals.c'
|
||||
--- pcre/pcre_globals.c 2013-09-26 14:02:17 +0000
|
||||
+++ pcre/pcre_globals.c 2013-10-02 07:58:29 +0000
|
||||
@@ -72,6 +72,7 @@ PCRE_EXP_DATA_DEFN void (*PUBL(free))(v
|
||||
PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = LocalPcreMalloc;
|
||||
PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = LocalPcreFree;
|
||||
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
|
||||
+PCRE_EXP_DATA_DEFN int (*PUBL(stack_guard))(void) = NULL;
|
||||
|
||||
#elif !defined VPCOMPAT
|
||||
PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = malloc;
|
||||
@@ -79,6 +80,7 @@ PCRE_EXP_DATA_DEFN void (*PUBL(free))(v
|
||||
PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = malloc;
|
||||
PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = free;
|
||||
PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL;
|
||||
+PCRE_EXP_DATA_DEFN int (*PUBL(stack_guard))(void) = NULL;
|
||||
#endif
|
||||
|
||||
/* End of pcre_globals.c */
|
||||
|
@ -150,7 +150,10 @@ with J. */
|
||||
#define PCRE_NEVER_UTF 0x00010000 /* C1 ) Overlaid */
|
||||
#define PCRE_DFA_SHORTEST 0x00010000 /* D ) Overlaid */
|
||||
|
||||
#define PCRE_DFA_RESTART 0x00020000 /* D */
|
||||
/* This pair use the same bit. */
|
||||
#define PCRE_NO_AUTO_POSSESS 0x00020000 /* C1 ) Overlaid */
|
||||
#define PCRE_DFA_RESTART 0x00020000 /* D ) Overlaid */
|
||||
|
||||
#define PCRE_FIRSTLINE 0x00040000 /* C3 */
|
||||
#define PCRE_DUPNAMES 0x00080000 /* C1 */
|
||||
#define PCRE_NEWLINE_CR 0x00100000 /* C3 E D */
|
||||
@ -277,6 +280,7 @@ with J. */
|
||||
#define PCRE_INFO_REQUIREDCHARFLAGS 22
|
||||
#define PCRE_INFO_MATCHLIMIT 23
|
||||
#define PCRE_INFO_RECURSIONLIMIT 24
|
||||
#define PCRE_INFO_MATCH_EMPTY 25
|
||||
|
||||
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
||||
compatible. */
|
||||
@ -294,6 +298,7 @@ compatible. */
|
||||
#define PCRE_CONFIG_UTF16 10
|
||||
#define PCRE_CONFIG_JITTARGET 11
|
||||
#define PCRE_CONFIG_UTF32 12
|
||||
#define PCRE_CONFIG_PARENS_LIMIT 13
|
||||
|
||||
/* Request types for pcre_study(). Do not re-arrange, in order to remain
|
||||
compatible. */
|
||||
@ -657,6 +662,9 @@ PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *,
|
||||
pcre16_jit_callback, void *);
|
||||
PCRE_EXP_DECL void pcre32_assign_jit_stack(pcre32_extra *,
|
||||
pcre32_jit_callback, void *);
|
||||
PCRE_EXP_DECL void pcre_jit_free_unused_memory(void);
|
||||
PCRE_EXP_DECL void pcre16_jit_free_unused_memory(void);
|
||||
PCRE_EXP_DECL void pcre32_jit_free_unused_memory(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
|
@ -163,7 +163,7 @@ graph, print, punct, and cntrl. Other classes are built from combinations. */
|
||||
*/
|
||||
|
||||
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
||||
0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
|
||||
0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
||||
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
|
||||
|
@ -163,7 +163,7 @@ graph, print, punct, and cntrl. Other classes are built from combinations. */
|
||||
*/
|
||||
|
||||
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
||||
0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
|
||||
0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
||||
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
|
||||
|
3474
pcre/pcre_compile.c
3474
pcre/pcre_compile.c
File diff suppressed because it is too large
Load Diff
@ -161,6 +161,10 @@ switch (what)
|
||||
*((int *)where) = POSIX_MALLOC_THRESHOLD;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_PARENS_LIMIT:
|
||||
*((unsigned long int *)where) = PARENS_NEST_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_MATCH_LIMIT:
|
||||
*((unsigned long int *)where) = MATCH_LIMIT;
|
||||
break;
|
||||
|
@ -120,7 +120,7 @@ static const pcre_uint8 coptable[] = {
|
||||
0, 0, /* \P, \p */
|
||||
0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
|
||||
0, /* \X */
|
||||
0, 0, 0, 0, 0, 0, /* \Z, \z, ^, ^M, $, $M */
|
||||
0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
|
||||
1, /* Char */
|
||||
1, /* Chari */
|
||||
1, /* not */
|
||||
@ -151,11 +151,14 @@ static const pcre_uint8 coptable[] = {
|
||||
/* Character class & ref repeats */
|
||||
0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */
|
||||
0, 0, /* CRRANGE, CRMINRANGE */
|
||||
0, 0, 0, 0, /* Possessive *+, ++, ?+, CRPOSRANGE */
|
||||
0, /* CLASS */
|
||||
0, /* NCLASS */
|
||||
0, /* XCLASS - variable length */
|
||||
0, /* REF */
|
||||
0, /* REFI */
|
||||
0, /* DNREF */
|
||||
0, /* DNREFI */
|
||||
0, /* RECURSE */
|
||||
0, /* CALLOUT */
|
||||
0, /* Alt */
|
||||
@ -171,8 +174,8 @@ static const pcre_uint8 coptable[] = {
|
||||
0, 0, /* ONCE, ONCE_NC */
|
||||
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
|
||||
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
|
||||
0, 0, /* CREF, NCREF */
|
||||
0, 0, /* RREF, NRREF */
|
||||
0, 0, /* CREF, DNCREF */
|
||||
0, 0, /* RREF, DNRREF */
|
||||
0, /* DEF */
|
||||
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
||||
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
||||
@ -194,7 +197,7 @@ static const pcre_uint8 poptable[] = {
|
||||
1, 1, /* \P, \p */
|
||||
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */
|
||||
1, /* \X */
|
||||
0, 0, 0, 0, 0, 0, /* \Z, \z, ^, ^M, $, $M */
|
||||
0, 0, 0, 0, 0, 0, /* \Z, \z, $, $M, ^, ^M */
|
||||
1, /* Char */
|
||||
1, /* Chari */
|
||||
1, /* not */
|
||||
@ -220,11 +223,14 @@ static const pcre_uint8 poptable[] = {
|
||||
/* Character class & ref repeats */
|
||||
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
|
||||
1, 1, /* CRRANGE, CRMINRANGE */
|
||||
1, 1, 1, 1, /* Possessive *+, ++, ?+, CRPOSRANGE */
|
||||
1, /* CLASS */
|
||||
1, /* NCLASS */
|
||||
1, /* XCLASS - variable length */
|
||||
0, /* REF */
|
||||
0, /* REFI */
|
||||
0, /* DNREF */
|
||||
0, /* DNREFI */
|
||||
0, /* RECURSE */
|
||||
0, /* CALLOUT */
|
||||
0, /* Alt */
|
||||
@ -240,8 +246,8 @@ static const pcre_uint8 poptable[] = {
|
||||
0, 0, /* ONCE, ONCE_NC */
|
||||
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
|
||||
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
|
||||
0, 0, /* CREF, NCREF */
|
||||
0, 0, /* RREF, NRREF */
|
||||
0, 0, /* CREF, DNCREF */
|
||||
0, 0, /* RREF, DNRREF */
|
||||
0, /* DEF */
|
||||
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
|
||||
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
|
||||
@ -1094,15 +1100,23 @@ for (;;)
|
||||
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
|
||||
break;
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||
which means that Perl space and POSIX space are now identical. PCRE
|
||||
was changed at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR;
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
@ -1344,15 +1358,23 @@ for (;;)
|
||||
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
|
||||
break;
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||
which means that Perl space and POSIX space are now identical. PCRE
|
||||
was changed at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR;
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
@ -1588,15 +1610,23 @@ for (;;)
|
||||
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
|
||||
break;
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||
which means that Perl space and POSIX space are now identical. PCRE
|
||||
was changed at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR;
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
@ -1857,15 +1887,23 @@ for (;;)
|
||||
PRIV(ucp_gentype)[prop->chartype] == ucp_N;
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
|
||||
break;
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||
which means that Perl space and POSIX space are now identical. PCRE
|
||||
was changed at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR;
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
OK = TRUE;
|
||||
break;
|
||||
|
||||
default:
|
||||
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
@ -2533,31 +2571,65 @@ for (;;)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRPOSSTAR:
|
||||
ADD_ACTIVE(next_state_offset + 1, 0);
|
||||
if (isinclass) { ADD_NEW(state_offset, 0); }
|
||||
if (isinclass)
|
||||
{
|
||||
if (*ecode == OP_CRPOSSTAR)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW(state_offset, 0);
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_CRPLUS:
|
||||
case OP_CRMINPLUS:
|
||||
case OP_CRPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
|
||||
if (isinclass) { count++; ADD_NEW(state_offset, count); }
|
||||
if (isinclass)
|
||||
{
|
||||
if (count > 0 && *ecode == OP_CRPOSPLUS)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
count++;
|
||||
ADD_NEW(state_offset, count);
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
case OP_CRPOSQUERY:
|
||||
ADD_ACTIVE(next_state_offset + 1, 0);
|
||||
if (isinclass) { ADD_NEW(next_state_offset + 1, 0); }
|
||||
if (isinclass)
|
||||
{
|
||||
if (*ecode == OP_CRPOSQUERY)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
ADD_NEW(next_state_offset + 1, 0);
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
case OP_CRPOSRANGE:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count >= (int)GET2(ecode, 1))
|
||||
{ ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
|
||||
if (isinclass)
|
||||
{
|
||||
int max = (int)GET2(ecode, 1 + IMM2_SIZE);
|
||||
if (*ecode == OP_CRPOSRANGE)
|
||||
{
|
||||
active_count--; /* Remove non-match possibility */
|
||||
next_active_state--;
|
||||
}
|
||||
if (++count >= max && max != 0) /* Max 0 => no limit */
|
||||
{ ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
|
||||
else
|
||||
@ -2657,9 +2729,11 @@ for (;;)
|
||||
|
||||
condcode = code[LINK_SIZE+1];
|
||||
|
||||
/* Back reference conditions are not supported */
|
||||
/* Back reference conditions and duplicate named recursion conditions
|
||||
are not supported */
|
||||
|
||||
if (condcode == OP_CREF || condcode == OP_NCREF)
|
||||
if (condcode == OP_CREF || condcode == OP_DNCREF ||
|
||||
condcode == OP_DNRREF)
|
||||
return PCRE_ERROR_DFA_UCOND;
|
||||
|
||||
/* The DEFINE condition is always false */
|
||||
@ -2671,7 +2745,7 @@ for (;;)
|
||||
which means "test if in any recursion". We can't test for specifically
|
||||
recursed groups. */
|
||||
|
||||
else if (condcode == OP_RREF || condcode == OP_NRREF)
|
||||
else if (condcode == OP_RREF)
|
||||
{
|
||||
int value = GET2(code, LINK_SIZE + 2);
|
||||
if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
|
||||
|
545
pcre/pcre_exec.c
545
pcre/pcre_exec.c
@ -107,8 +107,8 @@ because the offset vector is always a multiple of 3 long. */
|
||||
|
||||
/* Min and max values for the common repeats; for the maxima, 0 => infinity */
|
||||
|
||||
static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
|
||||
static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
|
||||
static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
|
||||
static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
|
||||
|
||||
#ifdef PCRE_DEBUG
|
||||
/*************************************************
|
||||
@ -167,7 +167,7 @@ match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
|
||||
{
|
||||
PCRE_PUCHAR eptr_start = eptr;
|
||||
register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
|
||||
#ifdef SUPPORT_UTF
|
||||
#if defined SUPPORT_UTF && defined SUPPORT_UCP
|
||||
BOOL utf = md->utf;
|
||||
#endif
|
||||
|
||||
@ -195,8 +195,7 @@ ASCII characters. */
|
||||
|
||||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UTF
|
||||
#ifdef SUPPORT_UCP
|
||||
#if defined SUPPORT_UTF && defined SUPPORT_UCP
|
||||
if (utf)
|
||||
{
|
||||
/* Match characters up to the end of the reference. NOTE: the number of
|
||||
@ -229,7 +228,6 @@ if (caseless)
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* The same code works when not in UTF-8 mode and in UTF-8 mode when there
|
||||
@ -312,7 +310,7 @@ enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
|
||||
RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
|
||||
RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
|
||||
RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
|
||||
RM61, RM62, RM63, RM64, RM65, RM66, RM67, RM68 };
|
||||
RM61, RM62, RM63, RM64, RM65, RM66, RM67 };
|
||||
|
||||
/* These versions of the macros use the stack, as normal. There are debugging
|
||||
versions and production versions. Note that the "rw" argument of RMATCH isn't
|
||||
@ -1173,6 +1171,7 @@ for (;;)
|
||||
ecode = md->start_code + code_offset;
|
||||
save_capture_last = md->capture_last;
|
||||
matched_once = TRUE;
|
||||
mstart = md->start_match_ptr; /* In case \K changed it */
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1245,6 +1244,7 @@ for (;;)
|
||||
eptr = md->end_match_ptr;
|
||||
ecode = md->start_code + code_offset;
|
||||
matched_once = TRUE;
|
||||
mstart = md->start_match_ptr; /* In case \K reset it */
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1274,25 +1274,32 @@ for (;;)
|
||||
|
||||
/* Control never reaches here. */
|
||||
|
||||
/* Conditional group: compilation checked that there are no more than
|
||||
two branches. If the condition is false, skipping the first branch takes us
|
||||
past the end if there is only one branch, but that's OK because that is
|
||||
exactly what going to the ket would do. */
|
||||
/* Conditional group: compilation checked that there are no more than two
|
||||
branches. If the condition is false, skipping the first branch takes us
|
||||
past the end of the item if there is only one branch, but that's exactly
|
||||
what we want. */
|
||||
|
||||
case OP_COND:
|
||||
case OP_SCOND:
|
||||
codelink = GET(ecode, 1);
|
||||
|
||||
/* The variable codelink will be added to ecode when the condition is
|
||||
false, to get to the second branch. Setting it to the offset to the ALT
|
||||
or KET, then incrementing ecode achieves this effect. We now have ecode
|
||||
pointing to the condition or callout. */
|
||||
|
||||
codelink = GET(ecode, 1); /* Offset to the second branch */
|
||||
ecode += 1 + LINK_SIZE; /* From this opcode */
|
||||
|
||||
/* Because of the way auto-callout works during compile, a callout item is
|
||||
inserted between OP_COND and an assertion condition. */
|
||||
|
||||
if (ecode[LINK_SIZE+1] == OP_CALLOUT)
|
||||
if (*ecode == OP_CALLOUT)
|
||||
{
|
||||
if (PUBL(callout) != NULL)
|
||||
{
|
||||
PUBL(callout_block) cb;
|
||||
cb.version = 2; /* Version 1 of the callout block */
|
||||
cb.callout_number = ecode[LINK_SIZE+2];
|
||||
cb.callout_number = ecode[1];
|
||||
cb.offset_vector = md->offset_vector;
|
||||
#if defined COMPILE_PCRE8
|
||||
cb.subject = (PCRE_SPTR)md->start_subject;
|
||||
@ -1304,8 +1311,8 @@ for (;;)
|
||||
cb.subject_length = (int)(md->end_subject - md->start_subject);
|
||||
cb.start_match = (int)(mstart - md->start_subject);
|
||||
cb.current_position = (int)(eptr - md->start_subject);
|
||||
cb.pattern_position = GET(ecode, LINK_SIZE + 3);
|
||||
cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
|
||||
cb.pattern_position = GET(ecode, 2);
|
||||
cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
|
||||
cb.capture_top = offset_top/2;
|
||||
cb.capture_last = md->capture_last & CAPLMASK;
|
||||
/* Internal change requires this for API compatibility. */
|
||||
@ -1315,207 +1322,119 @@ for (;;)
|
||||
if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
|
||||
if (rrc < 0) RRETURN(rrc);
|
||||
}
|
||||
|
||||
/* Advance ecode past the callout, so it now points to the condition. We
|
||||
must adjust codelink so that the value of ecode+codelink is unchanged. */
|
||||
|
||||
ecode += PRIV(OP_lengths)[OP_CALLOUT];
|
||||
codelink -= PRIV(OP_lengths)[OP_CALLOUT];
|
||||
}
|
||||
|
||||
condcode = ecode[LINK_SIZE+1];
|
||||
/* Test the various possible conditions */
|
||||
|
||||
/* Now see what the actual condition is */
|
||||
|
||||
if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
|
||||
condition = FALSE;
|
||||
switch(condcode = *ecode)
|
||||
{
|
||||
if (md->recursive == NULL) /* Not recursing => FALSE */
|
||||
case OP_RREF: /* Numbered group recursion test */
|
||||
if (md->recursive != NULL) /* Not recursing => FALSE */
|
||||
{
|
||||
condition = FALSE;
|
||||
ecode += GET(ecode, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
|
||||
unsigned int recno = GET2(ecode, 1); /* Recursion group number*/
|
||||
condition = (recno == RREF_ANY || recno == md->recursive->group_num);
|
||||
|
||||
/* If the test is for recursion into a specific subpattern, and it is
|
||||
false, but the test was set up by name, scan the table to see if the
|
||||
name refers to any other numbers, and test them. The condition is true
|
||||
if any one is set. */
|
||||
|
||||
if (!condition && condcode == OP_NRREF)
|
||||
{
|
||||
pcre_uchar *slotA = md->name_table;
|
||||
for (i = 0; i < md->name_count; i++)
|
||||
{
|
||||
if (GET2(slotA, 0) == recno) break;
|
||||
slotA += md->name_entry_size;
|
||||
}
|
||||
|
||||
/* Found a name for the number - there can be only one; duplicate
|
||||
names for different numbers are allowed, but not vice versa. First
|
||||
scan down for duplicates. */
|
||||
|
||||
if (i < md->name_count)
|
||||
{
|
||||
pcre_uchar *slotB = slotA;
|
||||
while (slotB > md->name_table)
|
||||
{
|
||||
slotB -= md->name_entry_size;
|
||||
if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
|
||||
{
|
||||
condition = GET2(slotB, 0) == md->recursive->group_num;
|
||||
if (condition) break;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
||||
/* Scan up for duplicates */
|
||||
|
||||
if (!condition)
|
||||
{
|
||||
slotB = slotA;
|
||||
for (i++; i < md->name_count; i++)
|
||||
{
|
||||
slotB += md->name_entry_size;
|
||||
if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
|
||||
{
|
||||
condition = GET2(slotB, 0) == md->recursive->group_num;
|
||||
if (condition) break;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Chose branch according to the condition */
|
||||
|
||||
ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
|
||||
{
|
||||
offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
|
||||
condition = offset < offset_top && md->offset_vector[offset] >= 0;
|
||||
|
||||
/* If the numbered capture is unset, but the reference was by name,
|
||||
scan the table to see if the name refers to any other numbers, and test
|
||||
them. The condition is true if any one is set. This is tediously similar
|
||||
to the code above, but not close enough to try to amalgamate. */
|
||||
|
||||
if (!condition && condcode == OP_NCREF)
|
||||
case OP_DNRREF: /* Duplicate named group recursion test */
|
||||
if (md->recursive != NULL)
|
||||
{
|
||||
unsigned int refno = offset >> 1;
|
||||
pcre_uchar *slotA = md->name_table;
|
||||
|
||||
for (i = 0; i < md->name_count; i++)
|
||||
int count = GET2(ecode, 1 + IMM2_SIZE);
|
||||
pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
|
||||
while (count-- > 0)
|
||||
{
|
||||
if (GET2(slotA, 0) == refno) break;
|
||||
slotA += md->name_entry_size;
|
||||
}
|
||||
|
||||
/* Found a name for the number - there can be only one; duplicate names
|
||||
for different numbers are allowed, but not vice versa. First scan down
|
||||
for duplicates. */
|
||||
|
||||
if (i < md->name_count)
|
||||
{
|
||||
pcre_uchar *slotB = slotA;
|
||||
while (slotB > md->name_table)
|
||||
{
|
||||
slotB -= md->name_entry_size;
|
||||
if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
|
||||
{
|
||||
offset = GET2(slotB, 0) << 1;
|
||||
condition = offset < offset_top &&
|
||||
md->offset_vector[offset] >= 0;
|
||||
if (condition) break;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
||||
/* Scan up for duplicates */
|
||||
|
||||
if (!condition)
|
||||
{
|
||||
slotB = slotA;
|
||||
for (i++; i < md->name_count; i++)
|
||||
{
|
||||
slotB += md->name_entry_size;
|
||||
if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
|
||||
{
|
||||
offset = GET2(slotB, 0) << 1;
|
||||
condition = offset < offset_top &&
|
||||
md->offset_vector[offset] >= 0;
|
||||
if (condition) break;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
}
|
||||
unsigned int recno = GET2(slot, 0);
|
||||
condition = recno == md->recursive->group_num;
|
||||
if (condition) break;
|
||||
slot += md->name_entry_size;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/* Chose branch according to the condition */
|
||||
case OP_CREF: /* Numbered group used test */
|
||||
offset = GET2(ecode, 1) << 1; /* Doubled ref number */
|
||||
condition = offset < offset_top && md->offset_vector[offset] >= 0;
|
||||
break;
|
||||
|
||||
ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
|
||||
}
|
||||
case OP_DNCREF: /* Duplicate named group used test */
|
||||
{
|
||||
int count = GET2(ecode, 1 + IMM2_SIZE);
|
||||
pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
|
||||
while (count-- > 0)
|
||||
{
|
||||
offset = GET2(slot, 0) << 1;
|
||||
condition = offset < offset_top && md->offset_vector[offset] >= 0;
|
||||
if (condition) break;
|
||||
slot += md->name_entry_size;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
else if (condcode == OP_DEF) /* DEFINE - always false */
|
||||
{
|
||||
condition = FALSE;
|
||||
ecode += GET(ecode, 1);
|
||||
}
|
||||
case OP_DEF: /* DEFINE - always false */
|
||||
break;
|
||||
|
||||
/* The condition is an assertion. Call match() to evaluate it - setting
|
||||
md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
|
||||
an assertion. */
|
||||
/* The condition is an assertion. Call match() to evaluate it - setting
|
||||
md->match_function_type to MATCH_CONDASSERT causes it to stop at the end
|
||||
of an assertion. */
|
||||
|
||||
else
|
||||
{
|
||||
default:
|
||||
md->match_function_type = MATCH_CONDASSERT;
|
||||
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);
|
||||
RMATCH(eptr, ecode, offset_top, md, NULL, RM3);
|
||||
if (rrc == MATCH_MATCH)
|
||||
{
|
||||
if (md->end_offset_top > offset_top)
|
||||
offset_top = md->end_offset_top; /* Captures may have happened */
|
||||
condition = TRUE;
|
||||
ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
|
||||
|
||||
/* Advance ecode past the assertion to the start of the first branch,
|
||||
but adjust it so that the general choosing code below works. */
|
||||
|
||||
ecode += GET(ecode, 1);
|
||||
while (*ecode == OP_ALT) ecode += GET(ecode, 1);
|
||||
ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
|
||||
}
|
||||
|
||||
/* PCRE doesn't allow the effect of (*THEN) to escape beyond an
|
||||
assertion; it is therefore treated as NOMATCH. */
|
||||
assertion; it is therefore treated as NOMATCH. Any other return is an
|
||||
error. */
|
||||
|
||||
else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
|
||||
{
|
||||
RRETURN(rrc); /* Need braces because of following else */
|
||||
}
|
||||
else
|
||||
{
|
||||
condition = FALSE;
|
||||
ecode += codelink;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* We are now at the branch that is to be obeyed. As there is only one, can
|
||||
use tail recursion to avoid using another stack frame, except when there is
|
||||
unlimited repeat of a possibly empty group. In the latter case, a recursive
|
||||
call to match() is always required, unless the second alternative doesn't
|
||||
exist, in which case we can just plough on. Note that, for compatibility
|
||||
with Perl, the | in a conditional group is NOT treated as creating two
|
||||
alternatives. If a THEN is encountered in the branch, it propagates out to
|
||||
the enclosing alternative (unless nested in a deeper set of alternatives,
|
||||
of course). */
|
||||
/* Choose branch according to the condition */
|
||||
|
||||
if (condition || *ecode == OP_ALT)
|
||||
ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
|
||||
|
||||
/* We are now at the branch that is to be obeyed. As there is only one, we
|
||||
can use tail recursion to avoid using another stack frame, except when
|
||||
there is unlimited repeat of a possibly empty group. In the latter case, a
|
||||
recursive call to match() is always required, unless the second alternative
|
||||
doesn't exist, in which case we can just plough on. Note that, for
|
||||
compatibility with Perl, the | in a conditional group is NOT treated as
|
||||
creating two alternatives. If a THEN is encountered in the branch, it
|
||||
propagates out to the enclosing alternative (unless nested in a deeper set
|
||||
of alternatives, of course). */
|
||||
|
||||
if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
|
||||
{
|
||||
if (op != OP_SCOND)
|
||||
{
|
||||
ecode += 1 + LINK_SIZE;
|
||||
goto TAIL_RECURSE;
|
||||
}
|
||||
|
||||
md->match_function_type = MATCH_CBEGROUP;
|
||||
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
|
||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);
|
||||
RRETURN(rrc);
|
||||
}
|
||||
|
||||
@ -1523,7 +1442,6 @@ for (;;)
|
||||
|
||||
else
|
||||
{
|
||||
ecode += 1 + LINK_SIZE;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -2089,6 +2007,7 @@ for (;;)
|
||||
|
||||
if (*ecode == OP_KETRPOS)
|
||||
{
|
||||
md->start_match_ptr = mstart; /* In case \K reset it */
|
||||
md->end_match_ptr = eptr;
|
||||
md->end_offset_top = offset_top;
|
||||
RRETURN(MATCH_KETRPOS);
|
||||
@ -2656,19 +2575,24 @@ for (;;)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
|
||||
== (op == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||
which means that Perl space and POSIX space are now identical. PCRE
|
||||
was changed at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
|
||||
c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
|
||||
c == CHAR_FF || c == CHAR_CR)
|
||||
== (op == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
default:
|
||||
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
|
||||
(op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
@ -2742,15 +2666,7 @@ for (;;)
|
||||
similar code to character type repeats - written out again for speed.
|
||||
However, if the referenced string is the empty string, always treat
|
||||
it as matched, any number of times (otherwise there could be infinite
|
||||
loops). */
|
||||
|
||||
case OP_REF:
|
||||
case OP_REFI:
|
||||
caseless = op == OP_REFI;
|
||||
offset = GET2(ecode, 1) << 1; /* Doubled ref number */
|
||||
ecode += 1 + IMM2_SIZE;
|
||||
|
||||
/* If the reference is unset, there are two possibilities:
|
||||
loops). If the reference is unset, there are two possibilities:
|
||||
|
||||
(a) In the default, Perl-compatible state, set the length negative;
|
||||
this ensures that every attempt at a match fails. We can't just fail
|
||||
@ -2760,8 +2676,39 @@ for (;;)
|
||||
so that the back reference matches an empty string.
|
||||
|
||||
Otherwise, set the length to the length of what was matched by the
|
||||
referenced subpattern. */
|
||||
referenced subpattern.
|
||||
|
||||
The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
|
||||
or to a non-duplicated named group. For a duplicated named group, OP_DNREF
|
||||
and OP_DNREFI are used. In this case we must scan the list of groups to
|
||||
which the name refers, and use the first one that is set. */
|
||||
|
||||
case OP_DNREF:
|
||||
case OP_DNREFI:
|
||||
caseless = op == OP_DNREFI;
|
||||
{
|
||||
int count = GET2(ecode, 1+IMM2_SIZE);
|
||||
pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
|
||||
ecode += 1 + 2*IMM2_SIZE;
|
||||
|
||||
while (count-- > 0)
|
||||
{
|
||||
offset = GET2(slot, 0) << 1;
|
||||
if (offset < offset_top && md->offset_vector[offset] >= 0) break;
|
||||
slot += md->name_entry_size;
|
||||
}
|
||||
if (count < 0)
|
||||
length = (md->jscript_compat)? 0 : -1;
|
||||
else
|
||||
length = md->offset_vector[offset+1] - md->offset_vector[offset];
|
||||
}
|
||||
goto REF_REPEAT;
|
||||
|
||||
case OP_REF:
|
||||
case OP_REFI:
|
||||
caseless = op == OP_REFI;
|
||||
offset = GET2(ecode, 1) << 1; /* Doubled ref number */
|
||||
ecode += 1 + IMM2_SIZE;
|
||||
if (offset >= offset_top || md->offset_vector[offset] < 0)
|
||||
length = (md->jscript_compat)? 0 : -1;
|
||||
else
|
||||
@ -2769,6 +2716,7 @@ for (;;)
|
||||
|
||||
/* Set up for repetition, or handle the non-repeated case */
|
||||
|
||||
REF_REPEAT:
|
||||
switch (*ecode)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
@ -2917,8 +2865,12 @@ for (;;)
|
||||
case OP_CRMINPLUS:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
case OP_CRPOSSTAR:
|
||||
case OP_CRPOSPLUS:
|
||||
case OP_CRPOSQUERY:
|
||||
c = *ecode++ - OP_CRSTAR;
|
||||
minimize = (c & 1) != 0;
|
||||
if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
|
||||
else possessive = TRUE;
|
||||
min = rep_min[c]; /* Pick up values from tables; */
|
||||
max = rep_max[c]; /* zero for max => infinity */
|
||||
if (max == 0) max = INT_MAX;
|
||||
@ -2926,7 +2878,9 @@ for (;;)
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
case OP_CRPOSRANGE:
|
||||
minimize = (*ecode == OP_CRMINRANGE);
|
||||
possessive = (*ecode == OP_CRPOSRANGE);
|
||||
min = GET2(ecode, 1);
|
||||
max = GET2(ecode, 1 + IMM2_SIZE);
|
||||
if (max == 0) max = INT_MAX;
|
||||
@ -3068,6 +3022,9 @@ for (;;)
|
||||
if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
|
||||
eptr += len;
|
||||
}
|
||||
|
||||
if (possessive) continue; /* No backtracking */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
|
||||
@ -3098,6 +3055,9 @@ for (;;)
|
||||
if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
|
||||
eptr++;
|
||||
}
|
||||
|
||||
if (possessive) continue; /* No backtracking */
|
||||
|
||||
while (eptr >= pp)
|
||||
{
|
||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
|
||||
@ -3113,9 +3073,10 @@ for (;;)
|
||||
/* Control never gets here */
|
||||
|
||||
|
||||
/* Match an extended character class. This opcode is encountered only
|
||||
when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
|
||||
mode, because Unicode properties are supported in non-UTF-8 mode. */
|
||||
/* Match an extended character class. In the 8-bit library, this opcode is
|
||||
encountered only when UTF-8 mode mode is supported. In the 16-bit and
|
||||
32-bit libraries, codepoints greater than 255 may be encountered even when
|
||||
UTF is not supported. */
|
||||
|
||||
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
|
||||
case OP_XCLASS:
|
||||
@ -3131,8 +3092,12 @@ for (;;)
|
||||
case OP_CRMINPLUS:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
case OP_CRPOSSTAR:
|
||||
case OP_CRPOSPLUS:
|
||||
case OP_CRPOSQUERY:
|
||||
c = *ecode++ - OP_CRSTAR;
|
||||
minimize = (c & 1) != 0;
|
||||
if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
|
||||
else possessive = TRUE;
|
||||
min = rep_min[c]; /* Pick up values from tables; */
|
||||
max = rep_max[c]; /* zero for max => infinity */
|
||||
if (max == 0) max = INT_MAX;
|
||||
@ -3140,7 +3105,9 @@ for (;;)
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
case OP_CRPOSRANGE:
|
||||
minimize = (*ecode == OP_CRMINRANGE);
|
||||
possessive = (*ecode == OP_CRPOSRANGE);
|
||||
min = GET2(ecode, 1);
|
||||
max = GET2(ecode, 1 + IMM2_SIZE);
|
||||
if (max == 0) max = INT_MAX;
|
||||
@ -3212,6 +3179,9 @@ for (;;)
|
||||
if (!PRIV(xclass)(c, data, utf)) break;
|
||||
eptr += len;
|
||||
}
|
||||
|
||||
if (possessive) continue; /* No backtracking */
|
||||
|
||||
for(;;)
|
||||
{
|
||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
|
||||
@ -3590,7 +3560,6 @@ for (;;)
|
||||
if (fc != cc && foc != cc) break;
|
||||
eptr++;
|
||||
}
|
||||
|
||||
if (possessive) continue; /* No backtracking */
|
||||
for (;;)
|
||||
{
|
||||
@ -3599,9 +3568,8 @@ for (;;)
|
||||
eptr--;
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
}
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
/* Control never gets here */
|
||||
}
|
||||
/* Control never gets here */
|
||||
}
|
||||
|
||||
/* Caseful comparisons (includes all multi-byte characters) */
|
||||
@ -3657,7 +3625,7 @@ for (;;)
|
||||
eptr--;
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
}
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
/* Control never gets here */
|
||||
}
|
||||
}
|
||||
/* Control never gets here */
|
||||
@ -3942,10 +3910,8 @@ for (;;)
|
||||
eptr--;
|
||||
}
|
||||
}
|
||||
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
/* Control never gets here */
|
||||
}
|
||||
/* Control never gets here */
|
||||
}
|
||||
|
||||
/* Caseful comparisons */
|
||||
@ -4079,8 +4045,7 @@ for (;;)
|
||||
eptr--;
|
||||
}
|
||||
}
|
||||
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
/* Control never gets here */
|
||||
}
|
||||
}
|
||||
/* Control never gets here */
|
||||
@ -4262,22 +4227,11 @@ for (;;)
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
for (i = 1; i <= min; i++)
|
||||
{
|
||||
if (eptr >= md->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
GETCHARINCTEST(c, eptr);
|
||||
if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
|
||||
c == CHAR_FF || c == CHAR_CR)
|
||||
== prop_fail_result)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
break;
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||
which means that Perl space and POSIX space are now identical. PCRE
|
||||
was changed at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
for (i = 1; i <= min; i++)
|
||||
{
|
||||
@ -4287,10 +4241,18 @@ for (;;)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
GETCHARINCTEST(c, eptr);
|
||||
if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
|
||||
c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
|
||||
== prop_fail_result)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
if (prop_fail_result) RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
default:
|
||||
if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
@ -5010,25 +4972,11 @@ for (;;)
|
||||
}
|
||||
/* Control never gets here */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
for (fi = min;; fi++)
|
||||
{
|
||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max) RRETURN(MATCH_NOMATCH);
|
||||
if (eptr >= md->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
GETCHARINCTEST(c, eptr);
|
||||
if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
|
||||
c == CHAR_FF || c == CHAR_CR)
|
||||
== prop_fail_result)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
/* Control never gets here */
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||
which means that Perl space and POSIX space are now identical. PCRE
|
||||
was changed at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
for (fi = min;; fi++)
|
||||
{
|
||||
@ -5041,10 +4989,18 @@ for (;;)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
GETCHARINCTEST(c, eptr);
|
||||
if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
|
||||
c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
|
||||
== prop_fail_result)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
if (prop_fail_result) RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
default:
|
||||
if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* Control never gets here */
|
||||
|
||||
@ -5097,7 +5053,7 @@ for (;;)
|
||||
case PT_UCNC:
|
||||
for (fi = min;; fi++)
|
||||
{
|
||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM68);
|
||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max) RRETURN(MATCH_NOMATCH);
|
||||
if (eptr >= md->end_subject)
|
||||
@ -5528,24 +5484,11 @@ for (;;)
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
for (i = min; i < max; i++)
|
||||
{
|
||||
int len = 1;
|
||||
if (eptr >= md->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
break;
|
||||
}
|
||||
GETCHARLENTEST(c, eptr, len);
|
||||
if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
|
||||
c == CHAR_FF || c == CHAR_CR)
|
||||
== prop_fail_result)
|
||||
break;
|
||||
eptr+= len;
|
||||
}
|
||||
break;
|
||||
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
|
||||
which means that Perl space and POSIX space are now identical. PCRE
|
||||
was changed at release 8.34. */
|
||||
|
||||
case PT_SPACE: /* Perl space */
|
||||
case PT_PXSPACE: /* POSIX space */
|
||||
for (i = min; i < max; i++)
|
||||
{
|
||||
@ -5556,12 +5499,21 @@ for (;;)
|
||||
break;
|
||||
}
|
||||
GETCHARLENTEST(c, eptr, len);
|
||||
if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
|
||||
c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
|
||||
== prop_fail_result)
|
||||
switch(c)
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
if (prop_fail_result) goto ENDLOOP99; /* Break the loop */
|
||||
break;
|
||||
|
||||
default:
|
||||
if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
|
||||
goto ENDLOOP99; /* Break the loop */
|
||||
break;
|
||||
}
|
||||
eptr+= len;
|
||||
}
|
||||
ENDLOOP99:
|
||||
break;
|
||||
|
||||
case PT_WORD:
|
||||
@ -5642,7 +5594,7 @@ for (;;)
|
||||
}
|
||||
}
|
||||
|
||||
/* Match extended Unicode sequences. We will get here only if the
|
||||
/* Match extended Unicode grapheme clusters. We will get here only if the
|
||||
support is in the binary; otherwise a compile-time error occurs. */
|
||||
|
||||
else if (ctype == OP_EXTUNI)
|
||||
@ -5675,21 +5627,41 @@ for (;;)
|
||||
/* eptr is now past the end of the maximum run */
|
||||
|
||||
if (possessive) continue; /* No backtracking */
|
||||
|
||||
for(;;)
|
||||
{
|
||||
if (eptr == pp) goto TAIL_RECURSE;
|
||||
int lgb, rgb;
|
||||
PCRE_PUCHAR fptr;
|
||||
|
||||
if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
|
||||
RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
|
||||
/* Backtracking over an extended grapheme cluster involves inspecting
|
||||
the previous two characters (if present) to see if a break is
|
||||
permitted between them. */
|
||||
|
||||
eptr--;
|
||||
for (;;) /* Move back over one extended */
|
||||
if (!utf) c = *eptr; else
|
||||
{
|
||||
if (!utf) c = *eptr; else
|
||||
BACKCHAR(eptr);
|
||||
GETCHAR(c, eptr);
|
||||
}
|
||||
rgb = UCD_GRAPHBREAK(c);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
|
||||
fptr = eptr - 1;
|
||||
if (!utf) c = *fptr; else
|
||||
{
|
||||
BACKCHAR(eptr);
|
||||
GETCHAR(c, eptr);
|
||||
BACKCHAR(fptr);
|
||||
GETCHAR(c, fptr);
|
||||
}
|
||||
if (UCD_CATEGORY(c) != ucp_M) break;
|
||||
eptr--;
|
||||
lgb = UCD_GRAPHBREAK(c);
|
||||
if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
|
||||
eptr = fptr;
|
||||
rgb = lgb;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -6211,11 +6183,8 @@ for (;;)
|
||||
}
|
||||
}
|
||||
|
||||
/* Get here if we can't make it match with any permitted repetitions */
|
||||
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
/* Control never gets here */
|
||||
}
|
||||
/* Control never gets here */
|
||||
|
||||
/* There's been some horrible disaster. Arrival here can only mean there is
|
||||
something seriously wrong in the code above or the OP_xxx definitions. */
|
||||
@ -6249,15 +6218,15 @@ switch (frame->Xwhere)
|
||||
LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
|
||||
LBL(65) LBL(66)
|
||||
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
|
||||
LBL(21)
|
||||
LBL(20) LBL(21)
|
||||
#endif
|
||||
#ifdef SUPPORT_UTF
|
||||
LBL(16) LBL(18) LBL(20)
|
||||
LBL(16) LBL(18)
|
||||
LBL(22) LBL(23) LBL(28) LBL(30)
|
||||
LBL(32) LBL(34) LBL(42) LBL(46)
|
||||
#ifdef SUPPORT_UCP
|
||||
LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
|
||||
LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(68)
|
||||
LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
|
||||
#endif /* SUPPORT_UCP */
|
||||
#endif /* SUPPORT_UTF */
|
||||
default:
|
||||
@ -6410,7 +6379,7 @@ const pcre_uint8 *start_bits = NULL;
|
||||
PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
|
||||
PCRE_PUCHAR end_subject;
|
||||
PCRE_PUCHAR start_partial = NULL;
|
||||
PCRE_PUCHAR match_partial;
|
||||
PCRE_PUCHAR match_partial = NULL;
|
||||
PCRE_PUCHAR req_char_ptr = start_match - 1;
|
||||
|
||||
const pcre_study_data *study;
|
||||
@ -7178,7 +7147,7 @@ if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
|
||||
|
||||
/* Handle partial matches - disable any mark data */
|
||||
|
||||
if (start_partial != NULL)
|
||||
if (match_partial != NULL)
|
||||
{
|
||||
DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
|
||||
md->mark = NULL;
|
||||
|
@ -232,6 +232,10 @@ switch (what)
|
||||
*((pcre_uint32 *)where) = re->limit_recursion;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_MATCH_EMPTY:
|
||||
*((int *)where) = (re->flags & PCRE_MATCH_EMPTY) != 0;
|
||||
break;
|
||||
|
||||
default: return PCRE_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
|
@ -1149,6 +1149,7 @@ compatibility. */
|
||||
#define PCRE_HASTHEN 0x00001000 /* pattern contains (*THEN) */
|
||||
#define PCRE_MLSET 0x00002000 /* match limit set by regex */
|
||||
#define PCRE_RLSET 0x00004000 /* recursion limit set by regex */
|
||||
#define PCRE_MATCH_EMPTY 0x00008000 /* pattern can match empty string */
|
||||
|
||||
#if defined COMPILE_PCRE8
|
||||
#define PCRE_MODE PCRE_MODE8
|
||||
@ -1173,7 +1174,8 @@ time, run time, or study time, respectively. */
|
||||
#define PUBLIC_COMPILE_OPTIONS \
|
||||
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
|
||||
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
|
||||
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
|
||||
PCRE_NO_AUTO_CAPTURE|PCRE_NO_AUTO_POSSESS| \
|
||||
PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
|
||||
PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
|
||||
PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE|PCRE_NEVER_UTF)
|
||||
|
||||
@ -1531,22 +1533,25 @@ a positive value. */
|
||||
#define STRING_xdigit "xdigit"
|
||||
|
||||
#define STRING_DEFINE "DEFINE"
|
||||
#define STRING_WEIRD_STARTWORD "[:<:]]"
|
||||
#define STRING_WEIRD_ENDWORD "[:>:]]"
|
||||
|
||||
#define STRING_CR_RIGHTPAR "CR)"
|
||||
#define STRING_LF_RIGHTPAR "LF)"
|
||||
#define STRING_CRLF_RIGHTPAR "CRLF)"
|
||||
#define STRING_ANY_RIGHTPAR "ANY)"
|
||||
#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
|
||||
#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
|
||||
#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
|
||||
#define STRING_UTF8_RIGHTPAR "UTF8)"
|
||||
#define STRING_UTF16_RIGHTPAR "UTF16)"
|
||||
#define STRING_UTF32_RIGHTPAR "UTF32)"
|
||||
#define STRING_UTF_RIGHTPAR "UTF)"
|
||||
#define STRING_UCP_RIGHTPAR "UCP)"
|
||||
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
|
||||
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
|
||||
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
|
||||
#define STRING_CR_RIGHTPAR "CR)"
|
||||
#define STRING_LF_RIGHTPAR "LF)"
|
||||
#define STRING_CRLF_RIGHTPAR "CRLF)"
|
||||
#define STRING_ANY_RIGHTPAR "ANY)"
|
||||
#define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)"
|
||||
#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
|
||||
#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
|
||||
#define STRING_UTF8_RIGHTPAR "UTF8)"
|
||||
#define STRING_UTF16_RIGHTPAR "UTF16)"
|
||||
#define STRING_UTF32_RIGHTPAR "UTF32)"
|
||||
#define STRING_UTF_RIGHTPAR "UTF)"
|
||||
#define STRING_UCP_RIGHTPAR "UCP)"
|
||||
#define STRING_NO_AUTO_POSSESS_RIGHTPAR "NO_AUTO_POSSESS)"
|
||||
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
|
||||
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
|
||||
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
|
||||
|
||||
#else /* SUPPORT_UTF */
|
||||
|
||||
@ -1797,21 +1802,22 @@ only. */
|
||||
#define STRING_WEIRD_STARTWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
|
||||
#define STRING_WEIRD_ENDWORD STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
|
||||
|
||||
#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS
|
||||
#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
|
||||
#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
|
||||
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
|
||||
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
|
||||
#define STRING_CR_RIGHTPAR STR_C STR_R STR_RIGHT_PARENTHESIS
|
||||
#define STRING_LF_RIGHTPAR STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_CRLF_RIGHTPAR STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_ANY_RIGHTPAR STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
|
||||
#define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF16_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF32_RIGHTPAR STR_U STR_T STR_F STR_3 STR_2 STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
|
||||
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NO_AUTO_POSSESS_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_A STR_U STR_T STR_O STR_UNDERSCORE STR_P STR_O STR_S STR_S STR_E STR_S STR_S STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
|
||||
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
|
||||
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
|
||||
|
||||
#endif /* SUPPORT_UTF */
|
||||
|
||||
@ -1853,6 +1859,17 @@ only. */
|
||||
#define PT_WORD 8 /* Word - L plus N plus underscore */
|
||||
#define PT_CLIST 9 /* Pseudo-property: match character list */
|
||||
#define PT_UCNC 10 /* Universal Character nameable character */
|
||||
#define PT_TABSIZE 11 /* Size of square table for autopossessify tests */
|
||||
|
||||
/* The following special properties are used only in XCLASS items, when POSIX
|
||||
classes are specified and PCRE_UCP is set - in other words, for Unicode
|
||||
handling of these classes. They are not available via the \p or \P escapes like
|
||||
those in the above list, and so they do not take part in the autopossessifying
|
||||
table. */
|
||||
|
||||
#define PT_PXGRAPH 11 /* [:graph:] - characters that mark the paper */
|
||||
#define PT_PXPRINT 12 /* [:print:] - [:graph:] plus non-control spaces */
|
||||
#define PT_PXPUNCT 13 /* [:punct:] - punctuation characters */
|
||||
|
||||
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
|
||||
contain characters with values greater than 255. */
|
||||
@ -1867,9 +1884,9 @@ contain characters with values greater than 255. */
|
||||
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
|
||||
|
||||
/* These are escaped items that aren't just an encoding of a particular data
|
||||
value such as \n. They must have non-zero values, as check_escape() returns
|
||||
0 for a data character. Also, they must appear in the same order as in the opcode
|
||||
definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
|
||||
value such as \n. They must have non-zero values, as check_escape() returns 0
|
||||
for a data character. Also, they must appear in the same order as in the
|
||||
opcode definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
|
||||
corresponds to "." in DOTALL mode rather than an escape sequence. It is also
|
||||
used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In
|
||||
non-DOTALL mode, "." behaves like \N.
|
||||
@ -1892,12 +1909,31 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
|
||||
ESC_E, ESC_Q, ESC_g, ESC_k,
|
||||
ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu };
|
||||
|
||||
/* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
|
||||
OP_EOD must correspond in order to the list of escapes immediately above.
|
||||
|
||||
*** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
|
||||
that follow must also be updated to match. There are also tables called
|
||||
"coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
|
||||
/********************** Opcode definitions ******************/
|
||||
|
||||
/****** NOTE NOTE NOTE ******
|
||||
|
||||
Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in
|
||||
order to the list of escapes immediately above. Furthermore, values up to
|
||||
OP_DOLLM must not be changed without adjusting the table called autoposstab in
|
||||
pcre_compile.c
|
||||
|
||||
Whenever this list is updated, the two macro definitions that follow must be
|
||||
updated to match. The possessification table called "opcode_possessify" in
|
||||
pcre_compile.c must also be updated, and also the tables called "coptable"
|
||||
and "poptable" in pcre_dfa_exec.c.
|
||||
|
||||
****** NOTE NOTE NOTE ******/
|
||||
|
||||
|
||||
/* The values between FIRST_AUTOTAB_OP and LAST_AUTOTAB_RIGHT_OP, inclusive,
|
||||
are used in a table for deciding whether a repeated character type can be
|
||||
auto-possessified. */
|
||||
|
||||
#define FIRST_AUTOTAB_OP OP_NOT_DIGIT
|
||||
#define LAST_AUTOTAB_LEFT_OP OP_EXTUNI
|
||||
#define LAST_AUTOTAB_RIGHT_OP OP_DOLLM
|
||||
|
||||
enum {
|
||||
OP_END, /* 0 End of pattern */
|
||||
@ -1930,10 +1966,15 @@ enum {
|
||||
OP_EODN, /* 23 End of data or \n at end of data (\Z) */
|
||||
OP_EOD, /* 24 End of data (\z) */
|
||||
|
||||
OP_CIRC, /* 25 Start of line - not multiline */
|
||||
OP_CIRCM, /* 26 Start of line - multiline */
|
||||
OP_DOLL, /* 27 End of line - not multiline */
|
||||
OP_DOLLM, /* 28 End of line - multiline */
|
||||
/* Line end assertions */
|
||||
|
||||
OP_DOLL, /* 25 End of line - not multiline */
|
||||
OP_DOLLM, /* 26 End of line - multiline */
|
||||
OP_CIRC, /* 27 Start of line - not multiline */
|
||||
OP_CIRCM, /* 28 Start of line - multiline */
|
||||
|
||||
/* Single characters; caseful must precede the caseless ones */
|
||||
|
||||
OP_CHAR, /* 29 Match one character, casefully */
|
||||
OP_CHARI, /* 30 Match one character, caselessly */
|
||||
OP_NOT, /* 31 Match one character, not the given one, casefully */
|
||||
@ -1942,7 +1983,7 @@ enum {
|
||||
/* The following sets of 13 opcodes must always be kept in step because
|
||||
the offset from the first one is used to generate the others. */
|
||||
|
||||
/**** Single characters, caseful, must precede the caseless ones ****/
|
||||
/* Repeated characters; caseful must precede the caseless ones */
|
||||
|
||||
OP_STAR, /* 33 The maximizing and minimizing versions of */
|
||||
OP_MINSTAR, /* 34 these six opcodes must come in pairs, with */
|
||||
@ -1960,7 +2001,7 @@ enum {
|
||||
OP_POSQUERY, /* 44 Posesssified query, caseful */
|
||||
OP_POSUPTO, /* 45 Possessified upto, caseful */
|
||||
|
||||
/**** Single characters, caseless, must follow the caseful ones */
|
||||
/* Repeated characters; caseless must follow the caseful ones */
|
||||
|
||||
OP_STARI, /* 46 */
|
||||
OP_MINSTARI, /* 47 */
|
||||
@ -1978,8 +2019,8 @@ enum {
|
||||
OP_POSQUERYI, /* 57 Posesssified query, caseless */
|
||||
OP_POSUPTOI, /* 58 Possessified upto, caseless */
|
||||
|
||||
/**** The negated ones must follow the non-negated ones, and match them ****/
|
||||
/**** Negated single character, caseful; must precede the caseless ones ****/
|
||||
/* The negated ones must follow the non-negated ones, and match them */
|
||||
/* Negated repeated character, caseful; must precede the caseless ones */
|
||||
|
||||
OP_NOTSTAR, /* 59 The maximizing and minimizing versions of */
|
||||
OP_NOTMINSTAR, /* 60 these six opcodes must come in pairs, with */
|
||||
@ -1997,7 +2038,7 @@ enum {
|
||||
OP_NOTPOSQUERY, /* 70 */
|
||||
OP_NOTPOSUPTO, /* 71 */
|
||||
|
||||
/**** Negated single character, caseless; must follow the caseful ones ****/
|
||||
/* Negated repeated character, caseless; must follow the caseful ones */
|
||||
|
||||
OP_NOTSTARI, /* 72 */
|
||||
OP_NOTMINSTARI, /* 73 */
|
||||
@ -2015,7 +2056,7 @@ enum {
|
||||
OP_NOTPOSQUERYI, /* 83 */
|
||||
OP_NOTPOSUPTOI, /* 84 */
|
||||
|
||||
/**** Character types ****/
|
||||
/* Character types */
|
||||
|
||||
OP_TYPESTAR, /* 85 The maximizing and minimizing versions of */
|
||||
OP_TYPEMINSTAR, /* 86 these six opcodes must come in pairs, with */
|
||||
@ -2046,89 +2087,96 @@ enum {
|
||||
OP_CRRANGE, /* 104 These are different to the three sets above. */
|
||||
OP_CRMINRANGE, /* 105 */
|
||||
|
||||
OP_CRPOSSTAR, /* 106 Possessified versions */
|
||||
OP_CRPOSPLUS, /* 107 */
|
||||
OP_CRPOSQUERY, /* 108 */
|
||||
OP_CRPOSRANGE, /* 109 */
|
||||
|
||||
/* End of quantifier opcodes */
|
||||
|
||||
OP_CLASS, /* 106 Match a character class, chars < 256 only */
|
||||
OP_NCLASS, /* 107 Same, but the bitmap was created from a negative
|
||||
OP_CLASS, /* 110 Match a character class, chars < 256 only */
|
||||
OP_NCLASS, /* 111 Same, but the bitmap was created from a negative
|
||||
class - the difference is relevant only when a
|
||||
character > 255 is encountered. */
|
||||
OP_XCLASS, /* 108 Extended class for handling > 255 chars within the
|
||||
OP_XCLASS, /* 112 Extended class for handling > 255 chars within the
|
||||
class. This does both positive and negative. */
|
||||
OP_REF, /* 109 Match a back reference, casefully */
|
||||
OP_REFI, /* 110 Match a back reference, caselessly */
|
||||
OP_RECURSE, /* 111 Match a numbered subpattern (possibly recursive) */
|
||||
OP_CALLOUT, /* 112 Call out to external function if provided */
|
||||
OP_REF, /* 113 Match a back reference, casefully */
|
||||
OP_REFI, /* 114 Match a back reference, caselessly */
|
||||
OP_DNREF, /* 115 Match a duplicate name backref, casefully */
|
||||
OP_DNREFI, /* 116 Match a duplicate name backref, caselessly */
|
||||
OP_RECURSE, /* 117 Match a numbered subpattern (possibly recursive) */
|
||||
OP_CALLOUT, /* 118 Call out to external function if provided */
|
||||
|
||||
OP_ALT, /* 113 Start of alternation */
|
||||
OP_KET, /* 114 End of group that doesn't have an unbounded repeat */
|
||||
OP_KETRMAX, /* 115 These two must remain together and in this */
|
||||
OP_KETRMIN, /* 116 order. They are for groups the repeat for ever. */
|
||||
OP_KETRPOS, /* 117 Possessive unlimited repeat. */
|
||||
OP_ALT, /* 119 Start of alternation */
|
||||
OP_KET, /* 120 End of group that doesn't have an unbounded repeat */
|
||||
OP_KETRMAX, /* 121 These two must remain together and in this */
|
||||
OP_KETRMIN, /* 122 order. They are for groups the repeat for ever. */
|
||||
OP_KETRPOS, /* 123 Possessive unlimited repeat. */
|
||||
|
||||
/* The assertions must come before BRA, CBRA, ONCE, and COND, and the four
|
||||
asserts must remain in order. */
|
||||
|
||||
OP_REVERSE, /* 118 Move pointer back - used in lookbehind assertions */
|
||||
OP_ASSERT, /* 119 Positive lookahead */
|
||||
OP_ASSERT_NOT, /* 120 Negative lookahead */
|
||||
OP_ASSERTBACK, /* 121 Positive lookbehind */
|
||||
OP_ASSERTBACK_NOT, /* 122 Negative lookbehind */
|
||||
OP_REVERSE, /* 124 Move pointer back - used in lookbehind assertions */
|
||||
OP_ASSERT, /* 125 Positive lookahead */
|
||||
OP_ASSERT_NOT, /* 126 Negative lookahead */
|
||||
OP_ASSERTBACK, /* 127 Positive lookbehind */
|
||||
OP_ASSERTBACK_NOT, /* 128 Negative lookbehind */
|
||||
|
||||
/* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately
|
||||
after the assertions, with ONCE first, as there's a test for >= ONCE for a
|
||||
subpattern that isn't an assertion. The POS versions must immediately follow
|
||||
the non-POS versions in each case. */
|
||||
|
||||
OP_ONCE, /* 123 Atomic group, contains captures */
|
||||
OP_ONCE_NC, /* 124 Atomic group containing no captures */
|
||||
OP_BRA, /* 125 Start of non-capturing bracket */
|
||||
OP_BRAPOS, /* 126 Ditto, with unlimited, possessive repeat */
|
||||
OP_CBRA, /* 127 Start of capturing bracket */
|
||||
OP_CBRAPOS, /* 128 Ditto, with unlimited, possessive repeat */
|
||||
OP_COND, /* 129 Conditional group */
|
||||
OP_ONCE, /* 129 Atomic group, contains captures */
|
||||
OP_ONCE_NC, /* 130 Atomic group containing no captures */
|
||||
OP_BRA, /* 131 Start of non-capturing bracket */
|
||||
OP_BRAPOS, /* 132 Ditto, with unlimited, possessive repeat */
|
||||
OP_CBRA, /* 133 Start of capturing bracket */
|
||||
OP_CBRAPOS, /* 134 Ditto, with unlimited, possessive repeat */
|
||||
OP_COND, /* 135 Conditional group */
|
||||
|
||||
/* These five must follow the previous five, in the same order. There's a
|
||||
check for >= SBRA to distinguish the two sets. */
|
||||
|
||||
OP_SBRA, /* 130 Start of non-capturing bracket, check empty */
|
||||
OP_SBRAPOS, /* 131 Ditto, with unlimited, possessive repeat */
|
||||
OP_SCBRA, /* 132 Start of capturing bracket, check empty */
|
||||
OP_SCBRAPOS, /* 133 Ditto, with unlimited, possessive repeat */
|
||||
OP_SCOND, /* 134 Conditional group, check empty */
|
||||
OP_SBRA, /* 136 Start of non-capturing bracket, check empty */
|
||||
OP_SBRAPOS, /* 137 Ditto, with unlimited, possessive repeat */
|
||||
OP_SCBRA, /* 138 Start of capturing bracket, check empty */
|
||||
OP_SCBRAPOS, /* 139 Ditto, with unlimited, possessive repeat */
|
||||
OP_SCOND, /* 140 Conditional group, check empty */
|
||||
|
||||
/* The next two pairs must (respectively) be kept together. */
|
||||
|
||||
OP_CREF, /* 135 Used to hold a capture number as condition */
|
||||
OP_NCREF, /* 136 Same, but generated by a name reference*/
|
||||
OP_RREF, /* 137 Used to hold a recursion number as condition */
|
||||
OP_NRREF, /* 138 Same, but generated by a name reference*/
|
||||
OP_DEF, /* 139 The DEFINE condition */
|
||||
OP_CREF, /* 141 Used to hold a capture number as condition */
|
||||
OP_DNCREF, /* 142 Used to point to duplicate names as a condition */
|
||||
OP_RREF, /* 143 Used to hold a recursion number as condition */
|
||||
OP_DNRREF, /* 144 Used to point to duplicate names as a condition */
|
||||
OP_DEF, /* 145 The DEFINE condition */
|
||||
|
||||
OP_BRAZERO, /* 140 These two must remain together and in this */
|
||||
OP_BRAMINZERO, /* 141 order. */
|
||||
OP_BRAPOSZERO, /* 142 */
|
||||
OP_BRAZERO, /* 146 These two must remain together and in this */
|
||||
OP_BRAMINZERO, /* 147 order. */
|
||||
OP_BRAPOSZERO, /* 148 */
|
||||
|
||||
/* These are backtracking control verbs */
|
||||
|
||||
OP_MARK, /* 143 always has an argument */
|
||||
OP_PRUNE, /* 144 */
|
||||
OP_PRUNE_ARG, /* 145 same, but with argument */
|
||||
OP_SKIP, /* 146 */
|
||||
OP_SKIP_ARG, /* 147 same, but with argument */
|
||||
OP_THEN, /* 148 */
|
||||
OP_THEN_ARG, /* 149 same, but with argument */
|
||||
OP_COMMIT, /* 150 */
|
||||
OP_MARK, /* 149 always has an argument */
|
||||
OP_PRUNE, /* 150 */
|
||||
OP_PRUNE_ARG, /* 151 same, but with argument */
|
||||
OP_SKIP, /* 152 */
|
||||
OP_SKIP_ARG, /* 153 same, but with argument */
|
||||
OP_THEN, /* 154 */
|
||||
OP_THEN_ARG, /* 155 same, but with argument */
|
||||
OP_COMMIT, /* 156 */
|
||||
|
||||
/* These are forced failure and success verbs */
|
||||
|
||||
OP_FAIL, /* 151 */
|
||||
OP_ACCEPT, /* 152 */
|
||||
OP_ASSERT_ACCEPT, /* 153 Used inside assertions */
|
||||
OP_CLOSE, /* 154 Used before OP_ACCEPT to close open captures */
|
||||
OP_FAIL, /* 157 */
|
||||
OP_ACCEPT, /* 158 */
|
||||
OP_ASSERT_ACCEPT, /* 159 Used inside assertions */
|
||||
OP_CLOSE, /* 160 Used before OP_ACCEPT to close open captures */
|
||||
|
||||
/* This is used to skip a subpattern with a {0} quantifier */
|
||||
|
||||
OP_SKIPZERO, /* 155 */
|
||||
OP_SKIPZERO, /* 161 */
|
||||
|
||||
/* This is not an opcode, but is used to check that tables indexed by opcode
|
||||
are the correct length, in order to catch updating errors - there have been
|
||||
@ -2139,7 +2187,8 @@ enum {
|
||||
|
||||
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
|
||||
definitions that follow must also be updated to match. There are also tables
|
||||
called "coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
|
||||
called "opcode_possessify" in pcre_compile.c and "coptable" and "poptable" in
|
||||
pcre_dfa_exec.c that must be updated. */
|
||||
|
||||
|
||||
/* This macro defines textual names for all the opcodes. These are used only
|
||||
@ -2152,7 +2201,7 @@ some cases doesn't actually use these names at all). */
|
||||
"\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte", \
|
||||
"notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \
|
||||
"extuni", "\\Z", "\\z", \
|
||||
"^", "^", "$", "$", "char", "chari", "not", "noti", \
|
||||
"$", "$", "^", "^", "char", "chari", "not", "noti", \
|
||||
"*", "*?", "+", "+?", "?", "??", \
|
||||
"{", "{", "{", \
|
||||
"*+","++", "?+", "{", \
|
||||
@ -2168,7 +2217,8 @@ some cases doesn't actually use these names at all). */
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
||||
"*+","++", "?+", "{", \
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", \
|
||||
"class", "nclass", "xclass", "Ref", "Refi", \
|
||||
"*+","++", "?+", "{", \
|
||||
"class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi", \
|
||||
"Recurse", "Callout", \
|
||||
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
|
||||
"Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \
|
||||
@ -2177,7 +2227,7 @@ some cases doesn't actually use these names at all). */
|
||||
"Cond", \
|
||||
"SBra", "SBraPos", "SCBra", "SCBraPos", \
|
||||
"SCond", \
|
||||
"Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def", \
|
||||
"Cond ref", "Cond dnref", "Cond rec", "Cond dnrec", "Cond def", \
|
||||
"Brazero", "Braminzero", "Braposzero", \
|
||||
"*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP", \
|
||||
"*THEN", "*THEN", "*COMMIT", "*FAIL", \
|
||||
@ -2202,7 +2252,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
||||
3, 3, /* \P, \p */ \
|
||||
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \
|
||||
1, /* \X */ \
|
||||
1, 1, 1, 1, 1, 1, /* \Z, \z, ^, ^M, $, $M */ \
|
||||
1, 1, 1, 1, 1, 1, /* \Z, \z, $, $M ^, ^M */ \
|
||||
2, /* Char - the minimum length */ \
|
||||
2, /* Chari - the minimum length */ \
|
||||
2, /* not */ \
|
||||
@ -2233,11 +2283,14 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
||||
/* Character class & ref repeats */ \
|
||||
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \
|
||||
1+2*IMM2_SIZE, 1+2*IMM2_SIZE, /* CRRANGE, CRMINRANGE */ \
|
||||
1, 1, 1, 1+2*IMM2_SIZE, /* Possessive *+, ++, ?+, CRPOSRANGE */ \
|
||||
1+(32/sizeof(pcre_uchar)), /* CLASS */ \
|
||||
1+(32/sizeof(pcre_uchar)), /* NCLASS */ \
|
||||
0, /* XCLASS - variable length */ \
|
||||
1+IMM2_SIZE, /* REF */ \
|
||||
1+IMM2_SIZE, /* REFI */ \
|
||||
1+2*IMM2_SIZE, /* DNREF */ \
|
||||
1+2*IMM2_SIZE, /* DNREFI */ \
|
||||
1+LINK_SIZE, /* RECURSE */ \
|
||||
2+2*LINK_SIZE, /* CALLOUT */ \
|
||||
1+LINK_SIZE, /* Alt */ \
|
||||
@ -2262,8 +2315,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
||||
1+LINK_SIZE+IMM2_SIZE, /* SCBRA */ \
|
||||
1+LINK_SIZE+IMM2_SIZE, /* SCBRAPOS */ \
|
||||
1+LINK_SIZE, /* SCOND */ \
|
||||
1+IMM2_SIZE, 1+IMM2_SIZE, /* CREF, NCREF */ \
|
||||
1+IMM2_SIZE, 1+IMM2_SIZE, /* RREF, NRREF */ \
|
||||
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* CREF, DNCREF */ \
|
||||
1+IMM2_SIZE, 1+2*IMM2_SIZE, /* RREF, DNRREF */ \
|
||||
1, /* DEF */ \
|
||||
1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \
|
||||
3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \
|
||||
@ -2272,8 +2325,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
||||
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
|
||||
1+IMM2_SIZE, 1 /* CLOSE, SKIPZERO */
|
||||
|
||||
/* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"
|
||||
condition. */
|
||||
/* A magic value for OP_RREF to indicate the "any recursion" condition. */
|
||||
|
||||
#define RREF_ANY 0xffff
|
||||
|
||||
@ -2288,9 +2340,11 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
|
||||
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
|
||||
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
|
||||
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
|
||||
ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERRCOUNT };
|
||||
ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79,
|
||||
ERR80, ERR81, ERR82, ERR83, ERR84, ERRCOUNT };
|
||||
|
||||
/* JIT compiling modes. The function list is indexed by them. */
|
||||
|
||||
enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
|
||||
JIT_NUMBER_OF_COMPILE_MODES };
|
||||
|
||||
@ -2408,6 +2462,15 @@ typedef struct open_capitem {
|
||||
pcre_uint16 flag; /* Set TRUE if recursive back ref */
|
||||
} open_capitem;
|
||||
|
||||
/* Structure for building a list of named groups during the first pass of
|
||||
compiling. */
|
||||
|
||||
typedef struct named_group {
|
||||
const pcre_uchar *name; /* Points to the name in the pattern */
|
||||
int length; /* Length of the name */
|
||||
pcre_uint32 number; /* Group number */
|
||||
} named_group;
|
||||
|
||||
/* Structure for passing "static" information around between the functions
|
||||
doing the compiling, so that they are thread-safe. */
|
||||
|
||||
@ -2420,17 +2483,21 @@ typedef struct compile_data {
|
||||
const pcre_uchar *start_code; /* The start of the compiled code */
|
||||
const pcre_uchar *start_pattern; /* The start of the pattern */
|
||||
const pcre_uchar *end_pattern; /* The end of the pattern */
|
||||
open_capitem *open_caps; /* Chain of open capture items */
|
||||
pcre_uchar *hwm; /* High watermark of workspace */
|
||||
open_capitem *open_caps; /* Chain of open capture items */
|
||||
named_group *named_groups; /* Points to vector in pre-compile */
|
||||
pcre_uchar *name_table; /* The name/number table */
|
||||
int names_found; /* Number of entries so far */
|
||||
int name_entry_size; /* Size of each entry */
|
||||
int named_group_list_size; /* Number of entries in the list */
|
||||
int workspace_size; /* Size of workspace */
|
||||
unsigned int bracount; /* Count of capturing parens as we compile */
|
||||
int final_bracount; /* Saved value after first pass */
|
||||
int max_lookbehind; /* Maximum lookbehind (characters) */
|
||||
int top_backref; /* Maximum back reference */
|
||||
unsigned int backref_map; /* Bitmap of low back refs */
|
||||
unsigned int namedrefcount; /* Number of backreferences by name */
|
||||
int parens_depth; /* Depth of nested parentheses */
|
||||
int assert_depth; /* Depth of nested assertions */
|
||||
pcre_uint32 external_options; /* External (initial) options */
|
||||
pcre_uint32 external_flags; /* External flag bits to be set */
|
||||
@ -2438,6 +2505,7 @@ typedef struct compile_data {
|
||||
BOOL had_accept; /* (*ACCEPT) encountered */
|
||||
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
|
||||
BOOL check_lookbehind; /* Lookbehinds need later checking */
|
||||
BOOL dupnames; /* Duplicate names exist */
|
||||
int nltype; /* Newline type */
|
||||
int nllen; /* Newline string length */
|
||||
pcre_uchar nl[4]; /* Newline string when fixed length */
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user